Add detection for IP addresses in the domain parser

This commit is contained in:
Alexander Bliskovsky
2020-02-05 15:07:47 -07:00
committed by Deimos
parent dccf3df4db
commit d75a6fc547
2 changed files with 23 additions and 1 deletions

View File

@@ -4,6 +4,7 @@
"""Consumer that generates content_metadata for topics."""
from typing import Any, Dict, Sequence
from ipaddress import ip_address
import publicsuffix
from sqlalchemy import cast, func
@@ -67,10 +68,22 @@ class TopicMetadataGenerator(EventStreamConsumer):
return {"word_count": word_count(extracted_text), "excerpt": excerpt}
def _domain_is_ip_address(self, domain: str) -> bool:
"""Return whether a "domain" is actually an IP address."""
try:
ip_address(domain)
return True
except ValueError:
return False
def _generate_link_metadata(self, topic: Topic) -> Dict[str, Any]:
"""Generate metadata for a link topic (domain)."""
parsed_domain = get_domain_from_url(topic.link)
domain = self.public_suffix_list.get_public_suffix(parsed_domain)
if self._domain_is_ip_address(parsed_domain):
domain = parsed_domain
else:
domain = self.public_suffix_list.get_public_suffix(parsed_domain)
return {"domain": domain}

View File

@@ -84,6 +84,15 @@ def test_link_domain_on_link_topic(link_topic):
assert link_topic.link_domain == "example.com"
def test_link_ip_address_on_link_topic(session_user, session_group):
"""Ensure IP addresses are recognized as domains."""
ip_addr_topic = Topic.create_link_topic(
session_group, session_user, "IP address topic", "http://1.1.1.1"
)
assert ip_addr_topic.link_domain == "1.1.1.1"
def test_edit_markdown_errors_on_link_topic(link_topic):
"""Ensure trying to edit the markdown of a link topic is an error."""
with raises(AttributeError):