Stop stripping periods from multi-sentence titles

If a topic title has multiple sentences in it, it looks strange to strip
the trailing period off it, so we only want to do that automatically
when it's a single sentence.
This commit is contained in:
Deimos
2020-10-07 17:16:25 -06:00
parent ee78cd7760
commit e4a187eb56
2 changed files with 16 additions and 3 deletions

View File

@@ -46,12 +46,19 @@ def test_whitespace_trimmed(title_schema):
def test_trailing_periods_trimmed(title_schema):
"""Ensure trailing periods on a title are removed."""
"""Ensure trailing periods on a single-sentence title are removed."""
title = "This is an interesting story."
result = title_schema.load({"title": title})
assert not result["title"].endswith(".")
def test_multisentence_trailing_period_kept(title_schema):
"""Ensure a trailing period is kept if the title has multiple sentences."""
title = "I came. I saw. I conquered."
result = title_schema.load({"title": title})
assert result["title"].endswith(".")
def test_consecutive_whitespace_removed(title_schema):
"""Ensure consecutive whitespace in a title is compressed."""
title = "sure are \n a lot of spaces"

View File

@@ -45,8 +45,14 @@ class TopicSchema(Schema):
new_data = data.copy()
# strip any trailing periods
new_data["title"] = new_data["title"].rstrip(".")
split_title = re.split("[.?!]+", new_data["title"])
# the last string in the list will be empty if it ended with punctuation
num_sentences = len([piece for piece in split_title if piece])
# strip trailing periods off single-sentence titles
if num_sentences == 1:
new_data["title"] = new_data["title"].rstrip(".")
return new_data