mirror of
https://gitlab.com/tildes/tildes.git
synced 2026-04-16 06:18:34 +02:00
Hide the inner <details> text from comment excerpts but include the <summary> text.
Closes tildes-community/tildes-cf#4 See merge request tildes-community/tildes-cf!13
This commit is contained in:
@@ -69,7 +69,9 @@ class TopicMetadataGenerator(EventStreamConsumer):
|
||||
if not topic.rendered_html:
|
||||
return {}
|
||||
|
||||
extracted_text = extract_text_from_html(topic.rendered_html)
|
||||
extracted_text = extract_text_from_html(
|
||||
topic.rendered_html, exclude_details_include_summary=True
|
||||
)
|
||||
|
||||
# create a short excerpt by truncating the extracted string
|
||||
excerpt = truncate_string(extracted_text, length=200, truncate_at_chars=" ")
|
||||
|
||||
@@ -154,6 +154,16 @@ def test_comment_excerpt_excludes_del(topic, session_user):
|
||||
assert comment.excerpt == "I really love it."
|
||||
|
||||
|
||||
def test_comment_excerpt_excludes_details(topic, session_user):
|
||||
"""Ensure that comment excerpts don't include text from <details> elements.
|
||||
|
||||
But ensure that the inner <summary> text *is* included.
|
||||
"""
|
||||
markdown = "<details>\n<summary>Spoilers!</summary>\n\nHide me!\n</details>"
|
||||
comment = Comment(topic, session_user, markdown)
|
||||
assert comment.excerpt == "Spoilers!"
|
||||
|
||||
|
||||
def test_comment_tree(db, topic, session_user):
|
||||
"""Ensure that building and pruning a comment tree works."""
|
||||
all_comments = []
|
||||
|
||||
@@ -7,6 +7,7 @@ from tildes.lib.string import (
|
||||
truncate_string,
|
||||
truncate_string_at_char,
|
||||
word_count,
|
||||
extract_text_from_html,
|
||||
)
|
||||
|
||||
|
||||
@@ -152,3 +153,23 @@ def test_basic_camelcase_to_snakecase():
|
||||
def test_camelcase_to_snakecase_with_acronym():
|
||||
"""Ensure CamelCase->snake_case works as expected with an acronym."""
|
||||
assert camelcase_to_snakecase("SomeHTTPThing") == "some_http_thing"
|
||||
|
||||
|
||||
def test_extract_text_from_html_include_details():
|
||||
"""Ensure extract_text_from_html behavior includes <details> elements by default."""
|
||||
html = "<details><summary>Spoilers!</summary> <p>Don't hide me!</p></details>"
|
||||
assert extract_text_from_html(html) == "Spoilers! Don't hide me!"
|
||||
|
||||
html = "<details><p>Don't hide me!</p></details>"
|
||||
assert extract_text_from_html(html) == "Don't hide me!"
|
||||
|
||||
|
||||
def test_extract_text_from_html_exclude_details():
|
||||
"""Ensure extract_text_from_html behavior excludes <details> elements when specified."""
|
||||
html = "<details><summary>Spoilers!</summary> <p>Hide me!</p></details>"
|
||||
text = extract_text_from_html(html, exclude_details_include_summary=True)
|
||||
assert text == "Spoilers!"
|
||||
|
||||
html = "<details><p>Hide me!</p></details>"
|
||||
text = extract_text_from_html(html, exclude_details_include_summary=True)
|
||||
assert text == "Details"
|
||||
|
||||
@@ -226,7 +226,11 @@ def separate_string(original: str, separator: str, segment_size: int) -> str:
|
||||
return separated
|
||||
|
||||
|
||||
def extract_text_from_html(html: str, skip_tags: Optional[list[str]] = None) -> str:
|
||||
def extract_text_from_html(
|
||||
html: str,
|
||||
skip_tags: Optional[list[str]] = None,
|
||||
exclude_details_include_summary: bool = False,
|
||||
) -> str:
|
||||
"""Extract plain text content from the elements inside an HTML string."""
|
||||
|
||||
def extract_text(element: Element, skip_tags: list[str]) -> Iterator[str]:
|
||||
@@ -242,6 +246,14 @@ def extract_text_from_html(html: str, skip_tags: Optional[list[str]] = None) ->
|
||||
if element.tag in skip_tags:
|
||||
return
|
||||
|
||||
if element.tag == "details" and exclude_details_include_summary:
|
||||
for subelement in element:
|
||||
if subelement.tag == "summary":
|
||||
yield from extract_text(subelement, skip_tags)
|
||||
return
|
||||
yield "Details"
|
||||
return
|
||||
|
||||
if element.text:
|
||||
yield element.text
|
||||
|
||||
|
||||
@@ -138,7 +138,9 @@ class Comment(DatabaseModel):
|
||||
self.rendered_html = convert_markdown_to_safe_html(new_markdown)
|
||||
|
||||
extracted_text = extract_text_from_html(
|
||||
self.rendered_html, skip_tags=["blockquote", "del"]
|
||||
self.rendered_html,
|
||||
skip_tags=["blockquote", "del"],
|
||||
exclude_details_include_summary=True,
|
||||
)
|
||||
self.excerpt = truncate_string(
|
||||
extracted_text, length=200, truncate_at_chars=" "
|
||||
|
||||
Reference in New Issue
Block a user