mirror of
https://gitlab.com/tildes/tildes.git
synced 2026-04-16 06:18:34 +02:00
Hide the inner <details> text from comment excerpts but include the <summary> text.
Closes tildes-community/tildes-cf#4 See merge request tildes-community/tildes-cf!13
This commit is contained in:
@@ -69,7 +69,9 @@ class TopicMetadataGenerator(EventStreamConsumer):
|
|||||||
if not topic.rendered_html:
|
if not topic.rendered_html:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
extracted_text = extract_text_from_html(topic.rendered_html)
|
extracted_text = extract_text_from_html(
|
||||||
|
topic.rendered_html, exclude_details_include_summary=True
|
||||||
|
)
|
||||||
|
|
||||||
# create a short excerpt by truncating the extracted string
|
# create a short excerpt by truncating the extracted string
|
||||||
excerpt = truncate_string(extracted_text, length=200, truncate_at_chars=" ")
|
excerpt = truncate_string(extracted_text, length=200, truncate_at_chars=" ")
|
||||||
|
|||||||
@@ -154,6 +154,16 @@ def test_comment_excerpt_excludes_del(topic, session_user):
|
|||||||
assert comment.excerpt == "I really love it."
|
assert comment.excerpt == "I really love it."
|
||||||
|
|
||||||
|
|
||||||
|
def test_comment_excerpt_excludes_details(topic, session_user):
|
||||||
|
"""Ensure that comment excerpts don't include text from <details> elements.
|
||||||
|
|
||||||
|
But ensure that the inner <summary> text *is* included.
|
||||||
|
"""
|
||||||
|
markdown = "<details>\n<summary>Spoilers!</summary>\n\nHide me!\n</details>"
|
||||||
|
comment = Comment(topic, session_user, markdown)
|
||||||
|
assert comment.excerpt == "Spoilers!"
|
||||||
|
|
||||||
|
|
||||||
def test_comment_tree(db, topic, session_user):
|
def test_comment_tree(db, topic, session_user):
|
||||||
"""Ensure that building and pruning a comment tree works."""
|
"""Ensure that building and pruning a comment tree works."""
|
||||||
all_comments = []
|
all_comments = []
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ from tildes.lib.string import (
|
|||||||
truncate_string,
|
truncate_string,
|
||||||
truncate_string_at_char,
|
truncate_string_at_char,
|
||||||
word_count,
|
word_count,
|
||||||
|
extract_text_from_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -152,3 +153,23 @@ def test_basic_camelcase_to_snakecase():
|
|||||||
def test_camelcase_to_snakecase_with_acronym():
|
def test_camelcase_to_snakecase_with_acronym():
|
||||||
"""Ensure CamelCase->snake_case works as expected with an acronym."""
|
"""Ensure CamelCase->snake_case works as expected with an acronym."""
|
||||||
assert camelcase_to_snakecase("SomeHTTPThing") == "some_http_thing"
|
assert camelcase_to_snakecase("SomeHTTPThing") == "some_http_thing"
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_text_from_html_include_details():
|
||||||
|
"""Ensure extract_text_from_html behavior includes <details> elements by default."""
|
||||||
|
html = "<details><summary>Spoilers!</summary> <p>Don't hide me!</p></details>"
|
||||||
|
assert extract_text_from_html(html) == "Spoilers! Don't hide me!"
|
||||||
|
|
||||||
|
html = "<details><p>Don't hide me!</p></details>"
|
||||||
|
assert extract_text_from_html(html) == "Don't hide me!"
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_text_from_html_exclude_details():
|
||||||
|
"""Ensure extract_text_from_html behavior excludes <details> elements when specified."""
|
||||||
|
html = "<details><summary>Spoilers!</summary> <p>Hide me!</p></details>"
|
||||||
|
text = extract_text_from_html(html, exclude_details_include_summary=True)
|
||||||
|
assert text == "Spoilers!"
|
||||||
|
|
||||||
|
html = "<details><p>Hide me!</p></details>"
|
||||||
|
text = extract_text_from_html(html, exclude_details_include_summary=True)
|
||||||
|
assert text == "Details"
|
||||||
|
|||||||
@@ -226,7 +226,11 @@ def separate_string(original: str, separator: str, segment_size: int) -> str:
|
|||||||
return separated
|
return separated
|
||||||
|
|
||||||
|
|
||||||
def extract_text_from_html(html: str, skip_tags: Optional[list[str]] = None) -> str:
|
def extract_text_from_html(
|
||||||
|
html: str,
|
||||||
|
skip_tags: Optional[list[str]] = None,
|
||||||
|
exclude_details_include_summary: bool = False,
|
||||||
|
) -> str:
|
||||||
"""Extract plain text content from the elements inside an HTML string."""
|
"""Extract plain text content from the elements inside an HTML string."""
|
||||||
|
|
||||||
def extract_text(element: Element, skip_tags: list[str]) -> Iterator[str]:
|
def extract_text(element: Element, skip_tags: list[str]) -> Iterator[str]:
|
||||||
@@ -242,6 +246,14 @@ def extract_text_from_html(html: str, skip_tags: Optional[list[str]] = None) ->
|
|||||||
if element.tag in skip_tags:
|
if element.tag in skip_tags:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if element.tag == "details" and exclude_details_include_summary:
|
||||||
|
for subelement in element:
|
||||||
|
if subelement.tag == "summary":
|
||||||
|
yield from extract_text(subelement, skip_tags)
|
||||||
|
return
|
||||||
|
yield "Details"
|
||||||
|
return
|
||||||
|
|
||||||
if element.text:
|
if element.text:
|
||||||
yield element.text
|
yield element.text
|
||||||
|
|
||||||
|
|||||||
@@ -138,7 +138,9 @@ class Comment(DatabaseModel):
|
|||||||
self.rendered_html = convert_markdown_to_safe_html(new_markdown)
|
self.rendered_html = convert_markdown_to_safe_html(new_markdown)
|
||||||
|
|
||||||
extracted_text = extract_text_from_html(
|
extracted_text = extract_text_from_html(
|
||||||
self.rendered_html, skip_tags=["blockquote", "del"]
|
self.rendered_html,
|
||||||
|
skip_tags=["blockquote", "del"],
|
||||||
|
exclude_details_include_summary=True,
|
||||||
)
|
)
|
||||||
self.excerpt = truncate_string(
|
self.excerpt = truncate_string(
|
||||||
extracted_text, length=200, truncate_at_chars=" "
|
extracted_text, length=200, truncate_at_chars=" "
|
||||||
|
|||||||
Reference in New Issue
Block a user