mirror of
https://gitlab.com/tildes/tildes.git
synced 2026-04-16 06:18:34 +02:00
Adjust zero-width joiner check to fix IndexError
There was the potential for an IndexError here, with a string that started with a zero-width joiner and had at least one more character afterwards.
This commit is contained in:
@@ -189,19 +189,18 @@ def _sanitize_characters(original: str) -> str:
|
||||
# newlines, which are replaced with normal spaces
|
||||
if char == "\n":
|
||||
final_characters.append(" ")
|
||||
elif char == "\u200D":
|
||||
final_length = len(final_characters)
|
||||
# only check for the ZWJ if it's between two characters
|
||||
if final_length <= index < len(original) - 1:
|
||||
char_before_category = unicodedata.category(
|
||||
final_characters[final_length - 1]
|
||||
)
|
||||
char_after_category = unicodedata.category(original[index + 1])
|
||||
# only keep the ZWJ if it's between two symbol characters
|
||||
if char_before_category.startswith(
|
||||
"S"
|
||||
) and char_after_category.startswith("S"):
|
||||
final_characters.append("\u200D")
|
||||
|
||||
# Keep zero-width joiner only if it's between two symbol characters, so we
|
||||
# don't break certain emoji variants
|
||||
if char == "\u200D":
|
||||
try:
|
||||
before_category = unicodedata.category(final_characters[-1])
|
||||
after_category = unicodedata.category(original[index + 1])
|
||||
except IndexError:
|
||||
continue
|
||||
|
||||
if before_category.startswith("S") and after_category.startswith("S"):
|
||||
final_characters.append(char)
|
||||
else:
|
||||
# any other type of character, just keep it
|
||||
final_characters.append(char)
|
||||
|
||||
Reference in New Issue
Block a user