diff --git a/aiogram/utils/text_decorations.py b/aiogram/utils/text_decorations.py new file mode 100644 index 00000000..26b40064 --- /dev/null +++ b/aiogram/utils/text_decorations.py @@ -0,0 +1,127 @@ +import html +import re +from dataclasses import dataclass +from struct import unpack +from typing import AnyStr, Callable, Generator, Iterable, List, Optional + +from aiogram.api.types import MessageEntity + +__all__ = ("TextDecoration", "html", "markdown", "add_surrogates", "remove_surrogates") + + +@dataclass +class TextDecoration: + link: str + bold: str + italic: str + code: str + pre: str + underline: str + strikethrough: str + quote: Callable[[AnyStr], AnyStr] + + def apply_entity(self, entity: MessageEntity, text: str) -> str: + """ + Apply single entity to text + + :param entity: + :param text: + :return: + """ + if entity.type in ("bold", "italic", "code", "pre", "underline", "strikethrough"): + return getattr(self, entity.type).format(value=text) + elif entity.type == "text_mention": + return self.link.format(value=text, link=f"tg://user?id={entity.user.id}") + elif entity.type == "text_link": + return self.link.format(value=text, link=entity.url) + elif entity.type == "url": + return text + return self.quote(text) + + def unparse(self, text, entities: Optional[List[MessageEntity]] = None) -> str: + """ + Unparse message entities + + :param text: raw text + :param entities: Array of MessageEntities + :return: + """ + text = add_surrogates(text) + result = "".join(self._unparse_entities(text, entities)) + return remove_surrogates(result) + + def _unparse_entities( + self, + text: str, + entities: Iterable[MessageEntity], + offset: Optional[int] = None, + length: Optional[int] = None, + ) -> Generator[str, None, None]: + offset = offset or 0 + length = length or len(text) + + for index, entity in enumerate(entities or []): + if entity.offset < offset: + continue + if entity.offset > offset: + yield self.quote(text[offset : entity.offset]) + start = entity.offset + end = entity.offset + entity.length + + sub_entities = list( + filter(lambda e: entity.offset <= e.offset < end, entities[index + 1 :]) + ) + yield self.apply_entity( + entity, + "".join(self._unparse_entities(text, sub_entities, offset=start, length=end)), + ) + offset = entity.offset + entity.length + + if offset < length: + yield self.quote(text[offset:length]) + + +html = TextDecoration( + link='{value}', + bold="{value}", + italic="{value}", + code="{value}", + pre="
{value}
", + underline="{value}", + strikethrough="{value}", + quote=html.escape, +) + +markdown = TextDecoration( + link="[{value}]({link})", + bold="*{value}*", + italic="_{value}_", + code="`{value}`", + pre="```{value}```", + underline="--{value}--", # Is not supported + strikethrough="~~{value}~~", # Is not supported + quote=lambda text: re.sub( + pattern=r"([*_`\[])", repl=r"\\\1", string=text + ), # Is not always helpful +) # Markdown is not recommended for usage. Use HTML instead + +# Surrogates util was copied form Pyrogram code it under GPL v3 License. +# Source: https://github.com/pyrogram/pyrogram/blob/c5cc85f0076149fc6f3a6fc1d482affb01eeab21/pyrogram/client/parser/utils.py#L19-L37 + +# SMP = Supplementary Multilingual Plane: https://en.wikipedia.org/wiki/Plane_(Unicode)#Overview +SMP_RE = re.compile(r"[\U00010000-\U0010FFFF]") + + +def add_surrogates(text): + # Replace each SMP code point with a surrogate pair + return SMP_RE.sub( + lambda match: "".join( # Split SMP in two surrogates + chr(i) for i in unpack("test', + ], + [html, MessageEntity(type="bold", offset=0, length=5), "test"], + [html, MessageEntity(type="italic", offset=0, length=5), "test"], + [html, MessageEntity(type="code", offset=0, length=5), "test"], + [html, MessageEntity(type="pre", offset=0, length=5), "
test
"], + [html, MessageEntity(type="underline", offset=0, length=5), "test"], + [html, MessageEntity(type="strikethrough", offset=0, length=5), "test"], + [html, MessageEntity(type="hashtag", offset=0, length=5), "test"], + [html, MessageEntity(type="cashtag", offset=0, length=5), "test"], + [html, MessageEntity(type="bot_command", offset=0, length=5), "test"], + [html, MessageEntity(type="email", offset=0, length=5), "test"], + [html, MessageEntity(type="phone_number", offset=0, length=5), "test"], + [ + html, + MessageEntity( + type="text_mention", + offset=0, + length=5, + user=User(id=42, first_name="Test", is_bot=False), + ), + 'test', + ], + [html, MessageEntity(type="url", offset=0, length=5), "test"], + [ + html, + MessageEntity(type="text_link", offset=0, length=5, url="https://aiogram.dev"), + 'test', + ], + [markdown, MessageEntity(type="bold", offset=0, length=5), "*test*"], + [markdown, MessageEntity(type="italic", offset=0, length=5), "_test_"], + [markdown, MessageEntity(type="code", offset=0, length=5), "`test`"], + [markdown, MessageEntity(type="pre", offset=0, length=5), "```test```"], + [markdown, MessageEntity(type="underline", offset=0, length=5), "--test--"], + [markdown, MessageEntity(type="strikethrough", offset=0, length=5), "~~test~~"], + [markdown, MessageEntity(type="hashtag", offset=0, length=5), "test"], + [markdown, MessageEntity(type="cashtag", offset=0, length=5), "test"], + [markdown, MessageEntity(type="bot_command", offset=0, length=5), "test"], + [markdown, MessageEntity(type="email", offset=0, length=5), "test"], + [markdown, MessageEntity(type="phone_number", offset=0, length=5), "test"], + [ + markdown, + MessageEntity( + type="text_mention", + offset=0, + length=5, + user=User(id=42, first_name="Test", is_bot=False), + ), + "[test](tg://user?id=42)", + ], + ], + ) + def test_apply_single_entity( + self, decorator: TextDecoration, entity: MessageEntity, result: str + ): + assert decorator.apply_entity(entity, "test") == result + + @pytest.mark.parametrize( + "decorator,before,after", + [ + [html, "test", "test"], + [html, "test < test", "test < test"], + [html, "test > test", "test > test"], + [html, "test & test", "test & test"], + [html, "test @ test", "test @ test"], + [markdown, "test", "test"], + [markdown, "[test]", "\\[test]"], + [markdown, "test ` test", "test \\` test"], + [markdown, "test * test", "test \\* test"], + [markdown, "test _ test", "test \\_ test"], + ], + ) + def test_quote(self, decorator: TextDecoration, before: str, after: str): + assert decorator.quote(before) == after + + @pytest.mark.parametrize( + "decorator,text,entities,result", + [ + [html, "test", None, "test"], + [ + html, + "test1 test2 test3 test4 test5 test6 test7", + [ + MessageEntity(type="bold", offset=6, length=29), + MessageEntity(type="underline", offset=12, length=5), + MessageEntity(type="italic", offset=24, length=5), + ], + "test1 test2 test3 test4 test5 test6 test7", + ], + [ + html, + "test1 test2 test3 test4 test5", + [ + MessageEntity(type="bold", offset=6, length=17), + MessageEntity(type="underline", offset=12, length=5), + ], + "test1 test2 test3 test4 test5", + ], + [ + html, + "test1 test2 test3 test4", + [ + MessageEntity(type="bold", offset=6, length=11), + MessageEntity(type="underline", offset=12, length=5), + ], + "test1 test2 test3 test4", + ], + [ + html, + "test1 test2 test3", + [MessageEntity(type="bold", offset=6, length=6),], + "test1 test2 test3", + ], + [ + html, + "test1 test2", + [MessageEntity(type="bold", offset=0, length=5),], + "test1 test2", + ], + # [ + # html, + # "test te👍🏿st test", + # [MessageEntity(type="bold", offset=5, length=6, url=None, user=None),], + # "test te👍🏿st test", + # ], + ], + ) + def test_unparse( + self, + decorator: TextDecoration, + text: str, + entities: Optional[List[MessageEntity]], + result: str, + ): + assert decorator.unparse(text, entities) == result