From a64359a6de86a5dccdadf311d9c6f8adc7e463d2 Mon Sep 17 00:00:00 2001 From: Alex Root Junior Date: Sat, 2 May 2020 01:56:01 +0300 Subject: [PATCH] Update text decorations. Fix 'mention' generator and emoji offsets. --- aiogram/utils/markdown.py | 49 ++---- aiogram/utils/text_decorations.py | 180 ++++++++++++++-------- poetry.lock | 14 +- tests/test_utils/test_text_decorations.py | 31 +++- 4 files changed, 165 insertions(+), 109 deletions(-) diff --git a/aiogram/utils/markdown.py b/aiogram/utils/markdown.py index 7b217b4f..5daae546 100644 --- a/aiogram/utils/markdown.py +++ b/aiogram/utils/markdown.py @@ -1,22 +1,5 @@ from .text_decorations import html_decoration, markdown_decoration -LIST_MD_SYMBOLS = "*_`[" - -MD_SYMBOLS = ( - (LIST_MD_SYMBOLS[0], LIST_MD_SYMBOLS[0]), - (LIST_MD_SYMBOLS[1], LIST_MD_SYMBOLS[1]), - (LIST_MD_SYMBOLS[2], LIST_MD_SYMBOLS[2]), - (LIST_MD_SYMBOLS[2] * 3 + "\n", "\n" + LIST_MD_SYMBOLS[2] * 3), - ("", ""), - ("", ""), - ("", ""), - ("
", "
"), -) - -HTML_QUOTES_MAP = {"<": "<", ">": ">", "&": "&", '"': """} - -_HQS = HTML_QUOTES_MAP.keys() # HQS for HTML QUOTES SYMBOLS - def _join(*content, sep=" "): return sep.join(map(str, content)) @@ -41,7 +24,7 @@ def bold(*content, sep=" "): :param sep: :return: """ - return markdown_decoration.bold.format(value=html_decoration.quote(_join(*content, sep=sep))) + return markdown_decoration.bold(value=html_decoration.quote(_join(*content, sep=sep))) def hbold(*content, sep=" "): @@ -52,7 +35,7 @@ def hbold(*content, sep=" "): :param sep: :return: """ - return html_decoration.bold.format(value=html_decoration.quote(_join(*content, sep=sep))) + return html_decoration.bold(value=html_decoration.quote(_join(*content, sep=sep))) def italic(*content, sep=" "): @@ -63,7 +46,7 @@ def italic(*content, sep=" "): :param sep: :return: """ - return markdown_decoration.italic.format(value=html_decoration.quote(_join(*content, sep=sep))) + return markdown_decoration.italic(value=html_decoration.quote(_join(*content, sep=sep))) def hitalic(*content, sep=" "): @@ -74,7 +57,7 @@ def hitalic(*content, sep=" "): :param sep: :return: """ - return html_decoration.italic.format(value=html_decoration.quote(_join(*content, sep=sep))) + return html_decoration.italic(value=html_decoration.quote(_join(*content, sep=sep))) def code(*content, sep=" "): @@ -85,7 +68,7 @@ def code(*content, sep=" "): :param sep: :return: """ - return markdown_decoration.code.format(value=html_decoration.quote(_join(*content, sep=sep))) + return markdown_decoration.code(value=html_decoration.quote(_join(*content, sep=sep))) def hcode(*content, sep=" "): @@ -96,7 +79,7 @@ def hcode(*content, sep=" "): :param sep: :return: """ - return html_decoration.code.format(value=html_decoration.quote(_join(*content, sep=sep))) + return html_decoration.code(value=html_decoration.quote(_join(*content, sep=sep))) def pre(*content, sep="\n"): @@ -107,7 +90,7 @@ def pre(*content, sep="\n"): :param sep: :return: """ - return markdown_decoration.pre.format(value=html_decoration.quote(_join(*content, sep=sep))) + return markdown_decoration.pre(value=html_decoration.quote(_join(*content, sep=sep))) def hpre(*content, sep="\n"): @@ -118,7 +101,7 @@ def hpre(*content, sep="\n"): :param sep: :return: """ - return html_decoration.pre.format(value=html_decoration.quote(_join(*content, sep=sep))) + return html_decoration.pre(value=html_decoration.quote(_join(*content, sep=sep))) def underline(*content, sep=" "): @@ -129,9 +112,7 @@ def underline(*content, sep=" "): :param sep: :return: """ - return markdown_decoration.underline.format( - value=markdown_decoration.quote(_join(*content, sep=sep)) - ) + return markdown_decoration.underline(value=markdown_decoration.quote(_join(*content, sep=sep))) def hunderline(*content, sep=" "): @@ -142,7 +123,7 @@ def hunderline(*content, sep=" "): :param sep: :return: """ - return html_decoration.underline.format(value=html_decoration.quote(_join(*content, sep=sep))) + return html_decoration.underline(value=html_decoration.quote(_join(*content, sep=sep))) def strikethrough(*content, sep=" "): @@ -153,7 +134,7 @@ def strikethrough(*content, sep=" "): :param sep: :return: """ - return markdown_decoration.strikethrough.format( + return markdown_decoration.strikethrough( value=markdown_decoration.quote(_join(*content, sep=sep)) ) @@ -166,9 +147,7 @@ def hstrikethrough(*content, sep=" "): :param sep: :return: """ - return html_decoration.strikethrough.format( - value=html_decoration.quote(_join(*content, sep=sep)) - ) + return html_decoration.strikethrough(value=html_decoration.quote(_join(*content, sep=sep))) def link(title: str, url: str) -> str: @@ -179,7 +158,7 @@ def link(title: str, url: str) -> str: :param url: :return: """ - return markdown_decoration.link.format(value=html_decoration.quote(title), link=url) + return markdown_decoration.link(value=html_decoration.quote(title), link=url) def hlink(title: str, url: str) -> str: @@ -190,7 +169,7 @@ def hlink(title: str, url: str) -> str: :param url: :return: """ - return html_decoration.link.format(value=html_decoration.quote(title), link=url) + return html_decoration.link(value=html_decoration.quote(title), link=url) def hide_link(url: str) -> str: diff --git a/aiogram/utils/text_decorations.py b/aiogram/utils/text_decorations.py index 125547e8..4e01bc8b 100644 --- a/aiogram/utils/text_decorations.py +++ b/aiogram/utils/text_decorations.py @@ -1,32 +1,23 @@ +from __future__ import annotations + import html import re -import struct -from dataclasses import dataclass -from typing import AnyStr, Callable, Generator, Iterable, List, Optional +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Generator, List, Optional, Pattern, cast -from aiogram.api.types import MessageEntity +if TYPE_CHECKING: # pragma: no cover + from aiogram.api.types import MessageEntity __all__ = ( "TextDecoration", + "HtmlDecoration", + "MarkdownDecoration", "html_decoration", "markdown_decoration", - "add_surrogate", - "remove_surrogate", ) -@dataclass -class TextDecoration: - link: str - bold: str - italic: str - code: str - pre: str - pre_language: str - underline: str - strikethrough: str - quote: Callable[[AnyStr], AnyStr] - +class TextDecoration(ABC): def apply_entity(self, entity: MessageEntity, text: str) -> str: """ Apply single entity to text @@ -36,20 +27,27 @@ class TextDecoration: :return: """ if entity.type in ("bold", "italic", "code", "underline", "strikethrough"): - return getattr(self, entity.type).format(value=text) + return cast(str, getattr(self, entity.type)(value=text)) if entity.type == "pre": - return (self.pre_language if entity.language else self.pre).format( - value=text, language=entity.language + return ( + self.pre_language(value=text, language=entity.language) + if entity.language + else self.pre(value=text) ) elif entity.type == "text_mention": - return self.link.format(value=text, link=f"tg://user?id={entity.user.id}") + from aiogram.api.types import User + + user = cast(User, entity.user) + return self.link(value=text, link=f"tg://user?id={user.id}") + elif entity.type == "mention": + return text elif entity.type == "text_link": - return self.link.format(value=text, link=entity.url) + return self.link(value=text, link=cast(str, entity.url)) elif entity.type == "url": return text return self.quote(text) - def unparse(self, text, entities: Optional[List[MessageEntity]] = None) -> str: + def unparse(self, text: str, entities: Optional[List[MessageEntity]] = None) -> str: """ Unparse message entities @@ -57,22 +55,22 @@ class TextDecoration: :param entities: Array of MessageEntities :return: """ - text = add_surrogate(text) result = "".join( self._unparse_entities( text, sorted(entities, key=lambda item: item.offset) if entities else [] ) ) - return remove_surrogate(result) + return result def _unparse_entities( self, text: str, - entities: Iterable[MessageEntity], + entities: List[MessageEntity], offset: Optional[int] = None, length: Optional[int] = None, ) -> Generator[str, None, None]: - offset = offset or 0 + if offset is None: + offset = 0 length = length or len(text) for index, entity in enumerate(entities): @@ -83,7 +81,7 @@ class TextDecoration: start = entity.offset offset = entity.offset + entity.length - sub_entities = list(filter(lambda e: e.offset < offset, entities[index + 1 :])) + sub_entities = list(filter(lambda e: e.offset < (offset or 0), entities[index + 1 :])) yield self.apply_entity( entity, "".join(self._unparse_entities(text, sub_entities, offset=start, length=offset)), @@ -92,42 +90,102 @@ class TextDecoration: if offset < length: yield self.quote(text[offset:length]) + @abstractmethod + def link(self, value: str, link: str) -> str: # pragma: no cover + pass -html_decoration = TextDecoration( - link='{value}', - bold="{value}", - italic="{value}", - code="{value}", - pre="
{value}
", - pre_language='
{value}
', - underline="{value}", - strikethrough="{value}", - quote=html.escape, -) + @abstractmethod + def bold(self, value: str) -> str: # pragma: no cover + pass -MARKDOWN_QUOTE_PATTERN = re.compile(r"([_*\[\]()~`>#+\-|{}.!])") + @abstractmethod + def italic(self, value: str) -> str: # pragma: no cover + pass -markdown_decoration = TextDecoration( - link="[{value}]({link})", - bold="*{value}*", - italic="_{value}_\r", - code="`{value}`", - pre="```{value}```", - pre_language="```{language}\n{value}\n```", - underline="__{value}__", - strikethrough="~{value}~", - quote=lambda text: re.sub(pattern=MARKDOWN_QUOTE_PATTERN, repl=r"\\\1", string=text), -) + @abstractmethod + def code(self, value: str) -> str: # pragma: no cover + pass + + @abstractmethod + def pre(self, value: str) -> str: # pragma: no cover + pass + + @abstractmethod + def pre_language(self, value: str, language: str) -> str: # pragma: no cover + pass + + @abstractmethod + def underline(self, value: str) -> str: # pragma: no cover + pass + + @abstractmethod + def strikethrough(self, value: str) -> str: # pragma: no cover + pass + + @abstractmethod + def quote(self, value: str) -> str: # pragma: no cover + pass -def add_surrogate(text: str) -> str: - return "".join( - "".join(chr(d) for d in struct.unpack(" str: + return f'{value}' + + def bold(self, value: str) -> str: + return f"{value}" + + def italic(self, value: str) -> str: + return f"{value}" + + def code(self, value: str) -> str: + return f"{value}" + + def pre(self, value: str) -> str: + return f"
{value}
" + + def pre_language(self, value: str, language: str) -> str: + return f'
{value}
' + + def underline(self, value: str) -> str: + return f"{value}" + + def strikethrough(self, value: str) -> str: + return f"{value}" + + def quote(self, value: str) -> str: + return html.escape(value) -def remove_surrogate(text: str) -> str: - return text.encode("utf-16", "surrogatepass").decode("utf-16") +class MarkdownDecoration(TextDecoration): + MARKDOWN_QUOTE_PATTERN: Pattern[str] = re.compile(r"([_*\[\]()~`>#+\-|{}.!])") + + def link(self, value: str, link: str) -> str: + return f"[{value}]({link})" + + def bold(self, value: str) -> str: + return f"*{value}*" + + def italic(self, value: str) -> str: + return f"_{value}_\r" + + def code(self, value: str) -> str: + return f"`{value}`" + + def pre(self, value: str) -> str: + return f"```{value}```" + + def pre_language(self, value: str, language: str) -> str: + return f"```{language}\n{value}\n```" + + def underline(self, value: str) -> str: + return f"__{value}__" + + def strikethrough(self, value: str) -> str: + return f"~{value}~" + + def quote(self, value: str) -> str: + return re.sub(pattern=self.MARKDOWN_QUOTE_PATTERN, repl=r"\\\1", string=value) + + +html_decoration = HtmlDecoration() +markdown_decoration = MarkdownDecoration() diff --git a/poetry.lock b/poetry.lock index ac94265a..ef800fbf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -85,7 +85,7 @@ marker = "python_version >= \"3.5\" and sys_platform == \"win32\" or sys_platfor name = "atomicwrites" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -version = "1.3.0" +version = "1.4.0" [[package]] category = "main" @@ -257,7 +257,7 @@ description = "IPython: Productive Interactive Computing" name = "ipython" optional = false python-versions = ">=3.6" -version = "7.13.0" +version = "7.14.0" [package.dependencies] appnope = "*" @@ -273,7 +273,7 @@ setuptools = ">=18.5" traitlets = ">=4.2" [package.extras] -all = ["numpy (>=1.14)", "testpath", "notebook", "nose (>=0.10.1)", "nbconvert", "requests", "ipywidgets", "qtconsole", "ipyparallel", "Sphinx (>=1.3)", "pygments", "nbformat", "ipykernel"] +all = ["nose (>=0.10.1)", "Sphinx (>=1.3)", "testpath", "nbformat", "ipywidgets", "qtconsole", "numpy (>=1.14)", "notebook", "ipyparallel", "ipykernel", "pygments", "requests", "nbconvert"] doc = ["Sphinx (>=1.3)"] kernel = ["ipykernel"] nbconvert = ["nbconvert"] @@ -979,8 +979,8 @@ asynctest = [ {file = "asynctest-0.13.0.tar.gz", hash = "sha256:c27862842d15d83e6a34eb0b2866c323880eb3a75e4485b079ea11748fd77fac"}, ] atomicwrites = [ - {file = "atomicwrites-1.3.0-py2.py3-none-any.whl", hash = "sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4"}, - {file = "atomicwrites-1.3.0.tar.gz", hash = "sha256:75a9445bac02d8d058d5e1fe689654ba5a6556a1dfd8ce6ec55a0ed79866cfa6"}, + {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"}, + {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"}, ] attrs = [ {file = "attrs-19.3.0-py2.py3-none-any.whl", hash = "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c"}, @@ -1071,8 +1071,8 @@ importlib-metadata = [ {file = "importlib_metadata-1.6.0.tar.gz", hash = "sha256:34513a8a0c4962bc66d35b359558fd8a5e10cd472d37aec5f66858addef32c1e"}, ] ipython = [ - {file = "ipython-7.13.0-py3-none-any.whl", hash = "sha256:eb8d075de37f678424527b5ef6ea23f7b80240ca031c2dd6de5879d687a65333"}, - {file = "ipython-7.13.0.tar.gz", hash = "sha256:ca478e52ae1f88da0102360e57e528b92f3ae4316aabac80a2cd7f7ab2efb48a"}, + {file = "ipython-7.14.0-py3-none-any.whl", hash = "sha256:5b241b84bbf0eb085d43ae9d46adf38a13b45929ca7774a740990c2c242534bb"}, + {file = "ipython-7.14.0.tar.gz", hash = "sha256:f0126781d0f959da852fb3089e170ed807388e986a8dd4e6ac44855845b0fb1c"}, ] ipython-genutils = [ {file = "ipython_genutils-0.2.0-py2.py3-none-any.whl", hash = "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8"}, diff --git a/tests/test_utils/test_text_decorations.py b/tests/test_utils/test_text_decorations.py index ad822c8f..5501c08f 100644 --- a/tests/test_utils/test_text_decorations.py +++ b/tests/test_utils/test_text_decorations.py @@ -180,12 +180,31 @@ class TestTextDecoration: ], "strikeboldunder", ], - # [ - # html, - # "test te๐Ÿ‘๐Ÿฟst test", - # [MessageEntity(type="bold", offset=5, length=6, url=None, user=None),], - # "test te๐Ÿ‘๐Ÿฟst test", - # ], + [ + html_decoration, + "@username", + [ + MessageEntity( + type="mention", offset=0, length=9, url=None, user=None, language=None + ), + MessageEntity( + type="bold", offset=0, length=9, url=None, user=None, language=None + ), + ], + "@username", + ], + [ + html_decoration, + "test te๐Ÿ‘๐Ÿฟst test", + [MessageEntity(type="bold", offset=5, length=6, url=None, user=None)], + "test te๐Ÿ‘๐Ÿฟst test", + ], + [ + html_decoration, + "๐Ÿ‘‹๐Ÿพ Hi!", + [MessageEntity(type="bold", offset=0, length=8, url=None, user=None)], + "๐Ÿ‘‹๐Ÿพ Hi!", + ], ], ) def test_unparse(