Backport of text decoration utils from 3.0

This commit is contained in:
Alex Root Junior 2020-01-01 16:39:31 +02:00
parent ce026dfa71
commit 9115a44be6
5 changed files with 242 additions and 109 deletions

View file

@ -3,6 +3,7 @@ import contextlib
import io
import ssl
import typing
import warnings
from contextvars import ContextVar
from typing import Dict, List, Optional, Union
@ -269,6 +270,10 @@ class BaseBot:
if value not in ParseMode.all():
raise ValueError(f"Parse mode must be one of {ParseMode.all()}")
setattr(self, '_parse_mode', value)
if value == 'markdown':
warnings.warn("Parse mode `Markdown` is legacy since Telegram Bot API 4.5, "
"retained for backward compatibility. Use `MarkdownV2` instead.\n"
"https://core.telegram.org/bots/api#markdown-style", stacklevel=3)
@parse_mode.deleter
def parse_mode(self):

View file

@ -2,7 +2,6 @@ from __future__ import annotations
import datetime
import functools
import sys
import typing
from . import base
@ -32,6 +31,7 @@ from .video_note import VideoNote
from .voice import Voice
from ..utils import helper
from ..utils import markdown as md
from ..utils.text_decorations import html_decoration, markdown_decoration
class Message(base.TelegramObject):
@ -200,38 +200,10 @@ class Message(base.TelegramObject):
if text is None:
raise TypeError("This message doesn't have any text.")
quote_fn = md.quote_html if as_html else md.escape_md
entities = self.entities or self.caption_entities
if not entities:
return quote_fn(text)
text_decorator = html_decoration if as_html else markdown_decoration
if not sys.maxunicode == 0xffff:
text = text.encode('utf-16-le')
result = ''
offset = 0
for entity in sorted(entities, key=lambda item: item.offset):
entity_text = entity.parse(text, as_html=as_html)
if sys.maxunicode == 0xffff:
part = text[offset:entity.offset]
result += quote_fn(part) + entity_text
else:
part = text[offset * 2:entity.offset * 2]
result += quote_fn(part.decode('utf-16-le')) + entity_text
offset = entity.offset + entity.length
if sys.maxunicode == 0xffff:
part = text[offset:]
result += quote_fn(part)
else:
part = text[offset * 2:]
result += quote_fn(part.decode('utf-16-le'))
return result
return text_decorator.unparse(text, entities)
@property
def md_text(self) -> str:
@ -1798,4 +1770,5 @@ class ParseMode(helper.Helper):
mode = helper.HelperMode.lowercase
MARKDOWN = helper.Item()
MARKDOWN_V2 = helper.Item()
HTML = helper.Item()

View file

@ -4,6 +4,7 @@ from . import base
from . import fields
from .user import User
from ..utils import helper, markdown
from ..utils.deprecated import deprecated
class MessageEntity(base.TelegramObject):
@ -36,6 +37,7 @@ class MessageEntity(base.TelegramObject):
entity_text = entity_text[self.offset * 2:(self.offset + self.length) * 2]
return entity_text.decode('utf-16-le')
@deprecated("This method doesn't work with nested entities and will be removed in aiogram 3.0")
def parse(self, text, as_html=True):
"""
Get entity value with markup
@ -87,6 +89,8 @@ class MessageEntityType(helper.Helper):
:key: ITALIC
:key: CODE
:key: PRE
:key: UNDERLINE
:key: STRIKETHROUGH
:key: TEXT_LINK
:key: TEXT_MENTION
"""
@ -101,7 +105,9 @@ class MessageEntityType(helper.Helper):
PHONE_NUMBER = helper.Item() # phone_number
BOLD = helper.Item() # bold - bold text
ITALIC = helper.Item() # italic - italic text
CODE = helper.Item() # code - monowidth string
PRE = helper.Item() # pre - monowidth block
CODE = helper.Item() # code - monowidth string
PRE = helper.Item() # pre - monowidth block
UNDERLINE = helper.Item() # underline
STRIKETHROUGH = helper.Item() # strikethrough
TEXT_LINK = helper.Item() # text_link - for clickable text URLs
TEXT_MENTION = helper.Item() # text_mention - for users without usernames

View file

@ -1,59 +1,28 @@
LIST_MD_SYMBOLS = '*_`['
from .text_decorations import html_decoration, markdown_decoration
LIST_MD_SYMBOLS = "*_`["
MD_SYMBOLS = (
(LIST_MD_SYMBOLS[0], LIST_MD_SYMBOLS[0]),
(LIST_MD_SYMBOLS[1], LIST_MD_SYMBOLS[1]),
(LIST_MD_SYMBOLS[2], LIST_MD_SYMBOLS[2]),
(LIST_MD_SYMBOLS[2] * 3 + '\n', '\n' + LIST_MD_SYMBOLS[2] * 3),
('<b>', '</b>'),
('<i>', '</i>'),
('<code>', '</code>'),
('<pre>', '</pre>'),
(LIST_MD_SYMBOLS[2] * 3 + "\n", "\n" + LIST_MD_SYMBOLS[2] * 3),
("<b>", "</b>"),
("<i>", "</i>"),
("<code>", "</code>"),
("<pre>", "</pre>"),
)
HTML_QUOTES_MAP = {
'<': '&lt;',
'>': '&gt;',
'&': '&amp;',
'"': '&quot;'
}
HTML_QUOTES_MAP = {"<": "&lt;", ">": "&gt;", "&": "&amp;", '"': "&quot;"}
_HQS = HTML_QUOTES_MAP.keys() # HQS for HTML QUOTES SYMBOLS
def _join(*content, sep=' '):
def _join(*content, sep=" "):
return sep.join(map(str, content))
def _escape(s, symbols=LIST_MD_SYMBOLS):
for symbol in symbols:
s = s.replace(symbol, '\\' + symbol)
return s
def _md(string, symbols=('', '')):
start, end = symbols
return start + string + end
def quote_html(content):
"""
Quote HTML symbols
All <, >, & and " symbols that are not a part of a tag or
an HTML entity must be replaced with the corresponding HTML entities
(< with &lt; > with &gt; & with &amp and " with &quot).
:param content: str
:return: str
"""
new_content = ''
for symbol in content:
new_content += HTML_QUOTES_MAP[symbol] if symbol in _HQS else symbol
return new_content
def text(*content, sep=' '):
def text(*content, sep=" "):
"""
Join all elements with a separator
@ -64,7 +33,7 @@ def text(*content, sep=' '):
return _join(*content, sep=sep)
def bold(*content, sep=' '):
def bold(*content, sep=" "):
"""
Make bold text (Markdown)
@ -72,10 +41,10 @@ def bold(*content, sep=' '):
:param sep:
:return:
"""
return _md(_join(*content, sep=sep), symbols=MD_SYMBOLS[0])
return markdown_decoration.bold.format(value=html_decoration.quote(_join(*content, sep=sep)))
def hbold(*content, sep=' '):
def hbold(*content, sep=" "):
"""
Make bold text (HTML)
@ -83,10 +52,10 @@ def hbold(*content, sep=' '):
:param sep:
:return:
"""
return _md(quote_html(_join(*content, sep=sep)), symbols=MD_SYMBOLS[4])
return html_decoration.bold.format(value=html_decoration.quote(_join(*content, sep=sep)))
def italic(*content, sep=' '):
def italic(*content, sep=" "):
"""
Make italic text (Markdown)
@ -94,10 +63,10 @@ def italic(*content, sep=' '):
:param sep:
:return:
"""
return _md(_join(*content, sep=sep), symbols=MD_SYMBOLS[1])
return markdown_decoration.italic.format(value=html_decoration.quote(_join(*content, sep=sep)))
def hitalic(*content, sep=' '):
def hitalic(*content, sep=" "):
"""
Make italic text (HTML)
@ -105,10 +74,10 @@ def hitalic(*content, sep=' '):
:param sep:
:return:
"""
return _md(quote_html(_join(*content, sep=sep)), symbols=MD_SYMBOLS[5])
return html_decoration.italic.format(value=html_decoration.quote(_join(*content, sep=sep)))
def code(*content, sep=' '):
def code(*content, sep=" "):
"""
Make mono-width text (Markdown)
@ -116,10 +85,10 @@ def code(*content, sep=' '):
:param sep:
:return:
"""
return _md(_join(*content, sep=sep), symbols=MD_SYMBOLS[2])
return markdown_decoration.code.format(value=html_decoration.quote(_join(*content, sep=sep)))
def hcode(*content, sep=' '):
def hcode(*content, sep=" "):
"""
Make mono-width text (HTML)
@ -127,10 +96,10 @@ def hcode(*content, sep=' '):
:param sep:
:return:
"""
return _md(quote_html(_join(*content, sep=sep)), symbols=MD_SYMBOLS[6])
return html_decoration.code.format(value=html_decoration.quote(_join(*content, sep=sep)))
def pre(*content, sep='\n'):
def pre(*content, sep="\n"):
"""
Make mono-width text block (Markdown)
@ -138,10 +107,10 @@ def pre(*content, sep='\n'):
:param sep:
:return:
"""
return _md(_join(*content, sep=sep), symbols=MD_SYMBOLS[3])
return markdown_decoration.pre.format(value=html_decoration.quote(_join(*content, sep=sep)))
def hpre(*content, sep='\n'):
def hpre(*content, sep="\n"):
"""
Make mono-width text block (HTML)
@ -149,10 +118,60 @@ def hpre(*content, sep='\n'):
:param sep:
:return:
"""
return _md(quote_html(_join(*content, sep=sep)), symbols=MD_SYMBOLS[7])
return html_decoration.pre.format(value=html_decoration.quote(_join(*content, sep=sep)))
def link(title, url):
def underline(*content, sep=" "):
"""
Make underlined text (Markdown)
:param content:
:param sep:
:return:
"""
return markdown_decoration.underline.format(
value=markdown_decoration.quote(_join(*content, sep=sep))
)
def hunderline(*content, sep=" "):
"""
Make underlined text (HTML)
:param content:
:param sep:
:return:
"""
return html_decoration.underline.format(value=html_decoration.quote(_join(*content, sep=sep)))
def strikethrough(*content, sep=" "):
"""
Make strikethrough text (Markdown)
:param content:
:param sep:
:return:
"""
return markdown_decoration.strikethrough.format(
value=markdown_decoration.quote(_join(*content, sep=sep))
)
def hstrikethrough(*content, sep=" "):
"""
Make strikethrough text (HTML)
:param content:
:param sep:
:return:
"""
return html_decoration.strikethrough.format(
value=html_decoration.quote(_join(*content, sep=sep))
)
def link(title: str, url: str) -> str:
"""
Format URL (Markdown)
@ -160,10 +179,10 @@ def link(title, url):
:param url:
:return:
"""
return "[{0}]({1})".format(title, url)
return markdown_decoration.link.format(value=html_decoration.quote(title), link=url)
def hlink(title, url):
def hlink(title: str, url: str) -> str:
"""
Format URL (HTML)
@ -171,23 +190,10 @@ def hlink(title, url):
:param url:
:return:
"""
return '<a href="{0}">{1}</a>'.format(url, quote_html(title))
return html_decoration.link.format(value=html_decoration.quote(title), link=url)
def escape_md(*content, sep=' '):
"""
Escape markdown text
E.g. for usernames
:param content:
:param sep:
:return:
"""
return _escape(_join(*content, sep=sep))
def hide_link(url):
def hide_link(url: str) -> str:
"""
Hide URL (HTML only)
Can be used for adding an image to a text message

View file

@ -0,0 +1,143 @@
from __future__ import annotations
import html
import re
import struct
from dataclasses import dataclass
from typing import TYPE_CHECKING, AnyStr, Callable, Generator, Iterable, List, Optional
if TYPE_CHECKING:
from aiogram.types import MessageEntity
__all__ = (
"TextDecoration",
"html_decoration",
"markdown_decoration",
"add_surrogate",
"remove_surrogate",
)
@dataclass
class TextDecoration:
link: str
bold: str
italic: str
code: str
pre: str
underline: str
strikethrough: str
quote: Callable[[AnyStr], AnyStr]
def apply_entity(self, entity: MessageEntity, text: str) -> str:
"""
Apply single entity to text
:param entity:
:param text:
:return:
"""
if entity.type in (
"bold",
"italic",
"code",
"pre",
"underline",
"strikethrough",
):
return getattr(self, entity.type).format(value=text)
elif entity.type == "text_mention":
return self.link.format(value=text, link=f"tg://user?id={entity.user.id}")
elif entity.type == "text_link":
return self.link.format(value=text, link=entity.url)
elif entity.type == "url":
return text
return self.quote(text)
def unparse(self, text, entities: Optional[List[MessageEntity]] = None) -> str:
"""
Unparse message entities
:param text: raw text
:param entities: Array of MessageEntities
:return:
"""
text = add_surrogate(text)
result = "".join(
self._unparse_entities(
text, sorted(entities, key=lambda item: item.offset) if entities else []
)
)
return remove_surrogate(result)
def _unparse_entities(
self,
text: str,
entities: Iterable[MessageEntity],
offset: Optional[int] = None,
length: Optional[int] = None,
) -> Generator[str, None, None]:
offset = offset or 0
length = length or len(text)
for index, entity in enumerate(entities):
if entity.offset < offset:
continue
if entity.offset > offset:
yield self.quote(text[offset : entity.offset])
start = entity.offset
offset = entity.offset + entity.length
sub_entities = list(
filter(lambda e: e.offset < offset, entities[index + 1 :])
)
yield self.apply_entity(
entity,
"".join(
self._unparse_entities(
text, sub_entities, offset=start, length=offset
)
),
)
if offset < length:
yield self.quote(text[offset:length])
html_decoration = TextDecoration(
link='<a href="{link}">{value}</a>',
bold="<b>{value}</b>",
italic="<i>{value}</i>",
code="<code>{value}</code>",
pre="<pre>{value}</pre>",
underline="<u>{value}</u>",
strikethrough="<s>{value}</s>",
quote=html.escape,
)
MARKDOWN_QUOTE_PATTERN = re.compile(r"([_*\[\]()~`>#+\-|{}.!])")
markdown_decoration = TextDecoration(
link="[{value}]({link})",
bold="*{value}*",
italic="_{value}_\r",
code="`{value}`",
pre="```{value}```",
underline="__{value}__",
strikethrough="~{value}~",
quote=lambda text: re.sub(
pattern=MARKDOWN_QUOTE_PATTERN, repl=r"\\\1", string=text
),
)
def add_surrogate(text: str) -> str:
return "".join(
"".join(chr(d) for d in struct.unpack("<HH", s.encode("utf-16-le")))
if (0x10000 <= ord(s) <= 0x10FFFF)
else s
for s in text
)
def remove_surrogate(text: str) -> str:
return text.encode("utf-16", "surrogatepass").decode("utf-16")