Spaces:
Paused
Paused
| # Smarty extension for Python-Markdown | |
| # ==================================== | |
| # Adds conversion of ASCII dashes, quotes and ellipses to their HTML | |
| # entity equivalents. | |
| # See https://Python-Markdown.github.io/extensions/smarty | |
| # for documentation. | |
| # Author: 2013, Dmitry Shachnev <[email protected]> | |
| # All changes Copyright 2013-2014 The Python Markdown Project | |
| # License: [BSD](https://opensource.org/licenses/bsd-license.php) | |
| # SmartyPants license: | |
| # Copyright (c) 2003 John Gruber <https://daringfireball.net/> | |
| # All rights reserved. | |
| # Redistribution and use in source and binary forms, with or without | |
| # modification, are permitted provided that the following conditions are | |
| # met: | |
| # * Redistributions of source code must retain the above copyright | |
| # notice, this list of conditions and the following disclaimer. | |
| # * Redistributions in binary form must reproduce the above copyright | |
| # notice, this list of conditions and the following disclaimer in | |
| # the documentation and/or other materials provided with the | |
| # distribution. | |
| # * Neither the name "SmartyPants" nor the names of its contributors | |
| # may be used to endorse or promote products derived from this | |
| # software without specific prior written permission. | |
| # This software is provided by the copyright holders and contributors "as | |
| # is" and any express or implied warranties, including, but not limited | |
| # to, the implied warranties of merchantability and fitness for a | |
| # particular purpose are disclaimed. In no event shall the copyright | |
| # owner or contributors be liable for any direct, indirect, incidental, | |
| # special, exemplary, or consequential damages (including, but not | |
| # limited to, procurement of substitute goods or services; loss of use, | |
| # data, or profits; or business interruption) however caused and on any | |
| # theory of liability, whether in contract, strict liability, or tort | |
| # (including negligence or otherwise) arising in any way out of the use | |
| # of this software, even if advised of the possibility of such damage. | |
| # `smartypants.py` license: | |
| # `smartypants.py` is a derivative work of SmartyPants. | |
| # Copyright (c) 2004, 2007 Chad Miller <http://web.chad.org/> | |
| # Redistribution and use in source and binary forms, with or without | |
| # modification, are permitted provided that the following conditions are | |
| # met: | |
| # * Redistributions of source code must retain the above copyright | |
| # notice, this list of conditions and the following disclaimer. | |
| # * Redistributions in binary form must reproduce the above copyright | |
| # notice, this list of conditions and the following disclaimer in | |
| # the documentation and/or other materials provided with the | |
| # distribution. | |
| # This software is provided by the copyright holders and contributors "as | |
| # is" and any express or implied warranties, including, but not limited | |
| # to, the implied warranties of merchantability and fitness for a | |
| # particular purpose are disclaimed. In no event shall the copyright | |
| # owner or contributors be liable for any direct, indirect, incidental, | |
| # special, exemplary, or consequential damages (including, but not | |
| # limited to, procurement of substitute goods or services; loss of use, | |
| # data, or profits; or business interruption) however caused and on any | |
| # theory of liability, whether in contract, strict liability, or tort | |
| # (including negligence or otherwise) arising in any way out of the use | |
| # of this software, even if advised of the possibility of such damage. | |
| """ | |
| Adds conversion of ASCII dashes, quotes and ellipses to their HTML | |
| entity equivalents. | |
| See the [documentation](https://Python-Markdown.github.io/extensions/smarty) | |
| for details. | |
| """ | |
| from __future__ import annotations | |
| from . import Extension | |
| from ..inlinepatterns import HtmlInlineProcessor, HTML_RE | |
| from ..treeprocessors import InlineProcessor | |
| from ..util import Registry | |
| from typing import TYPE_CHECKING, Sequence | |
| if TYPE_CHECKING: # pragma: no cover | |
| from markdown import Markdown | |
| from .. import inlinepatterns | |
| import re | |
| import xml.etree.ElementTree as etree | |
| # Constants for quote education. | |
| punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" | |
| endOfWordClass = r"[\s.,;:!?)]" | |
| closeClass = r"[^\ \t\r\n\[\{\(\-\u0002\u0003]" | |
| openingQuotesBase = ( | |
| r'(\s' # a whitespace char | |
| r'| ' # or a non-breaking space entity | |
| r'|--' # or dashes | |
| r'|–|—' # or Unicode | |
| r'|&[mn]dash;' # or named dash entities | |
| r'|–|—' # or decimal entities | |
| r')' | |
| ) | |
| substitutions = { | |
| 'mdash': '—', | |
| 'ndash': '–', | |
| 'ellipsis': '…', | |
| 'left-angle-quote': '«', | |
| 'right-angle-quote': '»', | |
| 'left-single-quote': '‘', | |
| 'right-single-quote': '’', | |
| 'left-double-quote': '“', | |
| 'right-double-quote': '”', | |
| } | |
| # Special case if the very first character is a quote | |
| # followed by punctuation at a non-word-break. Close the quotes by brute force: | |
| singleQuoteStartRe = r"^'(?=%s\B)" % punctClass | |
| doubleQuoteStartRe = r'^"(?=%s\B)' % punctClass | |
| # Special case for double sets of quotes, e.g.: | |
| # <p>He said, "'Quoted' words in a larger quote."</p> | |
| doubleQuoteSetsRe = r""""'(?=\w)""" | |
| singleQuoteSetsRe = r"""'"(?=\w)""" | |
| # Special case for decade abbreviations (the '80s): | |
| decadeAbbrRe = r"(?<!\w)'(?=\d{2}s)" | |
| # Get most opening double quotes: | |
| openingDoubleQuotesRegex = r'%s"(?=\w)' % openingQuotesBase | |
| # Double closing quotes: | |
| closingDoubleQuotesRegex = r'"(?=\s)' | |
| closingDoubleQuotesRegex2 = '(?<=%s)"' % closeClass | |
| # Get most opening single quotes: | |
| openingSingleQuotesRegex = r"%s'(?=\w)" % openingQuotesBase | |
| # Single closing quotes: | |
| closingSingleQuotesRegex = r"(?<=%s)'(?!\s|s\b|\d)" % closeClass | |
| closingSingleQuotesRegex2 = r"'(\s|s\b)" | |
| # All remaining quotes should be opening ones | |
| remainingSingleQuotesRegex = r"'" | |
| remainingDoubleQuotesRegex = r'"' | |
| HTML_STRICT_RE = HTML_RE + r'(?!\>)' | |
| class SubstituteTextPattern(HtmlInlineProcessor): | |
| def __init__(self, pattern: str, replace: Sequence[int | str | etree.Element], md: Markdown): | |
| """ Replaces matches with some text. """ | |
| HtmlInlineProcessor.__init__(self, pattern) | |
| self.replace = replace | |
| self.md = md | |
| def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: | |
| result = '' | |
| for part in self.replace: | |
| if isinstance(part, int): | |
| result += m.group(part) | |
| else: | |
| result += self.md.htmlStash.store(part) | |
| return result, m.start(0), m.end(0) | |
| class SmartyExtension(Extension): | |
| """ Add Smarty to Markdown. """ | |
| def __init__(self, **kwargs): | |
| self.config = { | |
| 'smart_quotes': [True, 'Educate quotes'], | |
| 'smart_angled_quotes': [False, 'Educate angled quotes'], | |
| 'smart_dashes': [True, 'Educate dashes'], | |
| 'smart_ellipses': [True, 'Educate ellipses'], | |
| 'substitutions': [{}, 'Overwrite default substitutions'], | |
| } | |
| """ Default configuration options. """ | |
| super().__init__(**kwargs) | |
| self.substitutions: dict[str, str] = dict(substitutions) | |
| self.substitutions.update(self.getConfig('substitutions', default={})) | |
| def _addPatterns( | |
| self, | |
| md: Markdown, | |
| patterns: Sequence[tuple[str, Sequence[int | str | etree.Element]]], | |
| serie: str, | |
| priority: int, | |
| ): | |
| for ind, pattern in enumerate(patterns): | |
| pattern += (md,) | |
| pattern = SubstituteTextPattern(*pattern) | |
| name = 'smarty-%s-%d' % (serie, ind) | |
| self.inlinePatterns.register(pattern, name, priority-ind) | |
| def educateDashes(self, md: Markdown) -> None: | |
| emDashesPattern = SubstituteTextPattern( | |
| r'(?<!-)---(?!-)', (self.substitutions['mdash'],), md | |
| ) | |
| enDashesPattern = SubstituteTextPattern( | |
| r'(?<!-)--(?!-)', (self.substitutions['ndash'],), md | |
| ) | |
| self.inlinePatterns.register(emDashesPattern, 'smarty-em-dashes', 50) | |
| self.inlinePatterns.register(enDashesPattern, 'smarty-en-dashes', 45) | |
| def educateEllipses(self, md: Markdown) -> None: | |
| ellipsesPattern = SubstituteTextPattern( | |
| r'(?<!\.)\.{3}(?!\.)', (self.substitutions['ellipsis'],), md | |
| ) | |
| self.inlinePatterns.register(ellipsesPattern, 'smarty-ellipses', 10) | |
| def educateAngledQuotes(self, md: Markdown) -> None: | |
| leftAngledQuotePattern = SubstituteTextPattern( | |
| r'\<\<', (self.substitutions['left-angle-quote'],), md | |
| ) | |
| rightAngledQuotePattern = SubstituteTextPattern( | |
| r'\>\>', (self.substitutions['right-angle-quote'],), md | |
| ) | |
| self.inlinePatterns.register(leftAngledQuotePattern, 'smarty-left-angle-quotes', 40) | |
| self.inlinePatterns.register(rightAngledQuotePattern, 'smarty-right-angle-quotes', 35) | |
| def educateQuotes(self, md: Markdown) -> None: | |
| lsquo = self.substitutions['left-single-quote'] | |
| rsquo = self.substitutions['right-single-quote'] | |
| ldquo = self.substitutions['left-double-quote'] | |
| rdquo = self.substitutions['right-double-quote'] | |
| patterns = ( | |
| (singleQuoteStartRe, (rsquo,)), | |
| (doubleQuoteStartRe, (rdquo,)), | |
| (doubleQuoteSetsRe, (ldquo + lsquo,)), | |
| (singleQuoteSetsRe, (lsquo + ldquo,)), | |
| (decadeAbbrRe, (rsquo,)), | |
| (openingSingleQuotesRegex, (1, lsquo)), | |
| (closingSingleQuotesRegex, (rsquo,)), | |
| (closingSingleQuotesRegex2, (rsquo, 1)), | |
| (remainingSingleQuotesRegex, (lsquo,)), | |
| (openingDoubleQuotesRegex, (1, ldquo)), | |
| (closingDoubleQuotesRegex, (rdquo,)), | |
| (closingDoubleQuotesRegex2, (rdquo,)), | |
| (remainingDoubleQuotesRegex, (ldquo,)) | |
| ) | |
| self._addPatterns(md, patterns, 'quotes', 30) | |
| def extendMarkdown(self, md): | |
| configs = self.getConfigs() | |
| self.inlinePatterns: Registry[inlinepatterns.InlineProcessor] = Registry() | |
| if configs['smart_ellipses']: | |
| self.educateEllipses(md) | |
| if configs['smart_quotes']: | |
| self.educateQuotes(md) | |
| if configs['smart_angled_quotes']: | |
| self.educateAngledQuotes(md) | |
| # Override `HTML_RE` from `inlinepatterns.py` so that it does not | |
| # process tags with duplicate closing quotes. | |
| md.inlinePatterns.register(HtmlInlineProcessor(HTML_STRICT_RE, md), 'html', 90) | |
| if configs['smart_dashes']: | |
| self.educateDashes(md) | |
| inlineProcessor = InlineProcessor(md) | |
| inlineProcessor.inlinePatterns = self.inlinePatterns | |
| md.treeprocessors.register(inlineProcessor, 'smarty', 6) | |
| md.ESCAPED_CHARS.extend(['"', "'"]) | |
| def makeExtension(**kwargs): # pragma: no cover | |
| return SmartyExtension(**kwargs) | |