Spaces:
Paused
Paused
| # Attribute List Extension for Python-Markdown | |
| # ============================================ | |
| # Adds attribute list syntax. Inspired by | |
| # [Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s | |
| # feature of the same name. | |
| # See https://Python-Markdown.github.io/extensions/attr_list | |
| # for documentation. | |
| # Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/). | |
| # All changes Copyright 2011-2014 The Python Markdown Project | |
| # License: [BSD](https://opensource.org/licenses/bsd-license.php) | |
| """ | |
| Adds attribute list syntax. Inspired by | |
| [Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s | |
| feature of the same name. | |
| See the [documentation](https://Python-Markdown.github.io/extensions/attr_list) | |
| for details. | |
| """ | |
| from __future__ import annotations | |
| from typing import TYPE_CHECKING | |
| from . import Extension | |
| from ..treeprocessors import Treeprocessor | |
| import re | |
| if TYPE_CHECKING: # pragma: no cover | |
| from xml.etree.ElementTree import Element | |
| def _handle_double_quote(s, t): | |
| k, v = t.split('=', 1) | |
| return k, v.strip('"') | |
| def _handle_single_quote(s, t): | |
| k, v = t.split('=', 1) | |
| return k, v.strip("'") | |
| def _handle_key_value(s, t): | |
| return t.split('=', 1) | |
| def _handle_word(s, t): | |
| if t.startswith('.'): | |
| return '.', t[1:] | |
| if t.startswith('#'): | |
| return 'id', t[1:] | |
| return t, t | |
| _scanner = re.Scanner([ | |
| (r'[^ =}]+=".*?"', _handle_double_quote), | |
| (r"[^ =}]+='.*?'", _handle_single_quote), | |
| (r'[^ =}]+=[^ =}]+', _handle_key_value), | |
| (r'[^ =}]+', _handle_word), | |
| (r' ', None) | |
| ]) | |
| def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], str]: | |
| """ Parse attribute list and return a list of attribute tuples. | |
| Additionally, return any text that remained after a curly brace. In typical cases, its presence | |
| should mean that the input does not match the intended attribute list syntax. | |
| """ | |
| attrs, remainder = _scanner.scan(attrs_string) | |
| # To keep historic behavior, discard all unparsable text prior to '}'. | |
| index = remainder.find('}') | |
| remainder = remainder[index:] if index != -1 else '' | |
| return attrs, remainder | |
| def get_attrs(str: str) -> list[tuple[str, str]]: # pragma: no cover | |
| """ Soft-deprecated. Prefer `get_attrs_and_remainder`. """ | |
| return get_attrs_and_remainder(str)[0] | |
| def isheader(elem: Element) -> bool: | |
| return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] | |
| class AttrListTreeprocessor(Treeprocessor): | |
| BASE_RE = r'\{\:?[ ]*([^\}\n ][^\n]*)[ ]*\}' | |
| HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE)) | |
| BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE)) | |
| INLINE_RE = re.compile(r'^{}'.format(BASE_RE)) | |
| NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff' | |
| r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d' | |
| r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff' | |
| r'\uf900-\ufdcf\ufdf0-\ufffd' | |
| r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') | |
| def run(self, doc: Element) -> None: | |
| for elem in doc.iter(): | |
| if self.md.is_block_level(elem.tag): | |
| # Block level: check for `attrs` on last line of text | |
| RE = self.BLOCK_RE | |
| if isheader(elem) or elem.tag in ['dt', 'td', 'th']: | |
| # header, def-term, or table cell: check for attributes at end of element | |
| RE = self.HEADER_RE | |
| if len(elem) and elem.tag == 'li': | |
| # special case list items. children may include a `ul` or `ol`. | |
| pos = None | |
| # find the `ul` or `ol` position | |
| for i, child in enumerate(elem): | |
| if child.tag in ['ul', 'ol']: | |
| pos = i | |
| break | |
| if pos is None and elem[-1].tail: | |
| # use tail of last child. no `ul` or `ol`. | |
| m = RE.search(elem[-1].tail) | |
| if m: | |
| if not self.assign_attrs(elem, m.group(1), strict=True): | |
| elem[-1].tail = elem[-1].tail[:m.start()] | |
| elif pos is not None and pos > 0 and elem[pos-1].tail: | |
| # use tail of last child before `ul` or `ol` | |
| m = RE.search(elem[pos-1].tail) | |
| if m: | |
| if not self.assign_attrs(elem, m.group(1), strict=True): | |
| elem[pos-1].tail = elem[pos-1].tail[:m.start()] | |
| elif elem.text: | |
| # use text. `ul` is first child. | |
| m = RE.search(elem.text) | |
| if m: | |
| if not self.assign_attrs(elem, m.group(1), strict=True): | |
| elem.text = elem.text[:m.start()] | |
| elif len(elem) and elem[-1].tail: | |
| # has children. Get from tail of last child | |
| m = RE.search(elem[-1].tail) | |
| if m: | |
| if not self.assign_attrs(elem, m.group(1), strict=True): | |
| elem[-1].tail = elem[-1].tail[:m.start()] | |
| if isheader(elem): | |
| # clean up trailing #s | |
| elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() | |
| elif elem.text: | |
| # no children. Get from text. | |
| m = RE.search(elem.text) | |
| if m: | |
| if not self.assign_attrs(elem, m.group(1), strict=True): | |
| elem.text = elem.text[:m.start()] | |
| if isheader(elem): | |
| # clean up trailing #s | |
| elem.text = elem.text.rstrip('#').rstrip() | |
| else: | |
| # inline: check for `attrs` at start of tail | |
| if elem.tail: | |
| m = self.INLINE_RE.match(elem.tail) | |
| if m: | |
| remainder = self.assign_attrs(elem, m.group(1)) | |
| elem.tail = elem.tail[m.end():] + remainder | |
| def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False) -> str: | |
| """ Assign `attrs` to element. | |
| If the `attrs_string` has an extra closing curly brace, the remaining text is returned. | |
| The `strict` argument controls whether to still assign `attrs` if there is a remaining `}`. | |
| """ | |
| attrs, remainder = get_attrs_and_remainder(attrs_string) | |
| if strict and remainder: | |
| return remainder | |
| for k, v in attrs: | |
| if k == '.': | |
| # add to class | |
| cls = elem.get('class') | |
| if cls: | |
| elem.set('class', '{} {}'.format(cls, v)) | |
| else: | |
| elem.set('class', v) | |
| else: | |
| # assign attribute `k` with `v` | |
| elem.set(self.sanitize_name(k), v) | |
| # The text that we initially over-matched will be put back. | |
| return remainder | |
| def sanitize_name(self, name: str) -> str: | |
| """ | |
| Sanitize name as 'an XML Name, minus the `:`.' | |
| See <https://www.w3.org/TR/REC-xml-names/#NT-NCName>. | |
| """ | |
| return self.NAME_RE.sub('_', name) | |
| class AttrListExtension(Extension): | |
| """ Attribute List extension for Python-Markdown """ | |
| def extendMarkdown(self, md): | |
| md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8) | |
| md.registerExtension(self) | |
| def makeExtension(**kwargs): # pragma: no cover | |
| return AttrListExtension(**kwargs) | |