Import Upstream version 2.1.0
This commit is contained in:
commit
6cb7cfa55b
|
@ -0,0 +1,48 @@
|
|||
name: benchmark
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
|
||||
jobs:
|
||||
benchmark-packages:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Set up Python 3.8
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.8
|
||||
|
||||
- name: install pandoc
|
||||
uses: r-lib/actions/setup-pandoc@v1
|
||||
with:
|
||||
pandoc-version: '2.6'
|
||||
|
||||
- name: Install tox
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install tox
|
||||
|
||||
- name: Run package benchmarks
|
||||
run: tox -e py38-bench-packages -- --benchmark-min-rounds 20 --benchmark-json bench-packages.json
|
||||
|
||||
# - name: Upload package data
|
||||
# uses: actions/upload-artifact@v2
|
||||
# with:
|
||||
# name: bench-packages
|
||||
# path: bench-packages.json
|
||||
# if-no-files-found: error
|
||||
|
||||
- name: Store benchmark result
|
||||
uses: aiidateam/github-action-benchmark@v2
|
||||
with:
|
||||
name: Parsing Benchmarks
|
||||
output-file-path: bench-packages.json
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
auto-push: true
|
||||
commit-msg-append: "[ci skip]"
|
||||
one-chart-groups: packages,plugins
|
||||
fail-on-alert: false
|
|
@ -0,0 +1,136 @@
|
|||
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
||||
|
||||
name: continuous-integration
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
tags:
|
||||
- "v[0-9]+.[0-9]+.[0-9]+*"
|
||||
pull_request:
|
||||
schedule:
|
||||
- cron: '0 0 * * 0' # every week
|
||||
|
||||
jobs:
|
||||
|
||||
pre-commit:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python 3.8
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.8'
|
||||
- uses: pre-commit/action@v2.0.0
|
||||
|
||||
tests:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['pypy-3.7', '3.7', '3.8', '3.9', '3.10']
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install .[testing,linkify]
|
||||
- name: Run pytest
|
||||
run: |
|
||||
pytest tests/ --cov=markdown_it --cov-report=xml --cov-report=term-missing
|
||||
- name: Upload to Codecov
|
||||
if: matrix.python-version == '3.7' && github.repository == 'executablebooks/markdown-it-py'
|
||||
uses: codecov/codecov-action@v1
|
||||
with:
|
||||
name: markdown-it-py-pytests-py3.7
|
||||
flags: pytests
|
||||
file: ./coverage.xml
|
||||
fail_ci_if_error: true
|
||||
|
||||
test-plugins:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.8']
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install markdown-it-py
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install .[testing]
|
||||
- name: clone and install mdit-py-plugins
|
||||
run: |
|
||||
git clone https://github.com/executablebooks/mdit-py-plugins.git
|
||||
pip install --no-deps -e mdit-py-plugins
|
||||
- name: Run pytest for unit tests of mdit-py-plugins
|
||||
run: cd mdit-py-plugins; pytest
|
||||
|
||||
benchmark:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Set up Python 3.8
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.8'
|
||||
|
||||
- name: Install tox
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install tox
|
||||
|
||||
- name: Run benchmark
|
||||
run: tox -e py38-bench-core -- --benchmark-json bench-core.json
|
||||
|
||||
- name: Upload data
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: bench-core
|
||||
path: bench-core.json
|
||||
if-no-files-found: error
|
||||
|
||||
publish:
|
||||
|
||||
name: Publish to PyPi
|
||||
needs: [pre-commit, tests]
|
||||
if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags')
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout source
|
||||
uses: actions/checkout@v2
|
||||
- name: Set up Python 3.8
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.8'
|
||||
- name: install flit
|
||||
run: |
|
||||
pip install flit~=3.4
|
||||
- name: Build and publish
|
||||
run: |
|
||||
flit publish
|
||||
env:
|
||||
FLIT_USERNAME: __token__
|
||||
FLIT_PASSWORD: ${{ secrets.PYPI_KEY }}
|
||||
|
||||
allgood:
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
- pre-commit
|
||||
- tests
|
||||
steps:
|
||||
- run: echo "Great success!"
|
|
@ -0,0 +1,143 @@
|
|||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
pip-wheel-metadata/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
benchmark/extra/
|
||||
node_modules/
|
||||
coverage/
|
||||
demo/
|
||||
apidoc/
|
||||
*.log
|
||||
__pycache__/
|
||||
.ropeproject/
|
||||
*.egg-info/
|
||||
.vscode/
|
||||
.DS_Store
|
||||
|
||||
docs/api/
|
|
@ -0,0 +1,46 @@
|
|||
# Install pre-commit hooks via
|
||||
# pre-commit install
|
||||
|
||||
exclude: >
|
||||
(?x)^(
|
||||
\.vscode/settings\.json|
|
||||
test.*\.md|
|
||||
test.*\.txt|
|
||||
test.*\.html|
|
||||
test.*\.xml|
|
||||
.*commonmark\.json|
|
||||
benchmark/.*\.md|
|
||||
.*/spec\.md
|
||||
)$
|
||||
|
||||
repos:
|
||||
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.2.0
|
||||
hooks:
|
||||
- id: check-json
|
||||
- id: check-yaml
|
||||
- id: end-of-file-fixer
|
||||
- id: trailing-whitespace
|
||||
|
||||
- repo: https://github.com/pycqa/isort
|
||||
rev: 5.10.1
|
||||
hooks:
|
||||
- id: isort
|
||||
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 22.3.0
|
||||
hooks:
|
||||
- id: black
|
||||
|
||||
- repo: https://gitlab.com/pycqa/flake8
|
||||
rev: 3.9.2
|
||||
hooks:
|
||||
- id: flake8
|
||||
additional_dependencies: [flake8-bugbear==21.3.1]
|
||||
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
rev: v0.942
|
||||
hooks:
|
||||
- id: mypy
|
||||
additional_dependencies: [attrs]
|
|
@ -0,0 +1,15 @@
|
|||
version: 2
|
||||
|
||||
python:
|
||||
version: 3
|
||||
install:
|
||||
- method: pip
|
||||
path: .
|
||||
extra_requirements:
|
||||
- linkify
|
||||
- plugins
|
||||
- rtd
|
||||
|
||||
sphinx:
|
||||
builder: html
|
||||
fail_on_warning: true
|
|
@ -0,0 +1,266 @@
|
|||
# Change Log
|
||||
|
||||
## 2.1.0 - 2022-04-15
|
||||
|
||||
This release is primarily to replace the `attrs` package dependency,
|
||||
with the built-in Python `dataclasses` package.
|
||||
|
||||
This should not be a breaking change, for most use cases.
|
||||
|
||||
- ⬆️ UPGRADE: Drop support for EOL Python 3.6 (#194)
|
||||
- ♻️ REFACTOR: Move `Rule`/`Delimiter` classes from `attrs` to `dataclass` (#211)
|
||||
- ♻️ REFACTOR: Move `Token` class from `attrs` to `dataclass` (#211)
|
||||
- ‼️ Remove deprecated `NestedTokens` and `nest_tokens`
|
||||
- ✨ NEW: Save ordered list numbering (#192)
|
||||
- 🐛 FIX: Combination of blockquotes, list and newlines causes `IndexError` (#207)
|
||||
|
||||
## 2.0.1 - 2022-24-01
|
||||
|
||||
- 🐛 FIX: Crash when file ends with empty blockquote line.
|
||||
- ✨ NEW: Add `inline_definitions` option.
|
||||
This option allows for `definition` token to be inserted into the token stream, at the point where the definition is located in the source text.
|
||||
It is useful for cases where one wishes to capture a "loseless" syntax tree of the parsed Markdown (in conjunction with the `store_labels` option).
|
||||
|
||||
## 2.0.0 - 2021-12-03
|
||||
|
||||
- ⬆️ Update: Sync with markdown-it v12.1.0 and CommonMark v0.30
|
||||
- ♻️ REFACTOR: Port `mdurl` and `punycode` for URL normalisation (thanks to @hukkin!).
|
||||
This port fixes the outstanding CommonMark compliance tests.
|
||||
- ♻️ REFACTOR: Remove `AttrDict`.
|
||||
This is no longer used is core or mdit-py-plugins, instead standard dictionaries are used.
|
||||
- 👌 IMPROVE: Use `__all__` to signal re-exports
|
||||
|
||||
## 1.1.0 - 2021-05-08
|
||||
|
||||
⬆️ UPGRADE: `attrs` -> v21 (#165)
|
||||
|
||||
This release has no breaking changes
|
||||
(see: <https://github.com/python-attrs/attrs/blob/main/CHANGELOG.rst>)
|
||||
|
||||
## 1.0.0 - 2021-05-02
|
||||
|
||||
[Full commit log](https://github.com/executablebooks/markdown-it-py/compare/v0.6.2...v1.0.0)
|
||||
|
||||
The first stable release of markdown-it-py 🎉
|
||||
|
||||
See the changes in the beta releases below,
|
||||
thanks to all the [contributors](https://github.com/executablebooks/markdown-it-py/graphs/contributors?from=2020-03-22&to=2021-05-02&type=c) in the last year!
|
||||
|
||||
## 1.0.0b3 - 2021-05-01
|
||||
|
||||
- 👌 IMPROVE: Add `RendererProtocol` type, for typing renderers (thanks to [@hukkinj1](https://github.com/hukkinj1))
|
||||
- 🔧 MAINTAIN: `None` is no longer allowed as a valid `src` input for `StateBase` subclasses
|
||||
|
||||
## 1.0.0b2 - 2021-04-25
|
||||
|
||||
‼️ BREAKING: Move `mdit-py-plugins` out of the core install requirements and into a `plugins` extra.
|
||||
|
||||
Synchronised code with the upstream Markdown-It `v12.0.6`:
|
||||
|
||||
- 🐛 FIX: Raise HTML blocks priority to resolve conflict with headings
|
||||
- 🐛 FIX: Newline not rendered in image alt attribute
|
||||
|
||||
## 1.0.0b1 - 2021-03-31
|
||||
|
||||
[Full commit log](https://github.com/executablebooks/markdown-it-py/compare/v0.6.2...9ecda04)
|
||||
|
||||
This is the first beta release of the stable v1.x series.
|
||||
|
||||
There are four notable (and breaking) changes:
|
||||
|
||||
1. The code has been synchronised with the upstream Markdown-It `v12.0.4`.
|
||||
In particular, this update alters the parsing of tables to be consistent with the GFM specification: <https://github.github.com/gfm/#tables-extension->
|
||||
A number of parsing performance and validation improvements are also included.
|
||||
2. `Token.attrs` are now stored as dictionaries, rather than a list of lists.
|
||||
This is a departure from upstream Markdown-It, allowed by Pythons guarantee of ordered dictionaries (see [#142](https://github.com/markdown-it/markdown-it/issues/142)), and is the more natural representation.
|
||||
Note `attrGet`, `attrSet`, `attrPush` and `attrJoin` methods remain identical to those upstream,
|
||||
and `Token.as_dict(as_upstream=True)` will convert the token back to a directly comparable dict.
|
||||
3. The use of `AttrDict` has been replaced:
|
||||
For `env` any Python mutable mapping is now allowed, and so attribute access to keys is not (differing from the Javascript dictionary).
|
||||
For `MarkdownIt.options` it is now set as an `OptionsDict`, which is a dictionary sub-class, with attribute access only for core MarkdownIt configuration keys.
|
||||
4. Introduction of the `SyntaxTreeNode`.
|
||||
This is a more comprehensive replacement for `nest_tokens` and `NestedTokens` (which are now deprecated).
|
||||
It allows for the `Token` stream to be converted to/from a nested tree structure, with opening/closing tokens collapsed into a single `SyntaxTreeNode` and the intermediate tokens set as children.
|
||||
See [Creating a syntax tree](https://markdown-it-py.readthedocs.io/en/latest/using.html#creating-a-syntax-tree) documentation for details.
|
||||
|
||||
### Additional Fixes 🐛
|
||||
|
||||
- Fix exception due to empty lines after blockquote+footnote
|
||||
- Fix linkify link nesting levels
|
||||
- Fix the use of `Ruler.at` for plugins
|
||||
- Avoid fenced token mutations during rendering
|
||||
- Fix CLI version info and correct return of exit codes
|
||||
|
||||
## 0.6.2 - 2021-02-07
|
||||
|
||||
This release brings Markdown-It-Py inline with Markdown-It v11.0.1 (2020-09-14), applying two fixes:
|
||||
|
||||
- Fix blockquote lazy newlines, [[#696](https://github.com/markdown-it/markdown-it/issues/696)].
|
||||
- Fix missed mappings for table rows, [[#705](https://github.com/markdown-it/markdown-it/issues/705)].
|
||||
|
||||
Thanks to [@hukkinj1](https://github.com/hukkinj1)!
|
||||
|
||||
## 0.6.1 - 2021-01-01
|
||||
|
||||
This release provides some improvements to the code base:
|
||||
|
||||
- 🐛 FIX: Do not resolve backslash escapes inside auto-links
|
||||
- 🐛 FIX: Add content to image tokens
|
||||
- 👌 IMPROVE: Add more type annotations, thanks to [@hukkinj1](https://github.com/hukkinj1)
|
||||
|
||||
## 0.6.0 - 2020-12-15
|
||||
|
||||
🗑 DEPRECATE: Move plugins to `mdit_py_plugins`
|
||||
|
||||
Plugins (in `markdown_it.extensions`) have now been moved to [executablebooks/mdit-py-plugins](https://github.com/executablebooks/mdit-py-plugins).
|
||||
This will allow for their maintenance to occur on a different cycle to the core code, facilitating the release of a v1.0.0 for this package
|
||||
|
||||
🔧 MAINTAIN: Add [mypy](https://mypy.readthedocs.io) type-checking, thanks to [@hukkinj1](https://github.com/hukkinj1).
|
||||
|
||||
## 0.5.8 - 2020-12-13
|
||||
|
||||
✨ NEW: Add linkify, thanks to [@tsutsu3](https://github.com/tsutsu3).
|
||||
|
||||
This extension uses [linkify-it-py](https://github.com/tsutsu3/linkify-it-py) to identify URL links within text:
|
||||
|
||||
- `github.com` -> `<a href="http://github.com">github.com</a>`
|
||||
|
||||
**Important:** To use this extension you must install linkify-it-py; `pip install markdown-it-py[linkify]`
|
||||
|
||||
It can then be activated by:
|
||||
|
||||
```python
|
||||
from markdown_it import MarkdownIt
|
||||
md = MarkdownIt().enable("linkify")
|
||||
md.options["linkify"] = True
|
||||
```
|
||||
|
||||
## 0.5.7 - 2020-12-13
|
||||
|
||||
✨ NEW: Add smartquotes, thanks to [@tsutsu3](https://github.com/tsutsu3).
|
||||
|
||||
This extension will convert basic quote marks to their opening and closing variants:
|
||||
|
||||
- 'single quotes' -> ‘single quotes’
|
||||
- "double quotes" -> “double quotes”
|
||||
|
||||
It can be activated by:
|
||||
|
||||
```python
|
||||
from markdown_it import MarkdownIt
|
||||
md = MarkdownIt().enable("smartquotes")
|
||||
md.options["typographer"] = True
|
||||
```
|
||||
|
||||
✨ NEW: Add markdown-it-task-lists plugin, thanks to [@wna-se](https://github.com/wna-se).
|
||||
|
||||
This is a port of the JS [markdown-it-task-lists](https://github.com/revin/markdown-it-task-lists),
|
||||
for building task/todo lists out of markdown lists with items starting with `[ ]` or `[x]`.
|
||||
For example:
|
||||
|
||||
```markdown
|
||||
- [ ] An item that needs doing
|
||||
- [x] An item that is complete
|
||||
```
|
||||
|
||||
This plugin can be activated by:
|
||||
|
||||
```python
|
||||
from markdown_it import MarkdownIt
|
||||
from markdown_it.extensions.tasklists import tasklists_plugin
|
||||
md = MarkdownIt().use(tasklists_plugin)
|
||||
```
|
||||
|
||||
🐛 Various bug fixes, thanks to [@hukkinj1](https://github.com/hukkinj1):
|
||||
|
||||
- Do not copy empty `env` arg in `MarkdownIt.render`
|
||||
- `_Entities.__contains__` fix return data
|
||||
- Parsing of unicode ordinals
|
||||
- Handling of final character in `skipSpacesBack` and `skipCharsBack` methods
|
||||
- Avoid exception when document ends in heading/blockquote marker
|
||||
|
||||
🧪 TESTS: Add CI for Python 3.9 and PyPy3
|
||||
|
||||
## 0.5.6 - 2020-10-21
|
||||
|
||||
- ✨ NEW: Add simple typographic replacements, thanks to [@tsutsu3](https://github.com/tsutsu3):
|
||||
This allows you to add the `typographer` option to the parser, to replace particular text constructs:
|
||||
|
||||
- ``(c)``, ``(C)`` → ©
|
||||
- ``(tm)``, ``(TM)`` → ™
|
||||
- ``(r)``, ``(R)`` → ®
|
||||
- ``(p)``, ``(P)`` → §
|
||||
- ``+-`` → ±
|
||||
- ``...`` → …
|
||||
- ``?....`` → ?..
|
||||
- ``!....`` → !..
|
||||
- ``????????`` → ???
|
||||
- ``!!!!!`` → !!!
|
||||
- ``,,,`` → ,
|
||||
- ``--`` → &ndash
|
||||
- ``---`` → &mdash
|
||||
|
||||
```python
|
||||
md = MarkdownIt().enable("replacements")
|
||||
md.options["typographer"] = True
|
||||
```
|
||||
|
||||
- 📚 DOCS: Improve documentation for CLI, thanks to [@westurner](https://github.com/westurner)
|
||||
- 👌 IMPROVE: Use `re.sub()` instead of `re.subn()[0]`, thanks to [@hukkinj1](https://github.com/hukkinj1)
|
||||
- 🐛 FIX: An exception raised by having multiple blank lines at the end of some files
|
||||
|
||||
## 0.5.5 - 2020-09-27
|
||||
|
||||
👌 IMPROVE: Add `store_labels` option.
|
||||
|
||||
This allows for storage of original reference label in link/image token's metadata,
|
||||
which can be useful for renderers.
|
||||
|
||||
## 0.5.4 - 2020-09-08
|
||||
|
||||
✨ NEW: Add `anchors_plugin` for headers, which can produce:
|
||||
|
||||
```html
|
||||
<h1 id="title-string">Title String <a class="header-anchor" href="#title-string">¶</a></h1>
|
||||
```
|
||||
|
||||
## 0.5.3 - 2020-09-04
|
||||
|
||||
🐛 Fixed an undefined variable in the reference block.
|
||||
|
||||
## 0.5.2 - 2020-08-22
|
||||
|
||||
🐛 Fixed an `IndexError` in `container_plugin`, when there is no newline on the closing tag line.
|
||||
|
||||
## 0.5.1 - 2020-08-21
|
||||
|
||||
⬆️ UPGRADE: attrs -> v20
|
||||
|
||||
This is not breaking, since it only deprecates Python 3.4 (see [CHANGELOG.rst](https://github.com/python-attrs/attrs/blob/master/CHANGELOG.rst))
|
||||
|
||||
## 0.5.0 - 2020-08-18
|
||||
|
||||
### Added ✨
|
||||
|
||||
- `deflist` and `dollarmath` plugins (see [plugins list](https://markdown-it-py.readthedocs.io/en/latest/plugins.html)).
|
||||
|
||||
### Improved 👌
|
||||
|
||||
- Added benchmarking tests and CI (see <https://executablebooks.github.io/markdown-it-py/dev/bench/>)
|
||||
- Improved performance of computing ordinals (=> 10-15% parsing speed increase).
|
||||
Thanks to [@sildar](https://github.com/sildar)!
|
||||
|
||||
### Fixed 🐛
|
||||
|
||||
- Stopped empty lines at the end of the document, after certain list blocks, raising an exception (#36).
|
||||
- Allow myst-role to accept names containing digits (0-9).
|
||||
|
||||
## 0.4.9 - 2020-08-11
|
||||
|
||||
### Added ✨
|
||||
|
||||
- `containers` plugin (see [plugins list](https://markdown-it-py.readthedocs.io/en/latest/plugins.html))
|
||||
|
||||
### Documented 📚
|
||||
|
||||
- Plugins and improved contributing section
|
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2020 ExecutableBookProject
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -0,0 +1,22 @@
|
|||
Copyright (c) 2014 Vitaly Puzrin, Alex Kocharin.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of this software and associated documentation
|
||||
files (the "Software"), to deal in the Software without
|
||||
restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
|
@ -0,0 +1,148 @@
|
|||
# markdown-it-py
|
||||
|
||||
[![Github-CI][github-ci]][github-link]
|
||||
[![Coverage Status][codecov-badge]][codecov-link]
|
||||
[![PyPI][pypi-badge]][pypi-link]
|
||||
[![Conda][conda-badge]][conda-link]
|
||||
[![Code style: black][black-badge]][black-link]
|
||||
[![PyPI - Downloads][install-badge]][install-link]
|
||||
|
||||
> Markdown parser done right.
|
||||
|
||||
- Follows the __[CommonMark spec](http://spec.commonmark.org/)__ for baseline parsing
|
||||
- Configurable syntax: you can add new rules and even replace existing ones.
|
||||
- Pluggable: Adds syntax extensions to extend the parser (see the [plugin list][md-plugins]).
|
||||
- High speed (see our [benchmarking tests][md-performance])
|
||||
- [Safe by default][md-security]
|
||||
|
||||
This is a Python port of [markdown-it], and some of its associated plugins.
|
||||
For more details see: <https://markdown-it-py.readthedocs.io>.
|
||||
|
||||
For details on [markdown-it] itself, see:
|
||||
|
||||
- The __[Live demo](https://markdown-it.github.io)__
|
||||
- [The markdown-it README][markdown-it-readme]
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
conda install -c conda-forge markdown-it-py
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
pip install markdown-it-py[plugins]
|
||||
```
|
||||
|
||||
or with extras
|
||||
|
||||
```bash
|
||||
conda install -c conda-forge markdown-it-py linkify-it-py mdit-py-plugins
|
||||
pip install markdown-it-py[linkify,plugins]
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Python API Usage
|
||||
|
||||
Render markdown to HTML with markdown-it-py and a custom configuration
|
||||
with and without plugins and features:
|
||||
|
||||
```python
|
||||
from markdown_it import MarkdownIt
|
||||
from mdit_py_plugins.front_matter import front_matter_plugin
|
||||
from mdit_py_plugins.footnote import footnote_plugin
|
||||
|
||||
md = (
|
||||
MarkdownIt()
|
||||
.use(front_matter_plugin)
|
||||
.use(footnote_plugin)
|
||||
.disable('image')
|
||||
.enable('table')
|
||||
)
|
||||
text = ("""
|
||||
---
|
||||
a: 1
|
||||
---
|
||||
|
||||
a | b
|
||||
- | -
|
||||
1 | 2
|
||||
|
||||
A footnote [^1]
|
||||
|
||||
[^1]: some details
|
||||
""")
|
||||
tokens = md.parse(text)
|
||||
html_text = md.render(text)
|
||||
|
||||
## To export the html to a file, uncomment the lines below:
|
||||
# from pathlib import Path
|
||||
# Path("output.html").write_text(html_text)
|
||||
```
|
||||
|
||||
### Command-line Usage
|
||||
|
||||
Render markdown to HTML with markdown-it-py from the
|
||||
command-line:
|
||||
|
||||
```console
|
||||
usage: markdown-it [-h] [-v] [filenames [filenames ...]]
|
||||
|
||||
Parse one or more markdown files, convert each to HTML, and print to stdout
|
||||
|
||||
positional arguments:
|
||||
filenames specify an optional list of files to convert
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-v, --version show program's version number and exit
|
||||
|
||||
Interactive:
|
||||
|
||||
$ markdown-it
|
||||
markdown-it-py [version 0.0.0] (interactive)
|
||||
Type Ctrl-D to complete input, or Ctrl-C to exit.
|
||||
>>> # Example
|
||||
... > markdown *input*
|
||||
...
|
||||
<h1>Example</h1>
|
||||
<blockquote>
|
||||
<p>markdown <em>input</em></p>
|
||||
</blockquote>
|
||||
|
||||
Batch:
|
||||
|
||||
$ markdown-it README.md README.footer.md > index.html
|
||||
|
||||
```
|
||||
|
||||
## References / Thanks
|
||||
|
||||
Big thanks to the authors of [markdown-it]:
|
||||
|
||||
- Alex Kocharin [github/rlidwka](https://github.com/rlidwka)
|
||||
- Vitaly Puzrin [github/puzrin](https://github.com/puzrin)
|
||||
|
||||
Also [John MacFarlane](https://github.com/jgm) for his work on the CommonMark spec and reference implementations.
|
||||
|
||||
[github-ci]: https://github.com/executablebooks/markdown-it-py/workflows/Python%20package/badge.svg?branch=master
|
||||
[github-link]: https://github.com/executablebooks/markdown-it-py
|
||||
[pypi-badge]: https://img.shields.io/pypi/v/markdown-it-py.svg
|
||||
[pypi-link]: https://pypi.org/project/markdown-it-py
|
||||
[conda-badge]: https://anaconda.org/conda-forge/markdown-it-py/badges/version.svg
|
||||
[conda-link]: https://anaconda.org/conda-forge/markdown-it-py
|
||||
[codecov-badge]: https://codecov.io/gh/executablebooks/markdown-it-py/branch/master/graph/badge.svg
|
||||
[codecov-link]: https://codecov.io/gh/executablebooks/markdown-it-py
|
||||
[black-badge]: https://img.shields.io/badge/code%20style-black-000000.svg
|
||||
[black-link]: https://github.com/ambv/black
|
||||
[install-badge]: https://img.shields.io/pypi/dw/markdown-it-py?label=pypi%20installs
|
||||
[install-link]: https://pypistats.org/packages/markdown-it-py
|
||||
|
||||
[CommonMark spec]: http://spec.commonmark.org/
|
||||
[markdown-it]: https://github.com/markdown-it/markdown-it
|
||||
[markdown-it-readme]: https://github.com/markdown-it/markdown-it/blob/master/README.md
|
||||
[md-security]: https://markdown-it-py.readthedocs.io/en/latest/other.html
|
||||
[md-performance]: https://markdown-it-py.readthedocs.io/en/latest/other.html
|
||||
[md-plugins]: https://markdown-it-py.readthedocs.io/en/latest/plugins.html
|
|
@ -0,0 +1,20 @@
|
|||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
import markdown_it
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def spec_text():
|
||||
return Path(__file__).parent.joinpath("samples", "spec.md").read_text()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def parser():
|
||||
return markdown_it.MarkdownIt("commonmark")
|
||||
|
||||
|
||||
@pytest.mark.benchmark(group="core")
|
||||
def test_spec(benchmark, parser, spec_text):
|
||||
benchmark(parser.render, spec_text)
|
|
@ -0,0 +1,69 @@
|
|||
from pathlib import Path
|
||||
from shutil import which
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def spec_text():
|
||||
return Path(__file__).parent.joinpath("samples", "spec.md").read_text()
|
||||
|
||||
|
||||
@pytest.mark.benchmark(group="packages")
|
||||
def test_markdown_it_py(benchmark, spec_text):
|
||||
import markdown_it
|
||||
|
||||
parser = markdown_it.MarkdownIt("commonmark")
|
||||
benchmark.extra_info["version"] = markdown_it.__version__
|
||||
benchmark(parser.render, spec_text)
|
||||
|
||||
|
||||
@pytest.mark.benchmark(group="packages")
|
||||
def test_mistune(benchmark, spec_text):
|
||||
import mistune
|
||||
|
||||
benchmark.extra_info["version"] = mistune.__version__
|
||||
benchmark(mistune.markdown, spec_text)
|
||||
|
||||
|
||||
@pytest.mark.benchmark(group="packages")
|
||||
def test_commonmark_py(benchmark, spec_text):
|
||||
import commonmark
|
||||
|
||||
benchmark.extra_info["version"] = "0.9.1"
|
||||
benchmark(commonmark.commonmark, spec_text)
|
||||
|
||||
|
||||
@pytest.mark.benchmark(group="packages")
|
||||
def test_pymarkdown(benchmark, spec_text):
|
||||
import markdown
|
||||
|
||||
benchmark.extra_info["version"] = markdown.__version__
|
||||
benchmark(markdown.markdown, spec_text)
|
||||
|
||||
|
||||
@pytest.mark.benchmark(group="packages")
|
||||
def test_pymarkdown_extra(benchmark, spec_text):
|
||||
import markdown
|
||||
|
||||
benchmark.extra_info["version"] = markdown.__version__
|
||||
benchmark(markdown.markdown, spec_text, extensions=["extra"])
|
||||
|
||||
|
||||
@pytest.mark.benchmark(group="packages")
|
||||
def test_mistletoe(benchmark, spec_text):
|
||||
import mistletoe
|
||||
|
||||
benchmark.extra_info["version"] = mistletoe.__version__
|
||||
benchmark(mistletoe.markdown, spec_text)
|
||||
|
||||
|
||||
@pytest.mark.skipif(which("pandoc") is None, reason="pandoc executable not found")
|
||||
@pytest.mark.benchmark(group="packages")
|
||||
def test_panflute(benchmark, spec_text):
|
||||
import panflute
|
||||
|
||||
benchmark.extra_info["version"] = panflute.__version__
|
||||
benchmark(
|
||||
panflute.convert_text, spec_text, input_format="markdown", output_format="html"
|
||||
)
|
|
@ -0,0 +1,12 @@
|
|||
def pytest_benchmark_update_machine_info(config, machine_info):
|
||||
import psutil
|
||||
|
||||
freq = psutil.cpu_freq()
|
||||
machine_info["psutil"] = {
|
||||
"cpu_count": psutil.cpu_count(logical=False),
|
||||
"cpu_count_logical": psutil.cpu_count(logical=True),
|
||||
"cpu_percent": psutil.cpu_percent(),
|
||||
"cpu_freq_min": freq.min,
|
||||
"cpu_freq_max": freq.max,
|
||||
"cpu_freq_current": freq.current,
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
> the simple example of a blockquote
|
||||
> the simple example of a blockquote
|
||||
> the simple example of a blockquote
|
||||
> the simple example of a blockquote
|
||||
... continuation
|
||||
... continuation
|
||||
... continuation
|
||||
... continuation
|
||||
|
||||
empty blockquote:
|
||||
|
||||
>
|
||||
>
|
||||
>
|
||||
>
|
|
@ -0,0 +1,13 @@
|
|||
>>>>>> deeply nested blockquote
|
||||
>>>>> deeply nested blockquote
|
||||
>>>> deeply nested blockquote
|
||||
>>> deeply nested blockquote
|
||||
>> deeply nested blockquote
|
||||
> deeply nested blockquote
|
||||
|
||||
> deeply nested blockquote
|
||||
>> deeply nested blockquote
|
||||
>>> deeply nested blockquote
|
||||
>>>> deeply nested blockquote
|
||||
>>>>> deeply nested blockquote
|
||||
>>>>>> deeply nested blockquote
|
|
@ -0,0 +1,10 @@
|
|||
|
||||
an
|
||||
example
|
||||
|
||||
of
|
||||
|
||||
|
||||
|
||||
a code
|
||||
block
|
|
@ -0,0 +1,13 @@
|
|||
|
||||
``````````text
|
||||
an
|
||||
example
|
||||
```
|
||||
of
|
||||
|
||||
|
||||
a fenced
|
||||
```
|
||||
code
|
||||
block
|
||||
``````````
|
|
@ -0,0 +1,9 @@
|
|||
# heading
|
||||
### heading
|
||||
##### heading
|
||||
|
||||
# heading #
|
||||
### heading ###
|
||||
##### heading \#\#\#\#\######
|
||||
|
||||
############ not a heading
|
|
@ -0,0 +1,9 @@
|
|||
|
||||
* * * * *
|
||||
|
||||
- - - - -
|
||||
|
||||
________
|
||||
|
||||
|
||||
************************* text
|
|
@ -0,0 +1,31 @@
|
|||
<div class="this is an html block">
|
||||
|
||||
blah blah
|
||||
|
||||
</div>
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td>
|
||||
**test**
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<table>
|
||||
|
||||
<tr>
|
||||
|
||||
<td>
|
||||
|
||||
test
|
||||
|
||||
</td>
|
||||
|
||||
</tr>
|
||||
|
||||
</table>
|
||||
|
||||
<![CDATA[
|
||||
[[[[[[[[[[[... *cdata section - this should not be parsed* ...]]]]]]]]]]]
|
||||
]]>
|
|
@ -0,0 +1,8 @@
|
|||
heading
|
||||
---
|
||||
|
||||
heading
|
||||
===================================
|
||||
|
||||
not a heading
|
||||
----------------------------------- text
|
|
@ -0,0 +1,67 @@
|
|||
- tidy
|
||||
- bullet
|
||||
- list
|
||||
|
||||
|
||||
- loose
|
||||
|
||||
- bullet
|
||||
|
||||
- list
|
||||
|
||||
|
||||
0. ordered
|
||||
1. list
|
||||
2. example
|
||||
|
||||
|
||||
-
|
||||
-
|
||||
-
|
||||
-
|
||||
|
||||
|
||||
1.
|
||||
2.
|
||||
3.
|
||||
|
||||
|
||||
- an example
|
||||
of a list item
|
||||
with a continuation
|
||||
|
||||
this part is inside the list
|
||||
|
||||
this part is just a paragraph
|
||||
|
||||
|
||||
1. test
|
||||
- test
|
||||
1. test
|
||||
- test
|
||||
|
||||
|
||||
111111111111111111111111111111111111111111. is this a valid bullet?
|
||||
|
||||
- _________________________
|
||||
|
||||
- this
|
||||
- is
|
||||
|
||||
a
|
||||
|
||||
long
|
||||
- loose
|
||||
- list
|
||||
|
||||
- with
|
||||
- some
|
||||
|
||||
tidy
|
||||
|
||||
- list
|
||||
- items
|
||||
- in
|
||||
|
||||
- between
|
||||
- _________________________
|
|
@ -0,0 +1,35 @@
|
|||
|
||||
- this
|
||||
- is
|
||||
- a
|
||||
- deeply
|
||||
- nested
|
||||
- bullet
|
||||
- list
|
||||
|
||||
|
||||
1. this
|
||||
2. is
|
||||
3. a
|
||||
4. deeply
|
||||
5. nested
|
||||
6. unordered
|
||||
7. list
|
||||
|
||||
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
- 5
|
||||
- 6
|
||||
- 7
|
||||
- 6
|
||||
- 5
|
||||
- 4
|
||||
- 3
|
||||
- 2
|
||||
- 1
|
||||
|
||||
|
||||
- - - - - - - - - deeply-nested one-element item
|
|
@ -0,0 +1,15 @@
|
|||
[1] [2] [3] [1] [2] [3]
|
||||
|
||||
[looooooooooooooooooooooooooooooooooooooooooooooooooong label]
|
||||
|
||||
[1]: <http://something.example.com/foo/bar>
|
||||
[2]: http://something.example.com/foo/bar 'test'
|
||||
[3]:
|
||||
http://foo/bar
|
||||
[ looooooooooooooooooooooooooooooooooooooooooooooooooong label ]:
|
||||
111
|
||||
'test'
|
||||
[[[[[[[[[[[[[[[[[[[[ this should not slow down anything ]]]]]]]]]]]]]]]]]]]]: q
|
||||
(as long as it is not referenced anywhere)
|
||||
|
||||
[[[[[[[[[[[[[[[[[[[[]: this is not a valid reference
|
|
@ -0,0 +1,50 @@
|
|||
[item 1]: <1>
|
||||
[item 2]: <2>
|
||||
[item 3]: <3>
|
||||
[item 4]: <4>
|
||||
[item 5]: <5>
|
||||
[item 6]: <6>
|
||||
[item 7]: <7>
|
||||
[item 8]: <8>
|
||||
[item 9]: <9>
|
||||
[item 10]: <10>
|
||||
[item 11]: <11>
|
||||
[item 12]: <12>
|
||||
[item 13]: <13>
|
||||
[item 14]: <14>
|
||||
[item 15]: <15>
|
||||
[item 16]: <16>
|
||||
[item 17]: <17>
|
||||
[item 18]: <18>
|
||||
[item 19]: <19>
|
||||
[item 20]: <20>
|
||||
[item 21]: <21>
|
||||
[item 22]: <22>
|
||||
[item 23]: <23>
|
||||
[item 24]: <24>
|
||||
[item 25]: <25>
|
||||
[item 26]: <26>
|
||||
[item 27]: <27>
|
||||
[item 28]: <28>
|
||||
[item 29]: <29>
|
||||
[item 30]: <30>
|
||||
[item 31]: <31>
|
||||
[item 32]: <32>
|
||||
[item 33]: <33>
|
||||
[item 34]: <34>
|
||||
[item 35]: <35>
|
||||
[item 36]: <36>
|
||||
[item 37]: <37>
|
||||
[item 38]: <38>
|
||||
[item 39]: <39>
|
||||
[item 40]: <40>
|
||||
[item 41]: <41>
|
||||
[item 42]: <42>
|
||||
[item 43]: <43>
|
||||
[item 44]: <44>
|
||||
[item 45]: <45>
|
||||
[item 46]: <46>
|
||||
[item 47]: <47>
|
||||
[item 48]: <48>
|
||||
[item 49]: <49>
|
||||
[item 50]: <50>
|
|
@ -0,0 +1,17 @@
|
|||
[[[[[[[foo]]]]]]]
|
||||
|
||||
[[[[[[[foo]]]]]]]: bar
|
||||
[[[[[[foo]]]]]]: bar
|
||||
[[[[[foo]]]]]: bar
|
||||
[[[[foo]]]]: bar
|
||||
[[[foo]]]: bar
|
||||
[[foo]]: bar
|
||||
[foo]: bar
|
||||
|
||||
[*[*[*[*[foo]*]*]*]*]
|
||||
|
||||
[*[*[*[*[foo]*]*]*]*]: bar
|
||||
[*[*[*[foo]*]*]*]: bar
|
||||
[*[*[foo]*]*]: bar
|
||||
[*[foo]*]: bar
|
||||
[foo]: bar
|
|
@ -0,0 +1,21 @@
|
|||
| Heading 1 | Heading 2
|
||||
| --------- | ---------
|
||||
| Cell 1 | Cell 2
|
||||
| Cell 3 | Cell 4
|
||||
|
||||
| Header 1 | Header 2 | Header 3 | Header 4 |
|
||||
| :------: | -------: | :------- | -------- |
|
||||
| Cell 1 | Cell 2 | Cell 3 | Cell 4 |
|
||||
| Cell 5 | Cell 6 | Cell 7 | Cell 8 |
|
||||
|
||||
Test code
|
||||
|
||||
Header 1 | Header 2
|
||||
-------- | --------
|
||||
Cell 1 | Cell 2
|
||||
Cell 3 | Cell 4
|
||||
|
||||
Header 1|Header 2|Header 3|Header 4
|
||||
:-------|:------:|-------:|--------
|
||||
Cell 1 |Cell 2 |Cell 3 |Cell 4
|
||||
*Cell 5*|Cell 6 |Cell 7 |Cell 8
|
|
@ -0,0 +1,14 @@
|
|||
closed (valid) autolinks:
|
||||
|
||||
<ftp://1.2.3.4:21/path/foo>
|
||||
<http://foo.bar.baz?q=hello&id=22&boolean>
|
||||
<http://veeeeeeeeeeeeeeeeeeery.loooooooooooooooooooooooooooooooong.autolink/>
|
||||
<teeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeest@gmail.com>
|
||||
|
||||
these are not autolinks:
|
||||
|
||||
<ftp://1.2.3.4:21/path/foo
|
||||
<http://foo.bar.baz?q=hello&id=22&boolean
|
||||
<http://veeeeeeeeeeeeeeeeeeery.loooooooooooooooooooooooooooooooong.autolink
|
||||
<teeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeest@gmail.com
|
||||
< http://foo.bar.baz?q=hello&id=22&boolean >
|
|
@ -0,0 +1,3 @@
|
|||
`lots`of`backticks`
|
||||
|
||||
``i``wonder``how``this``will``be``parsed``
|
|
@ -0,0 +1,5 @@
|
|||
*this* *is* *your* *basic* *boring* *emphasis*
|
||||
|
||||
_this_ _is_ _your_ _basic_ _boring_ _emphasis_
|
||||
|
||||
**this** **is** **your** **basic** **boring** **emphasis**
|
|
@ -0,0 +1,5 @@
|
|||
*this *is *a *bunch* of* nested* emphases*
|
||||
|
||||
__this __is __a __bunch__ of__ nested__ emphases__
|
||||
|
||||
***this ***is ***a ***bunch*** of*** nested*** emphases***
|
|
@ -0,0 +1,5 @@
|
|||
*this *is *a *worst *case *for *em *backtracking
|
||||
|
||||
__this __is __a __worst __case __for __em __backtracking
|
||||
|
||||
***this ***is ***a ***worst ***case ***for ***em ***backtracking
|
|
@ -0,0 +1,11 @@
|
|||
entities:
|
||||
|
||||
& © Æ Ď ¾ ℋ ⅆ ∲
|
||||
|
||||
# Ӓ Ϡ �
|
||||
|
||||
non-entities:
|
||||
|
||||
&18900987654321234567890; &1234567890098765432123456789009876543212345678987654;
|
||||
|
||||
&qwertyuioppoiuytrewqwer; &oiuytrewqwertyuioiuytrewqwertyuioytrewqwertyuiiuytri;
|
|
@ -0,0 +1,14 @@
|
|||
|
||||
\t\e\s\t\i\n\g \e\s\c\a\p\e \s\e\q\u\e\n\c\e\s
|
||||
|
||||
\!\\\"\#\$\%\&\'\(\)\*\+\,\.\/\:\;\<\=\>\?
|
||||
|
||||
\@ \[ \] \^ \_ \` \{ \| \} \~ \- \'
|
||||
|
||||
\
|
||||
\\
|
||||
\\\
|
||||
\\\\
|
||||
\\\\\
|
||||
|
||||
\<this\> \<is\> \<not\> \<html\>
|
|
@ -0,0 +1,44 @@
|
|||
Taking commonmark tests from the spec for benchmarking here:
|
||||
|
||||
<a><bab><c2c>
|
||||
|
||||
<a/><b2/>
|
||||
|
||||
<a /><b2
|
||||
data="foo" >
|
||||
|
||||
<a foo="bar" bam = 'baz <em>"</em>'
|
||||
_boolean zoop:33=zoop:33 />
|
||||
|
||||
<33> <__>
|
||||
|
||||
<a h*#ref="hi">
|
||||
|
||||
<a href="hi'> <a href=hi'>
|
||||
|
||||
< a><
|
||||
foo><bar/ >
|
||||
|
||||
<a href='bar'title=title>
|
||||
|
||||
</a>
|
||||
</foo >
|
||||
|
||||
</a href="foo">
|
||||
|
||||
foo <!-- this is a
|
||||
comment - with hyphen -->
|
||||
|
||||
foo <!-- not a comment -- two hyphens -->
|
||||
|
||||
foo <?php echo $a; ?>
|
||||
|
||||
foo <!ELEMENT br EMPTY>
|
||||
|
||||
foo <![CDATA[>&<]]>
|
||||
|
||||
<a href="ö">
|
||||
|
||||
<a href="\*">
|
||||
|
||||
<a href="\"">
|
|
@ -0,0 +1,23 @@
|
|||
Valid links:
|
||||
|
||||
[this is a link]()
|
||||
[this is a link](<http://something.example.com/foo/bar>)
|
||||
[this is a link](http://something.example.com/foo/bar 'test')
|
||||
![this is an image]()
|
||||
![this is an image](<http://something.example.com/foo/bar>)
|
||||
![this is an image](http://something.example.com/foo/bar 'test')
|
||||
|
||||
[escape test](<\>\>\>\>\>\>\>\>\>\>\>\>\>\>> '\'\'\'\'\'\'\'\'\'\'\'\'\'\'')
|
||||
[escape test \]\]\]\]\]\]\]\]\]\]\]\]\]\]\]\]](\)\)\)\)\)\)\)\)\)\)\)\)\)\))
|
||||
|
||||
Invalid links:
|
||||
|
||||
[this is not a link
|
||||
|
||||
[this is not a link](
|
||||
|
||||
[this is not a link](http://something.example.com/foo/bar 'test'
|
||||
|
||||
[this is not a link](((((((((((((((((((((((((((((((((((((((((((((((
|
||||
|
||||
[this is not a link]((((((((((()))))))))) (((((((((()))))))))))
|
|
@ -0,0 +1,13 @@
|
|||
Valid links:
|
||||
|
||||
[[[[[[[[](test)](test)](test)](test)](test)](test)](test)]
|
||||
|
||||
[ [[[[[[[[[[[[[[[[[[ [](test) ]]]]]]]]]]]]]]]]]] ](test)
|
||||
|
||||
Invalid links:
|
||||
|
||||
[[[[[[[[[
|
||||
|
||||
[ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [
|
||||
|
||||
![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![
|
|
@ -0,0 +1,23 @@
|
|||
|
||||
this\
|
||||
should\
|
||||
be\
|
||||
separated\
|
||||
by\
|
||||
newlines
|
||||
|
||||
this
|
||||
should
|
||||
be
|
||||
separated
|
||||
by
|
||||
newlines
|
||||
too
|
||||
|
||||
this
|
||||
should
|
||||
not
|
||||
be
|
||||
separated
|
||||
by
|
||||
newlines
|
|
@ -0,0 +1,13 @@
|
|||
Lorem ipsum dolor sit amet, __consectetur__ adipiscing elit. Cras imperdiet nec erat ac condimentum. Nulla vel rutrum ligula. Sed hendrerit interdum orci a posuere. Vivamus ut velit aliquet, mollis purus eget, iaculis nisl. Proin posuere malesuada ante. Proin auctor orci eros, ac molestie lorem dictum nec. Vestibulum sit amet erat est. Morbi luctus sed elit ac luctus. Proin blandit, enim vitae egestas posuere, neque elit ultricies dui, vel mattis nibh enim ac lorem. Maecenas molestie nisl sit amet velit dictum lobortis. Aliquam erat volutpat.
|
||||
|
||||
Vivamus sagittis, diam in [vehicula](https://github.com/markdown-it/markdown-it) lobortis, sapien arcu mattis erat, vel aliquet sem urna et risus. Ut feugiat sapien vitae mi elementum laoreet. Suspendisse potenti. Aliquam erat nisl, aliquam pretium libero aliquet, sagittis eleifend nunc. In hac habitasse platea dictumst. Integer turpis augue, tincidunt dignissim mauris id, rhoncus dapibus purus. Maecenas et enim odio. Nullam massa metus, varius quis vehicula sed, pharetra mollis erat. In quis viverra velit. Vivamus placerat, est nec hendrerit varius, enim dui hendrerit magna, ut pulvinar nibh lorem vel lacus. Mauris a orci iaculis, hendrerit eros sed, gravida leo. In dictum mauris vel augue varius, ac ullamcorper nisl ornare. In eu posuere velit, ac fermentum arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nullam sed malesuada leo, at interdum elit.
|
||||
|
||||
Nullam ut tincidunt nunc. [Pellentesque][1] metus lacus, commodo eget justo ut, rutrum varius nunc. Sed non rhoncus risus. Morbi sodales gravida pulvinar. Duis malesuada, odio volutpat elementum vulputate, massa magna scelerisque ante, et accumsan tellus nunc in sem. Donec mattis arcu et velit aliquet, non sagittis justo vestibulum. Suspendisse volutpat felis lectus, nec consequat ipsum mattis id. Donec dapibus vehicula facilisis. In tincidunt mi nisi, nec faucibus tortor euismod nec. Suspendisse ante ligula, aliquet vitae libero eu, vulputate dapibus libero. Sed bibendum, sapien at posuere interdum, libero est sollicitudin magna, ac gravida tellus purus eu ipsum. Proin ut quam arcu.
|
||||
|
||||
Suspendisse potenti. Donec ante velit, ornare at augue quis, tristique laoreet sem. Etiam in ipsum elit. Nullam cursus dolor sit amet nulla feugiat tristique. Phasellus ac tellus tincidunt, imperdiet purus eget, ullamcorper ipsum. Cras eu tincidunt sem. Nullam sed dapibus magna. Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id venenatis tortor. In consectetur sollicitudin pharetra. Etiam convallis nisi nunc, et aliquam turpis viverra sit amet. Maecenas faucibus sodales tortor. Suspendisse lobortis mi eu leo viverra volutpat. Pellentesque velit ante, vehicula sodales congue ut, elementum a urna. Cras tempor, ipsum eget luctus rhoncus, arcu ligula fermentum urna, vulputate pharetra enim enim non libero.
|
||||
|
||||
Proin diam quam, elementum in eleifend id, elementum et metus. Cras in justo consequat justo semper ultrices. Sed dignissim lectus a ante mollis, nec vulputate ante molestie. Proin in porta nunc. Etiam pulvinar turpis sed velit porttitor, vel adipiscing velit fringilla. Cras ac tellus vitae purus pharetra tincidunt. Sed cursus aliquet aliquet. Cras eleifend commodo malesuada. In turpis turpis, ullamcorper ut tincidunt a, ullamcorper a nunc. Etiam luctus tellus ac dapibus gravida. Ut nec lacus laoreet neque ullamcorper volutpat.
|
||||
|
||||
Nunc et leo erat. Aenean mattis ultrices lorem, eget adipiscing dolor ultricies eu. In hac habitasse platea dictumst. Vivamus cursus feugiat sapien quis aliquam. Mauris quam libero, porta vel volutpat ut, blandit a purus. Vivamus vestibulum dui vel tortor molestie, sit amet feugiat sem commodo. Nulla facilisi. Sed molestie arcu eget tellus vestibulum tristique.
|
||||
|
||||
[1]: https://github.com/markdown-it
|
|
@ -0,0 +1,17 @@
|
|||
|
||||
this is a test for tab expansion, be careful not to replace them with spaces
|
||||
|
||||
1 4444
|
||||
22 333
|
||||
333 22
|
||||
4444 1
|
||||
|
||||
|
||||
tab-indented line
|
||||
space-indented line
|
||||
tab-indented line
|
||||
|
||||
|
||||
a lot of spaces in between here
|
||||
|
||||
a lot of tabs in between here
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,10 @@
|
|||
coverage:
|
||||
status:
|
||||
project:
|
||||
default:
|
||||
target: 95%
|
||||
threshold: 0.2%
|
||||
patch:
|
||||
default:
|
||||
target: 80%
|
||||
threshold: 0.2%
|
|
@ -0,0 +1 @@
|
|||
*.ipynb
|
|
@ -0,0 +1,28 @@
|
|||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = .
|
||||
BUILDDIR = _build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
# raise warnings to errors
|
||||
html-strict:
|
||||
@$(SPHINXBUILD) -b html -nW --keep-going "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) $(O)
|
||||
|
||||
# increase logging level to verbose
|
||||
html-verbose:
|
||||
@$(SPHINXBUILD) -b html -v "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) $(O)
|
|
@ -0,0 +1,5 @@
|
|||
.code-cell > .highlight > pre {
|
||||
border-left-color: green;
|
||||
border-left-width: medium;
|
||||
border-left-style: solid;
|
||||
}
|
|
@ -0,0 +1,176 @@
|
|||
(md/architecture)=
|
||||
|
||||
# markdown-it design principles
|
||||
|
||||
(md/data-flow)=
|
||||
## Data flow
|
||||
|
||||
Input data is parsed via nested chains of rules. There are 3 nested chains -
|
||||
`core`, `block` & `inline`:
|
||||
|
||||
```
|
||||
core
|
||||
core.rule1 (normalize)
|
||||
...
|
||||
core.ruleX
|
||||
|
||||
block
|
||||
block.rule1 (blockquote)
|
||||
...
|
||||
block.ruleX
|
||||
|
||||
core.ruleX1 (intermediate rule that applies on block tokens, nothing yet)
|
||||
...
|
||||
core.ruleXX
|
||||
|
||||
inline (applied to each block token with "inline" type)
|
||||
inline.rule1 (text)
|
||||
...
|
||||
inline.ruleX
|
||||
|
||||
core.ruleYY (applies to all tokens)
|
||||
... (abbreviation, footnote, typographer, linkifier)
|
||||
|
||||
```
|
||||
|
||||
The result of the parsing is a *list of tokens*, that will be passed to the `renderer` to generate the html content.
|
||||
|
||||
These tokens can be themselves parsed again to generate more tokens (ex: a `list token` can be divided into multiple `inline tokens`).
|
||||
|
||||
An `env` sandbox can be used alongside tokens to inject external variables for your parsers and renderers.
|
||||
|
||||
Each chain (core / block / inline) uses an independent `state` object when parsing data, so that each parsing operation is independent and can be disabled on the fly.
|
||||
|
||||
|
||||
## Token stream
|
||||
|
||||
Instead of traditional AST we use more low-level data representation - *tokens*.
|
||||
The difference is simple:
|
||||
|
||||
- Tokens are a simple sequence (Array).
|
||||
- Opening and closing tags are separate.
|
||||
- There are special token objects, "inline containers", having nested tokens.
|
||||
sequences with inline markup (bold, italic, text, ...).
|
||||
|
||||
See [token class](https://github.com/executablebooks/markdown-it-py/tree/master/markdown_it/token.py)
|
||||
for details about each token content.
|
||||
|
||||
In total, a token stream is:
|
||||
|
||||
- On the top level - array of paired or single "block" tokens:
|
||||
- open/close for headers, lists, blockquotes, paragraphs, ...
|
||||
- codes, fenced blocks, horizontal rules, html blocks, inlines containers
|
||||
- Each inline token have a `.children` property with a nested token stream for inline content:
|
||||
- open/close for strong, em, link, code, ...
|
||||
- text, line breaks
|
||||
|
||||
Why not AST? Because it's not needed for our tasks. We follow KISS principle.
|
||||
If you wish - you can call a parser without a renderer and convert the token stream
|
||||
to an AST.
|
||||
|
||||
More details about tokens:
|
||||
|
||||
- [Renderer source](https://github.com/executablebooks/markdown-it-py/tree/master/markdown_it/renderer.py)
|
||||
- [Token source](https://github.com/executablebooks/markdown-it-py/tree/master/markdown_it/token.py)
|
||||
- [Live demo](https://markdown-it.github.io/) - type your text and click `debug` tab.
|
||||
|
||||
|
||||
## Rules
|
||||
|
||||
Rules are functions, doing "magic" with parser `state` objects. A rule is associated with one or more *chains* and is unique. For instance, a `blockquote` token is associated with `blockquote`, `paragraph`, `heading` and `list` chains.
|
||||
|
||||
Rules are managed by names via [Ruler](https://markdown-it.github.io/markdown-it/#Ruler) instances and can be `enabled` / `disabled` from the [MarkdownIt](https://markdown-it.github.io/markdown-it/#MarkdownIt) methods.
|
||||
|
||||
You can note, that some rules have a `validation mode` - in this mode rules do not
|
||||
modify the token stream, and only look ahead for the end of a token. It's one
|
||||
important design principle - a token stream is "write only" on block & inline parse stages.
|
||||
|
||||
Parsers are designed to keep rules independent of each other. You can safely enable/disable them, or
|
||||
add new ones. There are no universal recipes for how to create new rules - design of
|
||||
distributed state machines with good data isolation is a tricky business. But you
|
||||
can investigate existing rules & plugins to see possible approaches.
|
||||
|
||||
Also, in complex cases you can try to ask for help in tracker. Condition is very
|
||||
simple - it should be clear from your ticket, that you studied docs, sources,
|
||||
and tried to do something yourself. We never reject with help to real developers.
|
||||
|
||||
|
||||
## Renderer
|
||||
|
||||
After the token stream is generated, it's passed to a [renderer](https://github.com/executablebooks/markdown-it-py/tree/master/markdown_it/renderer.py).
|
||||
It then plays all the tokens, passing each to a rule with the same name as token type.
|
||||
|
||||
Renderer rules are located in `md.renderer.rules[name]` and are simple functions
|
||||
with the same signature:
|
||||
|
||||
```python
|
||||
def function(renderer, tokens, idx, options, env):
|
||||
return htmlResult
|
||||
```
|
||||
|
||||
In many cases that allows easy output change even without parser intrusion.
|
||||
For example, let's replace images with vimeo links to player's iframe:
|
||||
|
||||
```python
|
||||
import re
|
||||
md = MarkdownIt("commonmark")
|
||||
|
||||
vimeoRE = re.compile(r'^https?:\/\/(www\.)?vimeo.com\/(\d+)($|\/)')
|
||||
|
||||
def render_vimeo(self, tokens, idx, options, env):
|
||||
token = tokens[idx]
|
||||
|
||||
if vimeoRE.match(token.attrs["src"]):
|
||||
|
||||
ident = vimeoRE.match(token.attrs["src"])[2]
|
||||
|
||||
return ('<div class="embed-responsive embed-responsive-16by9">\n' +
|
||||
' <iframe class="embed-responsive-item" src="//player.vimeo.com/video/' +
|
||||
ident + '"></iframe>\n' +
|
||||
'</div>\n')
|
||||
return self.image(tokens, idx, options, env)
|
||||
|
||||
md = MarkdownIt("commonmark")
|
||||
md.add_render_rule("image", render_vimeo)
|
||||
print(md.render("![](https://www.vimeo.com/123)"))
|
||||
```
|
||||
|
||||
Here is another example, how to add `target="_blank"` to all links:
|
||||
|
||||
```python
|
||||
from markdown_it import MarkdownIt
|
||||
|
||||
def render_blank_link(self, tokens, idx, options, env):
|
||||
tokens[idx].attrSet("target", "_blank")
|
||||
|
||||
# pass token to default renderer.
|
||||
return self.renderToken(tokens, idx, options, env)
|
||||
|
||||
md = MarkdownIt("commonmark")
|
||||
md.add_render_rule("link_open", render_blank_link)
|
||||
print(md.render("[a]\n\n[a]: b"))
|
||||
```
|
||||
|
||||
Note, if you need to add attributes, you can do things without renderer override.
|
||||
For example, you can update tokens in `core` chain. That is slower, than direct
|
||||
renderer override, but can be more simple.
|
||||
|
||||
You also can write your own renderer to generate other formats than HTML, such as
|
||||
JSON/XML... You can even use it to generate AST.
|
||||
|
||||
## Summary
|
||||
|
||||
This was mentioned in [Data flow](md/data-flow), but let's repeat sequence again:
|
||||
|
||||
1. Blocks are parsed, and top level of token stream filled with block tokens.
|
||||
2. Content on inline containers is parsed, filling `.children` properties.
|
||||
3. Rendering happens.
|
||||
|
||||
And somewhere between you can apply additional transformations :) . Full content
|
||||
of each chain can be seen on the top of
|
||||
[parser_core.py](https://github.com/executablebooks/markdown-it-py/tree/master/markdown_it/parser_core.py),
|
||||
[parser_block.py](https://github.com/executablebooks/markdown-it-py/tree/master/markdown_it/parser_block.py) and
|
||||
[parser_inline.py](https://github.com/executablebooks/markdown-it-py/tree/master/markdown_it/parser_inline.py)
|
||||
files.
|
||||
|
||||
Also you can change output directly in [renderer](https://github.com/executablebooks/markdown-it-py/tree/master/markdown_it/renderer.py) for many simple cases.
|
|
@ -0,0 +1,150 @@
|
|||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# This file only contains a selection of the most common options. For a full
|
||||
# list see the documentation:
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
# -- Path setup --------------------------------------------------------------
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
#
|
||||
from glob import glob
|
||||
import os
|
||||
|
||||
# import sys
|
||||
# sys.path.insert(0, os.path.abspath('.'))
|
||||
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
project = "markdown-it-py"
|
||||
copyright = "2020, executable book project"
|
||||
author = "executable book project"
|
||||
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be
|
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||
# ones.
|
||||
extensions = [
|
||||
"sphinx.ext.autodoc",
|
||||
"sphinx.ext.viewcode",
|
||||
"sphinx.ext.intersphinx",
|
||||
"myst_parser",
|
||||
"sphinx_copybutton",
|
||||
"sphinx_design",
|
||||
]
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
# This pattern also affects html_static_path and html_extra_path.
|
||||
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
|
||||
|
||||
nitpicky = True
|
||||
nitpick_ignore = [
|
||||
("py:class", "Match"),
|
||||
("py:class", "Path"),
|
||||
("py:class", "x in the interval [0, 1)."),
|
||||
("py:class", "markdown_it.helpers.parse_link_destination._Result"),
|
||||
("py:class", "markdown_it.helpers.parse_link_title._Result"),
|
||||
("py:class", "MarkdownIt"),
|
||||
("py:class", "RuleFunc"),
|
||||
("py:class", "_NodeType"),
|
||||
("py:class", "typing_extensions.Protocol"),
|
||||
]
|
||||
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#
|
||||
html_title = "markdown-it-py"
|
||||
html_theme = "sphinx_book_theme"
|
||||
html_theme_options = {
|
||||
"use_edit_page_button": True,
|
||||
"repository_url": "https://github.com/executablebooks/markdown-it-py",
|
||||
"repository_branch": "master",
|
||||
"path_to_docs": "docs",
|
||||
}
|
||||
html_static_path = ["_static"]
|
||||
html_css_files = ["custom.css"]
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
# html_static_path = ["_static"]
|
||||
|
||||
|
||||
intersphinx_mapping = {
|
||||
"python": ("https://docs.python.org/3.7", None),
|
||||
"mdit-py-plugins": ("https://mdit-py-plugins.readthedocs.io/en/latest/", None),
|
||||
}
|
||||
|
||||
|
||||
def run_apidoc(app):
|
||||
"""generate apidoc
|
||||
|
||||
See: https://github.com/rtfd/readthedocs.org/issues/1139
|
||||
"""
|
||||
import os
|
||||
import shutil
|
||||
|
||||
import sphinx
|
||||
from sphinx.ext import apidoc
|
||||
|
||||
logger = sphinx.util.logging.getLogger(__name__)
|
||||
logger.info("running apidoc")
|
||||
# get correct paths
|
||||
this_folder = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
|
||||
api_folder = os.path.join(this_folder, "api")
|
||||
module_path = os.path.normpath(os.path.join(this_folder, "../"))
|
||||
ignore_paths = ["../profiler.py", "../conftest.py", "../tests", "../benchmarking"]
|
||||
ignore_paths = [
|
||||
os.path.normpath(os.path.join(this_folder, p)) for p in ignore_paths
|
||||
]
|
||||
# functions from these modules are all imported in the __init__.py with __all__
|
||||
for rule in ("block", "core", "inline"):
|
||||
for path in glob(
|
||||
os.path.normpath(
|
||||
os.path.join(this_folder, f"../markdown_it/rules_{rule}/*.py")
|
||||
)
|
||||
):
|
||||
if os.path.basename(path) not in ("__init__.py", f"state_{rule}.py"):
|
||||
ignore_paths.append(path)
|
||||
|
||||
if os.path.exists(api_folder):
|
||||
shutil.rmtree(api_folder)
|
||||
os.mkdir(api_folder)
|
||||
|
||||
argv = ["-M", "--separate", "-o", api_folder, module_path] + ignore_paths
|
||||
|
||||
apidoc.OPTIONS.append("ignore-module-all")
|
||||
apidoc.main(argv)
|
||||
|
||||
# we don't use this
|
||||
if os.path.exists(os.path.join(api_folder, "modules.rst")):
|
||||
os.remove(os.path.join(api_folder, "modules.rst"))
|
||||
|
||||
|
||||
def setup(app):
|
||||
"""Add functions to the Sphinx setup."""
|
||||
if os.environ.get("SKIP_APIDOC", None) is None:
|
||||
app.connect("builder-inited", run_apidoc)
|
||||
|
||||
from sphinx.directives.code import CodeBlock
|
||||
|
||||
class CodeCell(CodeBlock):
|
||||
"""Custom code block directive."""
|
||||
|
||||
def run(self):
|
||||
"""Run the directive."""
|
||||
self.options["class"] = ["code-cell"]
|
||||
return super().run()
|
||||
|
||||
# note, these could be run by myst-nb,
|
||||
# but currently this causes a circular dependency issue
|
||||
app.add_directive("code-cell", CodeCell)
|
|
@ -0,0 +1,108 @@
|
|||
# Contribute to markdown-it-py
|
||||
|
||||
We welcome all contributions! ✨
|
||||
|
||||
See the [EBP Contributing Guide](https://executablebooks.org/en/latest/contributing.html) for general details, and below for guidance specific to markdown-it-py.
|
||||
|
||||
Before continuing, make sure you've read:
|
||||
|
||||
1. [Architecture description](md/architecture)
|
||||
2. [Security considerations](md/security)
|
||||
3. [API documentation](api/markdown_it)
|
||||
|
||||
## Development guidance
|
||||
|
||||
Details of the port can be found in the `markdown_it/port.yaml` and in `port.yaml` files, within the extension folders.
|
||||
|
||||
## Code Style
|
||||
|
||||
Code style is tested using [flake8](http://flake8.pycqa.org), with the configuration set in `.flake8`, and code formatted with [black](https://github.com/ambv/black).
|
||||
|
||||
Installing with `markdown-it-py[code_style]` makes the [pre-commit](https://pre-commit.com/) package available, which will ensure this style is met before commits are submitted, by reformatting the code and testing for lint errors.
|
||||
It can be setup by:
|
||||
|
||||
```shell
|
||||
>> cd markdown-it-py
|
||||
>> pre-commit install
|
||||
```
|
||||
|
||||
Editors like VS Code also have automatic code reformat utilities, which can adhere to this standard.
|
||||
|
||||
All functions and class methods should be annotated with types and include a docstring.
|
||||
|
||||
## Testing
|
||||
|
||||
For code tests, markdown-it-py uses [pytest](https://docs.pytest.org)):
|
||||
|
||||
```shell
|
||||
>> cd markdown-it-py
|
||||
>> pytest
|
||||
```
|
||||
|
||||
You can also use [tox](https://tox.readthedocs.io), to run the tests in multiple isolated environments (see the `tox.ini` file for available test environments):
|
||||
|
||||
```shell
|
||||
>> cd markdown-it-py
|
||||
>> tox -p
|
||||
```
|
||||
|
||||
This can also be used to run benchmarking tests using [pytest-benchmark](https://pytest-benchmark.readthedocs.io):
|
||||
|
||||
```shell
|
||||
>> cd markdown-it-py
|
||||
tox -e py38-bench-packages -- --benchmark-min-rounds 50
|
||||
```
|
||||
|
||||
For documentation build tests:
|
||||
|
||||
```shell
|
||||
>> cd markdown-it-py/docs
|
||||
>> make clean
|
||||
>> make html-strict
|
||||
```
|
||||
|
||||
## Contributing a plugin
|
||||
|
||||
1. Does it already exist as JavaScript implementation ([see npm](https://www.npmjs.com/search?q=keywords:markdown-it-plugin))?
|
||||
Where possible try to port directly from that.
|
||||
It is usually better to modify existing code, instead of writing all from scratch.
|
||||
2. Try to find the right place for your plugin rule:
|
||||
- Will it conflict with existing markup (by priority)?
|
||||
- If yes - you need to write an inline or block rule.
|
||||
- If no - you can morph tokens within core chains.
|
||||
- Remember that token morphing in core chains is always more simple than writing
|
||||
block or inline rules, if you don't copy existing ones. However,
|
||||
block and inline rules are usually faster.
|
||||
- Sometimes, it's enough to only modify the renderer, for example, to add
|
||||
header IDs or `target="_blank"` for the links.
|
||||
|
||||
## FAQ
|
||||
|
||||
### I need async rule, how to do it?
|
||||
|
||||
Sorry. You can't do it directly. All complex parsers are sync by nature. But you
|
||||
can use workarounds:
|
||||
|
||||
1. On parse phase, replace content by random number and store it in `env`.
|
||||
2. Do async processing over collected data.
|
||||
3. Render content and replace those random numbers with text; or replace first, then render.
|
||||
|
||||
Alternatively, you can render HTML, then parse it to DOM, or
|
||||
[cheerio](https://github.com/cheeriojs/cheerio) AST, and apply transformations
|
||||
in a more convenient way.
|
||||
|
||||
### How to replace part of text token with link?
|
||||
|
||||
The right sequence is to split text to several tokens and add link tokens in between.
|
||||
The result will be: `text` + `link_open` + `text` + `link_close` + `text`.
|
||||
|
||||
See implementations of [linkify](https://github.com/markdown-it/markdown-it/blob/master/lib/rules_core/linkify.js) and [emoji](https://github.com/markdown-it/markdown-it-emoji/blob/master/lib/replace.js) - those do text token splits.
|
||||
|
||||
__Note:__ Don't try to replace text with HTML markup! That's not secure.
|
||||
|
||||
### Why is my inline rule not executed?
|
||||
|
||||
The inline parser skips pieces of texts to optimize speed. It stops only on [a small set of chars](https://github.com/markdown-it/markdown-it/blob/master/lib/rules_inline/text.js), which can be tokens. We did not made this list extensible for performance reasons too.
|
||||
|
||||
If you are absolutely sure that something important is missing there - create a
|
||||
ticket and we will consider adding it as a new charcode.
|
|
@ -0,0 +1,41 @@
|
|||
# markdown-it-py
|
||||
|
||||
> Markdown parser done right.
|
||||
|
||||
- {fa}`check,text-success mr-1` Follows the __[CommonMark spec](http://spec.commonmark.org/)__ for baseline parsing
|
||||
- {fa}`check,text-success mr-1` Configurable syntax: you can add new rules and even replace existing ones.
|
||||
- {fa}`check,text-success mr-1` Pluggable: Adds syntax extensions to extend the parser (see the [plugin list](md/plugins))
|
||||
- {fa}`check,text-success mr-1` High speed (see our [benchmarking tests](md/performance))
|
||||
- {fa}`check,text-success mr-1` [Safe by default](md/security)
|
||||
|
||||
For a good introduction to [markdown-it] see the __[Live demo](https://markdown-it.github.io)__.
|
||||
This is a Python port of the well used [markdown-it], and some of its associated plugins.
|
||||
The driving design philosophy of the port has been to change as little of the fundamental code structure (file names, function name, etc) as possible, just sprinkling in a little Python syntactical sugar ✨.
|
||||
It is very simple to write complimentary extensions for both language implementations!
|
||||
|
||||
## References & Thanks
|
||||
|
||||
Big thanks to the authors of [markdown-it]
|
||||
|
||||
- Alex Kocharin [github/rlidwka](https://github.com/rlidwka)
|
||||
- Vitaly Puzrin [github/puzrin](https://github.com/puzrin)
|
||||
|
||||
Also [John MacFarlane](https://github.com/jgm) for his work on the CommonMark spec and reference implementations.
|
||||
|
||||
## Related Links
|
||||
|
||||
- <https://github.com/jgm/CommonMark> - reference CommonMark implementations in C & JS, also contains latest spec & online demo.
|
||||
- <http://talk.commonmark.org> - CommonMark forum, good place to collaborate developers' efforts.
|
||||
|
||||
```{toctree}
|
||||
:maxdepth: 2
|
||||
|
||||
using
|
||||
architecture
|
||||
other
|
||||
plugins
|
||||
contributing
|
||||
api/markdown_it
|
||||
```
|
||||
|
||||
[markdown-it]: https://github.com/markdown-it/markdown-it
|
|
@ -0,0 +1,66 @@
|
|||
(md/security)=
|
||||
|
||||
# Security
|
||||
|
||||
Many people don't understand that markdown format does not care much about security.
|
||||
In many cases you have to pass output to sanitizers.
|
||||
`markdown-it` provides 2 possible strategies to produce safe output:
|
||||
|
||||
1. Don't enable HTML. Extend markup features with [plugins](md/plugins).
|
||||
We think it's the best choice and use it by default.
|
||||
- That's ok for 99% of user needs.
|
||||
- Output will be safe without sanitizer.
|
||||
2. Enable HTML and use external sanitizer package(s).
|
||||
|
||||
Also by default `markdown-it` prohibits some kind of links, which could be used
|
||||
for XSS:
|
||||
|
||||
- `javascript:`, `vbscript:`
|
||||
- `file:`
|
||||
- `data:`, except some images (gif/png/jpeg/webp).
|
||||
|
||||
So, by default `markdown-it` should be safe. We care about it.
|
||||
|
||||
If you find a security problem - contact us via tracker or email.
|
||||
Such reports are fixed with top priority.
|
||||
|
||||
## Plugins
|
||||
|
||||
Usually, plugins operate with tokenized content, and that's enough to provide safe output.
|
||||
|
||||
But there is one non-evident case you should know - don't allow plugins to generate arbitrary element `id` and `name`.
|
||||
If those depend on user input - always add prefixes to avoid DOM clobbering.
|
||||
See [discussion](https://github.com/markdown-it/markdown-it/issues/28) for details.
|
||||
|
||||
So, if you decide to use plugins that add extended class syntax or autogenerating header anchors - be careful.
|
||||
|
||||
(md/performance)=
|
||||
|
||||
# Performance
|
||||
|
||||
You can view our continuous integration benchmarking analysis at: <https://executablebooks.github.io/markdown-it-py/dev/bench/>,
|
||||
or you can run it for yourself within the repository:
|
||||
|
||||
```console
|
||||
$ tox -e py38-bench-packages -- --benchmark-columns mean,stddev
|
||||
|
||||
Name (time in ms) Mean StdDev
|
||||
---------------------------------------------------------------
|
||||
test_mistune 70.3272 (1.0) 0.7978 (1.0)
|
||||
test_mistletoe 116.0919 (1.65) 6.2870 (7.88)
|
||||
test_markdown_it_py 152.9022 (2.17) 4.2988 (5.39)
|
||||
test_commonmark_py 326.9506 (4.65) 15.8084 (19.81)
|
||||
test_pymarkdown 368.2712 (5.24) 7.5906 (9.51)
|
||||
test_pymarkdown_extra 640.4913 (9.11) 15.1769 (19.02)
|
||||
test_panflute 678.3547 (9.65) 9.4622 (11.86)
|
||||
---------------------------------------------------------------
|
||||
```
|
||||
|
||||
As you can see, `markdown-it-py` doesn't pay with speed for it's flexibility.
|
||||
|
||||
```{note}
|
||||
`mistune` is not CommonMark compliant, which is what allows for its
|
||||
faster parsing, at the expense of issues, for example, with nested inline parsing.
|
||||
See [mistletoes's explanation](https://github.com/miyuchina/mistletoe/blob/master/performance.md)
|
||||
for further details.
|
||||
```
|
|
@ -0,0 +1,50 @@
|
|||
(md/plugins)=
|
||||
|
||||
# Plugin extensions
|
||||
|
||||
The following plugins are embedded within the core package:
|
||||
|
||||
- [tables](https://help.github.com/articles/organizing-information-with-tables/) (GFM)
|
||||
- [strikethrough](https://help.github.com/articles/basic-writing-and-formatting-syntax/#styling-text) (GFM)
|
||||
|
||||
These can be enabled individually:
|
||||
|
||||
```python
|
||||
from markdown_it import MarkdownIt
|
||||
md = MarkdownIt("commonmark").enable('table')
|
||||
```
|
||||
|
||||
or as part of a configuration:
|
||||
|
||||
```python
|
||||
from markdown_it import MarkdownIt
|
||||
md = MarkdownIt("gfm-like")
|
||||
```
|
||||
|
||||
```{seealso}
|
||||
See [](using.md)
|
||||
```
|
||||
|
||||
Many other plugins are then available *via* the `mdit-py-plugins` package, including:
|
||||
|
||||
- Front-matter
|
||||
- Footnotes
|
||||
- Definition lists
|
||||
- Task lists
|
||||
- Heading anchors
|
||||
- LaTeX math
|
||||
- Containers
|
||||
- Word count
|
||||
|
||||
For full information see: <https://mdit-py-plugins.readthedocs.io>
|
||||
|
||||
Or you can write them yourself!
|
||||
|
||||
They can be chained and loaded *via*:
|
||||
|
||||
```python
|
||||
from markdown_it import MarkdownIt
|
||||
from mdit_py_plugins import plugin1, plugin2
|
||||
md = MarkdownIt().use(plugin1, keyword=value).use(plugin2, keyword=value)
|
||||
html_string = md.render("some *Markdown*")
|
||||
```
|
|
@ -0,0 +1,399 @@
|
|||
---
|
||||
jupytext:
|
||||
formats: ipynb,md:myst
|
||||
text_representation:
|
||||
extension: .md
|
||||
format_name: myst
|
||||
format_version: '0.8'
|
||||
jupytext_version: 1.4.2
|
||||
kernelspec:
|
||||
display_name: Python 3
|
||||
language: python
|
||||
name: python3
|
||||
---
|
||||
|
||||
# Using `markdown_it`
|
||||
|
||||
> This document can be opened to execute with [Jupytext](https://jupytext.readthedocs.io)!
|
||||
|
||||
markdown-it-py may be used as an API *via* the [`markdown-it-py`](https://pypi.org/project/markdown-it-py/) package.
|
||||
|
||||
The raw text is first parsed to syntax 'tokens',
|
||||
then these are converted to other formats using 'renderers'.
|
||||
|
||||
+++
|
||||
|
||||
## Quick-Start
|
||||
|
||||
The simplest way to understand how text will be parsed is using:
|
||||
|
||||
```{code-cell} python
|
||||
from pprint import pprint
|
||||
from markdown_it import MarkdownIt
|
||||
```
|
||||
|
||||
```{code-cell} python
|
||||
md = MarkdownIt()
|
||||
md.render("some *text*")
|
||||
```
|
||||
|
||||
```{code-cell} python
|
||||
for token in md.parse("some *text*"):
|
||||
print(token)
|
||||
print()
|
||||
```
|
||||
|
||||
## The Parser
|
||||
|
||||
+++
|
||||
|
||||
The `MarkdownIt` class is instantiated with parsing configuration options,
|
||||
dictating the syntax rules and additional options for the parser and renderer.
|
||||
You can define this configuration *via* directly supplying a dictionary or a preset name:
|
||||
|
||||
- `zero`: This configures the minimum components to parse text (i.e. just paragraphs and text)
|
||||
- `commonmark` (default): This configures the parser to strictly comply with the [CommonMark specification](http://spec.commonmark.org/).
|
||||
- `js-default`: This is the default in the JavaScript version.
|
||||
Compared to `commonmark`, it disables HTML parsing and enables the table and strikethrough components.
|
||||
- `gfm-like`: This configures the parser to approximately comply with the [GitHub Flavored Markdown specification](https://github.github.com/gfm/).
|
||||
Compared to `commonmark`, it enables the table, strikethrough and linkify components.
|
||||
**Important**, to use this configuration you must have `linkify-it-py` installed.
|
||||
|
||||
```{code-cell} python
|
||||
from markdown_it.presets import zero
|
||||
zero.make()
|
||||
```
|
||||
|
||||
```{code-cell} python
|
||||
md = MarkdownIt("zero")
|
||||
md.options
|
||||
```
|
||||
|
||||
You can also override specific options:
|
||||
|
||||
```{code-cell} python
|
||||
md = MarkdownIt("zero", {"maxNesting": 99})
|
||||
md.options
|
||||
```
|
||||
|
||||
```{code-cell} python
|
||||
pprint(md.get_active_rules())
|
||||
```
|
||||
|
||||
You can find all the parsing rules in the source code:
|
||||
`parser_core.py`, `parser_block.py`,
|
||||
`parser_inline.py`.
|
||||
|
||||
```{code-cell} python
|
||||
pprint(md.get_all_rules())
|
||||
```
|
||||
|
||||
Any of the parsing rules can be enabled/disabled, and these methods are "chainable":
|
||||
|
||||
```{code-cell} python
|
||||
md.render("- __*emphasise this*__")
|
||||
```
|
||||
|
||||
```{code-cell} python
|
||||
md.enable(["list", "emphasis"]).render("- __*emphasise this*__")
|
||||
```
|
||||
|
||||
You can temporarily modify rules with the `reset_rules` context manager.
|
||||
|
||||
```{code-cell} python
|
||||
with md.reset_rules():
|
||||
md.disable("emphasis")
|
||||
print(md.render("__*emphasise this*__"))
|
||||
md.render("__*emphasise this*__")
|
||||
```
|
||||
|
||||
Additionally `renderInline` runs the parser with all block syntax rules disabled.
|
||||
|
||||
```{code-cell} python
|
||||
md.renderInline("__*emphasise this*__")
|
||||
```
|
||||
|
||||
### Typographic components
|
||||
|
||||
The `smartquotes` and `replacements` components are intended to improve typography:
|
||||
|
||||
`smartquotes` will convert basic quote marks to their opening and closing variants:
|
||||
|
||||
- 'single quotes' -> ‘single quotes’
|
||||
- "double quotes" -> “double quotes”
|
||||
|
||||
`replacements` will replace particular text constructs:
|
||||
|
||||
- ``(c)``, ``(C)`` → ©
|
||||
- ``(tm)``, ``(TM)`` → ™
|
||||
- ``(r)``, ``(R)`` → ®
|
||||
- ``(p)``, ``(P)`` → §
|
||||
- ``+-`` → ±
|
||||
- ``...`` → …
|
||||
- ``?....`` → ?..
|
||||
- ``!....`` → !..
|
||||
- ``????????`` → ???
|
||||
- ``!!!!!`` → !!!
|
||||
- ``,,,`` → ,
|
||||
- ``--`` → &ndash
|
||||
- ``---`` → &mdash
|
||||
|
||||
Both of these components require typography to be turned on, as well as the components enabled:
|
||||
|
||||
```{code-cell} python
|
||||
md = MarkdownIt("commonmark", {"typographer": True})
|
||||
md.enable(["replacements", "smartquotes"])
|
||||
md.render("'single quotes' (c)")
|
||||
```
|
||||
|
||||
### Linkify
|
||||
|
||||
The `linkify` component requires that [linkify-it-py](https://github.com/tsutsu3/linkify-it-py) be installed (e.g. *via* `pip install markdown-it-py[linkify]`).
|
||||
This allows URI autolinks to be identified, without the need for enclosing in `<>` brackets:
|
||||
|
||||
```{code-cell} python
|
||||
md = MarkdownIt("commonmark", {"linkify": True})
|
||||
md.enable(["linkify"])
|
||||
md.render("github.com")
|
||||
```
|
||||
|
||||
### Plugins load
|
||||
|
||||
Plugins load collections of additional syntax rules and render methods into the parser.
|
||||
A number of useful plugins are available in [`mdit_py_plugins`](https://github.com/executablebooks/mdit-py-plugins) (see [the plugin list](./plugins.md)),
|
||||
or you can create your own (following the [markdown-it design principles](./architecture.md)).
|
||||
|
||||
```{code-cell} python
|
||||
from markdown_it import MarkdownIt
|
||||
import mdit_py_plugins
|
||||
from mdit_py_plugins.front_matter import front_matter_plugin
|
||||
from mdit_py_plugins.footnote import footnote_plugin
|
||||
|
||||
md = (
|
||||
MarkdownIt()
|
||||
.use(front_matter_plugin)
|
||||
.use(footnote_plugin)
|
||||
.enable('table')
|
||||
)
|
||||
text = ("""
|
||||
---
|
||||
a: 1
|
||||
---
|
||||
|
||||
a | b
|
||||
- | -
|
||||
1 | 2
|
||||
|
||||
A footnote [^1]
|
||||
|
||||
[^1]: some details
|
||||
""")
|
||||
md.render(text)
|
||||
```
|
||||
|
||||
## The Token Stream
|
||||
|
||||
+++
|
||||
|
||||
Before rendering, the text is parsed to a flat token stream of block level syntax elements, with nesting defined by opening (1) and closing (-1) attributes:
|
||||
|
||||
```{code-cell} python
|
||||
md = MarkdownIt("commonmark")
|
||||
tokens = md.parse("""
|
||||
Here's some *text*
|
||||
|
||||
1. a list
|
||||
|
||||
> a *quote*""")
|
||||
[(t.type, t.nesting) for t in tokens]
|
||||
```
|
||||
|
||||
Naturally all openings should eventually be closed,
|
||||
such that:
|
||||
|
||||
```{code-cell} python
|
||||
sum([t.nesting for t in tokens]) == 0
|
||||
```
|
||||
|
||||
All tokens are the same class, which can also be created outside the parser:
|
||||
|
||||
```{code-cell} python
|
||||
tokens[0]
|
||||
```
|
||||
|
||||
```{code-cell} python
|
||||
from markdown_it.token import Token
|
||||
token = Token("paragraph_open", "p", 1, block=True, map=[1, 2])
|
||||
token == tokens[0]
|
||||
```
|
||||
|
||||
The `'inline'` type token contain the inline tokens as children:
|
||||
|
||||
```{code-cell} python
|
||||
tokens[1]
|
||||
```
|
||||
|
||||
You can serialize a token (and its children) to a JSONable dictionary using:
|
||||
|
||||
```{code-cell} python
|
||||
print(tokens[1].as_dict())
|
||||
```
|
||||
|
||||
This dictionary can also be deserialized:
|
||||
|
||||
```{code-cell} python
|
||||
Token.from_dict(tokens[1].as_dict())
|
||||
```
|
||||
|
||||
### Creating a syntax tree
|
||||
|
||||
```{versionchanged} 0.7.0
|
||||
`nest_tokens` and `NestedTokens` are deprecated and replaced by `SyntaxTreeNode`.
|
||||
```
|
||||
|
||||
In some use cases it may be useful to convert the token stream into a syntax tree,
|
||||
with opening/closing tokens collapsed into a single token that contains children.
|
||||
|
||||
```{code-cell} python
|
||||
from markdown_it.tree import SyntaxTreeNode
|
||||
|
||||
md = MarkdownIt("commonmark")
|
||||
tokens = md.parse("""
|
||||
# Header
|
||||
|
||||
Here's some text and an image ![title](image.png)
|
||||
|
||||
1. a **list**
|
||||
|
||||
> a *quote*
|
||||
""")
|
||||
|
||||
node = SyntaxTreeNode(tokens)
|
||||
print(node.pretty(indent=2, show_text=True))
|
||||
```
|
||||
|
||||
You can then use methods to traverse the tree
|
||||
|
||||
```{code-cell} python
|
||||
node.children
|
||||
```
|
||||
|
||||
```{code-cell} python
|
||||
print(node[0])
|
||||
node[0].next_sibling
|
||||
```
|
||||
|
||||
## Renderers
|
||||
|
||||
+++
|
||||
|
||||
After the token stream is generated, it's passed to a [renderer](https://github.com/executablebooks/markdown-it-py/tree/master/markdown_it/renderer.py).
|
||||
It then plays all the tokens, passing each to a rule with the same name as token type.
|
||||
|
||||
Renderer rules are located in `md.renderer.rules` and are simple functions
|
||||
with the same signature:
|
||||
|
||||
```python
|
||||
def function(renderer, tokens, idx, options, env):
|
||||
return htmlResult
|
||||
```
|
||||
|
||||
+++
|
||||
|
||||
You can inject render methods into the instantiated render class.
|
||||
|
||||
```{code-cell} python
|
||||
md = MarkdownIt("commonmark")
|
||||
|
||||
def render_em_open(self, tokens, idx, options, env):
|
||||
return '<em class="myclass">'
|
||||
|
||||
md.add_render_rule("em_open", render_em_open)
|
||||
md.render("*a*")
|
||||
```
|
||||
|
||||
This is a slight change to the JS version, where the renderer argument is at the end.
|
||||
Also `add_render_rule` method is specific to Python, rather than adding directly to the `md.renderer.rules`, this ensures the method is bound to the renderer.
|
||||
|
||||
+++
|
||||
|
||||
You can also subclass a render and add the method there:
|
||||
|
||||
```{code-cell} python
|
||||
from markdown_it.renderer import RendererHTML
|
||||
|
||||
class MyRenderer(RendererHTML):
|
||||
def em_open(self, tokens, idx, options, env):
|
||||
return '<em class="myclass">'
|
||||
|
||||
md = MarkdownIt("commonmark", renderer_cls=MyRenderer)
|
||||
md.render("*a*")
|
||||
```
|
||||
|
||||
Plugins can support multiple render types, using the `__ouput__` attribute (this is currently a Python only feature).
|
||||
|
||||
```{code-cell} python
|
||||
from markdown_it.renderer import RendererHTML
|
||||
|
||||
class MyRenderer1(RendererHTML):
|
||||
__output__ = "html1"
|
||||
|
||||
class MyRenderer2(RendererHTML):
|
||||
__output__ = "html2"
|
||||
|
||||
def plugin(md):
|
||||
def render_em_open1(self, tokens, idx, options, env):
|
||||
return '<em class="myclass1">'
|
||||
def render_em_open2(self, tokens, idx, options, env):
|
||||
return '<em class="myclass2">'
|
||||
md.add_render_rule("em_open", render_em_open1, fmt="html1")
|
||||
md.add_render_rule("em_open", render_em_open2, fmt="html2")
|
||||
|
||||
md = MarkdownIt("commonmark", renderer_cls=MyRenderer1).use(plugin)
|
||||
print(md.render("*a*"))
|
||||
|
||||
md = MarkdownIt("commonmark", renderer_cls=MyRenderer2).use(plugin)
|
||||
print(md.render("*a*"))
|
||||
```
|
||||
|
||||
Here's a more concrete example; let's replace images with vimeo links to player's iframe:
|
||||
|
||||
```{code-cell} python
|
||||
import re
|
||||
from markdown_it import MarkdownIt
|
||||
|
||||
vimeoRE = re.compile(r'^https?:\/\/(www\.)?vimeo.com\/(\d+)($|\/)')
|
||||
|
||||
def render_vimeo(self, tokens, idx, options, env):
|
||||
token = tokens[idx]
|
||||
|
||||
if vimeoRE.match(token.attrs["src"]):
|
||||
|
||||
ident = vimeoRE.match(token.attrs["src"])[2]
|
||||
|
||||
return ('<div class="embed-responsive embed-responsive-16by9">\n' +
|
||||
' <iframe class="embed-responsive-item" src="//player.vimeo.com/video/' +
|
||||
ident + '"></iframe>\n' +
|
||||
'</div>\n')
|
||||
return self.image(tokens, idx, options, env)
|
||||
|
||||
md = MarkdownIt("commonmark")
|
||||
md.add_render_rule("image", render_vimeo)
|
||||
print(md.render("![](https://www.vimeo.com/123)"))
|
||||
```
|
||||
|
||||
Here is another example, how to add `target="_blank"` to all links:
|
||||
|
||||
```{code-cell} python
|
||||
from markdown_it import MarkdownIt
|
||||
|
||||
def render_blank_link(self, tokens, idx, options, env):
|
||||
tokens[idx].attrSet("target", "_blank")
|
||||
|
||||
# pass token to default renderer.
|
||||
return self.renderToken(tokens, idx, options, env)
|
||||
|
||||
md = MarkdownIt("commonmark")
|
||||
md.add_render_rule("link_open", render_blank_link)
|
||||
print(md.render("[a]\n\n[a]: b"))
|
||||
```
|
|
@ -0,0 +1,5 @@
|
|||
"""A Python port of Markdown-It"""
|
||||
__all__ = ("MarkdownIt",)
|
||||
__version__ = "2.1.0"
|
||||
|
||||
from .main import MarkdownIt
|
|
@ -0,0 +1,10 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Mapping
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
if sys.version_info >= (3, 10):
|
||||
DATACLASS_KWARGS: Mapping[str, Any] = {"slots": True}
|
||||
else:
|
||||
DATACLASS_KWARGS: Mapping[str, Any] = {}
|
|
@ -0,0 +1,66 @@
|
|||
# Copyright 2014 Mathias Bynens <https://mathiasbynens.be/>
|
||||
# Copyright 2021 Taneli Hukkinen
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining
|
||||
# a copy of this software and associated documentation files (the
|
||||
# "Software"), to deal in the Software without restriction, including
|
||||
# without limitation the rights to use, copy, modify, merge, publish,
|
||||
# distribute, sublicense, and/or sell copies of the Software, and to
|
||||
# permit persons to whom the Software is furnished to do so, subject to
|
||||
# the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
import codecs
|
||||
import re
|
||||
|
||||
REGEX_SEPARATORS = re.compile(r"[\x2E\u3002\uFF0E\uFF61]")
|
||||
REGEX_NON_ASCII = re.compile(r"[^\0-\x7E]")
|
||||
|
||||
|
||||
def encode(uni: str) -> str:
|
||||
return codecs.encode(uni, encoding="punycode").decode()
|
||||
|
||||
|
||||
def decode(ascii: str) -> str:
|
||||
return codecs.decode(ascii, encoding="punycode") # type: ignore[call-overload]
|
||||
|
||||
|
||||
def map_domain(string, fn):
|
||||
parts = string.split("@")
|
||||
result = ""
|
||||
if len(parts) > 1:
|
||||
# In email addresses, only the domain name should be punycoded. Leave
|
||||
# the local part (i.e. everything up to `@`) intact.
|
||||
result = parts[0] + "@"
|
||||
string = parts[1]
|
||||
labels = REGEX_SEPARATORS.split(string)
|
||||
encoded = ".".join(fn(label) for label in labels)
|
||||
return result + encoded
|
||||
|
||||
|
||||
def to_unicode(obj: str) -> str:
|
||||
def mapping(obj: str) -> str:
|
||||
if obj.startswith("xn--"):
|
||||
return decode(obj[4:].lower())
|
||||
return obj
|
||||
|
||||
return map_domain(obj, mapping)
|
||||
|
||||
|
||||
def to_ascii(obj: str) -> str:
|
||||
def mapping(obj: str) -> str:
|
||||
if REGEX_NON_ASCII.search(obj):
|
||||
return "xn--" + encode(obj)
|
||||
return obj
|
||||
|
||||
return map_domain(obj, mapping)
|
|
@ -0,0 +1,109 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
CLI interface to markdown-it-py
|
||||
|
||||
Parse one or more markdown files, convert each to HTML, and print to stdout.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from collections.abc import Iterable, Sequence
|
||||
import sys
|
||||
|
||||
from markdown_it import __version__
|
||||
from markdown_it.main import MarkdownIt
|
||||
|
||||
version_str = "markdown-it-py [version {}]".format(__version__)
|
||||
|
||||
|
||||
def main(args: Sequence[str] | None = None) -> int:
|
||||
namespace = parse_args(args)
|
||||
if namespace.filenames:
|
||||
convert(namespace.filenames)
|
||||
else:
|
||||
interactive()
|
||||
return 0
|
||||
|
||||
|
||||
def convert(filenames: Iterable[str]) -> None:
|
||||
for filename in filenames:
|
||||
convert_file(filename)
|
||||
|
||||
|
||||
def convert_file(filename: str) -> None:
|
||||
"""
|
||||
Parse a Markdown file and dump the output to stdout.
|
||||
"""
|
||||
try:
|
||||
with open(filename, "r") as fin:
|
||||
rendered = MarkdownIt().render(fin.read())
|
||||
print(rendered, end="")
|
||||
except OSError:
|
||||
sys.stderr.write(f'Cannot open file "{filename}".\n')
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def interactive() -> None:
|
||||
"""
|
||||
Parse user input, dump to stdout, rinse and repeat.
|
||||
Python REPL style.
|
||||
"""
|
||||
print_heading()
|
||||
contents = []
|
||||
more = False
|
||||
while True:
|
||||
try:
|
||||
prompt, more = ("... ", True) if more else (">>> ", True)
|
||||
contents.append(input(prompt) + "\n")
|
||||
except EOFError:
|
||||
print("\n" + MarkdownIt().render("\n".join(contents)), end="")
|
||||
more = False
|
||||
contents = []
|
||||
except KeyboardInterrupt:
|
||||
print("\nExiting.")
|
||||
break
|
||||
|
||||
|
||||
def parse_args(args: Sequence[str] | None) -> argparse.Namespace:
|
||||
"""Parse input CLI arguments."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Parse one or more markdown files, "
|
||||
"convert each to HTML, and print to stdout",
|
||||
# NOTE: Remember to update README.md w/ the output of `markdown-it -h`
|
||||
epilog=(
|
||||
f"""
|
||||
Interactive:
|
||||
|
||||
$ markdown-it
|
||||
markdown-it-py [version {__version__}] (interactive)
|
||||
Type Ctrl-D to complete input, or Ctrl-C to exit.
|
||||
>>> # Example
|
||||
... > markdown *input*
|
||||
...
|
||||
<h1>Example</h1>
|
||||
<blockquote>
|
||||
<p>markdown <em>input</em></p>
|
||||
</blockquote>
|
||||
|
||||
Batch:
|
||||
|
||||
$ markdown-it README.md README.footer.md > index.html
|
||||
"""
|
||||
),
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
parser.add_argument("-v", "--version", action="version", version=version_str)
|
||||
parser.add_argument(
|
||||
"filenames", nargs="*", help="specify an optional list of files to convert"
|
||||
)
|
||||
return parser.parse_args(args)
|
||||
|
||||
|
||||
def print_heading() -> None:
|
||||
print("{} (interactive)".format(version_str))
|
||||
print("Type Ctrl-D to complete input, or Ctrl-C to exit.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = main(sys.argv[1:])
|
||||
sys.exit(exit_code)
|
|
@ -0,0 +1,4 @@
|
|||
"""HTML5 entities map: { name -> characters }."""
|
||||
import html.entities
|
||||
|
||||
entities = {name.rstrip(";"): chars for name, chars in html.entities.html5.items()}
|
|
@ -0,0 +1,68 @@
|
|||
"""List of valid html blocks names, according to commonmark spec
|
||||
http://jgm.github.io/CommonMark/spec.html#html-blocks
|
||||
"""
|
||||
|
||||
block_names = [
|
||||
"address",
|
||||
"article",
|
||||
"aside",
|
||||
"base",
|
||||
"basefont",
|
||||
"blockquote",
|
||||
"body",
|
||||
"caption",
|
||||
"center",
|
||||
"col",
|
||||
"colgroup",
|
||||
"dd",
|
||||
"details",
|
||||
"dialog",
|
||||
"dir",
|
||||
"div",
|
||||
"dl",
|
||||
"dt",
|
||||
"fieldset",
|
||||
"figcaption",
|
||||
"figure",
|
||||
"footer",
|
||||
"form",
|
||||
"frame",
|
||||
"frameset",
|
||||
"h1",
|
||||
"h2",
|
||||
"h3",
|
||||
"h4",
|
||||
"h5",
|
||||
"h6",
|
||||
"head",
|
||||
"header",
|
||||
"hr",
|
||||
"html",
|
||||
"iframe",
|
||||
"legend",
|
||||
"li",
|
||||
"link",
|
||||
"main",
|
||||
"menu",
|
||||
"menuitem",
|
||||
"nav",
|
||||
"noframes",
|
||||
"ol",
|
||||
"optgroup",
|
||||
"option",
|
||||
"p",
|
||||
"param",
|
||||
"section",
|
||||
"source",
|
||||
"summary",
|
||||
"table",
|
||||
"tbody",
|
||||
"td",
|
||||
"tfoot",
|
||||
"th",
|
||||
"thead",
|
||||
"title",
|
||||
"tr",
|
||||
"track",
|
||||
"ul",
|
||||
]
|
|
@ -0,0 +1,40 @@
|
|||
"""Regexps to match html elements
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
attr_name = "[a-zA-Z_:][a-zA-Z0-9:._-]*"
|
||||
|
||||
unquoted = "[^\"'=<>`\\x00-\\x20]+"
|
||||
single_quoted = "'[^']*'"
|
||||
double_quoted = '"[^"]*"'
|
||||
|
||||
attr_value = "(?:" + unquoted + "|" + single_quoted + "|" + double_quoted + ")"
|
||||
|
||||
attribute = "(?:\\s+" + attr_name + "(?:\\s*=\\s*" + attr_value + ")?)"
|
||||
|
||||
open_tag = "<[A-Za-z][A-Za-z0-9\\-]*" + attribute + "*\\s*\\/?>"
|
||||
|
||||
close_tag = "<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>"
|
||||
comment = "<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->"
|
||||
processing = "<[?][\\s\\S]*?[?]>"
|
||||
declaration = "<![A-Z]+\\s+[^>]*>"
|
||||
cdata = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>"
|
||||
|
||||
HTML_TAG_RE = re.compile(
|
||||
"^(?:"
|
||||
+ open_tag
|
||||
+ "|"
|
||||
+ close_tag
|
||||
+ "|"
|
||||
+ comment
|
||||
+ "|"
|
||||
+ processing
|
||||
+ "|"
|
||||
+ declaration
|
||||
+ "|"
|
||||
+ cdata
|
||||
+ ")"
|
||||
)
|
||||
HTML_OPEN_CLOSE_TAG_STR = "^(?:" + open_tag + "|" + close_tag + ")"
|
||||
HTML_OPEN_CLOSE_TAG_RE = re.compile(HTML_OPEN_CLOSE_TAG_STR)
|
|
@ -0,0 +1,82 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
import re
|
||||
from urllib.parse import quote, unquote, urlparse, urlunparse # noqa: F401
|
||||
|
||||
import mdurl
|
||||
|
||||
from .. import _punycode
|
||||
|
||||
RECODE_HOSTNAME_FOR = ("http:", "https:", "mailto:")
|
||||
|
||||
|
||||
def normalizeLink(url: str) -> str:
|
||||
"""Normalize destination URLs in links
|
||||
|
||||
::
|
||||
|
||||
[label]: destination 'title'
|
||||
^^^^^^^^^^^
|
||||
"""
|
||||
parsed = mdurl.parse(url, slashes_denote_host=True)
|
||||
|
||||
if parsed.hostname:
|
||||
# Encode hostnames in urls like:
|
||||
# `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
|
||||
#
|
||||
# We don't encode unknown schemas, because it's likely that we encode
|
||||
# something we shouldn't (e.g. `skype:name` treated as `skype:host`)
|
||||
#
|
||||
if not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR:
|
||||
try:
|
||||
parsed = parsed._replace(hostname=_punycode.to_ascii(parsed.hostname))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return mdurl.encode(mdurl.format(parsed))
|
||||
|
||||
|
||||
def normalizeLinkText(url: str) -> str:
|
||||
"""Normalize autolink content
|
||||
|
||||
::
|
||||
|
||||
<destination>
|
||||
~~~~~~~~~~~
|
||||
"""
|
||||
parsed = mdurl.parse(url, slashes_denote_host=True)
|
||||
|
||||
if parsed.hostname:
|
||||
# Encode hostnames in urls like:
|
||||
# `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
|
||||
#
|
||||
# We don't encode unknown schemas, because it's likely that we encode
|
||||
# something we shouldn't (e.g. `skype:name` treated as `skype:host`)
|
||||
#
|
||||
if not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR:
|
||||
try:
|
||||
parsed = parsed._replace(hostname=_punycode.to_unicode(parsed.hostname))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# add '%' to exclude list because of https://github.com/markdown-it/markdown-it/issues/720
|
||||
return mdurl.decode(mdurl.format(parsed), mdurl.DECODE_DEFAULT_CHARS + "%")
|
||||
|
||||
|
||||
BAD_PROTO_RE = re.compile(r"^(vbscript|javascript|file|data):")
|
||||
GOOD_DATA_RE = re.compile(r"^data:image\/(gif|png|jpeg|webp);")
|
||||
|
||||
|
||||
def validateLink(url: str, validator: Callable | None = None) -> bool:
|
||||
"""Validate URL link is allowed in output.
|
||||
|
||||
This validator can prohibit more than really needed to prevent XSS.
|
||||
It's a tradeoff to keep code simple and to be secure by default.
|
||||
|
||||
Note: url should be normalized at this point, and existing entities decoded.
|
||||
"""
|
||||
if validator is not None:
|
||||
return validator(url)
|
||||
url = url.strip().lower()
|
||||
return bool(GOOD_DATA_RE.search(url)) if BAD_PROTO_RE.search(url) else True
|
|
@ -0,0 +1,334 @@
|
|||
"""Utilities for parsing source text
|
||||
"""
|
||||
import html
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from .entities import entities
|
||||
|
||||
|
||||
def charCodeAt(src: str, pos: int) -> Any:
|
||||
"""
|
||||
Returns the Unicode value of the character at the specified location.
|
||||
|
||||
@param - index The zero-based index of the desired character.
|
||||
If there is no character at the specified index, NaN is returned.
|
||||
|
||||
This was added for compatibility with python
|
||||
"""
|
||||
try:
|
||||
return ord(src[pos])
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
|
||||
# Merge objects
|
||||
#
|
||||
def assign(obj):
|
||||
"""Merge objects /*from1, from2, from3, ...*/)"""
|
||||
raise NotImplementedError
|
||||
# sources = Array.prototype.slice.call(arguments, 1)
|
||||
|
||||
# sources.forEach(function (source) {
|
||||
# if (!source) { return; }
|
||||
|
||||
# if (typeof source !== 'object') {
|
||||
# throw new TypeError(source + 'must be object')
|
||||
# }
|
||||
|
||||
# Object.keys(source).forEach(function (key) {
|
||||
# obj[key] = source[key]
|
||||
# })
|
||||
# })
|
||||
|
||||
# return obj
|
||||
|
||||
|
||||
def arrayReplaceAt(src: list, pos: int, newElements: list) -> list:
|
||||
"""
|
||||
Remove element from array and put another array at those position.
|
||||
Useful for some operations with tokens
|
||||
"""
|
||||
return src[:pos] + newElements + src[pos + 1 :]
|
||||
|
||||
|
||||
######################################################################
|
||||
|
||||
|
||||
def isValidEntityCode(c: int) -> bool:
|
||||
|
||||
# broken sequence
|
||||
if c >= 0xD800 and c <= 0xDFFF:
|
||||
return False
|
||||
# never used
|
||||
if c >= 0xFDD0 and c <= 0xFDEF:
|
||||
return False
|
||||
if ((c & 0xFFFF) == 0xFFFF) or ((c & 0xFFFF) == 0xFFFE):
|
||||
return False
|
||||
# control codes
|
||||
if c >= 0x00 and c <= 0x08:
|
||||
return False
|
||||
if c == 0x0B:
|
||||
return False
|
||||
if c >= 0x0E and c <= 0x1F:
|
||||
return False
|
||||
if c >= 0x7F and c <= 0x9F:
|
||||
return False
|
||||
# out of range
|
||||
if c > 0x10FFFF:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def fromCodePoint(c: int) -> str:
|
||||
"""Convert ordinal to unicode.
|
||||
|
||||
Note, in the original Javascript two string characters were required,
|
||||
for codepoints larger than `0xFFFF`.
|
||||
But Python 3 can represent any unicode codepoint in one character.
|
||||
"""
|
||||
return chr(c)
|
||||
|
||||
|
||||
UNESCAPE_MD_RE = re.compile(r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])')
|
||||
# ENTITY_RE_g = re.compile(r'&([a-z#][a-z0-9]{1,31})', re.IGNORECASE)
|
||||
UNESCAPE_ALL_RE = re.compile(
|
||||
r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])' + "|" + r"&([a-z#][a-z0-9]{1,31});",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
DIGITAL_ENTITY_TEST_RE = re.compile(r"^#((?:x[a-f0-9]{1,8}|[0-9]{1,8}))", re.IGNORECASE)
|
||||
|
||||
|
||||
def replaceEntityPattern(match: str, name: str) -> str:
|
||||
"""Convert HTML entity patterns
|
||||
|
||||
::
|
||||
|
||||
https://www.google.com -> https%3A//www.google.com
|
||||
|
||||
"""
|
||||
code = 0
|
||||
|
||||
if name in entities:
|
||||
return entities[name]
|
||||
|
||||
if ord(name[0]) == 0x23 and DIGITAL_ENTITY_TEST_RE.search(name):
|
||||
code = int(name[2:], 16) if name[1].lower() == "x" else int(name[1:], 10)
|
||||
if isValidEntityCode(code):
|
||||
return fromCodePoint(code)
|
||||
|
||||
return match
|
||||
|
||||
|
||||
# def replaceEntities(string):
|
||||
# if (string.indexOf('&') < 0):
|
||||
# return string
|
||||
# return string.replace(ENTITY_RE, replaceEntityPattern)
|
||||
|
||||
|
||||
def unescapeMd(string: str) -> str:
|
||||
raise NotImplementedError
|
||||
# if "\\" in string:
|
||||
# return string
|
||||
# return string.replace(UNESCAPE_MD_RE, "$1")
|
||||
|
||||
|
||||
def unescapeAll(string: str) -> str:
|
||||
def replacer_func(match):
|
||||
escaped = match.group(1)
|
||||
if escaped:
|
||||
return escaped
|
||||
entity = match.group(2)
|
||||
return replaceEntityPattern(match.group(), entity)
|
||||
|
||||
if "\\" not in string and "&" not in string:
|
||||
return string
|
||||
return UNESCAPE_ALL_RE.sub(replacer_func, string)
|
||||
|
||||
|
||||
ESCAPABLE = r"""\\!"#$%&'()*+,./:;<=>?@\[\]^`{}|_~-"""
|
||||
ESCAPE_CHAR = re.compile(r"\\([" + ESCAPABLE + r"])")
|
||||
|
||||
|
||||
def stripEscape(string: str) -> str:
|
||||
"""Strip escape \\ characters"""
|
||||
return ESCAPE_CHAR.sub(r"\1", string)
|
||||
|
||||
|
||||
# //////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
# TODO This section changed quite a lot, should re-check
|
||||
|
||||
# UNESCAPE_HTML_RE = re.compile(r"\\&(?=(amp\;|lt\;|gt\;|quot\;))")
|
||||
# ESCAPE_AND_HTML = re.compile(r"&(?!(amp\;|lt\;|gt\;|quot\;))")
|
||||
# HTML_ESCAPE_REPLACE_RE = re.compile(r'[&<>"]')
|
||||
|
||||
|
||||
# def escapeHtml(string: str):
|
||||
|
||||
# if HTML_ESCAPE_REPLACE_RE.search(string):
|
||||
|
||||
# string = UNESCAPE_HTML_RE.sub("&", string)
|
||||
# string = ESCAPE_AND_HTML.sub("&", string)
|
||||
# for k, v in {"<": "<", ">": ">", '"': """}.items():
|
||||
# string = string.replace(k, v)
|
||||
|
||||
# return string
|
||||
|
||||
|
||||
def escapeHtml(raw: str) -> str:
|
||||
# return html.escape(html.unescape(raw)).replace("'", "'")
|
||||
return html.escape(raw).replace("'", "'")
|
||||
|
||||
|
||||
# //////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
REGEXP_ESCAPE_RE = re.compile(r"[.?*+^$[\]\\(){}|-]")
|
||||
|
||||
|
||||
def escapeRE(string: str) -> str:
|
||||
string = REGEXP_ESCAPE_RE.sub("\\$&", string)
|
||||
return string
|
||||
|
||||
|
||||
# //////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
def isSpace(code: object) -> bool:
|
||||
return code in {0x09, 0x20}
|
||||
|
||||
|
||||
MD_WHITESPACE = {
|
||||
0x09, # \t
|
||||
0x0A, # \n
|
||||
0x0B, # \v
|
||||
0x0C, # \f
|
||||
0x0D, # \r
|
||||
0x20,
|
||||
0xA0,
|
||||
0x1680,
|
||||
0x202F,
|
||||
0x205F,
|
||||
0x3000,
|
||||
}
|
||||
|
||||
|
||||
def isWhiteSpace(code: int) -> bool:
|
||||
r"""Zs (unicode class) || [\t\f\v\r\n]"""
|
||||
if code >= 0x2000 and code <= 0x200A:
|
||||
return True
|
||||
return code in MD_WHITESPACE
|
||||
|
||||
|
||||
# //////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
UNICODE_PUNCT_RE = re.compile(
|
||||
r"[!-#%-\*,-\/:;\?@\[-\]_\{\}\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4E\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD803[\uDF55-\uDF59]|\uD804[\uDC47-\uDC4D\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC8\uDDCD\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDC4B-\uDC4F\uDC5B\uDC5D\uDCC6\uDDC1-\uDDD7\uDE41-\uDE43\uDE60-\uDE6C\uDF3C-\uDF3E]|\uD806[\uDC3B\uDE3F-\uDE46\uDE9A-\uDE9C\uDE9E-\uDEA2]|\uD807[\uDC41-\uDC45\uDC70\uDC71\uDEF7\uDEF8]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F\uDEF5\uDF37-\uDF3B\uDF44]|\uD81B[\uDE97-\uDE9A]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]|\uD83A[\uDD5E\uDD5F]" # noqa: E501
|
||||
)
|
||||
|
||||
|
||||
# Currently without astral characters support.
|
||||
def isPunctChar(ch: str) -> bool:
|
||||
return UNICODE_PUNCT_RE.search(ch) is not None
|
||||
|
||||
|
||||
MD_ASCII_PUNCT = {
|
||||
0x21, # /* ! */
|
||||
0x22, # /* " */
|
||||
0x23, # /* # */
|
||||
0x24, # /* $ */
|
||||
0x25, # /* % */
|
||||
0x26, # /* & */
|
||||
0x27, # /* ' */
|
||||
0x28, # /* ( */
|
||||
0x29, # /* ) */
|
||||
0x2A, # /* * */
|
||||
0x2B, # /* + */
|
||||
0x2C, # /* , */
|
||||
0x2D, # /* - */
|
||||
0x2E, # /* . */
|
||||
0x2F, # /* / */
|
||||
0x3A, # /* : */
|
||||
0x3B, # /* ; */
|
||||
0x3C, # /* < */
|
||||
0x3D, # /* = */
|
||||
0x3E, # /* > */
|
||||
0x3F, # /* ? */
|
||||
0x40, # /* @ */
|
||||
0x5B, # /* [ */
|
||||
0x5C, # /* \ */
|
||||
0x5D, # /* ] */
|
||||
0x5E, # /* ^ */
|
||||
0x5F, # /* _ */
|
||||
0x60, # /* ` */
|
||||
0x7B, # /* { */
|
||||
0x7C, # /* | */
|
||||
0x7D, # /* } */
|
||||
0x7E, # /* ~ */
|
||||
}
|
||||
|
||||
|
||||
def isMdAsciiPunct(ch: int) -> bool:
|
||||
"""Markdown ASCII punctuation characters.
|
||||
|
||||
::
|
||||
|
||||
!, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \\, ], ^, _, `, {, |, }, or ~
|
||||
|
||||
See http://spec.commonmark.org/0.15/#ascii-punctuation-character
|
||||
|
||||
Don't confuse with unicode punctuation !!! It lacks some chars in ascii range.
|
||||
|
||||
""" # noqa: E501
|
||||
return ch in MD_ASCII_PUNCT
|
||||
|
||||
|
||||
def normalizeReference(string: str) -> str:
|
||||
"""Helper to unify [reference labels]."""
|
||||
# Trim and collapse whitespace
|
||||
#
|
||||
string = re.sub(r"\s+", " ", string.strip())
|
||||
|
||||
# In node v10 'ẞ'.toLowerCase() === 'Ṿ', which is presumed to be a bug
|
||||
# fixed in v12 (couldn't find any details).
|
||||
#
|
||||
# So treat this one as a special case
|
||||
# (remove this when node v10 is no longer supported).
|
||||
#
|
||||
# if ('ẞ'.toLowerCase() === 'Ṿ') {
|
||||
# str = str.replace(/ẞ/g, 'ß')
|
||||
# }
|
||||
|
||||
# .toLowerCase().toUpperCase() should get rid of all differences
|
||||
# between letter variants.
|
||||
#
|
||||
# Simple .toLowerCase() doesn't normalize 125 code points correctly,
|
||||
# and .toUpperCase doesn't normalize 6 of them (list of exceptions:
|
||||
# İ, ϴ, ẞ, Ω, K, Å - those are already uppercased, but have differently
|
||||
# uppercased versions).
|
||||
#
|
||||
# Here's an example showing how it happens. Lets take greek letter omega:
|
||||
# uppercase U+0398 (Θ), U+03f4 (ϴ) and lowercase U+03b8 (θ), U+03d1 (ϑ)
|
||||
#
|
||||
# Unicode entries:
|
||||
# 0398;GREEK CAPITAL LETTER THETA;Lu;0;L;;;;;N;;;;03B8
|
||||
# 03B8;GREEK SMALL LETTER THETA;Ll;0;L;;;;;N;;;0398;;0398
|
||||
# 03D1;GREEK THETA SYMBOL;Ll;0;L;<compat> 03B8;;;;N;GREEK SMALL LETTER SCRIPT THETA;;0398;;0398
|
||||
# 03F4;GREEK CAPITAL THETA SYMBOL;Lu;0;L;<compat> 0398;;;;N;;;;03B8
|
||||
#
|
||||
# Case-insensitive comparison should treat all of them as equivalent.
|
||||
#
|
||||
# But .toLowerCase() doesn't change ϑ (it's already lowercase),
|
||||
# and .toUpperCase() doesn't change ϴ (already uppercase).
|
||||
#
|
||||
# Applying first lower then upper case normalizes any character:
|
||||
# '\u0398\u03f4\u03b8\u03d1'.toLowerCase().toUpperCase() === '\u0398\u0398\u0398\u0398'
|
||||
#
|
||||
# Note: this is equivalent to unicode case folding; unicode normalization
|
||||
# is a different step that is not required here.
|
||||
#
|
||||
# Final result should be uppercased, because it's later stored in an object
|
||||
# (this avoid a conflict with Object.prototype members,
|
||||
# most notably, `__proto__`)
|
||||
#
|
||||
return string.lower().upper()
|
|
@ -0,0 +1,6 @@
|
|||
"""Functions for parsing Links
|
||||
"""
|
||||
__all__ = ("parseLinkLabel", "parseLinkDestination", "parseLinkTitle")
|
||||
from .parse_link_destination import parseLinkDestination
|
||||
from .parse_link_label import parseLinkLabel
|
||||
from .parse_link_title import parseLinkTitle
|
|
@ -0,0 +1,86 @@
|
|||
"""
|
||||
Parse link destination
|
||||
"""
|
||||
|
||||
from ..common.utils import charCodeAt, unescapeAll
|
||||
|
||||
|
||||
class _Result:
|
||||
__slots__ = ("ok", "pos", "lines", "str")
|
||||
|
||||
def __init__(self):
|
||||
self.ok = False
|
||||
self.pos = 0
|
||||
self.lines = 0
|
||||
self.str = ""
|
||||
|
||||
|
||||
def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result:
|
||||
lines = 0
|
||||
start = pos
|
||||
result = _Result()
|
||||
|
||||
if charCodeAt(string, pos) == 0x3C: # /* < */
|
||||
pos += 1
|
||||
while pos < maximum:
|
||||
code = charCodeAt(string, pos)
|
||||
if code == 0x0A: # /* \n */)
|
||||
return result
|
||||
if code == 0x3C: # / * < * /
|
||||
return result
|
||||
if code == 0x3E: # /* > */) {
|
||||
result.pos = pos + 1
|
||||
result.str = unescapeAll(string[start + 1 : pos])
|
||||
result.ok = True
|
||||
return result
|
||||
|
||||
if code == 0x5C and pos + 1 < maximum: # \
|
||||
pos += 2
|
||||
continue
|
||||
|
||||
pos += 1
|
||||
|
||||
# no closing '>'
|
||||
return result
|
||||
|
||||
# this should be ... } else { ... branch
|
||||
|
||||
level = 0
|
||||
while pos < maximum:
|
||||
code = charCodeAt(string, pos)
|
||||
|
||||
if code == 0x20:
|
||||
break
|
||||
|
||||
# ascii control characters
|
||||
if code < 0x20 or code == 0x7F:
|
||||
break
|
||||
|
||||
if code == 0x5C and pos + 1 < maximum:
|
||||
if charCodeAt(string, pos + 1) == 0x20:
|
||||
break
|
||||
pos += 2
|
||||
continue
|
||||
|
||||
if code == 0x28: # /* ( */)
|
||||
level += 1
|
||||
if level > 32:
|
||||
return result
|
||||
|
||||
if code == 0x29: # /* ) */)
|
||||
if level == 0:
|
||||
break
|
||||
level -= 1
|
||||
|
||||
pos += 1
|
||||
|
||||
if start == pos:
|
||||
return result
|
||||
if level != 0:
|
||||
return result
|
||||
|
||||
result.str = unescapeAll(string[start:pos])
|
||||
result.lines = lines
|
||||
result.pos = pos
|
||||
result.ok = True
|
||||
return result
|
|
@ -0,0 +1,44 @@
|
|||
"""
|
||||
Parse link label
|
||||
|
||||
this function assumes that first character ("[") already matches
|
||||
returns the end of the label
|
||||
|
||||
"""
|
||||
from markdown_it.rules_inline import StateInline
|
||||
|
||||
|
||||
def parseLinkLabel(state: StateInline, start: int, disableNested: bool = False) -> int:
|
||||
|
||||
labelEnd = -1
|
||||
oldPos = state.pos
|
||||
found = False
|
||||
|
||||
state.pos = start + 1
|
||||
level = 1
|
||||
|
||||
while state.pos < state.posMax:
|
||||
marker = state.srcCharCode[state.pos]
|
||||
if marker == 0x5D: # /* ] */)
|
||||
level -= 1
|
||||
if level == 0:
|
||||
found = True
|
||||
break
|
||||
|
||||
prevPos = state.pos
|
||||
state.md.inline.skipToken(state)
|
||||
if marker == 0x5B: # /* [ */)
|
||||
if prevPos == state.pos - 1:
|
||||
# increase level if we find text `[`,
|
||||
# which is not a part of any token
|
||||
level += 1
|
||||
elif disableNested:
|
||||
state.pos = oldPos
|
||||
return -1
|
||||
if found:
|
||||
labelEnd = state.pos
|
||||
|
||||
# restore old state
|
||||
state.pos = oldPos
|
||||
|
||||
return labelEnd
|
|
@ -0,0 +1,60 @@
|
|||
"""Parse link title
|
||||
"""
|
||||
from ..common.utils import charCodeAt, unescapeAll
|
||||
|
||||
|
||||
class _Result:
|
||||
__slots__ = ("ok", "pos", "lines", "str")
|
||||
|
||||
def __init__(self):
|
||||
self.ok = False
|
||||
self.pos = 0
|
||||
self.lines = 0
|
||||
self.str = ""
|
||||
|
||||
def __str__(self):
|
||||
return self.str
|
||||
|
||||
|
||||
def parseLinkTitle(string: str, pos: int, maximum: int) -> _Result:
|
||||
lines = 0
|
||||
start = pos
|
||||
result = _Result()
|
||||
|
||||
if pos >= maximum:
|
||||
return result
|
||||
|
||||
marker = charCodeAt(string, pos)
|
||||
|
||||
# /* " */ /* ' */ /* ( */
|
||||
if marker != 0x22 and marker != 0x27 and marker != 0x28:
|
||||
return result
|
||||
|
||||
pos += 1
|
||||
|
||||
# if opening marker is "(", switch it to closing marker ")"
|
||||
if marker == 0x28:
|
||||
marker = 0x29
|
||||
|
||||
while pos < maximum:
|
||||
code = charCodeAt(string, pos)
|
||||
if code == marker:
|
||||
title = string[start + 1 : pos]
|
||||
title = unescapeAll(title)
|
||||
result.pos = pos + 1
|
||||
result.lines = lines
|
||||
result.str = title
|
||||
result.ok = True
|
||||
return result
|
||||
elif code == 0x28 and marker == 0x29: # /* ( */ /* ) */
|
||||
return result
|
||||
elif code == 0x0A:
|
||||
lines += 1
|
||||
elif code == 0x5C and pos + 1 < maximum: # /* \ */
|
||||
pos += 1
|
||||
if charCodeAt(string, pos) == 0x0A:
|
||||
lines += 1
|
||||
|
||||
pos += 1
|
||||
|
||||
return result
|
|
@ -0,0 +1,331 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable, Generator, Iterable, Mapping, MutableMapping
|
||||
from contextlib import contextmanager
|
||||
from typing import Any
|
||||
|
||||
from . import helpers, presets # noqa F401
|
||||
from .common import normalize_url, utils # noqa F401
|
||||
from .parser_block import ParserBlock # noqa F401
|
||||
from .parser_core import ParserCore # noqa F401
|
||||
from .parser_inline import ParserInline # noqa F401
|
||||
from .renderer import RendererHTML, RendererProtocol
|
||||
from .rules_core.state_core import StateCore
|
||||
from .token import Token
|
||||
from .utils import OptionsDict
|
||||
|
||||
try:
|
||||
import linkify_it
|
||||
except ModuleNotFoundError:
|
||||
linkify_it = None
|
||||
|
||||
|
||||
_PRESETS = {
|
||||
"default": presets.default.make(),
|
||||
"js-default": presets.js_default.make(),
|
||||
"zero": presets.zero.make(),
|
||||
"commonmark": presets.commonmark.make(),
|
||||
"gfm-like": presets.gfm_like.make(),
|
||||
}
|
||||
|
||||
|
||||
class MarkdownIt:
|
||||
def __init__(
|
||||
self,
|
||||
config: str | Mapping = "commonmark",
|
||||
options_update: Mapping | None = None,
|
||||
*,
|
||||
renderer_cls: Callable[[MarkdownIt], RendererProtocol] = RendererHTML,
|
||||
):
|
||||
"""Main parser class
|
||||
|
||||
:param config: name of configuration to load or a pre-defined dictionary
|
||||
:param options_update: dictionary that will be merged into ``config["options"]``
|
||||
:param renderer_cls: the class to load as the renderer:
|
||||
``self.renderer = renderer_cls(self)
|
||||
"""
|
||||
# add modules
|
||||
self.utils = utils
|
||||
self.helpers: Any = helpers
|
||||
|
||||
# initialise classes
|
||||
self.inline = ParserInline()
|
||||
self.block = ParserBlock()
|
||||
self.core = ParserCore()
|
||||
self.renderer = renderer_cls(self)
|
||||
self.linkify = linkify_it.LinkifyIt() if linkify_it else None
|
||||
|
||||
# set the configuration
|
||||
if options_update and not isinstance(options_update, Mapping):
|
||||
# catch signature change where renderer_cls was not used as a key-word
|
||||
raise TypeError(
|
||||
f"options_update should be a mapping: {options_update}"
|
||||
"\n(Perhaps you intended this to be the renderer_cls?)"
|
||||
)
|
||||
self.configure(config, options_update=options_update)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"{self.__class__.__module__}.{self.__class__.__name__}()"
|
||||
|
||||
def __getitem__(self, name: str) -> Any:
|
||||
return {
|
||||
"inline": self.inline,
|
||||
"block": self.block,
|
||||
"core": self.core,
|
||||
"renderer": self.renderer,
|
||||
}[name]
|
||||
|
||||
def set(self, options: MutableMapping) -> None:
|
||||
"""Set parser options (in the same format as in constructor).
|
||||
Probably, you will never need it, but you can change options after constructor call.
|
||||
|
||||
__Note:__ To achieve the best possible performance, don't modify a
|
||||
`markdown-it` instance options on the fly. If you need multiple configurations
|
||||
it's best to create multiple instances and initialize each with separate config.
|
||||
"""
|
||||
self.options = OptionsDict(options)
|
||||
|
||||
def configure(
|
||||
self, presets: str | Mapping, options_update: Mapping | None = None
|
||||
) -> MarkdownIt:
|
||||
"""Batch load of all options and component settings.
|
||||
This is an internal method, and you probably will not need it.
|
||||
But if you will - see available presets and data structure
|
||||
[here](https://github.com/markdown-it/markdown-it/tree/master/lib/presets)
|
||||
|
||||
We strongly recommend to use presets instead of direct config loads.
|
||||
That will give better compatibility with next versions.
|
||||
"""
|
||||
if isinstance(presets, str):
|
||||
if presets not in _PRESETS:
|
||||
raise KeyError(f"Wrong `markdown-it` preset '{presets}', check name")
|
||||
config = _PRESETS[presets]
|
||||
else:
|
||||
config = presets
|
||||
|
||||
if not config:
|
||||
raise ValueError("Wrong `markdown-it` config, can't be empty")
|
||||
|
||||
options = config.get("options", {}) or {}
|
||||
if options_update:
|
||||
options = {**options, **options_update}
|
||||
|
||||
self.set(options)
|
||||
|
||||
if "components" in config:
|
||||
for name, component in config["components"].items():
|
||||
rules = component.get("rules", None)
|
||||
if rules:
|
||||
self[name].ruler.enableOnly(rules)
|
||||
rules2 = component.get("rules2", None)
|
||||
if rules2:
|
||||
self[name].ruler2.enableOnly(rules2)
|
||||
|
||||
return self
|
||||
|
||||
def get_all_rules(self) -> dict[str, list[str]]:
|
||||
"""Return the names of all active rules."""
|
||||
rules = {
|
||||
chain: self[chain].ruler.get_all_rules()
|
||||
for chain in ["core", "block", "inline"]
|
||||
}
|
||||
rules["inline2"] = self.inline.ruler2.get_all_rules()
|
||||
return rules
|
||||
|
||||
def get_active_rules(self) -> dict[str, list[str]]:
|
||||
"""Return the names of all active rules."""
|
||||
rules = {
|
||||
chain: self[chain].ruler.get_active_rules()
|
||||
for chain in ["core", "block", "inline"]
|
||||
}
|
||||
rules["inline2"] = self.inline.ruler2.get_active_rules()
|
||||
return rules
|
||||
|
||||
def enable(
|
||||
self, names: str | Iterable[str], ignoreInvalid: bool = False
|
||||
) -> MarkdownIt:
|
||||
"""Enable list or rules. (chainable)
|
||||
|
||||
:param names: rule name or list of rule names to enable.
|
||||
:param ignoreInvalid: set `true` to ignore errors when rule not found.
|
||||
|
||||
It will automatically find appropriate components,
|
||||
containing rules with given names. If rule not found, and `ignoreInvalid`
|
||||
not set - throws exception.
|
||||
|
||||
Example::
|
||||
|
||||
md = MarkdownIt().enable(['sub', 'sup']).disable('smartquotes')
|
||||
|
||||
"""
|
||||
result = []
|
||||
|
||||
if isinstance(names, str):
|
||||
names = [names]
|
||||
|
||||
for chain in ["core", "block", "inline"]:
|
||||
result.extend(self[chain].ruler.enable(names, True))
|
||||
result.extend(self.inline.ruler2.enable(names, True))
|
||||
|
||||
missed = [name for name in names if name not in result]
|
||||
if missed and not ignoreInvalid:
|
||||
raise ValueError(f"MarkdownIt. Failed to enable unknown rule(s): {missed}")
|
||||
|
||||
return self
|
||||
|
||||
def disable(
|
||||
self, names: str | Iterable[str], ignoreInvalid: bool = False
|
||||
) -> MarkdownIt:
|
||||
"""The same as [[MarkdownIt.enable]], but turn specified rules off. (chainable)
|
||||
|
||||
:param names: rule name or list of rule names to disable.
|
||||
:param ignoreInvalid: set `true` to ignore errors when rule not found.
|
||||
|
||||
"""
|
||||
result = []
|
||||
|
||||
if isinstance(names, str):
|
||||
names = [names]
|
||||
|
||||
for chain in ["core", "block", "inline"]:
|
||||
result.extend(self[chain].ruler.disable(names, True))
|
||||
result.extend(self.inline.ruler2.disable(names, True))
|
||||
|
||||
missed = [name for name in names if name not in result]
|
||||
if missed and not ignoreInvalid:
|
||||
raise ValueError(f"MarkdownIt. Failed to disable unknown rule(s): {missed}")
|
||||
return self
|
||||
|
||||
@contextmanager
|
||||
def reset_rules(self) -> Generator[None, None, None]:
|
||||
"""A context manager, that will reset the current enabled rules on exit."""
|
||||
chain_rules = self.get_active_rules()
|
||||
yield
|
||||
for chain, rules in chain_rules.items():
|
||||
if chain != "inline2":
|
||||
self[chain].ruler.enableOnly(rules)
|
||||
self.inline.ruler2.enableOnly(chain_rules["inline2"])
|
||||
|
||||
def add_render_rule(self, name: str, function: Callable, fmt: str = "html") -> None:
|
||||
"""Add a rule for rendering a particular Token type.
|
||||
|
||||
Only applied when ``renderer.__output__ == fmt``
|
||||
"""
|
||||
if self.renderer.__output__ == fmt:
|
||||
self.renderer.rules[name] = function.__get__(self.renderer) # type: ignore
|
||||
|
||||
def use(self, plugin: Callable, *params, **options) -> MarkdownIt:
|
||||
"""Load specified plugin with given params into current parser instance. (chainable)
|
||||
|
||||
It's just a sugar to call `plugin(md, params)` with curring.
|
||||
|
||||
Example::
|
||||
|
||||
def func(tokens, idx):
|
||||
tokens[idx].content = tokens[idx].content.replace('foo', 'bar')
|
||||
md = MarkdownIt().use(plugin, 'foo_replace', 'text', func)
|
||||
|
||||
"""
|
||||
plugin(self, *params, **options)
|
||||
return self
|
||||
|
||||
def parse(self, src: str, env: MutableMapping | None = None) -> list[Token]:
|
||||
"""Parse the source string to a token stream
|
||||
|
||||
:param src: source string
|
||||
:param env: environment sandbox
|
||||
|
||||
Parse input string and return list of block tokens (special token type
|
||||
"inline" will contain list of inline tokens).
|
||||
|
||||
`env` is used to pass data between "distributed" rules and return additional
|
||||
metadata like reference info, needed for the renderer. It also can be used to
|
||||
inject data in specific cases. Usually, you will be ok to pass `{}`,
|
||||
and then pass updated object to renderer.
|
||||
"""
|
||||
env = {} if env is None else env
|
||||
if not isinstance(env, MutableMapping):
|
||||
raise TypeError(f"Input data should be a MutableMapping, not {type(env)}")
|
||||
if not isinstance(src, str):
|
||||
raise TypeError(f"Input data should be a string, not {type(src)}")
|
||||
state = StateCore(src, self, env)
|
||||
self.core.process(state)
|
||||
return state.tokens
|
||||
|
||||
def render(self, src: str, env: MutableMapping | None = None) -> Any:
|
||||
"""Render markdown string into html. It does all magic for you :).
|
||||
|
||||
:param src: source string
|
||||
:param env: environment sandbox
|
||||
:returns: The output of the loaded renderer
|
||||
|
||||
`env` can be used to inject additional metadata (`{}` by default).
|
||||
But you will not need it with high probability. See also comment
|
||||
in [[MarkdownIt.parse]].
|
||||
"""
|
||||
env = {} if env is None else env
|
||||
return self.renderer.render(self.parse(src, env), self.options, env)
|
||||
|
||||
def parseInline(self, src: str, env: MutableMapping | None = None) -> list[Token]:
|
||||
"""The same as [[MarkdownIt.parse]] but skip all block rules.
|
||||
|
||||
:param src: source string
|
||||
:param env: environment sandbox
|
||||
|
||||
It returns the
|
||||
block tokens list with the single `inline` element, containing parsed inline
|
||||
tokens in `children` property. Also updates `env` object.
|
||||
"""
|
||||
env = {} if env is None else env
|
||||
if not isinstance(env, MutableMapping):
|
||||
raise TypeError(f"Input data should be an MutableMapping, not {type(env)}")
|
||||
if not isinstance(src, str):
|
||||
raise TypeError(f"Input data should be a string, not {type(src)}")
|
||||
state = StateCore(src, self, env)
|
||||
state.inlineMode = True
|
||||
self.core.process(state)
|
||||
return state.tokens
|
||||
|
||||
def renderInline(self, src: str, env: MutableMapping | None = None) -> Any:
|
||||
"""Similar to [[MarkdownIt.render]] but for single paragraph content.
|
||||
|
||||
:param src: source string
|
||||
:param env: environment sandbox
|
||||
|
||||
Similar to [[MarkdownIt.render]] but for single paragraph content. Result
|
||||
will NOT be wrapped into `<p>` tags.
|
||||
"""
|
||||
env = {} if env is None else env
|
||||
return self.renderer.render(self.parseInline(src, env), self.options, env)
|
||||
|
||||
# link methods
|
||||
|
||||
def validateLink(self, url: str) -> bool:
|
||||
"""Validate if the URL link is allowed in output.
|
||||
|
||||
This validator can prohibit more than really needed to prevent XSS.
|
||||
It's a tradeoff to keep code simple and to be secure by default.
|
||||
|
||||
Note: the url should be normalized at this point, and existing entities decoded.
|
||||
"""
|
||||
return normalize_url.validateLink(url)
|
||||
|
||||
def normalizeLink(self, url: str) -> str:
|
||||
"""Normalize destination URLs in links
|
||||
|
||||
::
|
||||
|
||||
[label]: destination 'title'
|
||||
^^^^^^^^^^^
|
||||
"""
|
||||
return normalize_url.normalizeLink(url)
|
||||
|
||||
def normalizeLinkText(self, link: str) -> str:
|
||||
"""Normalize autolink content
|
||||
|
||||
::
|
||||
|
||||
<destination>
|
||||
~~~~~~~~~~~
|
||||
"""
|
||||
return normalize_url.normalizeLinkText(link)
|
|
@ -0,0 +1,109 @@
|
|||
"""Block-level tokenizer."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from . import rules_block
|
||||
from .ruler import Ruler
|
||||
from .rules_block.state_block import StateBlock
|
||||
from .token import Token
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_rules: list[tuple] = [
|
||||
# First 2 params - rule name & source. Secondary array - list of rules,
|
||||
# which can be terminated by this one.
|
||||
("table", rules_block.table, ["paragraph", "reference"]),
|
||||
("code", rules_block.code),
|
||||
("fence", rules_block.fence, ["paragraph", "reference", "blockquote", "list"]),
|
||||
(
|
||||
"blockquote",
|
||||
rules_block.blockquote,
|
||||
["paragraph", "reference", "blockquote", "list"],
|
||||
),
|
||||
("hr", rules_block.hr, ["paragraph", "reference", "blockquote", "list"]),
|
||||
("list", rules_block.list_block, ["paragraph", "reference", "blockquote"]),
|
||||
("reference", rules_block.reference),
|
||||
("html_block", rules_block.html_block, ["paragraph", "reference", "blockquote"]),
|
||||
("heading", rules_block.heading, ["paragraph", "reference", "blockquote"]),
|
||||
("lheading", rules_block.lheading),
|
||||
("paragraph", rules_block.paragraph),
|
||||
]
|
||||
|
||||
|
||||
class ParserBlock:
|
||||
"""
|
||||
ParserBlock#ruler -> Ruler
|
||||
|
||||
[[Ruler]] instance. Keep configuration of block rules.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.ruler = Ruler()
|
||||
for data in _rules:
|
||||
name = data[0]
|
||||
rule = data[1]
|
||||
self.ruler.push(name, rule, {"alt": data[2] if len(data) > 2 else []})
|
||||
|
||||
def tokenize(
|
||||
self, state: StateBlock, startLine: int, endLine: int, silent: bool = False
|
||||
) -> None:
|
||||
"""Generate tokens for input range."""
|
||||
rules = self.ruler.getRules("")
|
||||
line = startLine
|
||||
maxNesting = state.md.options.maxNesting
|
||||
hasEmptyLines = False
|
||||
|
||||
while line < endLine:
|
||||
state.line = line = state.skipEmptyLines(line)
|
||||
if line >= endLine:
|
||||
break
|
||||
if state.sCount[line] < state.blkIndent:
|
||||
# Termination condition for nested calls.
|
||||
# Nested calls currently used for blockquotes & lists
|
||||
break
|
||||
if state.level >= maxNesting:
|
||||
# If nesting level exceeded - skip tail to the end.
|
||||
# That's not ordinary situation and we should not care about content.
|
||||
state.line = endLine
|
||||
break
|
||||
|
||||
# Try all possible rules.
|
||||
# On success, rule should:
|
||||
# - update `state.line`
|
||||
# - update `state.tokens`
|
||||
# - return True
|
||||
for rule in rules:
|
||||
if rule(state, line, endLine, False):
|
||||
break
|
||||
|
||||
# set state.tight if we had an empty line before current tag
|
||||
# i.e. latest empty line should not count
|
||||
state.tight = not hasEmptyLines
|
||||
|
||||
line = state.line
|
||||
|
||||
# paragraph might "eat" one newline after it in nested lists
|
||||
if (line - 1) < endLine and state.isEmpty(line - 1):
|
||||
hasEmptyLines = True
|
||||
|
||||
if line < endLine and state.isEmpty(line):
|
||||
hasEmptyLines = True
|
||||
line += 1
|
||||
state.line = line
|
||||
|
||||
def parse(
|
||||
self,
|
||||
src: str,
|
||||
md,
|
||||
env,
|
||||
outTokens: list[Token],
|
||||
ords: tuple[int, ...] | None = None,
|
||||
) -> list[Token] | None:
|
||||
"""Process input string and push block tokens into `outTokens`."""
|
||||
if not src:
|
||||
return None
|
||||
state = StateBlock(src, md, env, outTokens, ords)
|
||||
self.tokenize(state, state.line, state.lineMax)
|
||||
return state.tokens
|
|
@ -0,0 +1,32 @@
|
|||
"""
|
||||
* class Core
|
||||
*
|
||||
* Top-level rules executor. Glues block/inline parsers and does intermediate
|
||||
* transformations.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from .ruler import RuleFunc, Ruler
|
||||
from .rules_core import block, inline, linkify, normalize, replace, smartquotes
|
||||
from .rules_core.state_core import StateCore
|
||||
|
||||
_rules: list[tuple[str, RuleFunc]] = [
|
||||
("normalize", normalize),
|
||||
("block", block),
|
||||
("inline", inline),
|
||||
("linkify", linkify),
|
||||
("replacements", replace),
|
||||
("smartquotes", smartquotes),
|
||||
]
|
||||
|
||||
|
||||
class ParserCore:
|
||||
def __init__(self):
|
||||
self.ruler = Ruler()
|
||||
for name, rule in _rules:
|
||||
self.ruler.push(name, rule)
|
||||
|
||||
def process(self, state: StateCore) -> None:
|
||||
"""Executes core chain rules."""
|
||||
for rule in self.ruler.getRules(""):
|
||||
rule(state)
|
|
@ -0,0 +1,124 @@
|
|||
"""Tokenizes paragraph content.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from . import rules_inline
|
||||
from .ruler import RuleFunc, Ruler
|
||||
from .rules_inline.state_inline import StateInline
|
||||
from .token import Token
|
||||
|
||||
# Parser rules
|
||||
_rules: list[tuple[str, RuleFunc]] = [
|
||||
("text", rules_inline.text),
|
||||
("newline", rules_inline.newline),
|
||||
("escape", rules_inline.escape),
|
||||
("backticks", rules_inline.backtick),
|
||||
("strikethrough", rules_inline.strikethrough.tokenize),
|
||||
("emphasis", rules_inline.emphasis.tokenize),
|
||||
("link", rules_inline.link),
|
||||
("image", rules_inline.image),
|
||||
("autolink", rules_inline.autolink),
|
||||
("html_inline", rules_inline.html_inline),
|
||||
("entity", rules_inline.entity),
|
||||
]
|
||||
|
||||
_rules2: list[tuple[str, RuleFunc]] = [
|
||||
("balance_pairs", rules_inline.link_pairs),
|
||||
("strikethrough", rules_inline.strikethrough.postProcess),
|
||||
("emphasis", rules_inline.emphasis.postProcess),
|
||||
("text_collapse", rules_inline.text_collapse),
|
||||
]
|
||||
|
||||
|
||||
class ParserInline:
|
||||
def __init__(self):
|
||||
self.ruler = Ruler()
|
||||
for name, rule in _rules:
|
||||
self.ruler.push(name, rule)
|
||||
# Second ruler used for post-processing (e.g. in emphasis-like rules)
|
||||
self.ruler2 = Ruler()
|
||||
for name, rule2 in _rules2:
|
||||
self.ruler2.push(name, rule2)
|
||||
|
||||
def skipToken(self, state: StateInline) -> None:
|
||||
"""Skip single token by running all rules in validation mode;
|
||||
returns `True` if any rule reported success
|
||||
"""
|
||||
ok = False
|
||||
pos = state.pos
|
||||
rules = self.ruler.getRules("")
|
||||
maxNesting = state.md.options["maxNesting"]
|
||||
cache = state.cache
|
||||
|
||||
if pos in cache:
|
||||
state.pos = cache[pos]
|
||||
return
|
||||
|
||||
if state.level < maxNesting:
|
||||
for rule in rules:
|
||||
# Increment state.level and decrement it later to limit recursion.
|
||||
# It's harmless to do here, because no tokens are created.
|
||||
# But ideally, we'd need a separate private state variable for this purpose.
|
||||
state.level += 1
|
||||
ok = rule(state, True)
|
||||
state.level -= 1
|
||||
if ok:
|
||||
break
|
||||
else:
|
||||
# Too much nesting, just skip until the end of the paragraph.
|
||||
#
|
||||
# NOTE: this will cause links to behave incorrectly in the following case,
|
||||
# when an amount of `[` is exactly equal to `maxNesting + 1`:
|
||||
#
|
||||
# [[[[[[[[[[[[[[[[[[[[[foo]()
|
||||
#
|
||||
# TODO: remove this workaround when CM standard will allow nested links
|
||||
# (we can replace it by preventing links from being parsed in
|
||||
# validation mode)
|
||||
#
|
||||
state.pos = state.posMax
|
||||
|
||||
if not ok:
|
||||
state.pos += 1
|
||||
cache[pos] = state.pos
|
||||
|
||||
def tokenize(self, state: StateInline) -> None:
|
||||
"""Generate tokens for input range."""
|
||||
ok = False
|
||||
rules = self.ruler.getRules("")
|
||||
end = state.posMax
|
||||
maxNesting = state.md.options["maxNesting"]
|
||||
|
||||
while state.pos < end:
|
||||
# Try all possible rules.
|
||||
# On success, rule should:
|
||||
#
|
||||
# - update `state.pos`
|
||||
# - update `state.tokens`
|
||||
# - return true
|
||||
|
||||
if state.level < maxNesting:
|
||||
for rule in rules:
|
||||
ok = rule(state, False)
|
||||
if ok:
|
||||
break
|
||||
|
||||
if ok:
|
||||
if state.pos >= end:
|
||||
break
|
||||
continue
|
||||
|
||||
state.pending += state.src[state.pos]
|
||||
state.pos += 1
|
||||
|
||||
if state.pending:
|
||||
state.pushPending()
|
||||
|
||||
def parse(self, src: str, md, env, tokens: list[Token]) -> list[Token]:
|
||||
"""Process input string and push inline tokens into `tokens`"""
|
||||
state = StateInline(src, md, env, tokens)
|
||||
self.tokenize(state)
|
||||
rules2 = self.ruler2.getRules("")
|
||||
for rule in rules2:
|
||||
rule(state)
|
||||
return state.tokens
|
|
@ -0,0 +1,49 @@
|
|||
- package: markdown-it/markdown-it
|
||||
version: 12.2.0
|
||||
commit: 6e2de08a0b03d3d0dcc524b89710ce05f83a0283
|
||||
date: Aug 2, 2021
|
||||
notes:
|
||||
- Rename variables that use python built-in names, e.g.
|
||||
- `max` -> `maximum`
|
||||
- `len` -> `length`
|
||||
- `str` -> `string`
|
||||
- |
|
||||
Convert JS `for` loops to `while` loops
|
||||
this is generally the main difference between the codes,
|
||||
because in python you can't do e.g. `for {i=1;i<x;i++} {}`
|
||||
- |
|
||||
`env` is a common Python dictionary, and so does not have attribute access to keys,
|
||||
as with JavaScript dictionaries.
|
||||
`options` have attribute access only to core markdownit configuration options
|
||||
- |
|
||||
`Token.attrs` is a dictionary, instead of a list of lists.
|
||||
Upstream the list format is only used to guarantee order: https://github.com/markdown-it/markdown-it/issues/142,
|
||||
but in Python 3.7+ order of dictionaries is guaranteed.
|
||||
One should anyhow use the `attrGet`, `attrSet`, `attrPush` and `attrJoin` methods
|
||||
to manipulate `Token.attrs`, which have an identical signature to those upstream.
|
||||
- Use python version of `charCodeAt`
|
||||
- |
|
||||
Reduce use of charCodeAt() by storing char codes in a srcCharCodes attribute for state
|
||||
objects and sharing those whenever possible
|
||||
This provides a significant performance boost
|
||||
- |
|
||||
In markdown_it/rules_block/reference.py,
|
||||
record line range in state.env["references"] and add state.env["duplicate_refs"]
|
||||
This is to allow renderers to report on issues regarding references
|
||||
- |
|
||||
The `MarkdownIt.__init__` signature is slightly different for updating options,
|
||||
since you must always specify the config first, e.g.
|
||||
use `MarkdownIt("commonmark", {"html": False})` instead of `MarkdownIt({"html": False})`
|
||||
- The default configuration preset for `MarkdownIt` is "commonmark" not "default"
|
||||
- Allow custom renderer to be passed to `MarkdownIt`
|
||||
- |
|
||||
change render method signatures
|
||||
`func(tokens, idx, options, env, slf)` to
|
||||
`func(self, tokens, idx, options, env)`
|
||||
- |
|
||||
Extensions add render methods by format
|
||||
`MarkdownIt.add_render_rule(name, function, fmt="html")`,
|
||||
rather than `MarkdownIt.renderer.rules[name] = function`
|
||||
and renderers should declare a class property `__output__ = "html"`.
|
||||
This allows for extensibility to more than just HTML renderers
|
||||
- inline tokens in tables are assigned a map (this is helpful for propagation to children)
|
|
@ -0,0 +1,27 @@
|
|||
__all__ = ("commonmark", "default", "zero", "js_default", "gfm_like")
|
||||
|
||||
from . import commonmark, default, zero
|
||||
|
||||
js_default = default
|
||||
|
||||
|
||||
class gfm_like:
|
||||
"""GitHub Flavoured Markdown (GFM) like.
|
||||
|
||||
This adds the linkify, table and strikethrough components to CommmonMark.
|
||||
|
||||
Note, it lacks task-list items and raw HTML filtering,
|
||||
to meet the the full GFM specification
|
||||
(see https://github.github.com/gfm/#autolinks-extension-).
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def make():
|
||||
config = commonmark.make()
|
||||
config["components"]["core"]["rules"].append("linkify")
|
||||
config["components"]["block"]["rules"].append("table")
|
||||
config["components"]["inline"]["rules"].append("strikethrough")
|
||||
config["components"]["inline"]["rules2"].append("strikethrough")
|
||||
config["options"]["linkify"] = True
|
||||
config["options"]["html"] = True
|
||||
return config
|
|
@ -0,0 +1,73 @@
|
|||
"""Commonmark default options.
|
||||
|
||||
This differs to presets.default,
|
||||
primarily in that it allows HTML and does not enable components:
|
||||
|
||||
- block: table
|
||||
- inline: strikethrough
|
||||
"""
|
||||
|
||||
|
||||
def make():
|
||||
return {
|
||||
"options": {
|
||||
"maxNesting": 20, # Internal protection, recursion limit
|
||||
"html": True, # Enable HTML tags in source,
|
||||
# this is just a shorthand for .enable(["html_inline", "html_block"])
|
||||
# used by the linkify rule:
|
||||
"linkify": False, # autoconvert URL-like texts to links
|
||||
# used by the replacements and smartquotes rules
|
||||
# Enable some language-neutral replacements + quotes beautification
|
||||
"typographer": False,
|
||||
# used by the smartquotes rule:
|
||||
# Double + single quotes replacement pairs, when typographer enabled,
|
||||
# and smartquotes on. Could be either a String or an Array.
|
||||
#
|
||||
# For example, you can use '«»„“' for Russian, '„“‚‘' for German,
|
||||
# and ['«\xA0', '\xA0»', '‹\xA0', '\xA0›'] for French (including nbsp).
|
||||
"quotes": "\u201c\u201d\u2018\u2019", # /* “”‘’ */
|
||||
# Renderer specific; these options are used directly in the HTML renderer
|
||||
"xhtmlOut": True, # Use '/' to close single tags (<br />)
|
||||
"breaks": False, # Convert '\n' in paragraphs into <br>
|
||||
"langPrefix": "language-", # CSS language prefix for fenced blocks
|
||||
# Highlighter function. Should return escaped HTML,
|
||||
# or '' if the source string is not changed and should be escaped externally.
|
||||
# If result starts with <pre... internal wrapper is skipped.
|
||||
#
|
||||
# function (/*str, lang, attrs*/) { return ''; }
|
||||
#
|
||||
"highlight": None,
|
||||
},
|
||||
"components": {
|
||||
"core": {"rules": ["normalize", "block", "inline"]},
|
||||
"block": {
|
||||
"rules": [
|
||||
"blockquote",
|
||||
"code",
|
||||
"fence",
|
||||
"heading",
|
||||
"hr",
|
||||
"html_block",
|
||||
"lheading",
|
||||
"list",
|
||||
"reference",
|
||||
"paragraph",
|
||||
]
|
||||
},
|
||||
"inline": {
|
||||
"rules": [
|
||||
"autolink",
|
||||
"backticks",
|
||||
"emphasis",
|
||||
"entity",
|
||||
"escape",
|
||||
"html_inline",
|
||||
"image",
|
||||
"link",
|
||||
"newline",
|
||||
"text",
|
||||
],
|
||||
"rules2": ["balance_pairs", "emphasis", "text_collapse"],
|
||||
},
|
||||
},
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
"""markdown-it default options."""
|
||||
|
||||
|
||||
def make():
|
||||
return {
|
||||
"options": {
|
||||
"maxNesting": 100, # Internal protection, recursion limit
|
||||
"html": False, # Enable HTML tags in source
|
||||
# this is just a shorthand for .disable(["html_inline", "html_block"])
|
||||
# used by the linkify rule:
|
||||
"linkify": False, # autoconvert URL-like texts to links
|
||||
# used by the replacements and smartquotes rules:
|
||||
# Enable some language-neutral replacements + quotes beautification
|
||||
"typographer": False,
|
||||
# used by the smartquotes rule:
|
||||
# Double + single quotes replacement pairs, when typographer enabled,
|
||||
# and smartquotes on. Could be either a String or an Array.
|
||||
# For example, you can use '«»„“' for Russian, '„“‚‘' for German,
|
||||
# and ['«\xA0', '\xA0»', '‹\xA0', '\xA0›'] for French (including nbsp).
|
||||
"quotes": "\u201c\u201d\u2018\u2019", # /* “”‘’ */
|
||||
# Renderer specific; these options are used directly in the HTML renderer
|
||||
"xhtmlOut": False, # Use '/' to close single tags (<br />)
|
||||
"breaks": False, # Convert '\n' in paragraphs into <br>
|
||||
"langPrefix": "language-", # CSS language prefix for fenced blocks
|
||||
# Highlighter function. Should return escaped HTML,
|
||||
# or '' if the source string is not changed and should be escaped externally.
|
||||
# If result starts with <pre... internal wrapper is skipped.
|
||||
#
|
||||
# function (/*str, lang, attrs*/) { return ''; }
|
||||
#
|
||||
"highlight": None,
|
||||
},
|
||||
"components": {"core": {}, "block": {}, "inline": {}},
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
"""
|
||||
"Zero" preset, with nothing enabled. Useful for manual configuring of simple
|
||||
modes. For example, to parse bold/italic only.
|
||||
"""
|
||||
|
||||
|
||||
def make():
|
||||
return {
|
||||
"options": {
|
||||
"maxNesting": 20, # Internal protection, recursion limit
|
||||
"html": False, # Enable HTML tags in source
|
||||
# this is just a shorthand for .disable(["html_inline", "html_block"])
|
||||
# used by the linkify rule:
|
||||
"linkify": False, # autoconvert URL-like texts to links
|
||||
# used by the replacements and smartquotes rules:
|
||||
# Enable some language-neutral replacements + quotes beautification
|
||||
"typographer": False,
|
||||
# used by the smartquotes rule:
|
||||
# Double + single quotes replacement pairs, when typographer enabled,
|
||||
# and smartquotes on. Could be either a String or an Array.
|
||||
# For example, you can use '«»„“' for Russian, '„“‚‘' for German,
|
||||
# and ['«\xA0', '\xA0»', '‹\xA0', '\xA0›'] for French (including nbsp).
|
||||
"quotes": "\u201c\u201d\u2018\u2019", # /* “”‘’ */
|
||||
# Renderer specific; these options are used directly in the HTML renderer
|
||||
"xhtmlOut": False, # Use '/' to close single tags (<br />)
|
||||
"breaks": False, # Convert '\n' in paragraphs into <br>
|
||||
"langPrefix": "language-", # CSS language prefix for fenced blocks
|
||||
# Highlighter function. Should return escaped HTML,
|
||||
# or '' if the source string is not changed and should be escaped externally.
|
||||
# If result starts with <pre... internal wrapper is skipped.
|
||||
# function (/*str, lang, attrs*/) { return ''; }
|
||||
"highlight": None,
|
||||
},
|
||||
"components": {
|
||||
"core": {"rules": ["normalize", "block", "inline"]},
|
||||
"block": {"rules": ["paragraph"]},
|
||||
"inline": {"rules": ["text"], "rules2": ["balance_pairs", "text_collapse"]},
|
||||
},
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
# Marker file for PEP 561
|
|
@ -0,0 +1,339 @@
|
|||
"""
|
||||
class Renderer
|
||||
|
||||
Generates HTML from parsed token stream. Each instance has independent
|
||||
copy of rules. Those can be rewritten with ease. Also, you can add new
|
||||
rules if you create plugin and adds new token types.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import MutableMapping, Sequence
|
||||
import inspect
|
||||
from typing import Any, ClassVar
|
||||
|
||||
from .common.utils import escapeHtml, unescapeAll
|
||||
from .token import Token
|
||||
from .utils import OptionsDict
|
||||
|
||||
try:
|
||||
from typing import Protocol
|
||||
except ImportError: # Python <3.8 doesn't have `Protocol` in the stdlib
|
||||
from typing_extensions import Protocol # type: ignore[misc]
|
||||
|
||||
|
||||
class RendererProtocol(Protocol):
|
||||
__output__: ClassVar[str]
|
||||
|
||||
def render(
|
||||
self, tokens: Sequence[Token], options: OptionsDict, env: MutableMapping
|
||||
) -> Any:
|
||||
...
|
||||
|
||||
|
||||
class RendererHTML(RendererProtocol):
|
||||
"""Contains render rules for tokens. Can be updated and extended.
|
||||
|
||||
Example:
|
||||
|
||||
Each rule is called as independent static function with fixed signature:
|
||||
|
||||
::
|
||||
|
||||
class Renderer:
|
||||
def token_type_name(self, tokens, idx, options, env) {
|
||||
# ...
|
||||
return renderedHTML
|
||||
|
||||
::
|
||||
|
||||
class CustomRenderer(RendererHTML):
|
||||
def strong_open(self, tokens, idx, options, env):
|
||||
return '<b>'
|
||||
def strong_close(self, tokens, idx, options, env):
|
||||
return '</b>'
|
||||
|
||||
md = MarkdownIt(renderer_cls=CustomRenderer)
|
||||
|
||||
result = md.render(...)
|
||||
|
||||
See https://github.com/markdown-it/markdown-it/blob/master/lib/renderer.js
|
||||
for more details and examples.
|
||||
"""
|
||||
|
||||
__output__ = "html"
|
||||
|
||||
def __init__(self, parser=None):
|
||||
self.rules = {
|
||||
k: v
|
||||
for k, v in inspect.getmembers(self, predicate=inspect.ismethod)
|
||||
if not (k.startswith("render") or k.startswith("_"))
|
||||
}
|
||||
|
||||
def render(
|
||||
self, tokens: Sequence[Token], options: OptionsDict, env: MutableMapping
|
||||
) -> str:
|
||||
"""Takes token stream and generates HTML.
|
||||
|
||||
:param tokens: list on block tokens to render
|
||||
:param options: params of parser instance
|
||||
:param env: additional data from parsed input
|
||||
|
||||
"""
|
||||
result = ""
|
||||
|
||||
for i, token in enumerate(tokens):
|
||||
|
||||
if token.type == "inline":
|
||||
assert token.children is not None
|
||||
result += self.renderInline(token.children, options, env)
|
||||
elif token.type in self.rules:
|
||||
result += self.rules[token.type](tokens, i, options, env)
|
||||
else:
|
||||
result += self.renderToken(tokens, i, options, env)
|
||||
|
||||
return result
|
||||
|
||||
def renderInline(
|
||||
self, tokens: Sequence[Token], options: OptionsDict, env: MutableMapping
|
||||
) -> str:
|
||||
"""The same as ``render``, but for single token of `inline` type.
|
||||
|
||||
:param tokens: list on block tokens to render
|
||||
:param options: params of parser instance
|
||||
:param env: additional data from parsed input (references, for example)
|
||||
"""
|
||||
result = ""
|
||||
|
||||
for i, token in enumerate(tokens):
|
||||
if token.type in self.rules:
|
||||
result += self.rules[token.type](tokens, i, options, env)
|
||||
else:
|
||||
result += self.renderToken(tokens, i, options, env)
|
||||
|
||||
return result
|
||||
|
||||
def renderToken(
|
||||
self,
|
||||
tokens: Sequence[Token],
|
||||
idx: int,
|
||||
options: OptionsDict,
|
||||
env: MutableMapping,
|
||||
) -> str:
|
||||
"""Default token renderer.
|
||||
|
||||
Can be overridden by custom function
|
||||
|
||||
:param idx: token index to render
|
||||
:param options: params of parser instance
|
||||
"""
|
||||
result = ""
|
||||
needLf = False
|
||||
token = tokens[idx]
|
||||
|
||||
# Tight list paragraphs
|
||||
if token.hidden:
|
||||
return ""
|
||||
|
||||
# Insert a newline between hidden paragraph and subsequent opening
|
||||
# block-level tag.
|
||||
#
|
||||
# For example, here we should insert a newline before blockquote:
|
||||
# - a
|
||||
# >
|
||||
#
|
||||
if token.block and token.nesting != -1 and idx and tokens[idx - 1].hidden:
|
||||
result += "\n"
|
||||
|
||||
# Add token name, e.g. `<img`
|
||||
result += ("</" if token.nesting == -1 else "<") + token.tag
|
||||
|
||||
# Encode attributes, e.g. `<img src="foo"`
|
||||
result += self.renderAttrs(token)
|
||||
|
||||
# Add a slash for self-closing tags, e.g. `<img src="foo" /`
|
||||
if token.nesting == 0 and options["xhtmlOut"]:
|
||||
result += " /"
|
||||
|
||||
# Check if we need to add a newline after this tag
|
||||
if token.block:
|
||||
needLf = True
|
||||
|
||||
if token.nesting == 1:
|
||||
if idx + 1 < len(tokens):
|
||||
nextToken = tokens[idx + 1]
|
||||
|
||||
if nextToken.type == "inline" or nextToken.hidden:
|
||||
# Block-level tag containing an inline tag.
|
||||
#
|
||||
needLf = False
|
||||
|
||||
elif nextToken.nesting == -1 and nextToken.tag == token.tag:
|
||||
# Opening tag + closing tag of the same type. E.g. `<li></li>`.
|
||||
#
|
||||
needLf = False
|
||||
|
||||
result += ">\n" if needLf else ">"
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def renderAttrs(token: Token) -> str:
|
||||
"""Render token attributes to string."""
|
||||
result = ""
|
||||
|
||||
for key, value in token.attrItems():
|
||||
result += " " + escapeHtml(key) + '="' + escapeHtml(str(value)) + '"'
|
||||
|
||||
return result
|
||||
|
||||
def renderInlineAsText(
|
||||
self,
|
||||
tokens: Sequence[Token] | None,
|
||||
options: OptionsDict,
|
||||
env: MutableMapping,
|
||||
) -> str:
|
||||
"""Special kludge for image `alt` attributes to conform CommonMark spec.
|
||||
|
||||
Don't try to use it! Spec requires to show `alt` content with stripped markup,
|
||||
instead of simple escaping.
|
||||
|
||||
:param tokens: list on block tokens to render
|
||||
:param options: params of parser instance
|
||||
:param env: additional data from parsed input
|
||||
"""
|
||||
result = ""
|
||||
|
||||
for token in tokens or []:
|
||||
if token.type == "text":
|
||||
result += token.content
|
||||
elif token.type == "image":
|
||||
assert token.children is not None
|
||||
result += self.renderInlineAsText(token.children, options, env)
|
||||
elif token.type == "softbreak":
|
||||
result += "\n"
|
||||
|
||||
return result
|
||||
|
||||
###################################################
|
||||
|
||||
def code_inline(self, tokens: Sequence[Token], idx: int, options, env) -> str:
|
||||
token = tokens[idx]
|
||||
return (
|
||||
"<code"
|
||||
+ self.renderAttrs(token)
|
||||
+ ">"
|
||||
+ escapeHtml(tokens[idx].content)
|
||||
+ "</code>"
|
||||
)
|
||||
|
||||
def code_block(
|
||||
self,
|
||||
tokens: Sequence[Token],
|
||||
idx: int,
|
||||
options: OptionsDict,
|
||||
env: MutableMapping,
|
||||
) -> str:
|
||||
token = tokens[idx]
|
||||
|
||||
return (
|
||||
"<pre"
|
||||
+ self.renderAttrs(token)
|
||||
+ "><code>"
|
||||
+ escapeHtml(tokens[idx].content)
|
||||
+ "</code></pre>\n"
|
||||
)
|
||||
|
||||
def fence(
|
||||
self,
|
||||
tokens: Sequence[Token],
|
||||
idx: int,
|
||||
options: OptionsDict,
|
||||
env: MutableMapping,
|
||||
) -> str:
|
||||
token = tokens[idx]
|
||||
info = unescapeAll(token.info).strip() if token.info else ""
|
||||
langName = ""
|
||||
langAttrs = ""
|
||||
|
||||
if info:
|
||||
arr = info.split(maxsplit=1)
|
||||
langName = arr[0]
|
||||
if len(arr) == 2:
|
||||
langAttrs = arr[1]
|
||||
|
||||
if options.highlight:
|
||||
highlighted = options.highlight(
|
||||
token.content, langName, langAttrs
|
||||
) or escapeHtml(token.content)
|
||||
else:
|
||||
highlighted = escapeHtml(token.content)
|
||||
|
||||
if highlighted.startswith("<pre"):
|
||||
return highlighted + "\n"
|
||||
|
||||
# If language exists, inject class gently, without modifying original token.
|
||||
# May be, one day we will add .deepClone() for token and simplify this part, but
|
||||
# now we prefer to keep things local.
|
||||
if info:
|
||||
# Fake token just to render attributes
|
||||
tmpToken = Token(type="", tag="", nesting=0, attrs=token.attrs.copy())
|
||||
tmpToken.attrJoin("class", options.langPrefix + langName)
|
||||
|
||||
return (
|
||||
"<pre><code"
|
||||
+ self.renderAttrs(tmpToken)
|
||||
+ ">"
|
||||
+ highlighted
|
||||
+ "</code></pre>\n"
|
||||
)
|
||||
|
||||
return (
|
||||
"<pre><code"
|
||||
+ self.renderAttrs(token)
|
||||
+ ">"
|
||||
+ highlighted
|
||||
+ "</code></pre>\n"
|
||||
)
|
||||
|
||||
def image(
|
||||
self,
|
||||
tokens: Sequence[Token],
|
||||
idx: int,
|
||||
options: OptionsDict,
|
||||
env: MutableMapping,
|
||||
) -> str:
|
||||
token = tokens[idx]
|
||||
|
||||
# "alt" attr MUST be set, even if empty. Because it's mandatory and
|
||||
# should be placed on proper position for tests.
|
||||
|
||||
assert (
|
||||
token.attrs and "alt" in token.attrs
|
||||
), '"image" token\'s attrs must contain `alt`'
|
||||
|
||||
# Replace content with actual value
|
||||
|
||||
token.attrSet("alt", self.renderInlineAsText(token.children, options, env))
|
||||
|
||||
return self.renderToken(tokens, idx, options, env)
|
||||
|
||||
def hardbreak(
|
||||
self, tokens: Sequence[Token], idx: int, options: OptionsDict, *args
|
||||
) -> str:
|
||||
return "<br />\n" if options.xhtmlOut else "<br>\n"
|
||||
|
||||
def softbreak(
|
||||
self, tokens: Sequence[Token], idx: int, options: OptionsDict, *args
|
||||
) -> str:
|
||||
return (
|
||||
("<br />\n" if options.xhtmlOut else "<br>\n") if options.breaks else "\n"
|
||||
)
|
||||
|
||||
def text(self, tokens: Sequence[Token], idx: int, *args) -> str:
|
||||
return escapeHtml(tokens[idx].content)
|
||||
|
||||
def html_block(self, tokens: Sequence[Token], idx: int, *args) -> str:
|
||||
return tokens[idx].content
|
||||
|
||||
def html_inline(self, tokens: Sequence[Token], idx: int, *args) -> str:
|
||||
return tokens[idx].content
|
|
@ -0,0 +1,237 @@
|
|||
"""
|
||||
class Ruler
|
||||
|
||||
Helper class, used by [[MarkdownIt#core]], [[MarkdownIt#block]] and
|
||||
[[MarkdownIt#inline]] to manage sequences of functions (rules):
|
||||
|
||||
- keep rules in defined order
|
||||
- assign the name to each rule
|
||||
- enable/disable rules
|
||||
- add/replace rules
|
||||
- allow assign rules to additional named chains (in the same)
|
||||
- caching lists of active rules
|
||||
|
||||
You will not need use this class directly until write plugins. For simple
|
||||
rules control use [[MarkdownIt.disable]], [[MarkdownIt.enable]] and
|
||||
[[MarkdownIt.use]].
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable, Iterable, MutableMapping
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from markdown_it._compat import DATACLASS_KWARGS
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from markdown_it import MarkdownIt
|
||||
|
||||
|
||||
class StateBase:
|
||||
srcCharCode: tuple[int, ...]
|
||||
|
||||
def __init__(self, src: str, md: MarkdownIt, env: MutableMapping):
|
||||
self.src = src
|
||||
self.env = env
|
||||
self.md = md
|
||||
|
||||
@property
|
||||
def src(self) -> str:
|
||||
return self._src
|
||||
|
||||
@src.setter
|
||||
def src(self, value: str) -> None:
|
||||
self._src = value
|
||||
self.srcCharCode = tuple(ord(c) for c in self.src)
|
||||
|
||||
|
||||
# The first positional arg is always a subtype of `StateBase`. Other
|
||||
# arguments may or may not exist, based on the rule's type (block,
|
||||
# core, inline). Return type is either `None` or `bool` based on the
|
||||
# rule's type.
|
||||
RuleFunc = Callable
|
||||
|
||||
|
||||
@dataclass(**DATACLASS_KWARGS)
|
||||
class Rule:
|
||||
name: str
|
||||
enabled: bool
|
||||
fn: RuleFunc = field(repr=False)
|
||||
alt: list[str]
|
||||
|
||||
|
||||
class Ruler:
|
||||
def __init__(self):
|
||||
# List of added rules.
|
||||
self.__rules__: list[Rule] = []
|
||||
# Cached rule chains.
|
||||
# First level - chain name, '' for default.
|
||||
# Second level - diginal anchor for fast filtering by charcodes.
|
||||
self.__cache__: dict[str, list[RuleFunc]] | None = None
|
||||
|
||||
def __find__(self, name: str) -> int:
|
||||
"""Find rule index by name"""
|
||||
for i, rule in enumerate(self.__rules__):
|
||||
if rule.name == name:
|
||||
return i
|
||||
return -1
|
||||
|
||||
def __compile__(self) -> None:
|
||||
"""Build rules lookup cache"""
|
||||
chains = {""}
|
||||
# collect unique names
|
||||
for rule in self.__rules__:
|
||||
if not rule.enabled:
|
||||
continue
|
||||
for name in rule.alt:
|
||||
chains.add(name)
|
||||
self.__cache__ = {}
|
||||
for chain in chains:
|
||||
self.__cache__[chain] = []
|
||||
for rule in self.__rules__:
|
||||
if not rule.enabled:
|
||||
continue
|
||||
if chain and (chain not in rule.alt):
|
||||
continue
|
||||
self.__cache__[chain].append(rule.fn)
|
||||
|
||||
def at(self, ruleName: str, fn: RuleFunc, options=None):
|
||||
"""Replace rule by name with new function & options.
|
||||
|
||||
:param ruleName: rule name to replace.
|
||||
:param fn: new rule function.
|
||||
:param options: new rule options (not mandatory).
|
||||
:raises: KeyError if name not found
|
||||
"""
|
||||
index = self.__find__(ruleName)
|
||||
options = options or {}
|
||||
if index == -1:
|
||||
raise KeyError(f"Parser rule not found: {ruleName}")
|
||||
self.__rules__[index].fn = fn
|
||||
self.__rules__[index].alt = options.get("alt", [])
|
||||
self.__cache__ = None
|
||||
|
||||
def before(self, beforeName: str, ruleName: str, fn: RuleFunc, options=None):
|
||||
"""Add new rule to chain before one with given name.
|
||||
|
||||
:param beforeName: new rule will be added before this one.
|
||||
:param ruleName: new rule will be added before this one.
|
||||
:param fn: new rule function.
|
||||
:param options: new rule options (not mandatory).
|
||||
:raises: KeyError if name not found
|
||||
"""
|
||||
index = self.__find__(beforeName)
|
||||
options = options or {}
|
||||
if index == -1:
|
||||
raise KeyError(f"Parser rule not found: {beforeName}")
|
||||
self.__rules__.insert(index, Rule(ruleName, True, fn, options.get("alt", [])))
|
||||
self.__cache__ = None
|
||||
|
||||
def after(self, afterName: str, ruleName: str, fn: RuleFunc, options=None):
|
||||
"""Add new rule to chain after one with given name.
|
||||
|
||||
:param afterName: new rule will be added after this one.
|
||||
:param ruleName: new rule will be added after this one.
|
||||
:param fn: new rule function.
|
||||
:param options: new rule options (not mandatory).
|
||||
:raises: KeyError if name not found
|
||||
"""
|
||||
index = self.__find__(afterName)
|
||||
options = options or {}
|
||||
if index == -1:
|
||||
raise KeyError(f"Parser rule not found: {afterName}")
|
||||
self.__rules__.insert(
|
||||
index + 1, Rule(ruleName, True, fn, options.get("alt", []))
|
||||
)
|
||||
self.__cache__ = None
|
||||
|
||||
def push(self, ruleName: str, fn: RuleFunc, options=None):
|
||||
"""Push new rule to the end of chain.
|
||||
|
||||
:param ruleName: new rule will be added to the end of chain.
|
||||
:param fn: new rule function.
|
||||
:param options: new rule options (not mandatory).
|
||||
|
||||
"""
|
||||
self.__rules__.append(Rule(ruleName, True, fn, (options or {}).get("alt", [])))
|
||||
self.__cache__ = None
|
||||
|
||||
def enable(self, names: str | Iterable[str], ignoreInvalid: bool = False):
|
||||
"""Enable rules with given names.
|
||||
|
||||
:param names: name or list of rule names to enable.
|
||||
:param ignoreInvalid: ignore errors when rule not found
|
||||
:raises: KeyError if name not found and not ignoreInvalid
|
||||
:return: list of found rule names
|
||||
"""
|
||||
if isinstance(names, str):
|
||||
names = [names]
|
||||
result = []
|
||||
for name in names:
|
||||
idx = self.__find__(name)
|
||||
if (idx < 0) and ignoreInvalid:
|
||||
continue
|
||||
if (idx < 0) and not ignoreInvalid:
|
||||
raise KeyError(f"Rules manager: invalid rule name {name}")
|
||||
self.__rules__[idx].enabled = True
|
||||
result.append(name)
|
||||
self.__cache__ = None
|
||||
return result
|
||||
|
||||
def enableOnly(self, names: str | Iterable[str], ignoreInvalid: bool = False):
|
||||
"""Enable rules with given names, and disable everything else.
|
||||
|
||||
:param names: name or list of rule names to enable.
|
||||
:param ignoreInvalid: ignore errors when rule not found
|
||||
:raises: KeyError if name not found and not ignoreInvalid
|
||||
:return: list of found rule names
|
||||
"""
|
||||
if isinstance(names, str):
|
||||
names = [names]
|
||||
for rule in self.__rules__:
|
||||
rule.enabled = False
|
||||
self.enable(names, ignoreInvalid)
|
||||
|
||||
def disable(self, names: str | Iterable[str], ignoreInvalid: bool = False):
|
||||
"""Disable rules with given names.
|
||||
|
||||
:param names: name or list of rule names to enable.
|
||||
:param ignoreInvalid: ignore errors when rule not found
|
||||
:raises: KeyError if name not found and not ignoreInvalid
|
||||
:return: list of found rule names
|
||||
"""
|
||||
if isinstance(names, str):
|
||||
names = [names]
|
||||
result = []
|
||||
for name in names:
|
||||
idx = self.__find__(name)
|
||||
if (idx < 0) and ignoreInvalid:
|
||||
continue
|
||||
if (idx < 0) and not ignoreInvalid:
|
||||
raise KeyError(f"Rules manager: invalid rule name {name}")
|
||||
self.__rules__[idx].enabled = False
|
||||
result.append(name)
|
||||
self.__cache__ = None
|
||||
return result
|
||||
|
||||
def getRules(self, chainName: str) -> list[RuleFunc]:
|
||||
"""Return array of active functions (rules) for given chain name.
|
||||
It analyzes rules configuration, compiles caches if not exists and returns result.
|
||||
|
||||
Default chain name is `''` (empty string). It can't be skipped.
|
||||
That's done intentionally, to keep signature monomorphic for high speed.
|
||||
|
||||
"""
|
||||
if self.__cache__ is None:
|
||||
self.__compile__()
|
||||
assert self.__cache__ is not None
|
||||
# Chain can be empty, if rules disabled. But we still have to return Array.
|
||||
return self.__cache__.get(chainName, []) or []
|
||||
|
||||
def get_all_rules(self) -> list[str]:
|
||||
"""Return all available rule names."""
|
||||
return [r.name for r in self.__rules__]
|
||||
|
||||
def get_active_rules(self) -> list[str]:
|
||||
"""Return the active rule names."""
|
||||
return [r.name for r in self.__rules__ if r.enabled]
|
|
@ -0,0 +1,27 @@
|
|||
__all__ = (
|
||||
"StateBlock",
|
||||
"paragraph",
|
||||
"heading",
|
||||
"lheading",
|
||||
"code",
|
||||
"fence",
|
||||
"hr",
|
||||
"list_block",
|
||||
"reference",
|
||||
"blockquote",
|
||||
"html_block",
|
||||
"table",
|
||||
)
|
||||
|
||||
from .blockquote import blockquote
|
||||
from .code import code
|
||||
from .fence import fence
|
||||
from .heading import heading
|
||||
from .hr import hr
|
||||
from .html_block import html_block
|
||||
from .lheading import lheading
|
||||
from .list import list_block
|
||||
from .paragraph import paragraph
|
||||
from .reference import reference
|
||||
from .state_block import StateBlock
|
||||
from .table import table
|
|
@ -0,0 +1,299 @@
|
|||
# Block quotes
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from ..common.utils import isSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
|
||||
|
||||
LOGGER.debug(
|
||||
"entering blockquote: %s, %s, %s, %s", state, startLine, endLine, silent
|
||||
)
|
||||
|
||||
oldLineMax = state.lineMax
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
max = state.eMarks[startLine]
|
||||
|
||||
# if it's indented more than 3 spaces, it should be a code block
|
||||
if (state.sCount[startLine] - state.blkIndent) >= 4:
|
||||
return False
|
||||
|
||||
# check the block quote marker
|
||||
if state.srcCharCode[pos] != 0x3E: # /* > */
|
||||
return False
|
||||
pos += 1
|
||||
|
||||
# we know that it's going to be a valid blockquote,
|
||||
# so no point trying to find the end of it in silent mode
|
||||
if silent:
|
||||
return True
|
||||
|
||||
# set offset past spaces and ">"
|
||||
initial = offset = state.sCount[startLine] + 1
|
||||
|
||||
try:
|
||||
second_char_code: int | None = state.srcCharCode[pos]
|
||||
except IndexError:
|
||||
second_char_code = None
|
||||
|
||||
# skip one optional space after '>'
|
||||
if second_char_code == 0x20: # /* space */
|
||||
# ' > test '
|
||||
# ^ -- position start of line here:
|
||||
pos += 1
|
||||
initial += 1
|
||||
offset += 1
|
||||
adjustTab = False
|
||||
spaceAfterMarker = True
|
||||
elif second_char_code == 0x09: # /* tab */
|
||||
spaceAfterMarker = True
|
||||
|
||||
if (state.bsCount[startLine] + offset) % 4 == 3:
|
||||
# ' >\t test '
|
||||
# ^ -- position start of line here (tab has width==1)
|
||||
pos += 1
|
||||
initial += 1
|
||||
offset += 1
|
||||
adjustTab = False
|
||||
else:
|
||||
# ' >\t test '
|
||||
# ^ -- position start of line here + shift bsCount slightly
|
||||
# to make extra space appear
|
||||
adjustTab = True
|
||||
|
||||
else:
|
||||
spaceAfterMarker = False
|
||||
|
||||
oldBMarks = [state.bMarks[startLine]]
|
||||
state.bMarks[startLine] = pos
|
||||
|
||||
while pos < max:
|
||||
ch = state.srcCharCode[pos]
|
||||
|
||||
if isSpace(ch):
|
||||
if ch == 0x09: # / tab /
|
||||
offset += (
|
||||
4
|
||||
- (offset + state.bsCount[startLine] + (1 if adjustTab else 0)) % 4
|
||||
)
|
||||
else:
|
||||
offset += 1
|
||||
|
||||
else:
|
||||
break
|
||||
|
||||
pos += 1
|
||||
|
||||
oldBSCount = [state.bsCount[startLine]]
|
||||
state.bsCount[startLine] = (
|
||||
state.sCount[startLine] + 1 + (1 if spaceAfterMarker else 0)
|
||||
)
|
||||
|
||||
lastLineEmpty = pos >= max
|
||||
|
||||
oldSCount = [state.sCount[startLine]]
|
||||
state.sCount[startLine] = offset - initial
|
||||
|
||||
oldTShift = [state.tShift[startLine]]
|
||||
state.tShift[startLine] = pos - state.bMarks[startLine]
|
||||
|
||||
terminatorRules = state.md.block.ruler.getRules("blockquote")
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "blockquote"
|
||||
|
||||
# Search the end of the block
|
||||
#
|
||||
# Block ends with either:
|
||||
# 1. an empty line outside:
|
||||
# ```
|
||||
# > test
|
||||
#
|
||||
# ```
|
||||
# 2. an empty line inside:
|
||||
# ```
|
||||
# >
|
||||
# test
|
||||
# ```
|
||||
# 3. another tag:
|
||||
# ```
|
||||
# > test
|
||||
# - - -
|
||||
# ```
|
||||
|
||||
# for (nextLine = startLine + 1; nextLine < endLine; nextLine++) {
|
||||
nextLine = startLine + 1
|
||||
while nextLine < endLine:
|
||||
|
||||
# check if it's outdented, i.e. it's inside list item and indented
|
||||
# less than said list item:
|
||||
#
|
||||
# ```
|
||||
# 1. anything
|
||||
# > current blockquote
|
||||
# 2. checking this line
|
||||
# ```
|
||||
isOutdented = state.sCount[nextLine] < state.blkIndent
|
||||
|
||||
pos = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
max = state.eMarks[nextLine]
|
||||
|
||||
if pos >= max:
|
||||
# Case 1: line is not inside the blockquote, and this line is empty.
|
||||
break
|
||||
|
||||
evaluatesTrue = state.srcCharCode[pos] == 0x3E and not isOutdented # /* > */
|
||||
pos += 1
|
||||
if evaluatesTrue:
|
||||
# This line is inside the blockquote.
|
||||
|
||||
# set offset past spaces and ">"
|
||||
initial = offset = state.sCount[nextLine] + 1
|
||||
|
||||
try:
|
||||
next_char: int | None = state.srcCharCode[pos]
|
||||
except IndexError:
|
||||
next_char = None
|
||||
|
||||
# skip one optional space after '>'
|
||||
if next_char == 0x20: # /* space */
|
||||
# ' > test '
|
||||
# ^ -- position start of line here:
|
||||
pos += 1
|
||||
initial += 1
|
||||
offset += 1
|
||||
adjustTab = False
|
||||
spaceAfterMarker = True
|
||||
elif next_char == 0x09: # /* tab */
|
||||
spaceAfterMarker = True
|
||||
|
||||
if (state.bsCount[nextLine] + offset) % 4 == 3:
|
||||
# ' >\t test '
|
||||
# ^ -- position start of line here (tab has width==1)
|
||||
pos += 1
|
||||
initial += 1
|
||||
offset += 1
|
||||
adjustTab = False
|
||||
else:
|
||||
# ' >\t test '
|
||||
# ^ -- position start of line here + shift bsCount slightly
|
||||
# to make extra space appear
|
||||
adjustTab = True
|
||||
|
||||
else:
|
||||
spaceAfterMarker = False
|
||||
|
||||
oldBMarks.append(state.bMarks[nextLine])
|
||||
state.bMarks[nextLine] = pos
|
||||
|
||||
while pos < max:
|
||||
ch = state.srcCharCode[pos]
|
||||
|
||||
if isSpace(ch):
|
||||
if ch == 0x09:
|
||||
offset += (
|
||||
4
|
||||
- (
|
||||
offset
|
||||
+ state.bsCount[nextLine]
|
||||
+ (1 if adjustTab else 0)
|
||||
)
|
||||
% 4
|
||||
)
|
||||
else:
|
||||
offset += 1
|
||||
else:
|
||||
break
|
||||
|
||||
pos += 1
|
||||
|
||||
lastLineEmpty = pos >= max
|
||||
|
||||
oldBSCount.append(state.bsCount[nextLine])
|
||||
state.bsCount[nextLine] = (
|
||||
state.sCount[nextLine] + 1 + (1 if spaceAfterMarker else 0)
|
||||
)
|
||||
|
||||
oldSCount.append(state.sCount[nextLine])
|
||||
state.sCount[nextLine] = offset - initial
|
||||
|
||||
oldTShift.append(state.tShift[nextLine])
|
||||
state.tShift[nextLine] = pos - state.bMarks[nextLine]
|
||||
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# Case 2: line is not inside the blockquote, and the last line was empty.
|
||||
if lastLineEmpty:
|
||||
break
|
||||
|
||||
# Case 3: another tag found.
|
||||
terminate = False
|
||||
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
if terminate:
|
||||
# Quirk to enforce "hard termination mode" for paragraphs;
|
||||
# normally if you call `tokenize(state, startLine, nextLine)`,
|
||||
# paragraphs will look below nextLine for paragraph continuation,
|
||||
# but if blockquote is terminated by another tag, they shouldn't
|
||||
state.lineMax = nextLine
|
||||
|
||||
if state.blkIndent != 0:
|
||||
# state.blkIndent was non-zero, we now set it to zero,
|
||||
# so we need to re-calculate all offsets to appear as
|
||||
# if indent wasn't changed
|
||||
oldBMarks.append(state.bMarks[nextLine])
|
||||
oldBSCount.append(state.bsCount[nextLine])
|
||||
oldTShift.append(state.tShift[nextLine])
|
||||
oldSCount.append(state.sCount[nextLine])
|
||||
state.sCount[nextLine] -= state.blkIndent
|
||||
|
||||
break
|
||||
|
||||
oldBMarks.append(state.bMarks[nextLine])
|
||||
oldBSCount.append(state.bsCount[nextLine])
|
||||
oldTShift.append(state.tShift[nextLine])
|
||||
oldSCount.append(state.sCount[nextLine])
|
||||
|
||||
# A negative indentation means that this is a paragraph continuation
|
||||
#
|
||||
state.sCount[nextLine] = -1
|
||||
|
||||
nextLine += 1
|
||||
|
||||
oldIndent = state.blkIndent
|
||||
state.blkIndent = 0
|
||||
|
||||
token = state.push("blockquote_open", "blockquote", 1)
|
||||
token.markup = ">"
|
||||
token.map = lines = [startLine, 0]
|
||||
|
||||
state.md.block.tokenize(state, startLine, nextLine)
|
||||
|
||||
token = state.push("blockquote_close", "blockquote", -1)
|
||||
token.markup = ">"
|
||||
|
||||
state.lineMax = oldLineMax
|
||||
state.parentType = oldParentType
|
||||
lines[1] = state.line
|
||||
|
||||
# Restore original tShift; this might not be necessary since the parser
|
||||
# has already been here, but just to make sure we can do that.
|
||||
for i, item in enumerate(oldTShift):
|
||||
state.bMarks[i + startLine] = oldBMarks[i]
|
||||
state.tShift[i + startLine] = item
|
||||
state.sCount[i + startLine] = oldSCount[i]
|
||||
state.bsCount[i + startLine] = oldBSCount[i]
|
||||
|
||||
state.blkIndent = oldIndent
|
||||
|
||||
return True
|
|
@ -0,0 +1,36 @@
|
|||
"""Code block (4 spaces padded)."""
|
||||
import logging
|
||||
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def code(state: StateBlock, startLine: int, endLine: int, silent: bool = False):
|
||||
|
||||
LOGGER.debug("entering code: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
if state.sCount[startLine] - state.blkIndent < 4:
|
||||
return False
|
||||
|
||||
last = nextLine = startLine + 1
|
||||
|
||||
while nextLine < endLine:
|
||||
if state.isEmpty(nextLine):
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
if state.sCount[nextLine] - state.blkIndent >= 4:
|
||||
nextLine += 1
|
||||
last = nextLine
|
||||
continue
|
||||
|
||||
break
|
||||
|
||||
state.line = last
|
||||
|
||||
token = state.push("code_block", "code", 0)
|
||||
token.content = state.getLines(startLine, last, 4 + state.blkIndent, False) + "\n"
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
return True
|
|
@ -0,0 +1,104 @@
|
|||
# fences (``` lang, ~~~ lang)
|
||||
import logging
|
||||
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
|
||||
|
||||
LOGGER.debug("entering fence: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
haveEndMarker = False
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
# if it's indented more than 3 spaces, it should be a code block
|
||||
if state.sCount[startLine] - state.blkIndent >= 4:
|
||||
return False
|
||||
|
||||
if pos + 3 > maximum:
|
||||
return False
|
||||
|
||||
marker = state.srcCharCode[pos]
|
||||
|
||||
# /* ~ */ /* ` */
|
||||
if marker != 0x7E and marker != 0x60:
|
||||
return False
|
||||
|
||||
# scan marker length
|
||||
mem = pos
|
||||
pos = state.skipChars(pos, marker)
|
||||
|
||||
length = pos - mem
|
||||
|
||||
if length < 3:
|
||||
return False
|
||||
|
||||
markup = state.src[mem:pos]
|
||||
params = state.src[pos:maximum]
|
||||
|
||||
# /* ` */
|
||||
if marker == 0x60:
|
||||
if chr(marker) in params:
|
||||
return False
|
||||
|
||||
# Since start is found, we can report success here in validation mode
|
||||
if silent:
|
||||
return True
|
||||
|
||||
# search end of block
|
||||
nextLine = startLine
|
||||
|
||||
while True:
|
||||
nextLine += 1
|
||||
if nextLine >= endLine:
|
||||
# unclosed block should be autoclosed by end of document.
|
||||
# also block seems to be autoclosed by end of parent
|
||||
break
|
||||
|
||||
pos = mem = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
maximum = state.eMarks[nextLine]
|
||||
|
||||
if pos < maximum and state.sCount[nextLine] < state.blkIndent:
|
||||
# non-empty line with negative indent should stop the list:
|
||||
# - ```
|
||||
# test
|
||||
break
|
||||
|
||||
if state.srcCharCode[pos] != marker:
|
||||
continue
|
||||
|
||||
if state.sCount[nextLine] - state.blkIndent >= 4:
|
||||
# closing fence should be indented less than 4 spaces
|
||||
continue
|
||||
|
||||
pos = state.skipChars(pos, marker)
|
||||
|
||||
# closing code fence must be at least as long as the opening one
|
||||
if pos - mem < length:
|
||||
continue
|
||||
|
||||
# make sure tail has spaces only
|
||||
pos = state.skipSpaces(pos)
|
||||
|
||||
if pos < maximum:
|
||||
continue
|
||||
|
||||
haveEndMarker = True
|
||||
# found!
|
||||
break
|
||||
|
||||
# If a fence has heading spaces, they should be removed from its inner block
|
||||
length = state.sCount[startLine]
|
||||
|
||||
state.line = nextLine + (1 if haveEndMarker else 0)
|
||||
|
||||
token = state.push("fence", "code", 0)
|
||||
token.info = params
|
||||
token.content = state.getLines(startLine + 1, nextLine, length, True)
|
||||
token.markup = markup
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
return True
|
|
@ -0,0 +1,72 @@
|
|||
""" Atex heading (#, ##, ...) """
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from ..common.utils import isSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def heading(state: StateBlock, startLine: int, endLine: int, silent: bool):
|
||||
|
||||
LOGGER.debug("entering heading: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
# if it's indented more than 3 spaces, it should be a code block
|
||||
if state.sCount[startLine] - state.blkIndent >= 4:
|
||||
return False
|
||||
|
||||
ch: int | None = state.srcCharCode[pos]
|
||||
|
||||
# /* # */
|
||||
if ch != 0x23 or pos >= maximum:
|
||||
return False
|
||||
|
||||
# count heading level
|
||||
level = 1
|
||||
pos += 1
|
||||
try:
|
||||
ch = state.srcCharCode[pos]
|
||||
except IndexError:
|
||||
ch = None
|
||||
# /* # */
|
||||
while ch == 0x23 and pos < maximum and level <= 6:
|
||||
level += 1
|
||||
pos += 1
|
||||
try:
|
||||
ch = state.srcCharCode[pos]
|
||||
except IndexError:
|
||||
ch = None
|
||||
|
||||
if level > 6 or (pos < maximum and not isSpace(ch)):
|
||||
return False
|
||||
|
||||
if silent:
|
||||
return True
|
||||
|
||||
# Let's cut tails like ' ### ' from the end of string
|
||||
|
||||
maximum = state.skipSpacesBack(maximum, pos)
|
||||
tmp = state.skipCharsBack(maximum, 0x23, pos) # #
|
||||
if tmp > pos and isSpace(state.srcCharCode[tmp - 1]):
|
||||
maximum = tmp
|
||||
|
||||
state.line = startLine + 1
|
||||
|
||||
token = state.push("heading_open", "h" + str(level), 1)
|
||||
token.markup = "########"[:level]
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
token.content = state.src[pos:maximum].strip()
|
||||
token.map = [startLine, state.line]
|
||||
token.children = []
|
||||
|
||||
token = state.push("heading_close", "h" + str(level), -1)
|
||||
token.markup = "########"[:level]
|
||||
|
||||
return True
|
|
@ -0,0 +1,54 @@
|
|||
"""Horizontal rule
|
||||
|
||||
At least 3 of these characters on a line * - _
|
||||
"""
|
||||
import logging
|
||||
|
||||
from ..common.utils import isSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def hr(state: StateBlock, startLine: int, endLine: int, silent: bool):
|
||||
|
||||
LOGGER.debug("entering hr: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
# if it's indented more than 3 spaces, it should be a code block
|
||||
if state.sCount[startLine] - state.blkIndent >= 4:
|
||||
return False
|
||||
|
||||
marker = state.srcCharCode[pos]
|
||||
pos += 1
|
||||
|
||||
# Check hr marker: /* * */ /* - */ /* _ */
|
||||
if marker != 0x2A and marker != 0x2D and marker != 0x5F:
|
||||
return False
|
||||
|
||||
# markers can be mixed with spaces, but there should be at least 3 of them
|
||||
|
||||
cnt = 1
|
||||
while pos < maximum:
|
||||
ch = state.srcCharCode[pos]
|
||||
pos += 1
|
||||
if ch != marker and not isSpace(ch):
|
||||
return False
|
||||
if ch == marker:
|
||||
cnt += 1
|
||||
|
||||
if cnt < 3:
|
||||
return False
|
||||
|
||||
if silent:
|
||||
return True
|
||||
|
||||
state.line = startLine + 1
|
||||
|
||||
token = state.push("hr", "hr", 0)
|
||||
token.map = [startLine, state.line]
|
||||
token.markup = chr(marker) * (cnt + 1)
|
||||
|
||||
return True
|
|
@ -0,0 +1,91 @@
|
|||
# HTML block
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
from ..common.html_blocks import block_names
|
||||
from ..common.html_re import HTML_OPEN_CLOSE_TAG_STR
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
# An array of opening and corresponding closing sequences for html tags,
|
||||
# last argument defines whether it can terminate a paragraph or not
|
||||
HTML_SEQUENCES: list[tuple[re.Pattern, re.Pattern, bool]] = [
|
||||
(
|
||||
re.compile(r"^<(script|pre|style|textarea)(?=(\s|>|$))", re.IGNORECASE),
|
||||
re.compile(r"<\/(script|pre|style|textarea)>", re.IGNORECASE),
|
||||
True,
|
||||
),
|
||||
(re.compile(r"^<!--"), re.compile(r"-->"), True),
|
||||
(re.compile(r"^<\?"), re.compile(r"\?>"), True),
|
||||
(re.compile(r"^<![A-Z]"), re.compile(r">"), True),
|
||||
(re.compile(r"^<!\[CDATA\["), re.compile(r"\]\]>"), True),
|
||||
(
|
||||
re.compile("^</?(" + "|".join(block_names) + ")(?=(\\s|/?>|$))", re.IGNORECASE),
|
||||
re.compile(r"^$"),
|
||||
True,
|
||||
),
|
||||
(re.compile(HTML_OPEN_CLOSE_TAG_STR + "\\s*$"), re.compile(r"^$"), False),
|
||||
]
|
||||
|
||||
|
||||
def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool):
|
||||
LOGGER.debug(
|
||||
"entering html_block: %s, %s, %s, %s", state, startLine, endLine, silent
|
||||
)
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
# if it's indented more than 3 spaces, it should be a code block
|
||||
if state.sCount[startLine] - state.blkIndent >= 4:
|
||||
return False
|
||||
|
||||
if not state.md.options.get("html", None):
|
||||
return False
|
||||
|
||||
if state.srcCharCode[pos] != 0x3C: # /* < */
|
||||
return False
|
||||
|
||||
lineText = state.src[pos:maximum]
|
||||
|
||||
html_seq = None
|
||||
for HTML_SEQUENCE in HTML_SEQUENCES:
|
||||
if HTML_SEQUENCE[0].search(lineText):
|
||||
html_seq = HTML_SEQUENCE
|
||||
break
|
||||
|
||||
if not html_seq:
|
||||
return False
|
||||
|
||||
if silent:
|
||||
# true if this sequence can be a terminator, false otherwise
|
||||
return html_seq[2]
|
||||
|
||||
nextLine = startLine + 1
|
||||
|
||||
# If we are here - we detected HTML block.
|
||||
# Let's roll down till block end.
|
||||
if not html_seq[1].search(lineText):
|
||||
while nextLine < endLine:
|
||||
if state.sCount[nextLine] < state.blkIndent:
|
||||
break
|
||||
|
||||
pos = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
maximum = state.eMarks[nextLine]
|
||||
lineText = state.src[pos:maximum]
|
||||
|
||||
if html_seq[1].search(lineText):
|
||||
if len(lineText) != 0:
|
||||
nextLine += 1
|
||||
break
|
||||
nextLine += 1
|
||||
|
||||
state.line = nextLine
|
||||
|
||||
token = state.push("html_block", "", 0)
|
||||
token.map = [startLine, nextLine]
|
||||
token.content = state.getLines(startLine, nextLine, state.blkIndent, True)
|
||||
|
||||
return True
|
|
@ -0,0 +1,90 @@
|
|||
# lheading (---, ==)
|
||||
import logging
|
||||
|
||||
from ..ruler import Ruler
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool):
|
||||
|
||||
LOGGER.debug("entering lheading: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
level = None
|
||||
nextLine = startLine + 1
|
||||
ruler: Ruler = state.md.block.ruler
|
||||
terminatorRules = ruler.getRules("paragraph")
|
||||
|
||||
# if it's indented more than 3 spaces, it should be a code block
|
||||
if state.sCount[startLine] - state.blkIndent >= 4:
|
||||
return False
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "paragraph" # use paragraph to match terminatorRules
|
||||
|
||||
# jump line-by-line until empty one or EOF
|
||||
while nextLine < endLine and not state.isEmpty(nextLine):
|
||||
# this would be a code block normally, but after paragraph
|
||||
# it's considered a lazy continuation regardless of what's there
|
||||
if state.sCount[nextLine] - state.blkIndent > 3:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# Check for underline in setext header
|
||||
if state.sCount[nextLine] >= state.blkIndent:
|
||||
pos = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
maximum = state.eMarks[nextLine]
|
||||
|
||||
if pos < maximum:
|
||||
marker = state.srcCharCode[pos]
|
||||
|
||||
# /* - */ /* = */
|
||||
if marker == 0x2D or marker == 0x3D:
|
||||
pos = state.skipChars(pos, marker)
|
||||
pos = state.skipSpaces(pos)
|
||||
|
||||
# /* = */
|
||||
if pos >= maximum:
|
||||
level = 1 if marker == 0x3D else 2
|
||||
break
|
||||
|
||||
# quirk for blockquotes, this line should already be checked by that rule
|
||||
if state.sCount[nextLine] < 0:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# Some tags can terminate paragraph without empty line.
|
||||
terminate = False
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
if terminate:
|
||||
break
|
||||
|
||||
nextLine += 1
|
||||
|
||||
if not level:
|
||||
# Didn't find valid underline
|
||||
return False
|
||||
|
||||
content = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
|
||||
|
||||
state.line = nextLine + 1
|
||||
|
||||
token = state.push("heading_open", "h" + str(level), 1)
|
||||
token.markup = chr(marker)
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
token.content = content
|
||||
token.map = [startLine, state.line - 1]
|
||||
token.children = []
|
||||
|
||||
token = state.push("heading_close", "h" + str(level), -1)
|
||||
token.markup = chr(marker)
|
||||
|
||||
state.parentType = oldParentType
|
||||
|
||||
return True
|
|
@ -0,0 +1,344 @@
|
|||
# Lists
|
||||
import logging
|
||||
|
||||
from ..common.utils import isSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Search `[-+*][\n ]`, returns next pos after marker on success
|
||||
# or -1 on fail.
|
||||
def skipBulletListMarker(state: StateBlock, startLine: int):
|
||||
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
marker = state.srcCharCode[pos]
|
||||
pos += 1
|
||||
# Check bullet /* * */ /* - */ /* + */
|
||||
if marker != 0x2A and marker != 0x2D and marker != 0x2B:
|
||||
return -1
|
||||
|
||||
if pos < maximum:
|
||||
ch = state.srcCharCode[pos]
|
||||
|
||||
if not isSpace(ch):
|
||||
# " -test " - is not a list item
|
||||
return -1
|
||||
|
||||
return pos
|
||||
|
||||
|
||||
# Search `\d+[.)][\n ]`, returns next pos after marker on success
|
||||
# or -1 on fail.
|
||||
def skipOrderedListMarker(state: StateBlock, startLine: int):
|
||||
|
||||
start = state.bMarks[startLine] + state.tShift[startLine]
|
||||
pos = start
|
||||
maximum = state.eMarks[startLine]
|
||||
|
||||
# List marker should have at least 2 chars (digit + dot)
|
||||
if pos + 1 >= maximum:
|
||||
return -1
|
||||
|
||||
ch = state.srcCharCode[pos]
|
||||
pos += 1
|
||||
|
||||
# /* 0 */ /* 9 */
|
||||
if ch < 0x30 or ch > 0x39:
|
||||
return -1
|
||||
|
||||
while True:
|
||||
# EOL -> fail
|
||||
if pos >= maximum:
|
||||
return -1
|
||||
|
||||
ch = state.srcCharCode[pos]
|
||||
pos += 1
|
||||
|
||||
# /* 0 */ /* 9 */
|
||||
if ch >= 0x30 and ch <= 0x39:
|
||||
|
||||
# List marker should have no more than 9 digits
|
||||
# (prevents integer overflow in browsers)
|
||||
if pos - start >= 10:
|
||||
return -1
|
||||
|
||||
continue
|
||||
|
||||
# found valid marker: /* ) */ /* . */
|
||||
if ch == 0x29 or ch == 0x2E:
|
||||
break
|
||||
|
||||
return -1
|
||||
|
||||
if pos < maximum:
|
||||
ch = state.srcCharCode[pos]
|
||||
|
||||
if not isSpace(ch):
|
||||
# " 1.test " - is not a list item
|
||||
return -1
|
||||
|
||||
return pos
|
||||
|
||||
|
||||
def markTightParagraphs(state: StateBlock, idx: int):
|
||||
level = state.level + 2
|
||||
|
||||
i = idx + 2
|
||||
length = len(state.tokens) - 2
|
||||
while i < length:
|
||||
if state.tokens[i].level == level and state.tokens[i].type == "paragraph_open":
|
||||
state.tokens[i + 2].hidden = True
|
||||
state.tokens[i].hidden = True
|
||||
i += 2
|
||||
i += 1
|
||||
|
||||
|
||||
def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool):
|
||||
|
||||
LOGGER.debug("entering list: %s, %s, %s, %s", state, startLine, endLine, silent)
|
||||
|
||||
isTerminatingParagraph = False
|
||||
tight = True
|
||||
|
||||
# if it's indented more than 3 spaces, it should be a code block
|
||||
if state.sCount[startLine] - state.blkIndent >= 4:
|
||||
return False
|
||||
|
||||
# Special case:
|
||||
# - item 1
|
||||
# - item 2
|
||||
# - item 3
|
||||
# - item 4
|
||||
# - this one is a paragraph continuation
|
||||
if (
|
||||
state.listIndent >= 0
|
||||
and state.sCount[startLine] - state.listIndent >= 4
|
||||
and state.sCount[startLine] < state.blkIndent
|
||||
):
|
||||
return False
|
||||
|
||||
# limit conditions when list can interrupt
|
||||
# a paragraph (validation mode only)
|
||||
if silent and state.parentType == "paragraph":
|
||||
# Next list item should still terminate previous list item
|
||||
#
|
||||
# This code can fail if plugins use blkIndent as well as lists,
|
||||
# but I hope the spec gets fixed long before that happens.
|
||||
#
|
||||
if state.tShift[startLine] >= state.blkIndent:
|
||||
isTerminatingParagraph = True
|
||||
|
||||
# Detect list type and position after marker
|
||||
posAfterMarker = skipOrderedListMarker(state, startLine)
|
||||
if posAfterMarker >= 0:
|
||||
isOrdered = True
|
||||
start = state.bMarks[startLine] + state.tShift[startLine]
|
||||
markerValue = int(state.src[start : posAfterMarker - 1])
|
||||
|
||||
# If we're starting a new ordered list right after
|
||||
# a paragraph, it should start with 1.
|
||||
if isTerminatingParagraph and markerValue != 1:
|
||||
return False
|
||||
else:
|
||||
posAfterMarker = skipBulletListMarker(state, startLine)
|
||||
if posAfterMarker >= 0:
|
||||
isOrdered = False
|
||||
else:
|
||||
return False
|
||||
|
||||
# If we're starting a new unordered list right after
|
||||
# a paragraph, first line should not be empty.
|
||||
if isTerminatingParagraph:
|
||||
if state.skipSpaces(posAfterMarker) >= state.eMarks[startLine]:
|
||||
return False
|
||||
|
||||
# We should terminate list on style change. Remember first one to compare.
|
||||
markerCharCode = state.srcCharCode[posAfterMarker - 1]
|
||||
|
||||
# For validation mode we can terminate immediately
|
||||
if silent:
|
||||
return True
|
||||
|
||||
# Start list
|
||||
listTokIdx = len(state.tokens)
|
||||
|
||||
if isOrdered:
|
||||
token = state.push("ordered_list_open", "ol", 1)
|
||||
if markerValue != 1:
|
||||
token.attrs = {"start": markerValue}
|
||||
|
||||
else:
|
||||
token = state.push("bullet_list_open", "ul", 1)
|
||||
|
||||
token.map = listLines = [startLine, 0]
|
||||
token.markup = chr(markerCharCode)
|
||||
|
||||
#
|
||||
# Iterate list items
|
||||
#
|
||||
|
||||
nextLine = startLine
|
||||
prevEmptyEnd = False
|
||||
terminatorRules = state.md.block.ruler.getRules("list")
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "list"
|
||||
|
||||
while nextLine < endLine:
|
||||
pos = posAfterMarker
|
||||
maximum = state.eMarks[nextLine]
|
||||
|
||||
initial = offset = (
|
||||
state.sCount[nextLine]
|
||||
+ posAfterMarker
|
||||
- (state.bMarks[startLine] + state.tShift[startLine])
|
||||
)
|
||||
|
||||
while pos < maximum:
|
||||
ch = state.srcCharCode[pos]
|
||||
|
||||
if ch == 0x09: # \t
|
||||
offset += 4 - (offset + state.bsCount[nextLine]) % 4
|
||||
elif ch == 0x20: # \s
|
||||
offset += 1
|
||||
else:
|
||||
break
|
||||
|
||||
pos += 1
|
||||
|
||||
contentStart = pos
|
||||
|
||||
if contentStart >= maximum:
|
||||
# trimming space in "- \n 3" case, indent is 1 here
|
||||
indentAfterMarker = 1
|
||||
else:
|
||||
indentAfterMarker = offset - initial
|
||||
|
||||
# If we have more than 4 spaces, the indent is 1
|
||||
# (the rest is just indented code block)
|
||||
if indentAfterMarker > 4:
|
||||
indentAfterMarker = 1
|
||||
|
||||
# " - test"
|
||||
# ^^^^^ - calculating total length of this thing
|
||||
indent = initial + indentAfterMarker
|
||||
|
||||
# Run subparser & write tokens
|
||||
token = state.push("list_item_open", "li", 1)
|
||||
token.markup = chr(markerCharCode)
|
||||
token.map = itemLines = [startLine, 0]
|
||||
if isOrdered:
|
||||
token.info = state.src[start : posAfterMarker - 1]
|
||||
|
||||
# change current state, then restore it after parser subcall
|
||||
oldTight = state.tight
|
||||
oldTShift = state.tShift[startLine]
|
||||
oldSCount = state.sCount[startLine]
|
||||
|
||||
# - example list
|
||||
# ^ listIndent position will be here
|
||||
# ^ blkIndent position will be here
|
||||
#
|
||||
oldListIndent = state.listIndent
|
||||
state.listIndent = state.blkIndent
|
||||
state.blkIndent = indent
|
||||
|
||||
state.tight = True
|
||||
state.tShift[startLine] = contentStart - state.bMarks[startLine]
|
||||
state.sCount[startLine] = offset
|
||||
|
||||
if contentStart >= maximum and state.isEmpty(startLine + 1):
|
||||
# workaround for this case
|
||||
# (list item is empty, list terminates before "foo"):
|
||||
# ~~~~~~~~
|
||||
# -
|
||||
#
|
||||
# foo
|
||||
# ~~~~~~~~
|
||||
state.line = min(state.line + 2, endLine)
|
||||
else:
|
||||
# NOTE in list.js this was:
|
||||
# state.md.block.tokenize(state, startLine, endLine, True)
|
||||
# but tokeniz does not take the final parameter
|
||||
state.md.block.tokenize(state, startLine, endLine)
|
||||
|
||||
# If any of list item is tight, mark list as tight
|
||||
if (not state.tight) or prevEmptyEnd:
|
||||
tight = False
|
||||
|
||||
# Item become loose if finish with empty line,
|
||||
# but we should filter last element, because it means list finish
|
||||
prevEmptyEnd = (state.line - startLine) > 1 and state.isEmpty(state.line - 1)
|
||||
|
||||
state.blkIndent = state.listIndent
|
||||
state.listIndent = oldListIndent
|
||||
state.tShift[startLine] = oldTShift
|
||||
state.sCount[startLine] = oldSCount
|
||||
state.tight = oldTight
|
||||
|
||||
token = state.push("list_item_close", "li", -1)
|
||||
token.markup = chr(markerCharCode)
|
||||
|
||||
nextLine = startLine = state.line
|
||||
itemLines[1] = nextLine
|
||||
|
||||
if nextLine >= endLine:
|
||||
break
|
||||
|
||||
contentStart = state.bMarks[startLine]
|
||||
|
||||
#
|
||||
# Try to check if list is terminated or continued.
|
||||
#
|
||||
if state.sCount[nextLine] < state.blkIndent:
|
||||
break
|
||||
|
||||
# if it's indented more than 3 spaces, it should be a code block
|
||||
if state.sCount[startLine] - state.blkIndent >= 4:
|
||||
break
|
||||
|
||||
# fail if terminating block found
|
||||
terminate = False
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
if terminate:
|
||||
break
|
||||
|
||||
# fail if list has another type
|
||||
if isOrdered:
|
||||
posAfterMarker = skipOrderedListMarker(state, nextLine)
|
||||
if posAfterMarker < 0:
|
||||
break
|
||||
start = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
else:
|
||||
posAfterMarker = skipBulletListMarker(state, nextLine)
|
||||
if posAfterMarker < 0:
|
||||
break
|
||||
|
||||
if markerCharCode != state.srcCharCode[posAfterMarker - 1]:
|
||||
break
|
||||
|
||||
# Finalize list
|
||||
if isOrdered:
|
||||
token = state.push("ordered_list_close", "ol", -1)
|
||||
else:
|
||||
token = state.push("bullet_list_close", "ul", -1)
|
||||
|
||||
token.markup = chr(markerCharCode)
|
||||
|
||||
listLines[1] = nextLine
|
||||
state.line = nextLine
|
||||
|
||||
state.parentType = oldParentType
|
||||
|
||||
# mark paragraphs tight if needed
|
||||
if tight:
|
||||
markTightParagraphs(state, listTokIdx)
|
||||
|
||||
return True
|
|
@ -0,0 +1,67 @@
|
|||
"""Paragraph."""
|
||||
import logging
|
||||
|
||||
from ..ruler import Ruler
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def paragraph(state: StateBlock, startLine: int, endLine: int, silent: bool = False):
|
||||
|
||||
LOGGER.debug(
|
||||
"entering paragraph: %s, %s, %s, %s", state, startLine, endLine, silent
|
||||
)
|
||||
|
||||
nextLine = startLine + 1
|
||||
ruler: Ruler = state.md.block.ruler
|
||||
terminatorRules = ruler.getRules("paragraph")
|
||||
endLine = state.lineMax
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "paragraph"
|
||||
|
||||
# jump line-by-line until empty one or EOF
|
||||
while nextLine < endLine:
|
||||
if state.isEmpty(nextLine):
|
||||
break
|
||||
# this would be a code block normally, but after paragraph
|
||||
# it's considered a lazy continuation regardless of what's there
|
||||
if state.sCount[nextLine] - state.blkIndent > 3:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# quirk for blockquotes, this line should already be checked by that rule
|
||||
if state.sCount[nextLine] < 0:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# Some tags can terminate paragraph without empty line.
|
||||
terminate = False
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
if terminate:
|
||||
break
|
||||
|
||||
nextLine += 1
|
||||
|
||||
content = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
|
||||
|
||||
state.line = nextLine
|
||||
|
||||
token = state.push("paragraph_open", "p", 1)
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
token.content = content
|
||||
token.map = [startLine, state.line]
|
||||
token.children = []
|
||||
|
||||
token = state.push("paragraph_close", "p", -1)
|
||||
|
||||
state.parentType = oldParentType
|
||||
|
||||
return True
|
|
@ -0,0 +1,218 @@
|
|||
import logging
|
||||
|
||||
from ..common.utils import charCodeAt, isSpace, normalizeReference
|
||||
from .state_block import StateBlock
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def reference(state: StateBlock, startLine, _endLine, silent):
|
||||
|
||||
LOGGER.debug(
|
||||
"entering reference: %s, %s, %s, %s", state, startLine, _endLine, silent
|
||||
)
|
||||
|
||||
lines = 0
|
||||
pos = state.bMarks[startLine] + state.tShift[startLine]
|
||||
maximum = state.eMarks[startLine]
|
||||
nextLine = startLine + 1
|
||||
|
||||
# if it's indented more than 3 spaces, it should be a code block
|
||||
if state.sCount[startLine] - state.blkIndent >= 4:
|
||||
return False
|
||||
|
||||
if state.srcCharCode[pos] != 0x5B: # /* [ */
|
||||
return False
|
||||
|
||||
# Simple check to quickly interrupt scan on [link](url) at the start of line.
|
||||
# Can be useful on practice: https:#github.com/markdown-it/markdown-it/issues/54
|
||||
while pos < maximum:
|
||||
# /* ] */ /* \ */ /* : */
|
||||
if state.srcCharCode[pos] == 0x5D and state.srcCharCode[pos - 1] != 0x5C:
|
||||
if pos + 1 == maximum:
|
||||
return False
|
||||
if state.srcCharCode[pos + 1] != 0x3A:
|
||||
return False
|
||||
break
|
||||
pos += 1
|
||||
|
||||
endLine = state.lineMax
|
||||
|
||||
# jump line-by-line until empty one or EOF
|
||||
terminatorRules = state.md.block.ruler.getRules("reference")
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "reference"
|
||||
|
||||
while nextLine < endLine and not state.isEmpty(nextLine):
|
||||
# this would be a code block normally, but after paragraph
|
||||
# it's considered a lazy continuation regardless of what's there
|
||||
if state.sCount[nextLine] - state.blkIndent > 3:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# quirk for blockquotes, this line should already be checked by that rule
|
||||
if state.sCount[nextLine] < 0:
|
||||
nextLine += 1
|
||||
continue
|
||||
|
||||
# Some tags can terminate paragraph without empty line.
|
||||
terminate = False
|
||||
for terminatorRule in terminatorRules:
|
||||
if terminatorRule(state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
if terminate:
|
||||
break
|
||||
|
||||
nextLine += 1
|
||||
|
||||
string = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
|
||||
maximum = len(string)
|
||||
|
||||
labelEnd = None
|
||||
pos = 1
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if ch == 0x5B: # /* [ */
|
||||
return False
|
||||
elif ch == 0x5D: # /* ] */
|
||||
labelEnd = pos
|
||||
break
|
||||
elif ch == 0x0A: # /* \n */
|
||||
lines += 1
|
||||
elif ch == 0x5C: # /* \ */
|
||||
pos += 1
|
||||
if pos < maximum and charCodeAt(string, pos) == 0x0A:
|
||||
lines += 1
|
||||
pos += 1
|
||||
|
||||
if (
|
||||
labelEnd is None or labelEnd < 0 or charCodeAt(string, labelEnd + 1) != 0x3A
|
||||
): # /* : */
|
||||
return False
|
||||
|
||||
# [label]: destination 'title'
|
||||
# ^^^ skip optional whitespace here
|
||||
pos = labelEnd + 2
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if ch == 0x0A:
|
||||
lines += 1
|
||||
elif isSpace(ch):
|
||||
pass
|
||||
else:
|
||||
break
|
||||
pos += 1
|
||||
|
||||
# [label]: destination 'title'
|
||||
# ^^^^^^^^^^^ parse this
|
||||
res = state.md.helpers.parseLinkDestination(string, pos, maximum)
|
||||
if not res.ok:
|
||||
return False
|
||||
|
||||
href = state.md.normalizeLink(res.str)
|
||||
if not state.md.validateLink(href):
|
||||
return False
|
||||
|
||||
pos = res.pos
|
||||
lines += res.lines
|
||||
|
||||
# save cursor state, we could require to rollback later
|
||||
destEndPos = pos
|
||||
destEndLineNo = lines
|
||||
|
||||
# [label]: destination 'title'
|
||||
# ^^^ skipping those spaces
|
||||
start = pos
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if ch == 0x0A:
|
||||
lines += 1
|
||||
elif isSpace(ch):
|
||||
pass
|
||||
else:
|
||||
break
|
||||
pos += 1
|
||||
|
||||
# [label]: destination 'title'
|
||||
# ^^^^^^^ parse this
|
||||
res = state.md.helpers.parseLinkTitle(string, pos, maximum)
|
||||
if pos < maximum and start != pos and res.ok:
|
||||
title = res.str
|
||||
pos = res.pos
|
||||
lines += res.lines
|
||||
else:
|
||||
title = ""
|
||||
pos = destEndPos
|
||||
lines = destEndLineNo
|
||||
|
||||
# skip trailing spaces until the rest of the line
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if not isSpace(ch):
|
||||
break
|
||||
pos += 1
|
||||
|
||||
if pos < maximum and charCodeAt(string, pos) != 0x0A:
|
||||
if title:
|
||||
# garbage at the end of the line after title,
|
||||
# but it could still be a valid reference if we roll back
|
||||
title = ""
|
||||
pos = destEndPos
|
||||
lines = destEndLineNo
|
||||
while pos < maximum:
|
||||
ch = charCodeAt(string, pos)
|
||||
if not isSpace(ch):
|
||||
break
|
||||
pos += 1
|
||||
|
||||
if pos < maximum and charCodeAt(string, pos) != 0x0A:
|
||||
# garbage at the end of the line
|
||||
return False
|
||||
|
||||
label = normalizeReference(string[1:labelEnd])
|
||||
if not label:
|
||||
# CommonMark 0.20 disallows empty labels
|
||||
return False
|
||||
|
||||
# Reference can not terminate anything. This check is for safety only.
|
||||
if silent:
|
||||
return True
|
||||
|
||||
if "references" not in state.env:
|
||||
state.env["references"] = {}
|
||||
|
||||
state.line = startLine + lines + 1
|
||||
|
||||
# note, this is not part of markdown-it JS, but is useful for renderers
|
||||
if state.md.options.get("inline_definitions", False):
|
||||
token = state.push("definition", "", 0)
|
||||
token.meta = {
|
||||
"id": label,
|
||||
"title": title,
|
||||
"url": href,
|
||||
"label": string[1:labelEnd],
|
||||
}
|
||||
token.map = [startLine, state.line]
|
||||
|
||||
if label not in state.env["references"]:
|
||||
state.env["references"][label] = {
|
||||
"title": title,
|
||||
"href": href,
|
||||
"map": [startLine, state.line],
|
||||
}
|
||||
else:
|
||||
state.env.setdefault("duplicate_refs", []).append(
|
||||
{
|
||||
"title": title,
|
||||
"href": href,
|
||||
"label": label,
|
||||
"map": [startLine, state.line],
|
||||
}
|
||||
)
|
||||
|
||||
state.parentType = oldParentType
|
||||
|
||||
return True
|
|
@ -0,0 +1,230 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ..common.utils import isSpace
|
||||
from ..ruler import StateBase
|
||||
from ..token import Token
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from markdown_it.main import MarkdownIt
|
||||
|
||||
|
||||
class StateBlock(StateBase):
|
||||
def __init__(
|
||||
self,
|
||||
src: str,
|
||||
md: MarkdownIt,
|
||||
env,
|
||||
tokens: list[Token],
|
||||
srcCharCode: tuple[int, ...] | None = None,
|
||||
):
|
||||
|
||||
if srcCharCode is not None:
|
||||
self._src = src
|
||||
self.srcCharCode = srcCharCode
|
||||
else:
|
||||
self.src = src
|
||||
|
||||
# link to parser instance
|
||||
self.md = md
|
||||
|
||||
self.env = env
|
||||
|
||||
#
|
||||
# Internal state variables
|
||||
#
|
||||
|
||||
self.tokens = tokens
|
||||
|
||||
self.bMarks = [] # line begin offsets for fast jumps
|
||||
self.eMarks = [] # line end offsets for fast jumps
|
||||
# offsets of the first non-space characters (tabs not expanded)
|
||||
self.tShift = []
|
||||
self.sCount = [] # indents for each line (tabs expanded)
|
||||
|
||||
# An amount of virtual spaces (tabs expanded) between beginning
|
||||
# of each line (bMarks) and real beginning of that line.
|
||||
#
|
||||
# It exists only as a hack because blockquotes override bMarks
|
||||
# losing information in the process.
|
||||
#
|
||||
# It's used only when expanding tabs, you can think about it as
|
||||
# an initial tab length, e.g. bsCount=21 applied to string `\t123`
|
||||
# means first tab should be expanded to 4-21%4 === 3 spaces.
|
||||
#
|
||||
self.bsCount = []
|
||||
|
||||
# block parser variables
|
||||
self.blkIndent = 0 # required block content indent (for example, if we are
|
||||
# inside a list, it would be positioned after list marker)
|
||||
self.line = 0 # line index in src
|
||||
self.lineMax = 0 # lines count
|
||||
self.tight = False # loose/tight mode for lists
|
||||
self.ddIndent = -1 # indent of the current dd block (-1 if there isn't any)
|
||||
self.listIndent = -1 # indent of the current list block (-1 if there isn't any)
|
||||
|
||||
# can be 'blockquote', 'list', 'root', 'paragraph' or 'reference'
|
||||
# used in lists to determine if they interrupt a paragraph
|
||||
self.parentType = "root"
|
||||
|
||||
self.level = 0
|
||||
|
||||
# renderer
|
||||
self.result = ""
|
||||
|
||||
# Create caches
|
||||
# Generate markers.
|
||||
indent_found = False
|
||||
|
||||
start = pos = indent = offset = 0
|
||||
length = len(self.src)
|
||||
|
||||
for pos, character in enumerate(self.srcCharCode):
|
||||
if not indent_found:
|
||||
if isSpace(character):
|
||||
indent += 1
|
||||
|
||||
if character == 0x09:
|
||||
offset += 4 - offset % 4
|
||||
else:
|
||||
offset += 1
|
||||
continue
|
||||
else:
|
||||
indent_found = True
|
||||
|
||||
if character == 0x0A or pos == length - 1:
|
||||
if character != 0x0A:
|
||||
pos += 1
|
||||
self.bMarks.append(start)
|
||||
self.eMarks.append(pos)
|
||||
self.tShift.append(indent)
|
||||
self.sCount.append(offset)
|
||||
self.bsCount.append(0)
|
||||
|
||||
indent_found = False
|
||||
indent = 0
|
||||
offset = 0
|
||||
start = pos + 1
|
||||
|
||||
# Push fake entry to simplify cache bounds checks
|
||||
self.bMarks.append(length)
|
||||
self.eMarks.append(length)
|
||||
self.tShift.append(0)
|
||||
self.sCount.append(0)
|
||||
self.bsCount.append(0)
|
||||
|
||||
self.lineMax = len(self.bMarks) - 1 # don't count last fake line
|
||||
|
||||
def __repr__(self):
|
||||
return (
|
||||
f"{self.__class__.__name__}"
|
||||
f"(line={self.line},level={self.level},tokens={len(self.tokens)})"
|
||||
)
|
||||
|
||||
def push(self, ttype: str, tag: str, nesting: int) -> Token:
|
||||
"""Push new token to "stream"."""
|
||||
token = Token(ttype, tag, nesting)
|
||||
token.block = True
|
||||
if nesting < 0:
|
||||
self.level -= 1 # closing tag
|
||||
token.level = self.level
|
||||
if nesting > 0:
|
||||
self.level += 1 # opening tag
|
||||
self.tokens.append(token)
|
||||
return token
|
||||
|
||||
def isEmpty(self, line: int) -> bool:
|
||||
"""."""
|
||||
return (self.bMarks[line] + self.tShift[line]) >= self.eMarks[line]
|
||||
|
||||
def skipEmptyLines(self, from_pos: int) -> int:
|
||||
"""."""
|
||||
while from_pos < self.lineMax:
|
||||
try:
|
||||
if (self.bMarks[from_pos] + self.tShift[from_pos]) < self.eMarks[
|
||||
from_pos
|
||||
]:
|
||||
break
|
||||
except IndexError:
|
||||
pass
|
||||
from_pos += 1
|
||||
return from_pos
|
||||
|
||||
def skipSpaces(self, pos: int) -> int:
|
||||
"""Skip spaces from given position."""
|
||||
while pos < len(self.src):
|
||||
if not isSpace(self.srcCharCode[pos]):
|
||||
break
|
||||
pos += 1
|
||||
return pos
|
||||
|
||||
def skipSpacesBack(self, pos: int, minimum: int) -> int:
|
||||
"""Skip spaces from given position in reverse."""
|
||||
if pos <= minimum:
|
||||
return pos
|
||||
while pos > minimum:
|
||||
pos -= 1
|
||||
if not isSpace(self.srcCharCode[pos]):
|
||||
return pos + 1
|
||||
return pos
|
||||
|
||||
def skipChars(self, pos: int, code: int) -> int:
|
||||
"""Skip char codes from given position."""
|
||||
while pos < len(self.src):
|
||||
if self.srcCharCode[pos] != code:
|
||||
break
|
||||
pos += 1
|
||||
return pos
|
||||
|
||||
def skipCharsBack(self, pos: int, code: int, minimum: int) -> int:
|
||||
"""Skip char codes reverse from given position - 1."""
|
||||
if pos <= minimum:
|
||||
return pos
|
||||
while pos > minimum:
|
||||
pos -= 1
|
||||
if code != self.srcCharCode[pos]:
|
||||
return pos + 1
|
||||
return pos
|
||||
|
||||
def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str:
|
||||
"""Cut lines range from source."""
|
||||
line = begin
|
||||
if begin >= end:
|
||||
return ""
|
||||
|
||||
queue = [""] * (end - begin)
|
||||
|
||||
i = 1
|
||||
while line < end:
|
||||
lineIndent = 0
|
||||
lineStart = first = self.bMarks[line]
|
||||
if line + 1 < end or keepLastLF:
|
||||
last = self.eMarks[line] + 1
|
||||
else:
|
||||
last = self.eMarks[line]
|
||||
|
||||
while (first < last) and (lineIndent < indent):
|
||||
ch = self.srcCharCode[first]
|
||||
if isSpace(ch):
|
||||
if ch == 0x09:
|
||||
lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4
|
||||
else:
|
||||
lineIndent += 1
|
||||
elif first - lineStart < self.tShift[line]:
|
||||
lineIndent += 1
|
||||
else:
|
||||
break
|
||||
first += 1
|
||||
|
||||
if lineIndent > indent:
|
||||
# partially expanding tabs in code blocks, e.g '\t\tfoobar'
|
||||
# with indent=2 becomes ' \tfoobar'
|
||||
queue[i - 1] = (" " * (lineIndent - indent)) + self.src[first:last]
|
||||
else:
|
||||
queue[i - 1] = self.src[first:last]
|
||||
|
||||
line += 1
|
||||
i += 1
|
||||
|
||||
return "".join(queue)
|
|
@ -0,0 +1,238 @@
|
|||
# GFM table, https://github.github.com/gfm/#tables-extension-
|
||||
import re
|
||||
|
||||
from ..common.utils import charCodeAt, isSpace
|
||||
from .state_block import StateBlock
|
||||
|
||||
headerLineRe = re.compile(r"^:?-+:?$")
|
||||
enclosingPipesRe = re.compile(r"^\||\|$")
|
||||
|
||||
|
||||
def getLine(state: StateBlock, line: int):
|
||||
pos = state.bMarks[line] + state.tShift[line]
|
||||
maximum = state.eMarks[line]
|
||||
|
||||
# return state.src.substr(pos, max - pos)
|
||||
return state.src[pos:maximum]
|
||||
|
||||
|
||||
def escapedSplit(string):
|
||||
result = []
|
||||
pos = 0
|
||||
max = len(string)
|
||||
isEscaped = False
|
||||
lastPos = 0
|
||||
current = ""
|
||||
ch = charCodeAt(string, pos)
|
||||
|
||||
while pos < max:
|
||||
if ch == 0x7C: # /* | */
|
||||
if not isEscaped:
|
||||
# pipe separating cells, '|'
|
||||
result.append(current + string[lastPos:pos])
|
||||
current = ""
|
||||
lastPos = pos + 1
|
||||
else:
|
||||
# escaped pipe, '\|'
|
||||
current += string[lastPos : pos - 1]
|
||||
lastPos = pos
|
||||
|
||||
isEscaped = ch == 0x5C # /* \ */
|
||||
pos += 1
|
||||
|
||||
ch = charCodeAt(string, pos)
|
||||
|
||||
result.append(current + string[lastPos:])
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def table(state: StateBlock, startLine: int, endLine: int, silent: bool):
|
||||
tbodyLines = None
|
||||
|
||||
# should have at least two lines
|
||||
if startLine + 2 > endLine:
|
||||
return False
|
||||
|
||||
nextLine = startLine + 1
|
||||
|
||||
if state.sCount[nextLine] < state.blkIndent:
|
||||
return False
|
||||
|
||||
# if it's indented more than 3 spaces, it should be a code block
|
||||
if state.sCount[nextLine] - state.blkIndent >= 4:
|
||||
return False
|
||||
|
||||
# first character of the second line should be '|', '-', ':',
|
||||
# and no other characters are allowed but spaces;
|
||||
# basically, this is the equivalent of /^[-:|][-:|\s]*$/ regexp
|
||||
|
||||
pos = state.bMarks[nextLine] + state.tShift[nextLine]
|
||||
if pos >= state.eMarks[nextLine]:
|
||||
return False
|
||||
first_ch = state.srcCharCode[pos]
|
||||
pos += 1
|
||||
if first_ch not in {0x7C, 0x2D, 0x3A}: # not in {"|", "-", ":"}
|
||||
return False
|
||||
|
||||
if pos >= state.eMarks[nextLine]:
|
||||
return False
|
||||
second_ch = state.srcCharCode[pos]
|
||||
pos += 1
|
||||
# not in {"|", "-", ":"} and not space
|
||||
if second_ch not in {0x7C, 0x2D, 0x3A} and not isSpace(second_ch):
|
||||
return False
|
||||
|
||||
# if first character is '-', then second character must not be a space
|
||||
# (due to parsing ambiguity with list)
|
||||
if first_ch == 0x2D and isSpace(second_ch):
|
||||
return False
|
||||
|
||||
while pos < state.eMarks[nextLine]:
|
||||
ch = state.srcCharCode[pos]
|
||||
|
||||
# /* | */ /* - */ /* : */
|
||||
if ch not in {0x7C, 0x2D, 0x3A} and not isSpace(ch):
|
||||
return False
|
||||
|
||||
pos += 1
|
||||
|
||||
lineText = getLine(state, startLine + 1)
|
||||
|
||||
columns = lineText.split("|")
|
||||
aligns = []
|
||||
for i in range(len(columns)):
|
||||
t = columns[i].strip()
|
||||
if not t:
|
||||
# allow empty columns before and after table, but not in between columns;
|
||||
# e.g. allow ` |---| `, disallow ` ---||--- `
|
||||
if i == 0 or i == len(columns) - 1:
|
||||
continue
|
||||
else:
|
||||
return False
|
||||
|
||||
if not headerLineRe.search(t):
|
||||
return False
|
||||
if charCodeAt(t, len(t) - 1) == 0x3A: # /* : */
|
||||
# /* : */
|
||||
aligns.append("center" if charCodeAt(t, 0) == 0x3A else "right")
|
||||
elif charCodeAt(t, 0) == 0x3A: # /* : */
|
||||
aligns.append("left")
|
||||
else:
|
||||
aligns.append("")
|
||||
|
||||
lineText = getLine(state, startLine).strip()
|
||||
if "|" not in lineText:
|
||||
return False
|
||||
if state.sCount[startLine] - state.blkIndent >= 4:
|
||||
return False
|
||||
columns = escapedSplit(lineText)
|
||||
if columns and columns[0] == "":
|
||||
columns.pop(0)
|
||||
if columns and columns[-1] == "":
|
||||
columns.pop()
|
||||
|
||||
# header row will define an amount of columns in the entire table,
|
||||
# and align row should be exactly the same (the rest of the rows can differ)
|
||||
columnCount = len(columns)
|
||||
if columnCount == 0 or columnCount != len(aligns):
|
||||
return False
|
||||
|
||||
if silent:
|
||||
return True
|
||||
|
||||
oldParentType = state.parentType
|
||||
state.parentType = "table"
|
||||
|
||||
# use 'blockquote' lists for termination because it's
|
||||
# the most similar to tables
|
||||
terminatorRules = state.md.block.ruler.getRules("blockquote")
|
||||
|
||||
token = state.push("table_open", "table", 1)
|
||||
token.map = tableLines = [startLine, 0]
|
||||
|
||||
token = state.push("thead_open", "thead", 1)
|
||||
token.map = [startLine, startLine + 1]
|
||||
|
||||
token = state.push("tr_open", "tr", 1)
|
||||
token.map = [startLine, startLine + 1]
|
||||
|
||||
for i in range(len(columns)):
|
||||
token = state.push("th_open", "th", 1)
|
||||
if aligns[i]:
|
||||
token.attrs = {"style": "text-align:" + aligns[i]}
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
# note in markdown-it this map was removed in v12.0.0 however, we keep it,
|
||||
# since it is helpful to propagate to children tokens
|
||||
token.map = [startLine, startLine + 1]
|
||||
token.content = columns[i].strip()
|
||||
token.children = []
|
||||
|
||||
token = state.push("th_close", "th", -1)
|
||||
|
||||
token = state.push("tr_close", "tr", -1)
|
||||
token = state.push("thead_close", "thead", -1)
|
||||
|
||||
nextLine = startLine + 2
|
||||
while nextLine < endLine:
|
||||
if state.sCount[nextLine] < state.blkIndent:
|
||||
break
|
||||
|
||||
terminate = False
|
||||
for i in range(len(terminatorRules)):
|
||||
if terminatorRules[i](state, nextLine, endLine, True):
|
||||
terminate = True
|
||||
break
|
||||
|
||||
if terminate:
|
||||
break
|
||||
lineText = getLine(state, nextLine).strip()
|
||||
if not lineText:
|
||||
break
|
||||
if state.sCount[nextLine] - state.blkIndent >= 4:
|
||||
break
|
||||
columns = escapedSplit(lineText)
|
||||
if columns and columns[0] == "":
|
||||
columns.pop(0)
|
||||
if columns and columns[-1] == "":
|
||||
columns.pop()
|
||||
|
||||
if nextLine == startLine + 2:
|
||||
token = state.push("tbody_open", "tbody", 1)
|
||||
token.map = tbodyLines = [startLine + 2, 0]
|
||||
|
||||
token = state.push("tr_open", "tr", 1)
|
||||
token.map = [nextLine, nextLine + 1]
|
||||
|
||||
for i in range(columnCount):
|
||||
token = state.push("td_open", "td", 1)
|
||||
if aligns[i]:
|
||||
token.attrs = {"style": "text-align:" + aligns[i]}
|
||||
|
||||
token = state.push("inline", "", 0)
|
||||
# note in markdown-it this map was removed in v12.0.0 however, we keep it,
|
||||
# since it is helpful to propagate to children tokens
|
||||
token.map = [nextLine, nextLine + 1]
|
||||
try:
|
||||
token.content = columns[i].strip() if columns[i] else ""
|
||||
except IndexError:
|
||||
token.content = ""
|
||||
token.children = []
|
||||
|
||||
token = state.push("td_close", "td", -1)
|
||||
|
||||
token = state.push("tr_close", "tr", -1)
|
||||
|
||||
nextLine += 1
|
||||
|
||||
if tbodyLines:
|
||||
token = state.push("tbody_close", "tbody", -1)
|
||||
tbodyLines[1] = nextLine
|
||||
|
||||
token = state.push("table_close", "table", -1)
|
||||
|
||||
tableLines[1] = nextLine
|
||||
state.parentType = oldParentType
|
||||
state.line = nextLine
|
||||
return True
|
|
@ -0,0 +1,17 @@
|
|||
__all__ = (
|
||||
"StateCore",
|
||||
"normalize",
|
||||
"block",
|
||||
"inline",
|
||||
"replace",
|
||||
"smartquotes",
|
||||
"linkify",
|
||||
)
|
||||
|
||||
from .block import block
|
||||
from .inline import inline
|
||||
from .linkify import linkify
|
||||
from .normalize import normalize
|
||||
from .replacements import replace
|
||||
from .smartquotes import smartquotes
|
||||
from .state_core import StateCore
|
|
@ -0,0 +1,16 @@
|
|||
from ..token import Token
|
||||
from .state_core import StateCore
|
||||
|
||||
|
||||
def block(state: StateCore) -> None:
|
||||
|
||||
if state.inlineMode:
|
||||
token = Token("inline", "", 0)
|
||||
token.content = state.src
|
||||
token.map = [0, 1]
|
||||
token.children = []
|
||||
state.tokens.append(token)
|
||||
else:
|
||||
state.md.block.parse(
|
||||
state.src, state.md, state.env, state.tokens, state.srcCharCode
|
||||
)
|
|
@ -0,0 +1,10 @@
|
|||
from .state_core import StateCore
|
||||
|
||||
|
||||
def inline(state: StateCore) -> None:
|
||||
"""Parse inlines"""
|
||||
for token in state.tokens:
|
||||
if token.type == "inline":
|
||||
if token.children is None:
|
||||
token.children = []
|
||||
state.md.inline.parse(token.content, state.md, state.env, token.children)
|
|
@ -0,0 +1,141 @@
|
|||
import re
|
||||
|
||||
from ..common.utils import arrayReplaceAt
|
||||
from ..token import Token
|
||||
from .state_core import StateCore
|
||||
|
||||
LINK_OPEN_RE = re.compile(r"^<a[>\s]", flags=re.IGNORECASE)
|
||||
LINK_CLOSE_RE = re.compile(r"^</a\s*>", flags=re.IGNORECASE)
|
||||
|
||||
HTTP_RE = re.compile(r"^http://")
|
||||
MAILTO_RE = re.compile(r"^mailto:")
|
||||
TEST_MAILTO_RE = re.compile(r"^mailto:", flags=re.IGNORECASE)
|
||||
|
||||
|
||||
def isLinkOpen(string: str) -> bool:
|
||||
return bool(LINK_OPEN_RE.search(string))
|
||||
|
||||
|
||||
def isLinkClose(string: str) -> bool:
|
||||
return bool(LINK_CLOSE_RE.search(string))
|
||||
|
||||
|
||||
def linkify(state: StateCore) -> None:
|
||||
blockTokens = state.tokens
|
||||
|
||||
if not state.md.options.linkify:
|
||||
return
|
||||
|
||||
if not state.md.linkify:
|
||||
raise ModuleNotFoundError("Linkify enabled but not installed.")
|
||||
|
||||
for j in range(len(blockTokens)):
|
||||
if blockTokens[j].type != "inline" or not state.md.linkify.pretest(
|
||||
blockTokens[j].content
|
||||
):
|
||||
continue
|
||||
|
||||
tokens = blockTokens[j].children
|
||||
|
||||
htmlLinkLevel = 0
|
||||
|
||||
# We scan from the end, to keep position when new tags added.
|
||||
# Use reversed logic in links start/end match
|
||||
assert tokens is not None
|
||||
i = len(tokens)
|
||||
while i >= 1:
|
||||
i -= 1
|
||||
assert isinstance(tokens, list)
|
||||
currentToken = tokens[i]
|
||||
|
||||
# Skip content of markdown links
|
||||
if currentToken.type == "link_close":
|
||||
i -= 1
|
||||
while (
|
||||
tokens[i].level != currentToken.level
|
||||
and tokens[i].type != "link_open"
|
||||
):
|
||||
i -= 1
|
||||
continue
|
||||
|
||||
# Skip content of html tag links
|
||||
if currentToken.type == "html_inline":
|
||||
if isLinkOpen(currentToken.content) and htmlLinkLevel > 0:
|
||||
htmlLinkLevel -= 1
|
||||
if isLinkClose(currentToken.content):
|
||||
htmlLinkLevel += 1
|
||||
if htmlLinkLevel > 0:
|
||||
continue
|
||||
|
||||
if currentToken.type == "text" and state.md.linkify.test(
|
||||
currentToken.content
|
||||
):
|
||||
text = currentToken.content
|
||||
links = state.md.linkify.match(text)
|
||||
|
||||
# Now split string to nodes
|
||||
nodes = []
|
||||
level = currentToken.level
|
||||
lastPos = 0
|
||||
|
||||
for ln in range(len(links)):
|
||||
url = links[ln].url
|
||||
fullUrl = state.md.normalizeLink(url)
|
||||
if not state.md.validateLink(fullUrl):
|
||||
continue
|
||||
|
||||
urlText = links[ln].text
|
||||
|
||||
# Linkifier might send raw hostnames like "example.com", where url
|
||||
# starts with domain name. So we prepend http:// in those cases,
|
||||
# and remove it afterwards.
|
||||
if not links[ln].schema:
|
||||
urlText = HTTP_RE.sub(
|
||||
"", state.md.normalizeLinkText("http://" + urlText)
|
||||
)
|
||||
elif links[ln].schema == "mailto:" and TEST_MAILTO_RE.search(
|
||||
urlText
|
||||
):
|
||||
urlText = MAILTO_RE.sub(
|
||||
"", state.md.normalizeLinkText("mailto:" + urlText)
|
||||
)
|
||||
else:
|
||||
urlText = state.md.normalizeLinkText(urlText)
|
||||
|
||||
pos = links[ln].index
|
||||
|
||||
if pos > lastPos:
|
||||
token = Token("text", "", 0)
|
||||
token.content = text[lastPos:pos]
|
||||
token.level = level
|
||||
nodes.append(token)
|
||||
|
||||
token = Token("link_open", "a", 1)
|
||||
token.attrs = {"href": fullUrl}
|
||||
token.level = level
|
||||
level += 1
|
||||
token.markup = "linkify"
|
||||
token.info = "auto"
|
||||
nodes.append(token)
|
||||
|
||||
token = Token("text", "", 0)
|
||||
token.content = urlText
|
||||
token.level = level
|
||||
nodes.append(token)
|
||||
|
||||
token = Token("link_close", "a", -1)
|
||||
level -= 1
|
||||
token.level = level
|
||||
token.markup = "linkify"
|
||||
token.info = "auto"
|
||||
nodes.append(token)
|
||||
|
||||
lastPos = links[ln].last_index
|
||||
|
||||
if lastPos < len(text):
|
||||
token = Token("text", "", 0)
|
||||
token.content = text[lastPos:]
|
||||
token.level = level
|
||||
nodes.append(token)
|
||||
|
||||
blockTokens[j].children = tokens = arrayReplaceAt(tokens, i, nodes)
|
|
@ -0,0 +1,19 @@
|
|||
"""Normalize input string."""
|
||||
import re
|
||||
|
||||
from .state_core import StateCore
|
||||
|
||||
# https://spec.commonmark.org/0.29/#line-ending
|
||||
NEWLINES_RE = re.compile(r"\r\n?|\n")
|
||||
NULL_RE = re.compile(r"\0")
|
||||
|
||||
|
||||
def normalize(state: StateCore) -> None:
|
||||
|
||||
# Normalize newlines
|
||||
string = NEWLINES_RE.sub("\n", state.src)
|
||||
|
||||
# Replace NULL characters
|
||||
string = NULL_RE.sub("\uFFFD", string)
|
||||
|
||||
state.src = string
|
|
@ -0,0 +1,125 @@
|
|||
"""Simple typographic replacements
|
||||
|
||||
* ``(c)``, ``(C)`` → ©
|
||||
* ``(tm)``, ``(TM)`` → ™
|
||||
* ``(r)``, ``(R)`` → ®
|
||||
* ``(p)``, ``(P)`` → §
|
||||
* ``+-`` → ±
|
||||
* ``...`` → …
|
||||
* ``?....`` → ?..
|
||||
* ``!....`` → !..
|
||||
* ``????????`` → ???
|
||||
* ``!!!!!`` → !!!
|
||||
* ``,,,`` → ,
|
||||
* ``--`` → &ndash
|
||||
* ``---`` → &mdash
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
from ..token import Token
|
||||
from .state_core import StateCore
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
# TODO:
|
||||
# - fractionals 1/2, 1/4, 3/4 -> ½, ¼, ¾
|
||||
# - miltiplication 2 x 4 -> 2 × 4
|
||||
|
||||
RARE_RE = re.compile(r"\+-|\.\.|\?\?\?\?|!!!!|,,|--")
|
||||
|
||||
# Workaround for phantomjs - need regex without /g flag,
|
||||
# or root check will fail every second time
|
||||
# SCOPED_ABBR_TEST_RE = r"\((c|tm|r|p)\)"
|
||||
|
||||
SCOPED_ABBR_RE = re.compile(r"\((c|tm|r|p)\)", flags=re.IGNORECASE)
|
||||
|
||||
PLUS_MINUS_RE = re.compile(r"\+-")
|
||||
|
||||
ELLIPSIS_RE = re.compile(r"\.{2,}")
|
||||
|
||||
ELLIPSIS_QUESTION_EXCLAMATION_RE = re.compile(r"([?!])…")
|
||||
|
||||
QUESTION_EXCLAMATION_RE = re.compile(r"([?!]){4,}")
|
||||
|
||||
COMMA_RE = re.compile(r",{2,}")
|
||||
|
||||
EM_DASH_RE = re.compile(r"(^|[^-])---(?=[^-]|$)", flags=re.MULTILINE)
|
||||
|
||||
EN_DASH_RE = re.compile(r"(^|\s)--(?=\s|$)", flags=re.MULTILINE)
|
||||
|
||||
EN_DASH_INDENT_RE = re.compile(r"(^|[^-\s])--(?=[^-\s]|$)", flags=re.MULTILINE)
|
||||
|
||||
|
||||
SCOPED_ABBR = {"c": "©", "r": "®", "p": "§", "tm": "™"}
|
||||
|
||||
|
||||
def replaceFn(match: re.Match[str]):
|
||||
return SCOPED_ABBR[match.group(1).lower()]
|
||||
|
||||
|
||||
def replace_scoped(inlineTokens: list[Token]) -> None:
|
||||
inside_autolink = 0
|
||||
|
||||
for token in inlineTokens:
|
||||
if token.type == "text" and not inside_autolink:
|
||||
token.content = SCOPED_ABBR_RE.sub(replaceFn, token.content)
|
||||
|
||||
if token.type == "link_open" and token.info == "auto":
|
||||
inside_autolink -= 1
|
||||
|
||||
if token.type == "link_close" and token.info == "auto":
|
||||
inside_autolink += 1
|
||||
|
||||
|
||||
def replace_rare(inlineTokens: list[Token]) -> None:
|
||||
inside_autolink = 0
|
||||
|
||||
for token in inlineTokens:
|
||||
if token.type == "text" and not inside_autolink:
|
||||
if RARE_RE.search(token.content):
|
||||
# +- -> ±
|
||||
token.content = PLUS_MINUS_RE.sub("±", token.content)
|
||||
|
||||
# .., ..., ....... -> …
|
||||
token.content = ELLIPSIS_RE.sub("…", token.content)
|
||||
|
||||
# but ?..... & !..... -> ?.. & !..
|
||||
token.content = ELLIPSIS_QUESTION_EXCLAMATION_RE.sub(
|
||||
"\\1..", token.content
|
||||
)
|
||||
token.content = QUESTION_EXCLAMATION_RE.sub("\\1\\1\\1", token.content)
|
||||
|
||||
# ,, ,,, ,,,, -> ,
|
||||
token.content = COMMA_RE.sub(",", token.content)
|
||||
|
||||
# em-dash
|
||||
token.content = EM_DASH_RE.sub("\\1\u2014", token.content)
|
||||
|
||||
# en-dash
|
||||
token.content = EN_DASH_RE.sub("\\1\u2013", token.content)
|
||||
token.content = EN_DASH_INDENT_RE.sub("\\1\u2013", token.content)
|
||||
|
||||
if token.type == "link_open" and token.info == "auto":
|
||||
inside_autolink -= 1
|
||||
|
||||
if token.type == "link_close" and token.info == "auto":
|
||||
inside_autolink += 1
|
||||
|
||||
|
||||
def replace(state: StateCore) -> None:
|
||||
if not state.md.options.typographer:
|
||||
return
|
||||
|
||||
for token in state.tokens:
|
||||
if token.type != "inline":
|
||||
continue
|
||||
assert token.children is not None
|
||||
|
||||
if SCOPED_ABBR_RE.search(token.content):
|
||||
replace_scoped(token.children)
|
||||
|
||||
if RARE_RE.search(token.content):
|
||||
replace_rare(token.children)
|
|
@ -0,0 +1,202 @@
|
|||
"""Convert straight quotation marks to typographic ones
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from ..common.utils import charCodeAt, isMdAsciiPunct, isPunctChar, isWhiteSpace
|
||||
from ..token import Token
|
||||
from .state_core import StateCore
|
||||
|
||||
QUOTE_TEST_RE = re.compile(r"['\"]")
|
||||
QUOTE_RE = re.compile(r"['\"]")
|
||||
APOSTROPHE = "\u2019" # ’
|
||||
|
||||
|
||||
def replaceAt(string: str, index: int, ch: str) -> str:
|
||||
# When the index is negative, the behavior is different from the js version.
|
||||
# But basically, the index will not be negative.
|
||||
assert index >= 0
|
||||
return string[:index] + ch + string[index + 1 :]
|
||||
|
||||
|
||||
def process_inlines(tokens: list[Token], state: StateCore) -> None:
|
||||
stack: list[dict[str, Any]] = []
|
||||
|
||||
for i in range(len(tokens)):
|
||||
token = tokens[i]
|
||||
|
||||
thisLevel = token.level
|
||||
|
||||
j = 0
|
||||
for j in range(len(stack))[::-1]:
|
||||
if stack[j]["level"] <= thisLevel:
|
||||
break
|
||||
else:
|
||||
# When the loop is terminated without a "break".
|
||||
# Subtract 1 to get the same index as the js version.
|
||||
j -= 1
|
||||
|
||||
stack = stack[: j + 1]
|
||||
|
||||
if token.type != "text":
|
||||
continue
|
||||
|
||||
text = token.content
|
||||
pos = 0
|
||||
maximum = len(text)
|
||||
|
||||
while pos < maximum:
|
||||
goto_outer = False
|
||||
lastIndex = pos
|
||||
t = QUOTE_RE.search(text[lastIndex:])
|
||||
if not t:
|
||||
break
|
||||
|
||||
canOpen = canClose = True
|
||||
pos = t.start(0) + lastIndex + 1
|
||||
isSingle = t.group(0) == "'"
|
||||
|
||||
# Find previous character,
|
||||
# default to space if it's the beginning of the line
|
||||
lastChar = 0x20
|
||||
|
||||
if t.start(0) + lastIndex - 1 >= 0:
|
||||
lastChar = charCodeAt(text, t.start(0) + lastIndex - 1)
|
||||
else:
|
||||
for j in range(i)[::-1]:
|
||||
# lastChar defaults to 0x20
|
||||
if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak":
|
||||
break
|
||||
# should skip all tokens except 'text', 'html_inline' or 'code_inline'
|
||||
if not tokens[j].content:
|
||||
continue
|
||||
|
||||
lastChar = charCodeAt(tokens[j].content, len(tokens[j].content) - 1)
|
||||
break
|
||||
|
||||
# Find next character,
|
||||
# default to space if it's the end of the line
|
||||
nextChar = 0x20
|
||||
|
||||
if pos < maximum:
|
||||
nextChar = charCodeAt(text, pos)
|
||||
else:
|
||||
for j in range(i + 1, len(tokens)):
|
||||
# nextChar defaults to 0x20
|
||||
if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak":
|
||||
break
|
||||
# should skip all tokens except 'text', 'html_inline' or 'code_inline'
|
||||
if not tokens[j].content:
|
||||
continue
|
||||
|
||||
nextChar = charCodeAt(tokens[j].content, 0)
|
||||
break
|
||||
|
||||
isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar))
|
||||
isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar))
|
||||
|
||||
isLastWhiteSpace = isWhiteSpace(lastChar)
|
||||
isNextWhiteSpace = isWhiteSpace(nextChar)
|
||||
|
||||
if isNextWhiteSpace:
|
||||
canOpen = False
|
||||
elif isNextPunctChar:
|
||||
if not (isLastWhiteSpace or isLastPunctChar):
|
||||
canOpen = False
|
||||
|
||||
if isLastWhiteSpace:
|
||||
canClose = False
|
||||
elif isLastPunctChar:
|
||||
if not (isNextWhiteSpace or isNextPunctChar):
|
||||
canClose = False
|
||||
|
||||
if nextChar == 0x22 and t.group(0) == '"': # 0x22: "
|
||||
if lastChar >= 0x30 and lastChar <= 0x39: # 0x30: 0, 0x39: 9
|
||||
# special case: 1"" - count first quote as an inch
|
||||
canClose = canOpen = False
|
||||
|
||||
if canOpen and canClose:
|
||||
# Replace quotes in the middle of punctuation sequence, but not
|
||||
# in the middle of the words, i.e.:
|
||||
#
|
||||
# 1. foo " bar " baz - not replaced
|
||||
# 2. foo-"-bar-"-baz - replaced
|
||||
# 3. foo"bar"baz - not replaced
|
||||
canOpen = isLastPunctChar
|
||||
canClose = isNextPunctChar
|
||||
|
||||
if not canOpen and not canClose:
|
||||
# middle of word
|
||||
if isSingle:
|
||||
token.content = replaceAt(
|
||||
token.content, t.start(0) + lastIndex, APOSTROPHE
|
||||
)
|
||||
continue
|
||||
|
||||
if canClose:
|
||||
# this could be a closing quote, rewind the stack to get a match
|
||||
for j in range(len(stack))[::-1]:
|
||||
item = stack[j]
|
||||
if stack[j]["level"] < thisLevel:
|
||||
break
|
||||
if item["single"] == isSingle and stack[j]["level"] == thisLevel:
|
||||
item = stack[j]
|
||||
|
||||
if isSingle:
|
||||
openQuote = state.md.options.quotes[2]
|
||||
closeQuote = state.md.options.quotes[3]
|
||||
else:
|
||||
openQuote = state.md.options.quotes[0]
|
||||
closeQuote = state.md.options.quotes[1]
|
||||
|
||||
# replace token.content *before* tokens[item.token].content,
|
||||
# because, if they are pointing at the same token, replaceAt
|
||||
# could mess up indices when quote length != 1
|
||||
token.content = replaceAt(
|
||||
token.content, t.start(0) + lastIndex, closeQuote
|
||||
)
|
||||
tokens[item["token"]].content = replaceAt(
|
||||
tokens[item["token"]].content, item["pos"], openQuote
|
||||
)
|
||||
|
||||
pos += len(closeQuote) - 1
|
||||
if item["token"] == i:
|
||||
pos += len(openQuote) - 1
|
||||
|
||||
text = token.content
|
||||
maximum = len(text)
|
||||
|
||||
stack = stack[:j]
|
||||
goto_outer = True
|
||||
break
|
||||
if goto_outer:
|
||||
goto_outer = False
|
||||
continue
|
||||
|
||||
if canOpen:
|
||||
stack.append(
|
||||
{
|
||||
"token": i,
|
||||
"pos": t.start(0) + lastIndex,
|
||||
"single": isSingle,
|
||||
"level": thisLevel,
|
||||
}
|
||||
)
|
||||
elif canClose and isSingle:
|
||||
token.content = replaceAt(
|
||||
token.content, t.start(0) + lastIndex, APOSTROPHE
|
||||
)
|
||||
|
||||
|
||||
def smartquotes(state: StateCore) -> None:
|
||||
if not state.md.options.typographer:
|
||||
return
|
||||
|
||||
for token in state.tokens:
|
||||
|
||||
if token.type != "inline" or not QUOTE_RE.search(token.content):
|
||||
continue
|
||||
assert token.children is not None
|
||||
process_inlines(token.children, state)
|
|
@ -0,0 +1,25 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from collections.abc import MutableMapping
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ..ruler import StateBase
|
||||
from ..token import Token
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from markdown_it import MarkdownIt
|
||||
|
||||
|
||||
class StateCore(StateBase):
|
||||
def __init__(
|
||||
self,
|
||||
src: str,
|
||||
md: MarkdownIt,
|
||||
env: MutableMapping,
|
||||
tokens: list[Token] | None = None,
|
||||
):
|
||||
self.src = src
|
||||
self.md = md # link to parser instance
|
||||
self.env = env
|
||||
self.tokens: list[Token] = tokens or []
|
||||
self.inlineMode = False
|
|
@ -0,0 +1,29 @@
|
|||
__all__ = (
|
||||
"StateInline",
|
||||
"text",
|
||||
"text_collapse",
|
||||
"link_pairs",
|
||||
"escape",
|
||||
"newline",
|
||||
"backtick",
|
||||
"emphasis",
|
||||
"image",
|
||||
"link",
|
||||
"autolink",
|
||||
"entity",
|
||||
"html_inline",
|
||||
"strikethrough",
|
||||
)
|
||||
from . import emphasis, strikethrough
|
||||
from .autolink import autolink
|
||||
from .backticks import backtick
|
||||
from .balance_pairs import link_pairs
|
||||
from .entity import entity
|
||||
from .escape import escape
|
||||
from .html_inline import html_inline
|
||||
from .image import image
|
||||
from .link import link
|
||||
from .newline import newline
|
||||
from .state_inline import StateInline
|
||||
from .text import text
|
||||
from .text_collapse import text_collapse
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue