Import Upstream version 1.4.0

This commit is contained in:
su-fang 2023-02-10 13:58:37 +08:00
commit f531b4c05c
591 changed files with 75001 additions and 0 deletions

28
.flake8 Normal file
View File

@ -0,0 +1,28 @@
[flake8]
min_python_version = 3.7.0
max-line-length = 88
ban-relative-imports = True
# flake8-use-fstring: https://github.com/MichaelKim0407/flake8-use-fstring#--percent-greedy-and---format-greedy
format-greedy = 1
inline-quotes = double
enable-extensions = TC, TC1
type-checking-strict = true
eradicate-whitelist-extend = ^-.*;
extend-ignore =
# E203: Whitespace before ':' (pycqa/pycodestyle#373)
E203,
# E501: Line too long
E501,
# SIM106: Handle error-cases first
SIM106,
# ANN101: Missing type annotation for self in method
ANN101,
# ANN102: Missing type annotation for cls in classmethod
ANN102,
per-file-ignores =
tests/test_*:ANN201
tests/**/test_*:ANN201
extend-exclude =
src/poetry/core/_vendor/*
tests/fixtures/*
tests/**/fixtures/*

4
.gitattributes vendored Normal file
View File

@ -0,0 +1,4 @@
poetry.lock linguist-generated=true
vendors/poetry.lock linguist-generated=true
poetry/core/_vendor/** linguist-generated=true
poetry/core/_vendor/vendor.txt linguist-generated=false

12
.github/ISSUE_TEMPLATE/config.yml vendored Normal file
View File

@ -0,0 +1,12 @@
# Ref: https://help.github.com/en/github/building-a-strong-community/configuring-issue-templates-for-your-repository#configuring-the-template-chooser
blank_issues_enabled: false
contact_links:
- name: '✏️ Poetry Issue Tracker'
url: https://github.com/python-poetry/poetry/issues/new/choose
about: |
Submit your issues to the Poetry issue tracker. Bug reports and feature requests
will be tracked there.
- name: '💬 Discord Server'
url: https://discordapp.com/invite/awxPgve
about: |
Chat with the community, ask questions and learn about best practices.

13
.github/PULL_REQUEST_TEMPLATE.md vendored Normal file
View File

@ -0,0 +1,13 @@
Resolves: python-poetry#<!-- add issue number/link here -->
<!-- This is just a reminder about the most common mistakes. Please make sure that you tick all *appropriate* boxes. But please read our [contribution guide](https://python-poetry.org/docs/contributing/) at least once, it will save you unnecessary review cycles! -->
- [ ] Added **tests** for changed code.
- [ ] Updated **documentation** for changed code.
<!--
**Note**: If your Pull Request introduces a new feature or changes the current behavior, it should be based
on the `develop` branch. If it's a bug fix or only a documentation update, it should be based on the `master` branch.
If you have *any* questions to *any* of the points above, just **submit and ask**! This checklist is here to *help* you, not to deter you from contributing!
-->

76
.github/workflows/downstream.yml vendored Normal file
View File

@ -0,0 +1,76 @@
name: Poetry Downstream Tests
on:
pull_request: {}
push:
branches: [main]
jobs:
Tests:
name: ${{ matrix.ref }}
runs-on: ubuntu-latest
strategy:
matrix:
ref: ["master", "1.2"]
fail-fast: false
defaults:
run:
shell: bash
steps:
- uses: actions/checkout@v3
with:
path: poetry-core
- uses: actions/checkout@v3
with:
path: poetry
repository: python-poetry/poetry
ref: ${{ matrix.ref }}
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"
- name: Get full python version
id: full-python-version
run: echo ::set-output name=version::$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))")
- name: Set up Poetry
run: |
pip install poetry
poetry config virtualenvs.in-project true
- name: Set up cache
uses: actions/cache@v3
id: cache
with:
path: ./poetry/.venv
key: venv-${{ steps.full-python-version.outputs.version }}-${{ hashFiles('**/poetry.lock') }}
- name: Ensure cache is healthy
if: steps.cache.outputs.cache-hit == 'true'
working-directory: ./poetry
run: timeout 10s poetry run pip --version >/dev/null 2>&1 || rm -rf .venv
- name: Switch downstream to development poetry-core
working-directory: ./poetry
run: |
# remove poetry-core from main group to avoid version conflicts
# with a potential entry in the test group
poetry remove poetry-core
# add to test group to overwrite a potential entry in that group
poetry add --lock --group test ../poetry-core
- name: Install downstream dependencies
working-directory: ./poetry
run: |
# force update of directory dependency in cached venv
# (even if directory dependency with same version is already installed)
poetry run pip uninstall -y poetry-core
poetry install
# TODO: mark run as success even when this fails and add comment to PR instead
- name: Run downstream test suite
working-directory: ./poetry
run: poetry run pytest

32
.github/workflows/integration.yml vendored Normal file
View File

@ -0,0 +1,32 @@
name: Integration
on:
pull_request: {}
push:
branches: [main]
jobs:
Tests:
name: ${{ matrix.os }} / ${{ matrix.python-version }}
runs-on: "${{ matrix.os }}-latest"
strategy:
matrix:
os: [Ubuntu, MacOS, Windows]
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
fail-fast: false
defaults:
run:
shell: bash
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install tox
run: pip install --upgrade tox
- name: Execute integration tests
run: tox -e integration

52
.github/workflows/release.yml vendored Normal file
View File

@ -0,0 +1,52 @@
name: Release
on:
push:
tags:
- '*.*.*'
jobs:
Release:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Get tag
id: tag
run: echo ::set-output name=tag::${GITHUB_REF#refs/tags/}
- name: Set up Python 3.9
uses: actions/setup-python@v3
with:
python-version: "3.9"
- name: Install Poetry
run: |
curl -sSL https://install.python-poetry.org | python - -y
- name: Update PATH
run: echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Build project for distribution
run: poetry build
- name: Check Version
id: check-version
run: |
[[ "$(poetry version --short)" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]] \
|| echo ::set-output name=prerelease::true
- name: Create Release
uses: ncipollo/release-action@v1
with:
artifacts: "dist/*"
token: ${{ secrets.GITHUB_TOKEN }}
draft: false
prerelease: steps.check-version.outputs.prerelease == 'true'
- name: Publish to PyPI
env:
POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI_TOKEN }}
run: poetry publish

72
.github/workflows/tests.yml vendored Normal file
View File

@ -0,0 +1,72 @@
name: Tests
on:
pull_request: {}
push:
branches: [main]
jobs:
tests:
name: ${{ matrix.os }} / ${{ matrix.python-version }}
runs-on: "${{ matrix.os }}-latest"
strategy:
matrix:
os: [Ubuntu, MacOS, Windows]
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
include:
- os: Ubuntu
python-version: pypy-3.8
fail-fast: false
defaults:
run:
shell: bash
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Get full Python version
id: full-python-version
run: echo ::set-output name=version::$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))")
- name: Bootstrap poetry
run: |
curl -sSL https://install.python-poetry.org | python - -y
- name: Update PATH
if: ${{ matrix.os != 'Windows' }}
run: echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Update Path for Windows
if: ${{ matrix.os == 'Windows' }}
run: echo "$APPDATA\Python\Scripts" >> $GITHUB_PATH
- name: Configure poetry
run: poetry config virtualenvs.in-project true
- name: Set up cache
uses: actions/cache@v3
id: cache
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.full-python-version.outputs.version }}-${{ hashFiles('**/poetry.lock') }}
- name: Ensure cache is healthy
if: steps.cache.outputs.cache-hit == 'true'
run: |
# `timeout` is not available on macOS, so we define a custom function.
[ "$(command -v timeout)" ] || function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }
# Using `timeout` is a safeguard against the Poetry command hanging for some reason.
timeout 10s poetry run pip --version || rm -rf .venv
- name: Install dependencies
run: poetry install
- name: Run pytest
run: poetry run python -m pytest -p no:sugar -q tests/
- name: Run mypy
run: poetry run mypy

41
.gitignore vendored Normal file
View File

@ -0,0 +1,41 @@
*.pyc
# Packages
*.egg
!/tests/**/*.egg
/*.egg-info
/tests/fixtures/**/*.egg-info
/dist/*
build
_build
.cache
*.so
# Installer logs
pip-log.txt
# Unit test / coverage reports
.coverage
.tox
.pytest_cache
.DS_Store
.idea/*
.python-version
.vscode/*
/test.py
/test_*.*
/setup.cfg
MANIFEST.in
/setup.py
/docs/site/*
/tests/fixtures/simple_project/setup.py
/tests/fixtures/project_with_extras/setup.py
.mypy_cache
.venv
/releases/*
pip-wheel-metadata
/poetry.toml

87
.pre-commit-config.yaml Normal file
View File

@ -0,0 +1,87 @@
exclude: |
(?x)(
^tests/.*/fixtures/.*
| ^src/poetry/core/_vendor
)
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
- id: trailing-whitespace
exclude: "vendors/patches/jsonschema.patch"
- id: end-of-file-fixer
- id: debug-statements
- id: check-merge-conflict
- id: check-case-conflict
- id: check-json
- id: check-toml
- id: check-yaml
- id: pretty-format-json
args:
- --autofix
- --no-ensure-ascii
- --no-sort-keys
- id: check-ast
- id: debug-statements
- id: check-docstring-first
- repo: https://github.com/pre-commit/pygrep-hooks
rev: v1.9.0
hooks:
- id: python-check-mock-methods
- id: python-use-type-annotations
- id: python-check-blanket-noqa
- repo: https://github.com/asottile/yesqa
rev: v1.4.0
hooks:
- id: yesqa
additional_dependencies: &flake8_deps
- flake8-annotations==2.9.0
- flake8-broken-line==0.5.0
- flake8-bugbear==22.7.1
- flake8-comprehensions==3.10.0
- flake8-eradicate==1.3.0
- flake8-quotes==3.3.1
- flake8-simplify==0.19.3
- flake8-tidy-imports==4.8.0
- flake8-type-checking==2.2.0
- flake8-typing-imports==1.12.0
- flake8-use-fstring==1.4
- pep8-naming==0.13.1
- repo: https://github.com/asottile/pyupgrade
rev: v3.2.2
hooks:
- id: pyupgrade
args:
- --py37-plus
- repo: https://github.com/hadialqattan/pycln
rev: v2.1.2
hooks:
- id: pycln
args: [--all]
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort
args: [--add-import, from __future__ import annotations]
exclude: |
(?x)(
^.*/?setup\.py$
| tests/.*\.pyi$
)
- repo: https://github.com/psf/black
rev: 22.10.0
hooks:
- id: black
- repo: https://github.com/pycqa/flake8
rev: 5.0.4
hooks:
- id: flake8
additional_dependencies: *flake8_deps

476
CHANGELOG.md Normal file
View File

@ -0,0 +1,476 @@
# Change Log
## [1.4.0] - 2022-11-22
### Added
- The PEP 517 `metadata_directory` is now respected as an input to the `build_wheel` hook ([#487](https://github.com/python-poetry/poetry-core/pull/487)).
### Changed
- Sources are now considered more carefully when dealing with dependencies with environment markers ([#497](https://github.com/python-poetry/poetry-core/pull/497)).
- `EmptyConstraint` is now hashable ([#513](https://github.com/python-poetry/poetry-core/pull/513)).
- `ParseConstraintError` is now raised on version and constraint parsing errors, and includes information on the package that caused the error ([#514](https://github.com/python-poetry/poetry-core/pull/514)).
### Fixed
- Fix an issue where invalid PEP 508 requirements were generated due to a missing space before semicolons ([#510](https://github.com/python-poetry/poetry-core/pull/510)).
- Fix an issue where relative paths were encoded into package requirements, instead of a file:// URL as required by PEP 508 ([#512](https://github.com/python-poetry/poetry-core/pull/512)).
### Vendoring
- [`jsonschema==4.17.0`](https://github.com/python-jsonschema/jsonschema/blob/main/CHANGELOG.rst)
- [`lark==1.1.14`](https://github.com/lark-parser/lark/releases/tag/1.1.4)
- [`pyrsistent==0.19.2`](https://github.com/tobgu/pyrsistent/blob/master/CHANGES.txt)
- [`tomlkit==0.11.6`](https://github.com/sdispater/tomlkit/blob/master/CHANGELOG.md)
- [`typing-extensions==4.4.0`](https://github.com/python/typing_extensions/blob/main/CHANGELOG.md)
## [1.3.2] - 2022-10-07
### Fixed
- Fix an issue where the normalization was not applied to the path of an sdist built using a PEP 517 frontend ([#495](https://github.com/python-poetry/poetry-core/pull/495)).
## [1.3.1] - 2022-10-05
### Fixed
- Fix an issue where a typing-driven assertion could be false at runtime, causing a failure during prepare_metadata_for_build_wheel ([#492](https://github.com/python-poetry/poetry-core/pull/492)).
## [1.3.0] - 2022-10-05
### Added
- Add `3.11` to the list of available Python versions ([#477](https://github.com/python-poetry/poetry-core/pull/477)).
### Changed
- Deprecate `poetry.core.constraints.generic`, which is replaced by `poetry.core.packages.constraints` ([#482](https://github.com/python-poetry/poetry-core/pull/482)).
- Deprecate `poetry.core.semver`, which is replaced by `poetry.core.constraints.version` ([#482](https://github.com/python-poetry/poetry-core/pull/482)).
### Fixed
- Fix an issue where versions were escaped wrongly when building the wheel name ([#469](https://github.com/python-poetry/poetry-core/pull/469)).
- Fix an issue where caret constraints of pre-releases with a major version of 0 resulted in an empty version range ([#475](https://github.com/python-poetry/poetry-core/pull/475)).
- Fix an issue where the names of extras were not normalized according to PEP 685 ([#476](https://github.com/python-poetry/poetry-core/pull/476)).
- Fix an issue where sdist names were not normalized ([#484](https://github.com/python-poetry/poetry-core/pull/484)).
## [1.2.0] - 2022-09-13
### Added
- Added support for subdirectories in `url` dependencies ([#398](https://github.com/python-poetry/poetry-core/pull/398)).
### Changed
- When setting an invalid version constraint an error is raised instead of silently setting "any version" ([#461](https://github.com/python-poetry/poetry-core/pull/461)).
- Allow more characters in author name ([#411](https://github.com/python-poetry/poetry-core/pull/411)).
### Fixed
- Fixed an issue where incorrect `Requires-Dist` information was generated when environment markers where used for optional packages ([#462](https://github.com/python-poetry/poetry-core/pull/462)).
- Fixed an issue where incorrect python constraints were parsed from environment markers ([#457](https://github.com/python-poetry/poetry-core/pull/457)).
- Fixed the hashing of markers and constraints ([#466](https://github.com/python-poetry/poetry-core/pull/466)).
- Fixed an issue where the PEP 508 name of directory dependencies used platform paths ([#463](https://github.com/python-poetry/poetry-core/pull/463)).
## [1.1.0] - 2022-08-31
- No functional changes.
## [1.1.0rc3] - 2022-08-26
### Fixed
- Fixed an issue where a malformed URL was passed to pip when installing from a git subdirectory ([#451](https://github.com/python-poetry/poetry-core/pull/451)).
## [1.1.0rc2] - 2022-08-26
### Changed
- Enabled setting `version` of `ProjectPackage` to support dynamically setting the project's package version (e.g. from a plugin) ([#447](https://github.com/python-poetry/poetry-core/pull/447)).
### Fixed
- Fixed an issue where `authors` property was not detected ([#437](https://github.com/python-poetry/poetry-core/pull/437)).
- Fixed an issue where submodules of git dependencies was not checked out ([#439](https://github.com/python-poetry/poetry-core/pull/439)).
- Fixed an issue with Python constraints from markers ([#448](https://github.com/python-poetry/poetry-core/pull/448)).
- Fixed an issue where the latest version of git dependency was selected instead of the locked one ([#449](https://github.com/python-poetry/poetry-core/pull/449)).
## [1.1.0rc1] - 2022-08-17
### Changed
- Replaced Poetry's helper method `canonicalize_name()` by `packaging.utils.canonicalize_name()` ([#418](https://github.com/python-poetry/poetry-core/pull/418)).
- Removed unused code ([#419](https://github.com/python-poetry/poetry-core/pull/419)).
### Fixed
- Fixed an issue with markers, that results in incorrectly resolved extra dependencies ([#415](https://github.com/python-poetry/poetry-core/pull/415)).
- Fixed an issue where equal markers had not the same hash ([#417](https://github.com/python-poetry/poetry-core/pull/417)).
- Fixed `allows_any()` for local versions ([#433](https://github.com/python-poetry/poetry-core/pull/433)).
- Fixed special cases of `next_major()`, `next_minor()`, etc. and deprecated ambiguous usage ([#434](https://github.com/python-poetry/poetry-core/pull/434)).
- Fixed an issue with Python constraints from markers ([#436](https://github.com/python-poetry/poetry-core/pull/436)).
## [1.1.0b3] - 2022-07-09
### Added
- Added support for valid PEP 517 projects with another build-system than poetry-core as directory dependencies ([#368](https://github.com/python-poetry/poetry-core/pull/368), [#377](https://github.com/python-poetry/poetry-core/pull/377)).
- Added support for yanked files and releases according to PEP 592 ([#400](https://github.com/python-poetry/poetry-core/pull/400)).
### Changed
- Relaxed schema validation to allow additional properties ([#369](https://github.com/python-poetry/poetry-core/pull/369)).
- Harmonized string representation of dependencies ([#393](https://github.com/python-poetry/poetry-core/pull/393)).
- Changed wheel name normalization to follow most recent packaging specification ([#394](https://github.com/python-poetry/poetry-core/pull/394)).
- Changed equality check of direct origin dependencies, so that constraints are not considered anymore ([#405](https://github.com/python-poetry/poetry-core/pull/405)).
- Deprecated `Dependency.set_constraint()` and replaced it by a `constraint` property for consistency ([#370](https://github.com/python-poetry/poetry-core/pull/370)).
- Removed `Package.requires_extras` ([#374](https://github.com/python-poetry/poetry-core/pull/374)).
- Improved marker handling ([#380](https://github.com/python-poetry/poetry-core/pull/380),
[#383](https://github.com/python-poetry/poetry-core/pull/383),
[#384](https://github.com/python-poetry/poetry-core/pull/384),
[#390](https://github.com/python-poetry/poetry-core/pull/390),
[#395](https://github.com/python-poetry/poetry-core/pull/395)).
### Fixed
- Fixed hash method for `PackageSpecification`, `Package`, `Dependency` and their sub classes ([#370](https://github.com/python-poetry/poetry-core/pull/370)).
- Fixed merging of markers `python_version` and `python_full_version` ([#382](https://github.com/python-poetry/poetry-core/pull/382), [#388](https://github.com/python-poetry/poetry-core/pull/388)).
- Fixed python version normalization ([#385](https://github.com/python-poetry/poetry-core/pull/385), [#407](https://github.com/python-poetry/poetry-core/pull/407)).
- Fixed an issue where version identifiers with a local version segment allowed non local versions ([#396](https://github.com/python-poetry/poetry-core/pull/396)).
- Fixed an issue where version identifiers without a post release segment allowed post releases ([#396](https://github.com/python-poetry/poetry-core/pull/396)).
- Fixed script definitions that didn't work when extras were not explicitly defined ([#404](https://github.com/python-poetry/poetry-core/pull/404)).
## [1.1.0b2] - 2022-05-24
### Fixed
- Fixed a regression where `poetry-core` no longer handled improper Python version constraints from package metadata ([#371](https://github.com/python-poetry/poetry-core/pull/371))
- Fixed missing version bump in `poetry.core.__version__` ([#367](https://github.com/python-poetry/poetry-core/pull/367))
### Improvements
- `poetry-core` generated wheel's now correctly identify `Generator` metadata as `poetry-core` instead of `poetry` ([#367](https://github.com/python-poetry/poetry-core/pull/367))
## [1.1.0b1] - 2022-05-23
### Fixed
- Fixed an issue where canonicalize package names leads to infinite loops ([#328](https://github.com/python-poetry/poetry-core/pull/328)).
- Fixed an issue where versions wasn't correct normalized to PEP-440 ([#344](https://github.com/python-poetry/poetry-core/pull/344)).
- Fixed an issue with union of multi markers if one marker is a subset of the other marker ([#352](https://github.com/python-poetry/poetry-core/pull/352)).
- Fixed an issue with markers which are not in disjunctive normal form (DNF) ([#347](https://github.com/python-poetry/poetry-core/pull/347)).
- Fixed an issue where stub-only partial namespace packages were not recognized as packages ([#221](https://github.com/python-poetry/poetry-core/pull/221)).
- Fixed an issue where PEP-508 url requirements with extras were not parsed correctly ([#345](https://github.com/python-poetry/poetry-core/pull/345)).
- Fixed an issue where PEP-508 strings with wildcard exclusion constraints were incorrectly exported ([#343](https://github.com/python-poetry/poetry-core/pull/343)).
- Allow hidden directories on Windows bare repos ([#341](https://github.com/python-poetry/poetry-core/pull/341)).
- Fixed an issue where dependencies with an epoch are parsed as empty ([#316](https://github.com/python-poetry/poetry-core/pull/316)).
- Fixed an issue where a package consisting of multiple packages wasn't build correctly ([#292](https://github.com/python-poetry/poetry-core/pull/292)).
### Added
- Added support for handling git urls with subdirectory ([#288](https://github.com/python-poetry/poetry-core/pull/288)).
- Added support for metadata files as described in PEP-658 for PEP-503 "simple" API repositories ([#333](https://github.com/python-poetry/poetry-core/pull/333)).
### Changed
- Renamed dependency group of runtime dependencies to from `default` to `main` ([#326](https://github.com/python-poetry/poetry-core/pull/326)).
### Improvements
- `poetry-core` is now completely type checked.
- Improved the SemVer constraint parsing ([#327](https://github.com/python-poetry/poetry-core/pull/327)).
- Improved the speed when cloning git repositories ([#290](https://github.com/python-poetry/poetry-core/pull/290)).
## [1.1.0a7] - 2022-03-05
### Fixed
- Fixed an issue when evaluate `in/not in` markers ([#188](https://github.com/python-poetry/poetry-core/pull/188)).
- Fixed an issue when parsing of caret constraint with leading zero ([#201](https://github.com/python-poetry/poetry-core/pull/201)).
- Respect format for explicit included files when finding excluded files ([#228](https://github.com/python-poetry/poetry-core/pull/228)).
- Fixed an issue where only the last location was used when multiple packages should be included ([#108](https://github.com/python-poetry/poetry-core/pull/108)).
- Ensure that package `description` contains no new line ([#219](https://github.com/python-poetry/poetry-core/pull/219)).
- Fixed an issue where all default dependencies were removed instead of just the selected one ([#220](https://github.com/python-poetry/poetry-core/pull/220)).
- Ensure that authors and maintainers are normalized ([#276](https://github.com/python-poetry/poetry-core/pull/276)).
### Added
- Add support for most of the guaranteed hashes ([#207](https://github.com/python-poetry/poetry-core/pull/207)).
- Add support to declare multiple README files ([#248](https://github.com/python-poetry/poetry-core/pull/248)).
- Add support for git sub directories ([#192](https://github.com/python-poetry/poetry-core/pull/192)).
- Add hooks according to PEP-660 for editable installs ([#182](https://github.com/python-poetry/poetry-core/pull/182)).
- Add support for version epochs ([#264](https://github.com/python-poetry/poetry-core/pull/264)).
### Changed
- Drop python3.6 support ([#263](https://github.com/python-poetry/poetry-core/pull/263)).
- Loose the strictness when parsing version constraint to support invalid use of wildcards, e.g. `>=3.*` ([#186](https://github.com/python-poetry/poetry-core/pull/186)).
- No longer assume a default git branch name ([#192](https://github.com/python-poetry/poetry-core/pull/192)).
- Sort package name in extras to make it reproducible ([#280](https://github.com/python-poetry/poetry-core/pull/280)).
### Improvements
- Improve marker handling ([#208](https://github.com/python-poetry/poetry-core/pull/208),
[#282](https://github.com/python-poetry/poetry-core/pull/282),
[#283](https://github.com/python-poetry/poetry-core/pull/283),
[#284](https://github.com/python-poetry/poetry-core/pull/284),
[#286](https://github.com/python-poetry/poetry-core/pull/286),
[#291](https://github.com/python-poetry/poetry-core/pull/291),
[#293](https://github.com/python-poetry/poetry-core/pull/293),
[#294](https://github.com/python-poetry/poetry-core/pull/294),
[#297](https://github.com/python-poetry/poetry-core/pull/297)).
## [1.1.0a6] - 2021-07-30
### Added
- Added support for dependency groups. ([#183](https://github.com/python-poetry/poetry-core/pull/183))
## [1.1.0a5] - 2021-05-21
### Added
- Added support for script files in addition to standard entry points. ([#40](https://github.com/python-poetry/poetry-core/pull/40))
### Fixed
- Fixed an error in the way python markers with a precision >= 3 were handled. ([#178](https://github.com/python-poetry/poetry-core/pull/178))
## [1.1.0a4] - 2021-04-30
### Changed
- Files in source distributions now have a deterministic time to improve reproducibility. ([#142](https://github.com/python-poetry/poetry-core/pull/142))
### Fixed
- Fixed an error where leading zeros in the local build part of version specifications were discarded. ([#167](https://github.com/python-poetry/poetry-core/pull/167))
- Fixed the PEP 508 representation of file dependencies. ([#153](https://github.com/python-poetry/poetry-core/pull/153))
- Fixed the copy of `Package` instances which led to file hashes not being available. ([#159](https://github.com/python-poetry/poetry-core/pull/159))
- Fixed an error in the parsing of caret requirements with a pre-release lower bound. ([#171](https://github.com/python-poetry/poetry-core/pull/171))
- Fixed an error where some pre-release versions were not flagged as pre-releases. ([#170](https://github.com/python-poetry/poetry-core/pull/170))
## [1.1.0a3] - 2021-04-09
### Fixed
- Fixed dependency markers not being properly copied when changing the constraint ([#162](https://github.com/python-poetry/poetry-core/pull/162)).
## [1.1.0a2] - 2021-04-08
### Fixed
- Fixed performance regressions when parsing version constraints ([#152](https://github.com/python-poetry/poetry-core/pull/152)).
- Fixed how local build versions are handled and compared ([#157](https://github.com/python-poetry/poetry-core/pull/157), [#158](https://github.com/python-poetry/poetry-core/pull/158)).
- Fixed errors when parsing some environment markers ([#155](https://github.com/python-poetry/poetry-core/pull/155)).
## [1.1.0a1] - 2021-03-30
This version is the first to drop support for Python 2.7 and 3.5.
If you are still using these versions you should update the `requires` property of the `build-system` section
to restrict the version of `poetry-core`:
```toml
[build-system]
requires = ["poetry-core<1.1.0"]
build-backend = "poetry.core.masonry.api"
```
### Changed
- Dropped support for Python 2.7 and 3.5 ([#131](https://github.com/python-poetry/poetry-core/pull/131)).
- Reorganized imports internally to improve performances ([#131](https://github.com/python-poetry/poetry-core/pull/131)).
- Directory dependencies are now in non-develop mode by default ([#98](https://github.com/python-poetry/poetry-core/pull/98)).
- Improved support for PEP 440 specific versions that do not abide by semantic versioning ([#140](https://github.com/python-poetry/poetry-core/pull/140)).
### Fixed
- Fixed path dependencies PEP 508 representation ([#141](https://github.com/python-poetry/poetry-core/pull/141)).
## [1.0.2] - 2021-02-05
### Fixed
- Fixed a missing import causing an error in Poetry ([#134](https://github.com/python-poetry/poetry-core/pull/134)).
## [1.0.1] - 2021-02-05
### Fixed
- Fixed PEP 508 representation of dependency without extras ([#102](https://github.com/python-poetry/poetry-core/pull/102)).
- Fixed an error where development dependencies were being resolved when invoking the PEP-517 backend ([#101](https://github.com/python-poetry/poetry-core/pull/101)).
- Fixed source distribution not being deterministic ([#105](https://github.com/python-poetry/poetry-core/pull/105)).
- Fixed an error where zip files were left open when building wheels ([#122](https://github.com/python-poetry/poetry-core/pull/122)).
- Fixed an error where explicitly included files were still not present in final distributions ([#124](https://github.com/python-poetry/poetry-core/pull/124)).
- Fixed wheel filename matching for recent architecture ([#125](https://github.com/python-poetry/poetry-core/pull/125), [#129](https://github.com/python-poetry/poetry-core/pull/129)).
- Fixed an error where the `&` character was not accepted for author names ([#120](https://github.com/python-poetry/poetry-core/pull/120)).
- Fixed the PEP-508 representation of some dependencies ([#103](https://github.com/python-poetry/poetry-core/pull/103)).
- Fixed the `Requires-Python` metadata generation ([#127](https://github.com/python-poetry/poetry-core/pull/127)).
- Fixed an error where pre-release versions were accepted in version constraints ([#128](https://github.com/python-poetry/poetry-core/pull/128)).
## [1.0.0] - 2020-09-30
No changes.
## [1.0.0rc3] - 2020-09-30
### Changed
- Removed `intreehooks` build backend in favor of the `backend-path` mechanism ([#90](https://github.com/python-poetry/poetry-core/pull/90)).
- Directory dependencies will now always use a posix path for their representation ([#90](https://github.com/python-poetry/poetry-core/pull/91)).
- Dependency constraints can now be set directly via a proper setter ([#90](https://github.com/python-poetry/poetry-core/pull/90)).
## [1.0.0rc2] - 2020-09-25
### Fixed
- Fixed `python_full_version` markers conversion to version constraints ([#86](https://github.com/python-poetry/core/pull/86)).
## [1.0.0rc1] - 2020-09-25
### Fixed
- Fixed Python constraint propagation when converting a package to a dependency ([#84](https://github.com/python-poetry/core/pull/84)).
- Fixed VCS ignored files being included in wheel distributions for projects using the `src` layout ([#81](https://github.com/python-poetry/core/pull/81))
## [1.0.0b1] - 2020-09-18
### Added
- Added support for build executable for wheels ([#72](https://github.com/python-poetry/core/pull/72)).
### Changed
- Improved packages with sources equality comparison ([#53](https://github.com/python-poetry/core/pull/53)).
- Improved licenses handling and packaging in builders ([#57](https://github.com/python-poetry/core/pull/57)).
- Refactored packages and dependencies classes to improve comparison between bare packages and packages with extras ([#78](https://github.com/python-poetry/core/pull/78)).
### Fixed
- Fixed PEP-508 representation of URL dependencies ([#60](https://github.com/python-poetry/core/pull/60)).
- Fixed generated `RECORD` files in some cases by ensuring it's a valid CSV file ([#61](https://github.com/python-poetry/core/pull/61)).
- Fixed an error when parsing some version constraints if they contained wildcard elements ([#56](https://github.com/python-poetry/core/pull/56)).
- Fixed errors when using the `exclude` property ([#62](https://github.com/python-poetry/core/pull/62)).
- Fixed the way git revisions are retrieved ([#69](https://github.com/python-poetry/core/pull/69)).
- Fixed dependency constraint PEP-508 compatibility when generating metadata ([#79](https://github.com/python-poetry/core/pull/79)).
- Fixed potential errors on Python 3.5 when building with the `include` property set ([#75](https://github.com/python-poetry/core/pull/75)).
## [1.0.0a9] - 2020-07-24
### Added
- Added support for build scripts without `setup.py` generation ([#45](https://github.com/python-poetry/core/pull/45)).
### Changed
- Improved the parsing of requirements and environment markers ([#44](https://github.com/python-poetry/core/pull/44)).
### Fixed
- Fixed the default value used for the `build.generate-setup-file` settings ([#43](https://github.com/python-poetry/core/pull/43)).
- Fixed error messages when the authors specified in the pyproject.toml file are invalid ([#49](https://github.com/python-poetry/core/pull/49)).
- Fixed distributions build when using the PEP-517 backend for packages with includes ([#47](https://github.com/python-poetry/core/pull/47)).
## [1.0.0a8] - 2020-06-26
### Fixed
- Fixed errors in the way Python environment markers were parsed and generated ([#36](https://github.com/python-poetry/core/pull/36)).
## [1.0.0a7] - 2020-05-06
### Added
- Added support for format-specific includes via the `include` property ([#6](https://github.com/python-poetry/core/pull/6)).
### Changed
- Allow url dependencies in multiple constraints dependencies ([#32](https://github.com/python-poetry/core/pull/32)).
### Fixed
- Fixed PEP 508 representation and parsing of VCS dependencies ([#30](https://github.com/python-poetry/core/pull/30)).
## [1.0.0a6] - 2020-04-24
### Added
- Added support for markers inverse ([#21](https://github.com/python-poetry/core/pull/21)).
- Added support for specifying that `git` dependencies should be installed in develop mode ([#23](https://github.com/python-poetry/core/pull/23)).
- Added the ability to specify build settings from the Poetry main configuration file ([#26](https://github.com/python-poetry/core/pull/26)).
- Added the ability to disable the generation of the `setup.py` file when building ([#26](https://github.com/python-poetry/core/pull/26)).
### Changed
- Relaxed licence restrictions to support custom licences ([#5](https://github.com/python-poetry/core/pull/5)).
- Improved support for PEP-440 direct references ([#22](https://github.com/python-poetry/core/pull/22)).
- Improved dependency vendoring ([#25](https://github.com/python-poetry/core/pull/25)).
### Fixed
- Fixed the inability to make the url dependencies optional ([#13](https://github.com/python-poetry/core/pull/13)).
- Fixed whitespaces in PEP-440 constraints causing an error ([#16](https://github.com/python-poetry/core/pull/16)).
- Fixed subpackage check when generating the `setup.py` file ([#17](https://github.com/python-poetry/core/pull/17)).
- Fix PEP-517 issues for projects using build scripts ([#12](https://github.com/python-poetry/core/pull/12)).
- Fixed support for stub-only packages ([#28](https://github.com/python-poetry/core/pull/28)).
[Unreleased]: https://github.com/python-poetry/poetry-core/compare/1.4.0...main
[1.4.0]: https://github.com/python-poetry/poetry-core/releases/tag/1.4.0
[1.3.2]: https://github.com/python-poetry/poetry-core/releases/tag/1.3.2
[1.3.1]: https://github.com/python-poetry/poetry-core/releases/tag/1.3.1
[1.3.0]: https://github.com/python-poetry/poetry-core/releases/tag/1.3.0
[1.2.0]: https://github.com/python-poetry/poetry-core/releases/tag/1.2.0
[1.1.0]: https://github.com/python-poetry/poetry-core/releases/tag/1.1.0
[1.1.0rc3]: https://github.com/python-poetry/poetry-core/releases/tag/1.1.0rc3
[1.1.0rc2]: https://github.com/python-poetry/poetry-core/releases/tag/1.1.0rc2
[1.1.0rc1]: https://github.com/python-poetry/poetry-core/releases/tag/1.1.0rc1
[1.1.0b3]: https://github.com/python-poetry/poetry-core/releases/tag/1.1.0b3
[1.1.0b2]: https://github.com/python-poetry/poetry-core/releases/tag/1.1.0b2
[1.1.0b1]: https://github.com/python-poetry/poetry-core/releases/tag/1.1.0b1
[1.1.0a7]: https://github.com/python-poetry/poetry-core/releases/tag/1.1.0a7
[1.1.0a6]: https://github.com/python-poetry/poetry-core/releases/tag/1.1.0a6
[1.1.0a5]: https://github.com/python-poetry/poetry-core/releases/tag/1.1.0a5
[1.1.0a4]: https://github.com/python-poetry/poetry-core/releases/tag/1.1.0a4
[1.1.0a3]: https://github.com/python-poetry/poetry-core/releases/tag/1.1.0a3
[1.1.0a2]: https://github.com/python-poetry/poetry-core/releases/tag/1.1.0a2
[1.1.0a1]: https://github.com/python-poetry/poetry-core/releases/tag/1.1.0a1
[1.0.2]: https://github.com/python-poetry/poetry-core/releases/tag/1.0.2
[1.0.1]: https://github.com/python-poetry/poetry-core/releases/tag/1.0.1
[1.0.0]: https://github.com/python-poetry/poetry-core/releases/tag/1.0.0
[1.0.0rc3]: https://github.com/python-poetry/poetry-core/releases/tag/1.0.0rc3
[1.0.0rc2]: https://github.com/python-poetry/poetry-core/releases/tag/1.0.0rc2
[1.0.0rc1]: https://github.com/python-poetry/poetry-core/releases/tag/1.0.0rc1
[1.0.0b1]: https://github.com/python-poetry/poetry-core/releases/tag/1.0.0b1
[1.0.0a9]: https://github.com/python-poetry/poetry-core/releases/tag/1.0.0a9
[1.0.0a8]: https://github.com/python-poetry/poetry-core/releases/tag/1.0.0a8
[1.0.0a7]: https://github.com/python-poetry/poetry-core/releases/tag/1.0.0a7
[1.0.0a6]: https://github.com/python-poetry/poetry-core/releases/tag/1.0.0a6

20
LICENSE Normal file
View File

@ -0,0 +1,20 @@
Copyright (c) 2020 Sébastien Eustace
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

30
Makefile Normal file
View File

@ -0,0 +1,30 @@
SHELL := $(shell which bash) -e
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
ROOT_DIR := $(patsubst %/,%,$(dir $(MAKEFILE_PATH)))
VENDOR_SRC := $(ROOT_DIR)/vendors
VENDOR_DIR := $(ROOT_DIR)/src/poetry/core/_vendor
VENDOR_TXT := $(VENDOR_DIR)/vendor.txt
POETRY_BIN ?= $(shell which poetry)
.PHONY: vendor/lock
vendor/lock: $(VENDOR_LOCK)
# regenerate lock file
@pushd $(VENDOR_SRC) && $(POETRY_BIN) lock --no-update
.PHONY: vendor/sync
vendor/sync:
# regenerate vendor.txt file (exported from lockfile)
@pushd $(VENDOR_SRC) && $(POETRY_BIN) export --without-hashes 2> /dev/null \
| grep -E -v "(importlib|zipp)" \
| sort > $(VENDOR_TXT)
# vendor packages
@$(POETRY_BIN) run vendoring sync
# strip out *.pyi stubs
@find "$(VENDOR_DIR)" -type f -name "*.pyi" -exec rm {} \;
.PHONY: vendor/update
vendor/update: | vendor/lock vendor/sync
@:

45
README.md Normal file
View File

@ -0,0 +1,45 @@
# Poetry Core
[![PyPI version](https://img.shields.io/pypi/v/poetry-core)](https://pypi.org/project/poetry-core/)
[![Python Versions](https://img.shields.io/pypi/pyversions/poetry-core)](https://pypi.org/project/poetry-core/)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
[![](https://github.com/python-poetry/poetry-core/workflows/Tests/badge.svg)](https://github.com/python-poetry/poetry-core/actions?query=workflow%3ATests)
A [PEP 517](https://www.python.org/dev/peps/pep-0517/) build backend implementation developed for
[Poetry](https://github.com/python-poetry/poetry). This project is intended to be a light weight, fully compliant,
self-contained package allowing PEP 517 compatible build frontends to build Poetry managed projects.
## Usage
In most cases, the usage of this package is transparent to the end-user as it is either made use by Poetry itself
or a PEP 517 frontend (eg: `pip`).
In order to enable the use `poetry-core` as your build backend, the following snippet must be present in your
project's `pyproject.toml` file.
```toml
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
```
Once this is present, a PEP 517 frontend like `pip` can build and install your project from source without the need
for Poetry or any of its dependencies.
```shell
# install to current environment
pip install /path/to/poetry/managed/project
# build a wheel package
pip wheel /path/to/poetry/managed/project
```
## Why is this required?
Prior to the release of version `1.1.0`, Poetry was a project management tool that included a PEP 517
build backend. This was inefficient and time consuming when a PEP 517 build was required. For example,
both `pip` and `tox` (with isolated builds) would install Poetry and all dependencies it required. Most of these
dependencies are not required when the objective is to simply build either a source or binary distribution of your
project.
In order to improve the above situation, `poetry-core` was created. Shared functionality pertaining to PEP 517 build
backends, including reading lock file, `pyproject.toml` and building wheel/sdist, were implemented in this package. This
makes PEP 517 builds extremely fast for Poetry managed packages.

917
poetry.lock generated Normal file
View File

@ -0,0 +1,917 @@
[[package]]
name = "atomicwrites"
version = "1.4.0"
description = "Atomic file writes."
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
[[package]]
name = "attrs"
version = "21.4.0"
description = "Classes Without Boilerplate"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[package.extras]
dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"]
docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"]
tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "cloudpickle"]
tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "cloudpickle"]
[[package]]
name = "build"
version = "0.7.0"
description = "A simple, correct PEP517 package builder"
category = "dev"
optional = false
python-versions = ">=3.6"
[package.dependencies]
colorama = {version = "*", markers = "os_name == \"nt\""}
importlib-metadata = {version = ">=0.22", markers = "python_version < \"3.8\""}
packaging = ">=19.0"
pep517 = ">=0.9.1"
tomli = ">=1.0.0"
[package.extras]
docs = ["furo (>=2020.11.19b18)", "sphinx (>=3.0,<4.0)", "sphinx-argparse-cli (>=1.5)", "sphinx-autodoc-typehints (>=1.10)"]
test = ["filelock (>=3)", "pytest (>=6.2.4)", "pytest-cov (>=2)", "pytest-mock (>=2)", "pytest-rerunfailures (>=9.1)", "pytest-xdist (>=1.34)", "setuptools (>=42.0.0)", "toml (>=0.10.0)", "wheel (>=0.36.0)"]
typing = ["importlib-metadata (>=4.6.4)", "mypy (==0.910)", "typing-extensions (>=3.7.4.3)"]
virtualenv = ["virtualenv (>=20.0.35)"]
[[package]]
name = "certifi"
version = "2021.10.8"
description = "Python package for providing Mozilla's CA Bundle."
category = "dev"
optional = false
python-versions = "*"
[[package]]
name = "cfgv"
version = "3.3.1"
description = "Validate configuration and produce human readable error messages."
category = "dev"
optional = false
python-versions = ">=3.6.1"
[[package]]
name = "charset-normalizer"
version = "2.0.12"
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
category = "dev"
optional = false
python-versions = ">=3.5.0"
[package.extras]
unicode_backport = ["unicodedata2"]
[[package]]
name = "click"
version = "8.1.3"
description = "Composable command line interface toolkit"
category = "dev"
optional = false
python-versions = ">=3.7"
[package.dependencies]
colorama = {version = "*", markers = "platform_system == \"Windows\""}
importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
[[package]]
name = "colorama"
version = "0.4.4"
description = "Cross-platform colored terminal text."
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[[package]]
name = "commonmark"
version = "0.9.1"
description = "Python parser for the CommonMark Markdown spec"
category = "dev"
optional = false
python-versions = "*"
[package.extras]
test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"]
[[package]]
name = "coverage"
version = "6.4"
description = "Code coverage measurement for Python"
category = "dev"
optional = false
python-versions = ">=3.7"
[package.dependencies]
tomli = {version = "*", optional = true, markers = "python_version < \"3.11\" and extra == \"toml\""}
[package.extras]
toml = ["tomli"]
[[package]]
name = "distlib"
version = "0.3.4"
description = "Distribution utilities"
category = "dev"
optional = false
python-versions = "*"
[[package]]
name = "filelock"
version = "3.7.0"
description = "A platform independent file lock."
category = "dev"
optional = false
python-versions = ">=3.7"
[package.extras]
docs = ["furo (>=2021.8.17b43)", "sphinx (>=4.1)", "sphinx-autodoc-typehints (>=1.12)"]
testing = ["covdefaults (>=1.2.0)", "coverage (>=4)", "pytest (>=4)", "pytest-cov", "pytest-timeout (>=1.4.2)"]
[[package]]
name = "identify"
version = "2.5.0"
description = "File identification library for Python"
category = "dev"
optional = false
python-versions = ">=3.7"
[package.extras]
license = ["ukkonen"]
[[package]]
name = "idna"
version = "3.3"
description = "Internationalized Domain Names in Applications (IDNA)"
category = "dev"
optional = false
python-versions = ">=3.5"
[[package]]
name = "importlib-metadata"
version = "4.11.3"
description = "Read metadata from Python packages"
category = "main"
optional = false
python-versions = ">=3.7"
[package.dependencies]
typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""}
zipp = ">=0.5"
[package.extras]
docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)"]
perf = ["ipython"]
testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "packaging", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "importlib-resources (>=1.3)"]
[[package]]
name = "importlib-resources"
version = "5.7.1"
description = "Read resources from Python packages"
category = "dev"
optional = false
python-versions = ">=3.7"
[package.dependencies]
zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""}
[package.extras]
docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)"]
testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"]
[[package]]
name = "iniconfig"
version = "1.1.1"
description = "iniconfig: brain-dead simple config-ini parsing"
category = "dev"
optional = false
python-versions = "*"
[[package]]
name = "jsonschema"
version = "4.5.1"
description = "An implementation of JSON Schema validation for Python"
category = "dev"
optional = false
python-versions = ">=3.7"
[package.dependencies]
attrs = ">=17.4.0"
importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""}
pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2"
typing-extensions = {version = "*", markers = "python_version < \"3.8\""}
[package.extras]
format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
format_nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"]
[[package]]
name = "mypy"
version = "0.960"
description = "Optional static typing for Python"
category = "dev"
optional = false
python-versions = ">=3.6"
[package.dependencies]
mypy-extensions = ">=0.4.3"
tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
typed-ast = {version = ">=1.4.0,<2", markers = "python_version < \"3.8\""}
typing-extensions = ">=3.10"
[package.extras]
dmypy = ["psutil (>=4.0)"]
python2 = ["typed-ast (>=1.4.0,<2)"]
reports = ["lxml"]
[[package]]
name = "mypy-extensions"
version = "0.4.3"
description = "Experimental type system extensions for programs checked with the mypy typechecker."
category = "dev"
optional = false
python-versions = "*"
[[package]]
name = "nodeenv"
version = "1.6.0"
description = "Node.js virtual environment builder"
category = "dev"
optional = false
python-versions = "*"
[[package]]
name = "packaging"
version = "21.3"
description = "Core utilities for Python packages"
category = "dev"
optional = false
python-versions = ">=3.6"
[package.dependencies]
pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
[[package]]
name = "pep517"
version = "0.12.0"
description = "Wrappers to build Python packages using PEP 517 hooks"
category = "dev"
optional = false
python-versions = "*"
[package.dependencies]
importlib_metadata = {version = "*", markers = "python_version < \"3.8\""}
tomli = {version = ">=1.1.0", markers = "python_version >= \"3.6\""}
zipp = {version = "*", markers = "python_version < \"3.8\""}
[[package]]
name = "platformdirs"
version = "2.5.2"
description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
category = "dev"
optional = false
python-versions = ">=3.7"
[package.extras]
docs = ["furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx-autodoc-typehints (>=1.12)", "sphinx (>=4)"]
test = ["appdirs (==1.4.4)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)", "pytest (>=6)"]
[[package]]
name = "pluggy"
version = "1.0.0"
description = "plugin and hook calling mechanisms for python"
category = "dev"
optional = false
python-versions = ">=3.6"
[package.dependencies]
importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
[package.extras]
dev = ["pre-commit", "tox"]
testing = ["pytest", "pytest-benchmark"]
[[package]]
name = "pre-commit"
version = "2.19.0"
description = "A framework for managing and maintaining multi-language pre-commit hooks."
category = "dev"
optional = false
python-versions = ">=3.7"
[package.dependencies]
cfgv = ">=2.0.0"
identify = ">=1.0.0"
importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
nodeenv = ">=0.11.1"
pyyaml = ">=5.1"
toml = "*"
virtualenv = ">=20.0.8"
[[package]]
name = "py"
version = "1.11.0"
description = "library with cross-python path, ini-parsing, io, code, log facilities"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[[package]]
name = "pygments"
version = "2.12.0"
description = "Pygments is a syntax highlighting package written in Python."
category = "dev"
optional = false
python-versions = ">=3.6"
[[package]]
name = "pyparsing"
version = "3.0.9"
description = "pyparsing module - Classes and methods to define and execute parsing grammars"
category = "dev"
optional = false
python-versions = ">=3.6.8"
[package.extras]
diagrams = ["railroad-diagrams", "jinja2"]
[[package]]
name = "pyrsistent"
version = "0.18.1"
description = "Persistent/Functional/Immutable data structures"
category = "dev"
optional = false
python-versions = ">=3.7"
[[package]]
name = "pytest"
version = "7.1.2"
description = "pytest: simple powerful testing with Python"
category = "dev"
optional = false
python-versions = ">=3.7"
[package.dependencies]
atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
attrs = ">=19.2.0"
colorama = {version = "*", markers = "sys_platform == \"win32\""}
importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
iniconfig = "*"
packaging = "*"
pluggy = ">=0.12,<2.0"
py = ">=1.8.2"
tomli = ">=1.0.0"
[package.extras]
testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
[[package]]
name = "pytest-cov"
version = "3.0.0"
description = "Pytest plugin for measuring coverage."
category = "dev"
optional = false
python-versions = ">=3.6"
[package.dependencies]
coverage = {version = ">=5.2.1", extras = ["toml"]}
pytest = ">=4.6"
[package.extras]
testing = ["fields", "hunter", "process-tests", "six", "pytest-xdist", "virtualenv"]
[[package]]
name = "pytest-mock"
version = "3.7.0"
description = "Thin-wrapper around the mock package for easier use with pytest"
category = "dev"
optional = false
python-versions = ">=3.7"
[package.dependencies]
pytest = ">=5.0"
[package.extras]
dev = ["pre-commit", "tox", "pytest-asyncio"]
[[package]]
name = "pyyaml"
version = "6.0"
description = "YAML parser and emitter for Python"
category = "dev"
optional = false
python-versions = ">=3.6"
[[package]]
name = "requests"
version = "2.27.1"
description = "Python HTTP for Humans."
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
[package.dependencies]
certifi = ">=2017.4.17"
charset-normalizer = {version = ">=2.0.0,<2.1.0", markers = "python_version >= \"3\""}
idna = {version = ">=2.5,<4", markers = "python_version >= \"3\""}
urllib3 = ">=1.21.1,<1.27"
[package.extras]
socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
[[package]]
name = "rich"
version = "12.4.1"
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
category = "dev"
optional = false
python-versions = ">=3.6.3,<4.0.0"
[package.dependencies]
commonmark = ">=0.9.0,<0.10.0"
pygments = ">=2.6.0,<3.0.0"
typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9\""}
[package.extras]
jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"]
[[package]]
name = "six"
version = "1.16.0"
description = "Python 2 and 3 compatibility utilities"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
[[package]]
name = "toml"
version = "0.10.2"
description = "Python Library for Tom's Obvious, Minimal Language"
category = "dev"
optional = false
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
[[package]]
name = "tomli"
version = "2.0.1"
description = "A lil' TOML parser"
category = "dev"
optional = false
python-versions = ">=3.7"
[[package]]
name = "tox"
version = "3.25.0"
description = "tox is a generic virtualenv management and test command line tool"
category = "dev"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
[package.dependencies]
colorama = {version = ">=0.4.1", markers = "platform_system == \"Windows\""}
filelock = ">=3.0.0"
importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
packaging = ">=14"
pluggy = ">=0.12.0"
py = ">=1.4.17"
six = ">=1.14.0"
toml = ">=0.9.4"
virtualenv = ">=16.0.0,<20.0.0 || >20.0.0,<20.0.1 || >20.0.1,<20.0.2 || >20.0.2,<20.0.3 || >20.0.3,<20.0.4 || >20.0.4,<20.0.5 || >20.0.5,<20.0.6 || >20.0.6,<20.0.7 || >20.0.7"
[package.extras]
docs = ["pygments-github-lexers (>=0.0.5)", "sphinx (>=2.0.0)", "sphinxcontrib-autoprogram (>=0.1.5)", "towncrier (>=18.5.0)"]
testing = ["flaky (>=3.4.0)", "freezegun (>=0.3.11)", "pytest (>=4.0.0)", "pytest-cov (>=2.5.1)", "pytest-mock (>=1.10.0)", "pytest-randomly (>=1.0.0)", "psutil (>=5.6.1)", "pathlib2 (>=2.3.3)"]
[[package]]
name = "typed-ast"
version = "1.5.4"
description = "a fork of Python 2 and 3 ast modules with type comment support"
category = "dev"
optional = false
python-versions = ">=3.6"
[[package]]
name = "types-jsonschema"
version = "4.4.4"
description = "Typing stubs for jsonschema"
category = "dev"
optional = false
python-versions = "*"
[[package]]
name = "types-setuptools"
version = "57.4.14"
description = "Typing stubs for setuptools"
category = "dev"
optional = false
python-versions = "*"
[[package]]
name = "typing-extensions"
version = "4.2.0"
description = "Backported and Experimental Type Hints for Python 3.7+"
category = "main"
optional = false
python-versions = ">=3.7"
[[package]]
name = "urllib3"
version = "1.26.9"
description = "HTTP library with thread-safe connection pooling, file post, and more."
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
[package.extras]
brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"]
secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"]
socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
[[package]]
name = "vendoring"
version = "1.2.0"
description = "A command line tool, to simplify vendoring pure Python dependencies."
category = "dev"
optional = false
python-versions = "~= 3.8"
[package.dependencies]
click = "*"
jsonschema = "*"
packaging = "*"
requests = "*"
rich = "*"
toml = "*"
[package.extras]
doc = ["sphinx"]
test = ["pytest", "pytest-cov", "pytest-mock"]
[[package]]
name = "virtualenv"
version = "20.14.1"
description = "Virtual Python Environment builder"
category = "dev"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
[package.dependencies]
distlib = ">=0.3.1,<1"
filelock = ">=3.2,<4"
importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
platformdirs = ">=2,<3"
six = ">=1.9.0,<2"
[package.extras]
docs = ["proselint (>=0.10.2)", "sphinx (>=3)", "sphinx-argparse (>=0.2.5)", "sphinx-rtd-theme (>=0.4.3)", "towncrier (>=21.3)"]
testing = ["coverage (>=4)", "coverage-enable-subprocess (>=1)", "flaky (>=3)", "pytest (>=4)", "pytest-env (>=0.6.2)", "pytest-freezegun (>=0.4.1)", "pytest-mock (>=2)", "pytest-randomly (>=1)", "pytest-timeout (>=1)", "packaging (>=20.0)"]
[[package]]
name = "zipp"
version = "3.8.0"
description = "Backport of pathlib-compatible object wrapper for zip files"
category = "main"
optional = false
python-versions = ">=3.7"
[package.extras]
docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)"]
testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"]
[metadata]
lock-version = "1.1"
python-versions = "^3.7"
content-hash = "942983e12963ee3294081a5f38b6a66034dc7cd350b48a65f21e706a77f160d7"
[metadata.files]
atomicwrites = [
{file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"},
{file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"},
]
attrs = [
{file = "attrs-21.4.0-py2.py3-none-any.whl", hash = "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4"},
{file = "attrs-21.4.0.tar.gz", hash = "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"},
]
build = [
{file = "build-0.7.0-py3-none-any.whl", hash = "sha256:21b7ebbd1b22499c4dac536abc7606696ea4d909fd755e00f09f3c0f2c05e3c8"},
{file = "build-0.7.0.tar.gz", hash = "sha256:1aaadcd69338252ade4f7ec1265e1a19184bf916d84c9b7df095f423948cb89f"},
]
certifi = [
{file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"},
{file = "certifi-2021.10.8.tar.gz", hash = "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872"},
]
cfgv = [
{file = "cfgv-3.3.1-py2.py3-none-any.whl", hash = "sha256:c6a0883f3917a037485059700b9e75da2464e6c27051014ad85ba6aaa5884426"},
{file = "cfgv-3.3.1.tar.gz", hash = "sha256:f5a830efb9ce7a445376bb66ec94c638a9787422f96264c98edc6bdeed8ab736"},
]
charset-normalizer = [
{file = "charset-normalizer-2.0.12.tar.gz", hash = "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597"},
{file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"},
]
click = [
{file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
{file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
]
colorama = [
{file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
{file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
]
commonmark = [
{file = "commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9"},
{file = "commonmark-0.9.1.tar.gz", hash = "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60"},
]
coverage = [
{file = "coverage-6.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:50ed480b798febce113709846b11f5d5ed1e529c88d8ae92f707806c50297abf"},
{file = "coverage-6.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:26f8f92699756cb7af2b30720de0c5bb8d028e923a95b6d0c891088025a1ac8f"},
{file = "coverage-6.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60c2147921da7f4d2d04f570e1838db32b95c5509d248f3fe6417e91437eaf41"},
{file = "coverage-6.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:750e13834b597eeb8ae6e72aa58d1d831b96beec5ad1d04479ae3772373a8088"},
{file = "coverage-6.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af5b9ee0fc146e907aa0f5fb858c3b3da9199d78b7bb2c9973d95550bd40f701"},
{file = "coverage-6.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a022394996419142b33a0cf7274cb444c01d2bb123727c4bb0b9acabcb515dea"},
{file = "coverage-6.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5a78cf2c43b13aa6b56003707c5203f28585944c277c1f3f109c7b041b16bd39"},
{file = "coverage-6.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9229d074e097f21dfe0643d9d0140ee7433814b3f0fc3706b4abffd1e3038632"},
{file = "coverage-6.4-cp310-cp310-win32.whl", hash = "sha256:fb45fe08e1abc64eb836d187b20a59172053999823f7f6ef4f18a819c44ba16f"},
{file = "coverage-6.4-cp310-cp310-win_amd64.whl", hash = "sha256:3cfd07c5889ddb96a401449109a8b97a165be9d67077df6802f59708bfb07720"},
{file = "coverage-6.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:03014a74023abaf5a591eeeaf1ac66a73d54eba178ff4cb1fa0c0a44aae70383"},
{file = "coverage-6.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c82f2cd69c71698152e943f4a5a6b83a3ab1db73b88f6e769fabc86074c3b08"},
{file = "coverage-6.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b546cf2b1974ddc2cb222a109b37c6ed1778b9be7e6b0c0bc0cf0438d9e45a6"},
{file = "coverage-6.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc173f1ce9ffb16b299f51c9ce53f66a62f4d975abe5640e976904066f3c835d"},
{file = "coverage-6.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c53ad261dfc8695062fc8811ac7c162bd6096a05a19f26097f411bdf5747aee7"},
{file = "coverage-6.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:eef5292b60b6de753d6e7f2d128d5841c7915fb1e3321c3a1fe6acfe76c38052"},
{file = "coverage-6.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:543e172ce4c0de533fa892034cce260467b213c0ea8e39da2f65f9a477425211"},
{file = "coverage-6.4-cp37-cp37m-win32.whl", hash = "sha256:00c8544510f3c98476bbd58201ac2b150ffbcce46a8c3e4fb89ebf01998f806a"},
{file = "coverage-6.4-cp37-cp37m-win_amd64.whl", hash = "sha256:b84ab65444dcc68d761e95d4d70f3cfd347ceca5a029f2ffec37d4f124f61311"},
{file = "coverage-6.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d548edacbf16a8276af13063a2b0669d58bbcfca7c55a255f84aac2870786a61"},
{file = "coverage-6.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:033ebec282793bd9eb988d0271c211e58442c31077976c19c442e24d827d356f"},
{file = "coverage-6.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:742fb8b43835078dd7496c3c25a1ec8d15351df49fb0037bffb4754291ef30ce"},
{file = "coverage-6.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d55fae115ef9f67934e9f1103c9ba826b4c690e4c5bcf94482b8b2398311bf9c"},
{file = "coverage-6.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cd698341626f3c77784858427bad0cdd54a713115b423d22ac83a28303d1d95"},
{file = "coverage-6.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:62d382f7d77eeeaff14b30516b17bcbe80f645f5cf02bb755baac376591c653c"},
{file = "coverage-6.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:016d7f5cf1c8c84f533a3c1f8f36126fbe00b2ec0ccca47cc5731c3723d327c6"},
{file = "coverage-6.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:69432946f154c6add0e9ede03cc43b96e2ef2733110a77444823c053b1ff5166"},
{file = "coverage-6.4-cp38-cp38-win32.whl", hash = "sha256:83bd142cdec5e4a5c4ca1d4ff6fa807d28460f9db919f9f6a31babaaa8b88426"},
{file = "coverage-6.4-cp38-cp38-win_amd64.whl", hash = "sha256:4002f9e8c1f286e986fe96ec58742b93484195defc01d5cc7809b8f7acb5ece3"},
{file = "coverage-6.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e4f52c272fdc82e7c65ff3f17a7179bc5f710ebc8ce8a5cadac81215e8326740"},
{file = "coverage-6.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b5578efe4038be02d76c344007b13119b2b20acd009a88dde8adec2de4f630b5"},
{file = "coverage-6.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8099ea680201c2221f8468c372198ceba9338a5fec0e940111962b03b3f716a"},
{file = "coverage-6.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a00441f5ea4504f5abbc047589d09e0dc33eb447dc45a1a527c8b74bfdd32c65"},
{file = "coverage-6.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e76bd16f0e31bc2b07e0fb1379551fcd40daf8cdf7e24f31a29e442878a827c"},
{file = "coverage-6.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8d2e80dd3438e93b19e1223a9850fa65425e77f2607a364b6fd134fcd52dc9df"},
{file = "coverage-6.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:341e9c2008c481c5c72d0e0dbf64980a4b2238631a7f9780b0fe2e95755fb018"},
{file = "coverage-6.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:21e6686a95025927775ac501e74f5940cdf6fe052292f3a3f7349b0abae6d00f"},
{file = "coverage-6.4-cp39-cp39-win32.whl", hash = "sha256:968ed5407f9460bd5a591cefd1388cc00a8f5099de9e76234655ae48cfdbe2c3"},
{file = "coverage-6.4-cp39-cp39-win_amd64.whl", hash = "sha256:e35217031e4b534b09f9b9a5841b9344a30a6357627761d4218818b865d45055"},
{file = "coverage-6.4-pp36.pp37.pp38-none-any.whl", hash = "sha256:e637ae0b7b481905358624ef2e81d7fb0b1af55f5ff99f9ba05442a444b11e45"},
{file = "coverage-6.4.tar.gz", hash = "sha256:727dafd7f67a6e1cad808dc884bd9c5a2f6ef1f8f6d2f22b37b96cb0080d4f49"},
]
distlib = [
{file = "distlib-0.3.4-py2.py3-none-any.whl", hash = "sha256:6564fe0a8f51e734df6333d08b8b94d4ea8ee6b99b5ed50613f731fd4089f34b"},
{file = "distlib-0.3.4.zip", hash = "sha256:e4b58818180336dc9c529bfb9a0b58728ffc09ad92027a3f30b7cd91e3458579"},
]
filelock = [
{file = "filelock-3.7.0-py3-none-any.whl", hash = "sha256:c7b5fdb219b398a5b28c8e4c1893ef5f98ece6a38c6ab2c22e26ec161556fed6"},
{file = "filelock-3.7.0.tar.gz", hash = "sha256:b795f1b42a61bbf8ec7113c341dad679d772567b936fbd1bf43c9a238e673e20"},
]
identify = [
{file = "identify-2.5.0-py2.py3-none-any.whl", hash = "sha256:3acfe15a96e4272b4ec5662ee3e231ceba976ef63fd9980ed2ce9cc415df393f"},
{file = "identify-2.5.0.tar.gz", hash = "sha256:c83af514ea50bf2be2c4a3f2fb349442b59dc87284558ae9ff54191bff3541d2"},
]
idna = [
{file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"},
{file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"},
]
importlib-metadata = [
{file = "importlib_metadata-4.11.3-py3-none-any.whl", hash = "sha256:1208431ca90a8cca1a6b8af391bb53c1a2db74e5d1cef6ddced95d4b2062edc6"},
{file = "importlib_metadata-4.11.3.tar.gz", hash = "sha256:ea4c597ebf37142f827b8f39299579e31685c31d3a438b59f469406afd0f2539"},
]
importlib-resources = [
{file = "importlib_resources-5.7.1-py3-none-any.whl", hash = "sha256:e447dc01619b1e951286f3929be820029d48c75eb25d265c28b92a16548212b8"},
{file = "importlib_resources-5.7.1.tar.gz", hash = "sha256:b6062987dfc51f0fcb809187cffbd60f35df7acb4589091f154214af6d0d49d3"},
]
iniconfig = [
{file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"},
{file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"},
]
jsonschema = [
{file = "jsonschema-4.5.1-py3-none-any.whl", hash = "sha256:71b5e39324422543546572954ce71c67728922c104902cb7ce252e522235b33f"},
{file = "jsonschema-4.5.1.tar.gz", hash = "sha256:7c6d882619340c3347a1bf7315e147e6d3dae439033ae6383d6acb908c101dfc"},
]
mypy = [
{file = "mypy-0.960-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3a3e525cd76c2c4f90f1449fd034ba21fcca68050ff7c8397bb7dd25dd8b8248"},
{file = "mypy-0.960-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7a76dc4f91e92db119b1be293892df8379b08fd31795bb44e0ff84256d34c251"},
{file = "mypy-0.960-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ffdad80a92c100d1b0fe3d3cf1a4724136029a29afe8566404c0146747114382"},
{file = "mypy-0.960-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7d390248ec07fa344b9f365e6ed9d205bd0205e485c555bed37c4235c868e9d5"},
{file = "mypy-0.960-cp310-cp310-win_amd64.whl", hash = "sha256:925aa84369a07846b7f3b8556ccade1f371aa554f2bd4fb31cb97a24b73b036e"},
{file = "mypy-0.960-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:239d6b2242d6c7f5822163ee082ef7a28ee02e7ac86c35593ef923796826a385"},
{file = "mypy-0.960-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f1ba54d440d4feee49d8768ea952137316d454b15301c44403db3f2cb51af024"},
{file = "mypy-0.960-cp36-cp36m-win_amd64.whl", hash = "sha256:cb7752b24528c118a7403ee955b6a578bfcf5879d5ee91790667c8ea511d2085"},
{file = "mypy-0.960-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:826a2917c275e2ee05b7c7b736c1e6549a35b7ea5a198ca457f8c2ebea2cbecf"},
{file = "mypy-0.960-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3eabcbd2525f295da322dff8175258f3fc4c3eb53f6d1929644ef4d99b92e72d"},
{file = "mypy-0.960-cp37-cp37m-win_amd64.whl", hash = "sha256:f47322796c412271f5aea48381a528a613f33e0a115452d03ae35d673e6064f8"},
{file = "mypy-0.960-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2c7f8bb9619290836a4e167e2ef1f2cf14d70e0bc36c04441e41487456561409"},
{file = "mypy-0.960-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fbfb873cf2b8d8c3c513367febde932e061a5f73f762896826ba06391d932b2a"},
{file = "mypy-0.960-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cc537885891382e08129d9862553b3d00d4be3eb15b8cae9e2466452f52b0117"},
{file = "mypy-0.960-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:481f98c6b24383188c928f33dd2f0776690807e12e9989dd0419edd5c74aa53b"},
{file = "mypy-0.960-cp38-cp38-win_amd64.whl", hash = "sha256:29dc94d9215c3eb80ac3c2ad29d0c22628accfb060348fd23d73abe3ace6c10d"},
{file = "mypy-0.960-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:33d53a232bb79057f33332dbbb6393e68acbcb776d2f571ba4b1d50a2c8ba873"},
{file = "mypy-0.960-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8d645e9e7f7a5da3ec3bbcc314ebb9bb22c7ce39e70367830eb3c08d0140b9ce"},
{file = "mypy-0.960-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:85cf2b14d32b61db24ade8ac9ae7691bdfc572a403e3cb8537da936e74713275"},
{file = "mypy-0.960-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a85a20b43fa69efc0b955eba1db435e2ffecb1ca695fe359768e0503b91ea89f"},
{file = "mypy-0.960-cp39-cp39-win_amd64.whl", hash = "sha256:0ebfb3f414204b98c06791af37a3a96772203da60636e2897408517fcfeee7a8"},
{file = "mypy-0.960-py3-none-any.whl", hash = "sha256:bfd4f6536bd384c27c392a8b8f790fd0ed5c0cf2f63fc2fed7bce56751d53026"},
{file = "mypy-0.960.tar.gz", hash = "sha256:d4fccf04c1acf750babd74252e0f2db6bd2ac3aa8fe960797d9f3ef41cf2bfd4"},
]
mypy-extensions = [
{file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"},
{file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"},
]
nodeenv = [
{file = "nodeenv-1.6.0-py2.py3-none-any.whl", hash = "sha256:621e6b7076565ddcacd2db0294c0381e01fd28945ab36bcf00f41c5daf63bef7"},
{file = "nodeenv-1.6.0.tar.gz", hash = "sha256:3ef13ff90291ba2a4a7a4ff9a979b63ffdd00a464dbe04acf0ea6471517a4c2b"},
]
packaging = [
{file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
{file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
]
pep517 = [
{file = "pep517-0.12.0-py2.py3-none-any.whl", hash = "sha256:dd884c326898e2c6e11f9e0b64940606a93eb10ea022a2e067959f3a110cf161"},
{file = "pep517-0.12.0.tar.gz", hash = "sha256:931378d93d11b298cf511dd634cf5ea4cb249a28ef84160b3247ee9afb4e8ab0"},
]
platformdirs = [
{file = "platformdirs-2.5.2-py3-none-any.whl", hash = "sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788"},
{file = "platformdirs-2.5.2.tar.gz", hash = "sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19"},
]
pluggy = [
{file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
{file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
]
pre-commit = [
{file = "pre_commit-2.19.0-py2.py3-none-any.whl", hash = "sha256:10c62741aa5704faea2ad69cb550ca78082efe5697d6f04e5710c3c229afdd10"},
{file = "pre_commit-2.19.0.tar.gz", hash = "sha256:4233a1e38621c87d9dda9808c6606d7e7ba0e087cd56d3fe03202a01d2919615"},
]
py = [
{file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
{file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
]
pygments = [
{file = "Pygments-2.12.0-py3-none-any.whl", hash = "sha256:dc9c10fb40944260f6ed4c688ece0cd2048414940f1cea51b8b226318411c519"},
{file = "Pygments-2.12.0.tar.gz", hash = "sha256:5eb116118f9612ff1ee89ac96437bb6b49e8f04d8a13b514ba26f620208e26eb"},
]
pyparsing = [
{file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"},
{file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"},
]
pyrsistent = [
{file = "pyrsistent-0.18.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:df46c854f490f81210870e509818b729db4488e1f30f2a1ce1698b2295a878d1"},
{file = "pyrsistent-0.18.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d45866ececf4a5fff8742c25722da6d4c9e180daa7b405dc0a2a2790d668c26"},
{file = "pyrsistent-0.18.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4ed6784ceac462a7d6fcb7e9b663e93b9a6fb373b7f43594f9ff68875788e01e"},
{file = "pyrsistent-0.18.1-cp310-cp310-win32.whl", hash = "sha256:e4f3149fd5eb9b285d6bfb54d2e5173f6a116fe19172686797c056672689daf6"},
{file = "pyrsistent-0.18.1-cp310-cp310-win_amd64.whl", hash = "sha256:636ce2dc235046ccd3d8c56a7ad54e99d5c1cd0ef07d9ae847306c91d11b5fec"},
{file = "pyrsistent-0.18.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e92a52c166426efbe0d1ec1332ee9119b6d32fc1f0bbfd55d5c1088070e7fc1b"},
{file = "pyrsistent-0.18.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7a096646eab884bf8bed965bad63ea327e0d0c38989fc83c5ea7b8a87037bfc"},
{file = "pyrsistent-0.18.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cdfd2c361b8a8e5d9499b9082b501c452ade8bbf42aef97ea04854f4a3f43b22"},
{file = "pyrsistent-0.18.1-cp37-cp37m-win32.whl", hash = "sha256:7ec335fc998faa4febe75cc5268a9eac0478b3f681602c1f27befaf2a1abe1d8"},
{file = "pyrsistent-0.18.1-cp37-cp37m-win_amd64.whl", hash = "sha256:6455fc599df93d1f60e1c5c4fe471499f08d190d57eca040c0ea182301321286"},
{file = "pyrsistent-0.18.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:fd8da6d0124efa2f67d86fa70c851022f87c98e205f0594e1fae044e7119a5a6"},
{file = "pyrsistent-0.18.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bfe2388663fd18bd8ce7db2c91c7400bf3e1a9e8bd7d63bf7e77d39051b85ec"},
{file = "pyrsistent-0.18.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e3e1fcc45199df76053026a51cc59ab2ea3fc7c094c6627e93b7b44cdae2c8c"},
{file = "pyrsistent-0.18.1-cp38-cp38-win32.whl", hash = "sha256:b568f35ad53a7b07ed9b1b2bae09eb15cdd671a5ba5d2c66caee40dbf91c68ca"},
{file = "pyrsistent-0.18.1-cp38-cp38-win_amd64.whl", hash = "sha256:d1b96547410f76078eaf66d282ddca2e4baae8964364abb4f4dcdde855cd123a"},
{file = "pyrsistent-0.18.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f87cc2863ef33c709e237d4b5f4502a62a00fab450c9e020892e8e2ede5847f5"},
{file = "pyrsistent-0.18.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bc66318fb7ee012071b2792024564973ecc80e9522842eb4e17743604b5e045"},
{file = "pyrsistent-0.18.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:914474c9f1d93080338ace89cb2acee74f4f666fb0424896fcfb8d86058bf17c"},
{file = "pyrsistent-0.18.1-cp39-cp39-win32.whl", hash = "sha256:1b34eedd6812bf4d33814fca1b66005805d3640ce53140ab8bbb1e2651b0d9bc"},
{file = "pyrsistent-0.18.1-cp39-cp39-win_amd64.whl", hash = "sha256:e24a828f57e0c337c8d8bb9f6b12f09dfdf0273da25fda9e314f0b684b415a07"},
{file = "pyrsistent-0.18.1.tar.gz", hash = "sha256:d4d61f8b993a7255ba714df3aca52700f8125289f84f704cf80916517c46eb96"},
]
pytest = [
{file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"},
{file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"},
]
pytest-cov = [
{file = "pytest-cov-3.0.0.tar.gz", hash = "sha256:e7f0f5b1617d2210a2cabc266dfe2f4c75a8d32fb89eafb7ad9d06f6d076d470"},
{file = "pytest_cov-3.0.0-py3-none-any.whl", hash = "sha256:578d5d15ac4a25e5f961c938b85a05b09fdaae9deef3bb6de9a6e766622ca7a6"},
]
pytest-mock = [
{file = "pytest-mock-3.7.0.tar.gz", hash = "sha256:5112bd92cc9f186ee96e1a92efc84969ea494939c3aead39c50f421c4cc69534"},
{file = "pytest_mock-3.7.0-py3-none-any.whl", hash = "sha256:6cff27cec936bf81dc5ee87f07132b807bcda51106b5ec4b90a04331cba76231"},
]
pyyaml = [
{file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"},
{file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"},
{file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"},
{file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"},
{file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"},
{file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"},
{file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"},
{file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"},
{file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"},
{file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"},
{file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"},
{file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"},
{file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"},
{file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"},
{file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"},
{file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"},
{file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"},
{file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"},
{file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"},
{file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"},
{file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"},
{file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"},
{file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"},
{file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"},
{file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"},
{file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"},
{file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"},
{file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"},
{file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"},
{file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"},
{file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"},
{file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"},
{file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"},
]
requests = [
{file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"},
{file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"},
]
rich = [
{file = "rich-12.4.1-py3-none-any.whl", hash = "sha256:d13c6c90c42e24eb7ce660db397e8c398edd58acb7f92a2a88a95572b838aaa4"},
{file = "rich-12.4.1.tar.gz", hash = "sha256:d239001c0fb7de985e21ec9a4bb542b5150350330bbc1849f835b9cbc8923b91"},
]
six = [
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
]
toml = [
{file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
{file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
]
tomli = [
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
]
tox = [
{file = "tox-3.25.0-py2.py3-none-any.whl", hash = "sha256:0805727eb4d6b049de304977dfc9ce315a1938e6619c3ab9f38682bb04662a5a"},
{file = "tox-3.25.0.tar.gz", hash = "sha256:37888f3092aa4e9f835fc8cc6dadbaaa0782651c41ef359e3a5743fcb0308160"},
]
typed-ast = [
{file = "typed_ast-1.5.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:669dd0c4167f6f2cd9f57041e03c3c2ebf9063d0757dc89f79ba1daa2bfca9d4"},
{file = "typed_ast-1.5.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:211260621ab1cd7324e0798d6be953d00b74e0428382991adfddb352252f1d62"},
{file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:267e3f78697a6c00c689c03db4876dd1efdfea2f251a5ad6555e82a26847b4ac"},
{file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c542eeda69212fa10a7ada75e668876fdec5f856cd3d06829e6aa64ad17c8dfe"},
{file = "typed_ast-1.5.4-cp310-cp310-win_amd64.whl", hash = "sha256:a9916d2bb8865f973824fb47436fa45e1ebf2efd920f2b9f99342cb7fab93f72"},
{file = "typed_ast-1.5.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:79b1e0869db7c830ba6a981d58711c88b6677506e648496b1f64ac7d15633aec"},
{file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a94d55d142c9265f4ea46fab70977a1944ecae359ae867397757d836ea5a3f47"},
{file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:183afdf0ec5b1b211724dfef3d2cad2d767cbefac291f24d69b00546c1837fb6"},
{file = "typed_ast-1.5.4-cp36-cp36m-win_amd64.whl", hash = "sha256:639c5f0b21776605dd6c9dbe592d5228f021404dafd377e2b7ac046b0349b1a1"},
{file = "typed_ast-1.5.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cf4afcfac006ece570e32d6fa90ab74a17245b83dfd6655a6f68568098345ff6"},
{file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed855bbe3eb3715fca349c80174cfcfd699c2f9de574d40527b8429acae23a66"},
{file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6778e1b2f81dfc7bc58e4b259363b83d2e509a65198e85d5700dfae4c6c8ff1c"},
{file = "typed_ast-1.5.4-cp37-cp37m-win_amd64.whl", hash = "sha256:0261195c2062caf107831e92a76764c81227dae162c4f75192c0d489faf751a2"},
{file = "typed_ast-1.5.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2efae9db7a8c05ad5547d522e7dbe62c83d838d3906a3716d1478b6c1d61388d"},
{file = "typed_ast-1.5.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7d5d014b7daa8b0bf2eaef684295acae12b036d79f54178b92a2b6a56f92278f"},
{file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:370788a63915e82fd6f212865a596a0fefcbb7d408bbbb13dea723d971ed8bdc"},
{file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4e964b4ff86550a7a7d56345c7864b18f403f5bd7380edf44a3c1fb4ee7ac6c6"},
{file = "typed_ast-1.5.4-cp38-cp38-win_amd64.whl", hash = "sha256:683407d92dc953c8a7347119596f0b0e6c55eb98ebebd9b23437501b28dcbb8e"},
{file = "typed_ast-1.5.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4879da6c9b73443f97e731b617184a596ac1235fe91f98d279a7af36c796da35"},
{file = "typed_ast-1.5.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3e123d878ba170397916557d31c8f589951e353cc95fb7f24f6bb69adc1a8a97"},
{file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebd9d7f80ccf7a82ac5f88c521115cc55d84e35bf8b446fcd7836eb6b98929a3"},
{file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98f80dee3c03455e92796b58b98ff6ca0b2a6f652120c263efdba4d6c5e58f72"},
{file = "typed_ast-1.5.4-cp39-cp39-win_amd64.whl", hash = "sha256:0fdbcf2fef0ca421a3f5912555804296f0b0960f0418c440f5d6d3abb549f3e1"},
{file = "typed_ast-1.5.4.tar.gz", hash = "sha256:39e21ceb7388e4bb37f4c679d72707ed46c2fbf2a5609b8b8ebc4b067d977df2"},
]
types-jsonschema = [
{file = "types-jsonschema-4.4.4.tar.gz", hash = "sha256:d03f0c1a97ff06dda9535dfa51916a98f38bf40d6828ef4d93bc40708effe507"},
{file = "types_jsonschema-4.4.4-py3-none-any.whl", hash = "sha256:294d2de9ea3564fbec6c56153e84d1f3f7d9b2ada36e183d88a63c126da7bc3d"},
]
types-setuptools = [
{file = "types-setuptools-57.4.14.tar.gz", hash = "sha256:df02fe1dd244f58cf4e67cfc3d0a97930a2d61a72dd89f21d81c71017cd83f9a"},
{file = "types_setuptools-57.4.14-py3-none-any.whl", hash = "sha256:828f7e7e51e157876f47c80518b23ba0c3c36aa8081efd39d5d39f393938aec9"},
]
typing-extensions = [
{file = "typing_extensions-4.2.0-py3-none-any.whl", hash = "sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708"},
{file = "typing_extensions-4.2.0.tar.gz", hash = "sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376"},
]
urllib3 = [
{file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"},
{file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"},
]
vendoring = [
{file = "vendoring-1.2.0-py2.py3-none-any.whl", hash = "sha256:35b5fca683264e69e851a7580bb6a6f9848af024ffc8382ed5491bcfa55750c6"},
{file = "vendoring-1.2.0.tar.gz", hash = "sha256:6340a84bf542222c96f22ebc3cb87e4d86932dc04bc8d446e38285594702c00e"},
]
virtualenv = [
{file = "virtualenv-20.14.1-py2.py3-none-any.whl", hash = "sha256:e617f16e25b42eb4f6e74096b9c9e37713cf10bf30168fb4a739f3fa8f898a3a"},
{file = "virtualenv-20.14.1.tar.gz", hash = "sha256:ef589a79795589aada0c1c5b319486797c03b67ac3984c48c669c0e4f50df3a5"},
]
zipp = [
{file = "zipp-3.8.0-py3-none-any.whl", hash = "sha256:c4f6e5bbf48e74f7a38e7cc5b0480ff42b0ae5178957d564d18932525d5cf099"},
{file = "zipp-3.8.0.tar.gz", hash = "sha256:56bf8aadb83c24db6c4b577e13de374ccfb67da2078beba1d037c17980bf43ad"},
]

121
pyproject.toml Normal file
View File

@ -0,0 +1,121 @@
[tool.poetry]
name = "poetry-core"
version = "1.4.0"
description = "Poetry PEP 517 Build Backend"
authors = ["Sébastien Eustace <sebastien@eustace.io>"]
license = "MIT"
readme = "README.md"
homepage = "https://github.com/python-poetry/poetry-core"
repository = "https://github.com/python-poetry/poetry-core"
keywords = ["packaging", "dependency", "poetry"]
classifiers = [
"Topic :: Software Development :: Build Tools",
"Topic :: Software Development :: Libraries :: Python Modules"
]
packages = [
{ include = "poetry", from = "src" },
]
include = [
{ path = "tests", format = "sdist" },
]
exclude = [
"**/*.pyc",
"**/*.pyi",
]
[tool.poetry.build]
generate-setup-file = false
[tool.poetry.urls]
"Bug Tracker" = "https://github.com/python-poetry/poetry/issues"
[tool.poetry.dependencies]
python = "^3.7"
# required for compatibility
importlib-metadata = {version = ">=1.7.0", python = "<3.8"}
[tool.poetry.dev-dependencies]
pre-commit = "^2.15.0"
pyrsistent = "^0.18.0"
pytest = "^7.1.2"
pytest-cov = "^3.0.0"
pytest-mock = "^3.5"
tox = "^3.0"
vendoring = {version = "^1.0", python = "^3.8"}
build = "^0.7.0"
mypy = ">=0.960"
types-jsonschema = ">=4.4.4"
types-setuptools = ">=57.4.14"
[tool.black]
line-length = 88
preview = true
include = '\.pyi?$'
extend-exclude = "src/poetry/core/_vendor/*"
[tool.isort]
profile = "black"
force_single_line = true
atomic = true
include_trailing_comma = true
lines_after_imports = 2
lines_between_types = 1
use_parentheses = true
skip_glob = ["*/setup.py", "*/poetry/core/_vendor/*"]
filter_files = true
known_first_party = "poetry.core"
known_third_party = ["poetry.core._vendor"]
[tool.mypy]
strict = true
explicit_package_bases = true
namespace_packages = true
show_error_codes = true
enable_error_code = [
"ignore-without-code",
"redundant-expr",
"truthy-bool",
]
mypy_path = "src"
files = "src, tests"
exclude = "(?x)(^tests/.*/fixtures | ^src/poetry/core/_vendor)"
[[tool.mypy.overrides]]
module = [
'lark.*',
'tomlkit.*',
'virtualenv.*',
]
ignore_missing_imports = true
[tool.vendoring]
destination = "src/poetry/core/_vendor/"
requirements = "src/poetry/core/_vendor/vendor.txt"
namespace = ""
protected-files = ["vendor.txt"]
patches-dir = "vendors/patches"
[tool.vendoring.transformations]
drop = [
"bin/",
"*.so",
"typing.*",
"*/tests/"
]
[tool.vendoring.license.fallback-urls]
pyrsistent = "https://raw.githubusercontent.com/tobgu/pyrsistent/master/LICENSE.mit"
[build-system]
requires = []
build-backend = "poetry.core.masonry.api"
backend-path = ["src"]

View File

@ -0,0 +1,15 @@
from __future__ import annotations
import sys
from pathlib import Path
# this cannot presently be replaced with importlib.metadata.version as when building
# itself, poetry-core is not available as an installed distribution.
__version__ = "1.4.0"
__vendor_site__ = (Path(__file__).parent / "_vendor").as_posix()
if __vendor_site__ not in sys.path:
sys.path.insert(0, __vendor_site__)

View File

@ -0,0 +1 @@
__version__ = '0.19.2'

View File

@ -0,0 +1,79 @@
# SPDX-License-Identifier: MIT
import sys
from functools import partial
from . import converters, exceptions, filters, setters, validators
from ._cmp import cmp_using
from ._config import get_run_validators, set_run_validators
from ._funcs import asdict, assoc, astuple, evolve, has, resolve_types
from ._make import (
NOTHING,
Attribute,
Factory,
attrib,
attrs,
fields,
fields_dict,
make_class,
validate,
)
from ._version_info import VersionInfo
__version__ = "22.1.0"
__version_info__ = VersionInfo._from_version_string(__version__)
__title__ = "attrs"
__description__ = "Classes Without Boilerplate"
__url__ = "https://www.attrs.org/"
__uri__ = __url__
__doc__ = __description__ + " <" + __uri__ + ">"
__author__ = "Hynek Schlawack"
__email__ = "hs@ox.cx"
__license__ = "MIT"
__copyright__ = "Copyright (c) 2015 Hynek Schlawack"
s = attributes = attrs
ib = attr = attrib
dataclass = partial(attrs, auto_attribs=True) # happy Easter ;)
__all__ = [
"Attribute",
"Factory",
"NOTHING",
"asdict",
"assoc",
"astuple",
"attr",
"attrib",
"attributes",
"attrs",
"cmp_using",
"converters",
"evolve",
"exceptions",
"fields",
"fields_dict",
"filters",
"get_run_validators",
"has",
"ib",
"make_class",
"resolve_types",
"s",
"set_run_validators",
"setters",
"validate",
"validators",
]
if sys.version_info[:2] >= (3, 6):
from ._next_gen import define, field, frozen, mutable # noqa: F401
__all__.extend(("define", "field", "frozen", "mutable"))

View File

@ -0,0 +1,155 @@
# SPDX-License-Identifier: MIT
import functools
import types
from ._make import _make_ne
_operation_names = {"eq": "==", "lt": "<", "le": "<=", "gt": ">", "ge": ">="}
def cmp_using(
eq=None,
lt=None,
le=None,
gt=None,
ge=None,
require_same_type=True,
class_name="Comparable",
):
"""
Create a class that can be passed into `attr.ib`'s ``eq``, ``order``, and
``cmp`` arguments to customize field comparison.
The resulting class will have a full set of ordering methods if
at least one of ``{lt, le, gt, ge}`` and ``eq`` are provided.
:param Optional[callable] eq: `callable` used to evaluate equality
of two objects.
:param Optional[callable] lt: `callable` used to evaluate whether
one object is less than another object.
:param Optional[callable] le: `callable` used to evaluate whether
one object is less than or equal to another object.
:param Optional[callable] gt: `callable` used to evaluate whether
one object is greater than another object.
:param Optional[callable] ge: `callable` used to evaluate whether
one object is greater than or equal to another object.
:param bool require_same_type: When `True`, equality and ordering methods
will return `NotImplemented` if objects are not of the same type.
:param Optional[str] class_name: Name of class. Defaults to 'Comparable'.
See `comparison` for more details.
.. versionadded:: 21.1.0
"""
body = {
"__slots__": ["value"],
"__init__": _make_init(),
"_requirements": [],
"_is_comparable_to": _is_comparable_to,
}
# Add operations.
num_order_functions = 0
has_eq_function = False
if eq is not None:
has_eq_function = True
body["__eq__"] = _make_operator("eq", eq)
body["__ne__"] = _make_ne()
if lt is not None:
num_order_functions += 1
body["__lt__"] = _make_operator("lt", lt)
if le is not None:
num_order_functions += 1
body["__le__"] = _make_operator("le", le)
if gt is not None:
num_order_functions += 1
body["__gt__"] = _make_operator("gt", gt)
if ge is not None:
num_order_functions += 1
body["__ge__"] = _make_operator("ge", ge)
type_ = types.new_class(
class_name, (object,), {}, lambda ns: ns.update(body)
)
# Add same type requirement.
if require_same_type:
type_._requirements.append(_check_same_type)
# Add total ordering if at least one operation was defined.
if 0 < num_order_functions < 4:
if not has_eq_function:
# functools.total_ordering requires __eq__ to be defined,
# so raise early error here to keep a nice stack.
raise ValueError(
"eq must be define is order to complete ordering from "
"lt, le, gt, ge."
)
type_ = functools.total_ordering(type_)
return type_
def _make_init():
"""
Create __init__ method.
"""
def __init__(self, value):
"""
Initialize object with *value*.
"""
self.value = value
return __init__
def _make_operator(name, func):
"""
Create operator method.
"""
def method(self, other):
if not self._is_comparable_to(other):
return NotImplemented
result = func(self.value, other.value)
if result is NotImplemented:
return NotImplemented
return result
method.__name__ = "__%s__" % (name,)
method.__doc__ = "Return a %s b. Computed by attrs." % (
_operation_names[name],
)
return method
def _is_comparable_to(self, other):
"""
Check whether `other` is comparable to `self`.
"""
for func in self._requirements:
if not func(self, other):
return False
return True
def _check_same_type(self, other):
"""
Return True if *self* and *other* are of the same type, False otherwise.
"""
return other.value.__class__ is self.value.__class__

View File

@ -0,0 +1,185 @@
# SPDX-License-Identifier: MIT
import inspect
import platform
import sys
import threading
import types
import warnings
from collections.abc import Mapping, Sequence # noqa
PYPY = platform.python_implementation() == "PyPy"
PY36 = sys.version_info[:2] >= (3, 6)
HAS_F_STRINGS = PY36
PY310 = sys.version_info[:2] >= (3, 10)
if PYPY or PY36:
ordered_dict = dict
else:
from collections import OrderedDict
ordered_dict = OrderedDict
def just_warn(*args, **kw):
warnings.warn(
"Running interpreter doesn't sufficiently support code object "
"introspection. Some features like bare super() or accessing "
"__class__ will not work with slotted classes.",
RuntimeWarning,
stacklevel=2,
)
class _AnnotationExtractor:
"""
Extract type annotations from a callable, returning None whenever there
is none.
"""
__slots__ = ["sig"]
def __init__(self, callable):
try:
self.sig = inspect.signature(callable)
except (ValueError, TypeError): # inspect failed
self.sig = None
def get_first_param_type(self):
"""
Return the type annotation of the first argument if it's not empty.
"""
if not self.sig:
return None
params = list(self.sig.parameters.values())
if params and params[0].annotation is not inspect.Parameter.empty:
return params[0].annotation
return None
def get_return_type(self):
"""
Return the return type if it's not empty.
"""
if (
self.sig
and self.sig.return_annotation is not inspect.Signature.empty
):
return self.sig.return_annotation
return None
def make_set_closure_cell():
"""Return a function of two arguments (cell, value) which sets
the value stored in the closure cell `cell` to `value`.
"""
# pypy makes this easy. (It also supports the logic below, but
# why not do the easy/fast thing?)
if PYPY:
def set_closure_cell(cell, value):
cell.__setstate__((value,))
return set_closure_cell
# Otherwise gotta do it the hard way.
# Create a function that will set its first cellvar to `value`.
def set_first_cellvar_to(value):
x = value
return
# This function will be eliminated as dead code, but
# not before its reference to `x` forces `x` to be
# represented as a closure cell rather than a local.
def force_x_to_be_a_cell(): # pragma: no cover
return x
try:
# Extract the code object and make sure our assumptions about
# the closure behavior are correct.
co = set_first_cellvar_to.__code__
if co.co_cellvars != ("x",) or co.co_freevars != ():
raise AssertionError # pragma: no cover
# Convert this code object to a code object that sets the
# function's first _freevar_ (not cellvar) to the argument.
if sys.version_info >= (3, 8):
def set_closure_cell(cell, value):
cell.cell_contents = value
else:
args = [co.co_argcount]
args.append(co.co_kwonlyargcount)
args.extend(
[
co.co_nlocals,
co.co_stacksize,
co.co_flags,
co.co_code,
co.co_consts,
co.co_names,
co.co_varnames,
co.co_filename,
co.co_name,
co.co_firstlineno,
co.co_lnotab,
# These two arguments are reversed:
co.co_cellvars,
co.co_freevars,
]
)
set_first_freevar_code = types.CodeType(*args)
def set_closure_cell(cell, value):
# Create a function using the set_first_freevar_code,
# whose first closure cell is `cell`. Calling it will
# change the value of that cell.
setter = types.FunctionType(
set_first_freevar_code, {}, "setter", (), (cell,)
)
# And call it to set the cell.
setter(value)
# Make sure it works on this interpreter:
def make_func_with_cell():
x = None
def func():
return x # pragma: no cover
return func
cell = make_func_with_cell().__closure__[0]
set_closure_cell(cell, 100)
if cell.cell_contents != 100:
raise AssertionError # pragma: no cover
except Exception:
return just_warn
else:
return set_closure_cell
set_closure_cell = make_set_closure_cell()
# Thread-local global to track attrs instances which are already being repr'd.
# This is needed because there is no other (thread-safe) way to pass info
# about the instances that are already being repr'd through the call stack
# in order to ensure we don't perform infinite recursion.
#
# For instance, if an instance contains a dict which contains that instance,
# we need to know that we're already repr'ing the outside instance from within
# the dict's repr() call.
#
# This lives here rather than in _make.py so that the functions in _make.py
# don't have a direct reference to the thread-local in their globals dict.
# If they have such a reference, it breaks cloudpickle.
repr_context = threading.local()

View File

@ -0,0 +1,31 @@
# SPDX-License-Identifier: MIT
__all__ = ["set_run_validators", "get_run_validators"]
_run_validators = True
def set_run_validators(run):
"""
Set whether or not validators are run. By default, they are run.
.. deprecated:: 21.3.0 It will not be removed, but it also will not be
moved to new ``attrs`` namespace. Use `attrs.validators.set_disabled()`
instead.
"""
if not isinstance(run, bool):
raise TypeError("'run' must be bool.")
global _run_validators
_run_validators = run
def get_run_validators():
"""
Return whether or not validators are run.
.. deprecated:: 21.3.0 It will not be removed, but it also will not be
moved to new ``attrs`` namespace. Use `attrs.validators.get_disabled()`
instead.
"""
return _run_validators

View File

@ -0,0 +1,420 @@
# SPDX-License-Identifier: MIT
import copy
from ._make import NOTHING, _obj_setattr, fields
from .exceptions import AttrsAttributeNotFoundError
def asdict(
inst,
recurse=True,
filter=None,
dict_factory=dict,
retain_collection_types=False,
value_serializer=None,
):
"""
Return the ``attrs`` attribute values of *inst* as a dict.
Optionally recurse into other ``attrs``-decorated classes.
:param inst: Instance of an ``attrs``-decorated class.
:param bool recurse: Recurse into classes that are also
``attrs``-decorated.
:param callable filter: A callable whose return code determines whether an
attribute or element is included (``True``) or dropped (``False``). Is
called with the `attrs.Attribute` as the first argument and the
value as the second argument.
:param callable dict_factory: A callable to produce dictionaries from. For
example, to produce ordered dictionaries instead of normal Python
dictionaries, pass in ``collections.OrderedDict``.
:param bool retain_collection_types: Do not convert to ``list`` when
encountering an attribute whose type is ``tuple`` or ``set``. Only
meaningful if ``recurse`` is ``True``.
:param Optional[callable] value_serializer: A hook that is called for every
attribute or dict key/value. It receives the current instance, field
and value and must return the (updated) value. The hook is run *after*
the optional *filter* has been applied.
:rtype: return type of *dict_factory*
:raise attr.exceptions.NotAnAttrsClassError: If *cls* is not an ``attrs``
class.
.. versionadded:: 16.0.0 *dict_factory*
.. versionadded:: 16.1.0 *retain_collection_types*
.. versionadded:: 20.3.0 *value_serializer*
.. versionadded:: 21.3.0 If a dict has a collection for a key, it is
serialized as a tuple.
"""
attrs = fields(inst.__class__)
rv = dict_factory()
for a in attrs:
v = getattr(inst, a.name)
if filter is not None and not filter(a, v):
continue
if value_serializer is not None:
v = value_serializer(inst, a, v)
if recurse is True:
if has(v.__class__):
rv[a.name] = asdict(
v,
recurse=True,
filter=filter,
dict_factory=dict_factory,
retain_collection_types=retain_collection_types,
value_serializer=value_serializer,
)
elif isinstance(v, (tuple, list, set, frozenset)):
cf = v.__class__ if retain_collection_types is True else list
rv[a.name] = cf(
[
_asdict_anything(
i,
is_key=False,
filter=filter,
dict_factory=dict_factory,
retain_collection_types=retain_collection_types,
value_serializer=value_serializer,
)
for i in v
]
)
elif isinstance(v, dict):
df = dict_factory
rv[a.name] = df(
(
_asdict_anything(
kk,
is_key=True,
filter=filter,
dict_factory=df,
retain_collection_types=retain_collection_types,
value_serializer=value_serializer,
),
_asdict_anything(
vv,
is_key=False,
filter=filter,
dict_factory=df,
retain_collection_types=retain_collection_types,
value_serializer=value_serializer,
),
)
for kk, vv in v.items()
)
else:
rv[a.name] = v
else:
rv[a.name] = v
return rv
def _asdict_anything(
val,
is_key,
filter,
dict_factory,
retain_collection_types,
value_serializer,
):
"""
``asdict`` only works on attrs instances, this works on anything.
"""
if getattr(val.__class__, "__attrs_attrs__", None) is not None:
# Attrs class.
rv = asdict(
val,
recurse=True,
filter=filter,
dict_factory=dict_factory,
retain_collection_types=retain_collection_types,
value_serializer=value_serializer,
)
elif isinstance(val, (tuple, list, set, frozenset)):
if retain_collection_types is True:
cf = val.__class__
elif is_key:
cf = tuple
else:
cf = list
rv = cf(
[
_asdict_anything(
i,
is_key=False,
filter=filter,
dict_factory=dict_factory,
retain_collection_types=retain_collection_types,
value_serializer=value_serializer,
)
for i in val
]
)
elif isinstance(val, dict):
df = dict_factory
rv = df(
(
_asdict_anything(
kk,
is_key=True,
filter=filter,
dict_factory=df,
retain_collection_types=retain_collection_types,
value_serializer=value_serializer,
),
_asdict_anything(
vv,
is_key=False,
filter=filter,
dict_factory=df,
retain_collection_types=retain_collection_types,
value_serializer=value_serializer,
),
)
for kk, vv in val.items()
)
else:
rv = val
if value_serializer is not None:
rv = value_serializer(None, None, rv)
return rv
def astuple(
inst,
recurse=True,
filter=None,
tuple_factory=tuple,
retain_collection_types=False,
):
"""
Return the ``attrs`` attribute values of *inst* as a tuple.
Optionally recurse into other ``attrs``-decorated classes.
:param inst: Instance of an ``attrs``-decorated class.
:param bool recurse: Recurse into classes that are also
``attrs``-decorated.
:param callable filter: A callable whose return code determines whether an
attribute or element is included (``True``) or dropped (``False``). Is
called with the `attrs.Attribute` as the first argument and the
value as the second argument.
:param callable tuple_factory: A callable to produce tuples from. For
example, to produce lists instead of tuples.
:param bool retain_collection_types: Do not convert to ``list``
or ``dict`` when encountering an attribute which type is
``tuple``, ``dict`` or ``set``. Only meaningful if ``recurse`` is
``True``.
:rtype: return type of *tuple_factory*
:raise attr.exceptions.NotAnAttrsClassError: If *cls* is not an ``attrs``
class.
.. versionadded:: 16.2.0
"""
attrs = fields(inst.__class__)
rv = []
retain = retain_collection_types # Very long. :/
for a in attrs:
v = getattr(inst, a.name)
if filter is not None and not filter(a, v):
continue
if recurse is True:
if has(v.__class__):
rv.append(
astuple(
v,
recurse=True,
filter=filter,
tuple_factory=tuple_factory,
retain_collection_types=retain,
)
)
elif isinstance(v, (tuple, list, set, frozenset)):
cf = v.__class__ if retain is True else list
rv.append(
cf(
[
astuple(
j,
recurse=True,
filter=filter,
tuple_factory=tuple_factory,
retain_collection_types=retain,
)
if has(j.__class__)
else j
for j in v
]
)
)
elif isinstance(v, dict):
df = v.__class__ if retain is True else dict
rv.append(
df(
(
astuple(
kk,
tuple_factory=tuple_factory,
retain_collection_types=retain,
)
if has(kk.__class__)
else kk,
astuple(
vv,
tuple_factory=tuple_factory,
retain_collection_types=retain,
)
if has(vv.__class__)
else vv,
)
for kk, vv in v.items()
)
)
else:
rv.append(v)
else:
rv.append(v)
return rv if tuple_factory is list else tuple_factory(rv)
def has(cls):
"""
Check whether *cls* is a class with ``attrs`` attributes.
:param type cls: Class to introspect.
:raise TypeError: If *cls* is not a class.
:rtype: bool
"""
return getattr(cls, "__attrs_attrs__", None) is not None
def assoc(inst, **changes):
"""
Copy *inst* and apply *changes*.
:param inst: Instance of a class with ``attrs`` attributes.
:param changes: Keyword changes in the new copy.
:return: A copy of inst with *changes* incorporated.
:raise attr.exceptions.AttrsAttributeNotFoundError: If *attr_name* couldn't
be found on *cls*.
:raise attr.exceptions.NotAnAttrsClassError: If *cls* is not an ``attrs``
class.
.. deprecated:: 17.1.0
Use `attrs.evolve` instead if you can.
This function will not be removed du to the slightly different approach
compared to `attrs.evolve`.
"""
import warnings
warnings.warn(
"assoc is deprecated and will be removed after 2018/01.",
DeprecationWarning,
stacklevel=2,
)
new = copy.copy(inst)
attrs = fields(inst.__class__)
for k, v in changes.items():
a = getattr(attrs, k, NOTHING)
if a is NOTHING:
raise AttrsAttributeNotFoundError(
"{k} is not an attrs attribute on {cl}.".format(
k=k, cl=new.__class__
)
)
_obj_setattr(new, k, v)
return new
def evolve(inst, **changes):
"""
Create a new instance, based on *inst* with *changes* applied.
:param inst: Instance of a class with ``attrs`` attributes.
:param changes: Keyword changes in the new copy.
:return: A copy of inst with *changes* incorporated.
:raise TypeError: If *attr_name* couldn't be found in the class
``__init__``.
:raise attr.exceptions.NotAnAttrsClassError: If *cls* is not an ``attrs``
class.
.. versionadded:: 17.1.0
"""
cls = inst.__class__
attrs = fields(cls)
for a in attrs:
if not a.init:
continue
attr_name = a.name # To deal with private attributes.
init_name = attr_name if attr_name[0] != "_" else attr_name[1:]
if init_name not in changes:
changes[init_name] = getattr(inst, attr_name)
return cls(**changes)
def resolve_types(cls, globalns=None, localns=None, attribs=None):
"""
Resolve any strings and forward annotations in type annotations.
This is only required if you need concrete types in `Attribute`'s *type*
field. In other words, you don't need to resolve your types if you only
use them for static type checking.
With no arguments, names will be looked up in the module in which the class
was created. If this is not what you want, e.g. if the name only exists
inside a method, you may pass *globalns* or *localns* to specify other
dictionaries in which to look up these names. See the docs of
`typing.get_type_hints` for more details.
:param type cls: Class to resolve.
:param Optional[dict] globalns: Dictionary containing global variables.
:param Optional[dict] localns: Dictionary containing local variables.
:param Optional[list] attribs: List of attribs for the given class.
This is necessary when calling from inside a ``field_transformer``
since *cls* is not an ``attrs`` class yet.
:raise TypeError: If *cls* is not a class.
:raise attr.exceptions.NotAnAttrsClassError: If *cls* is not an ``attrs``
class and you didn't pass any attribs.
:raise NameError: If types cannot be resolved because of missing variables.
:returns: *cls* so you can use this function also as a class decorator.
Please note that you have to apply it **after** `attrs.define`. That
means the decorator has to come in the line **before** `attrs.define`.
.. versionadded:: 20.1.0
.. versionadded:: 21.1.0 *attribs*
"""
# Since calling get_type_hints is expensive we cache whether we've
# done it already.
if getattr(cls, "__attrs_types_resolved__", None) != cls:
import typing
hints = typing.get_type_hints(cls, globalns=globalns, localns=localns)
for field in fields(cls) if attribs is None else attribs:
if field.name in hints:
# Since fields have been frozen we must work around it.
_obj_setattr(field, "type", hints[field.name])
# We store the class we resolved so that subclasses know they haven't
# been resolved.
cls.__attrs_types_resolved__ = cls
# Return the class so you can use it as a decorator too.
return cls

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,220 @@
# SPDX-License-Identifier: MIT
"""
These are Python 3.6+-only and keyword-only APIs that call `attr.s` and
`attr.ib` with different default values.
"""
from functools import partial
from . import setters
from ._funcs import asdict as _asdict
from ._funcs import astuple as _astuple
from ._make import (
NOTHING,
_frozen_setattrs,
_ng_default_on_setattr,
attrib,
attrs,
)
from .exceptions import UnannotatedAttributeError
def define(
maybe_cls=None,
*,
these=None,
repr=None,
hash=None,
init=None,
slots=True,
frozen=False,
weakref_slot=True,
str=False,
auto_attribs=None,
kw_only=False,
cache_hash=False,
auto_exc=True,
eq=None,
order=False,
auto_detect=True,
getstate_setstate=None,
on_setattr=None,
field_transformer=None,
match_args=True,
):
r"""
Define an ``attrs`` class.
Differences to the classic `attr.s` that it uses underneath:
- Automatically detect whether or not *auto_attribs* should be `True` (c.f.
*auto_attribs* parameter).
- If *frozen* is `False`, run converters and validators when setting an
attribute by default.
- *slots=True*
.. caution::
Usually this has only upsides and few visible effects in everyday
programming. But it *can* lead to some suprising behaviors, so please
make sure to read :term:`slotted classes`.
- *auto_exc=True*
- *auto_detect=True*
- *order=False*
- Some options that were only relevant on Python 2 or were kept around for
backwards-compatibility have been removed.
Please note that these are all defaults and you can change them as you
wish.
:param Optional[bool] auto_attribs: If set to `True` or `False`, it behaves
exactly like `attr.s`. If left `None`, `attr.s` will try to guess:
1. If any attributes are annotated and no unannotated `attrs.fields`\ s
are found, it assumes *auto_attribs=True*.
2. Otherwise it assumes *auto_attribs=False* and tries to collect
`attrs.fields`\ s.
For now, please refer to `attr.s` for the rest of the parameters.
.. versionadded:: 20.1.0
.. versionchanged:: 21.3.0 Converters are also run ``on_setattr``.
"""
def do_it(cls, auto_attribs):
return attrs(
maybe_cls=cls,
these=these,
repr=repr,
hash=hash,
init=init,
slots=slots,
frozen=frozen,
weakref_slot=weakref_slot,
str=str,
auto_attribs=auto_attribs,
kw_only=kw_only,
cache_hash=cache_hash,
auto_exc=auto_exc,
eq=eq,
order=order,
auto_detect=auto_detect,
collect_by_mro=True,
getstate_setstate=getstate_setstate,
on_setattr=on_setattr,
field_transformer=field_transformer,
match_args=match_args,
)
def wrap(cls):
"""
Making this a wrapper ensures this code runs during class creation.
We also ensure that frozen-ness of classes is inherited.
"""
nonlocal frozen, on_setattr
had_on_setattr = on_setattr not in (None, setters.NO_OP)
# By default, mutable classes convert & validate on setattr.
if frozen is False and on_setattr is None:
on_setattr = _ng_default_on_setattr
# However, if we subclass a frozen class, we inherit the immutability
# and disable on_setattr.
for base_cls in cls.__bases__:
if base_cls.__setattr__ is _frozen_setattrs:
if had_on_setattr:
raise ValueError(
"Frozen classes can't use on_setattr "
"(frozen-ness was inherited)."
)
on_setattr = setters.NO_OP
break
if auto_attribs is not None:
return do_it(cls, auto_attribs)
try:
return do_it(cls, True)
except UnannotatedAttributeError:
return do_it(cls, False)
# maybe_cls's type depends on the usage of the decorator. It's a class
# if it's used as `@attrs` but ``None`` if used as `@attrs()`.
if maybe_cls is None:
return wrap
else:
return wrap(maybe_cls)
mutable = define
frozen = partial(define, frozen=True, on_setattr=None)
def field(
*,
default=NOTHING,
validator=None,
repr=True,
hash=None,
init=True,
metadata=None,
converter=None,
factory=None,
kw_only=False,
eq=None,
order=None,
on_setattr=None,
):
"""
Identical to `attr.ib`, except keyword-only and with some arguments
removed.
.. versionadded:: 20.1.0
"""
return attrib(
default=default,
validator=validator,
repr=repr,
hash=hash,
init=init,
metadata=metadata,
converter=converter,
factory=factory,
kw_only=kw_only,
eq=eq,
order=order,
on_setattr=on_setattr,
)
def asdict(inst, *, recurse=True, filter=None, value_serializer=None):
"""
Same as `attr.asdict`, except that collections types are always retained
and dict is always used as *dict_factory*.
.. versionadded:: 21.3.0
"""
return _asdict(
inst=inst,
recurse=recurse,
filter=filter,
value_serializer=value_serializer,
retain_collection_types=True,
)
def astuple(inst, *, recurse=True, filter=None):
"""
Same as `attr.astuple`, except that collections types are always retained
and `tuple` is always used as the *tuple_factory*.
.. versionadded:: 21.3.0
"""
return _astuple(
inst=inst, recurse=recurse, filter=filter, retain_collection_types=True
)

View File

@ -0,0 +1,86 @@
# SPDX-License-Identifier: MIT
from functools import total_ordering
from ._funcs import astuple
from ._make import attrib, attrs
@total_ordering
@attrs(eq=False, order=False, slots=True, frozen=True)
class VersionInfo:
"""
A version object that can be compared to tuple of length 1--4:
>>> attr.VersionInfo(19, 1, 0, "final") <= (19, 2)
True
>>> attr.VersionInfo(19, 1, 0, "final") < (19, 1, 1)
True
>>> vi = attr.VersionInfo(19, 2, 0, "final")
>>> vi < (19, 1, 1)
False
>>> vi < (19,)
False
>>> vi == (19, 2,)
True
>>> vi == (19, 2, 1)
False
.. versionadded:: 19.2
"""
year = attrib(type=int)
minor = attrib(type=int)
micro = attrib(type=int)
releaselevel = attrib(type=str)
@classmethod
def _from_version_string(cls, s):
"""
Parse *s* and return a _VersionInfo.
"""
v = s.split(".")
if len(v) == 3:
v.append("final")
return cls(
year=int(v[0]), minor=int(v[1]), micro=int(v[2]), releaselevel=v[3]
)
def _ensure_tuple(self, other):
"""
Ensure *other* is a tuple of a valid length.
Returns a possibly transformed *other* and ourselves as a tuple of
the same length as *other*.
"""
if self.__class__ is other.__class__:
other = astuple(other)
if not isinstance(other, tuple):
raise NotImplementedError
if not (1 <= len(other) <= 4):
raise NotImplementedError
return astuple(self)[: len(other)], other
def __eq__(self, other):
try:
us, them = self._ensure_tuple(other)
except NotImplementedError:
return NotImplemented
return us == them
def __lt__(self, other):
try:
us, them = self._ensure_tuple(other)
except NotImplementedError:
return NotImplemented
# Since alphabetically "dev0" < "final" < "post1" < "post2", we don't
# have to do anything special with releaselevel for now.
return us < them

View File

@ -0,0 +1,144 @@
# SPDX-License-Identifier: MIT
"""
Commonly useful converters.
"""
import typing
from ._compat import _AnnotationExtractor
from ._make import NOTHING, Factory, pipe
__all__ = [
"default_if_none",
"optional",
"pipe",
"to_bool",
]
def optional(converter):
"""
A converter that allows an attribute to be optional. An optional attribute
is one which can be set to ``None``.
Type annotations will be inferred from the wrapped converter's, if it
has any.
:param callable converter: the converter that is used for non-``None``
values.
.. versionadded:: 17.1.0
"""
def optional_converter(val):
if val is None:
return None
return converter(val)
xtr = _AnnotationExtractor(converter)
t = xtr.get_first_param_type()
if t:
optional_converter.__annotations__["val"] = typing.Optional[t]
rt = xtr.get_return_type()
if rt:
optional_converter.__annotations__["return"] = typing.Optional[rt]
return optional_converter
def default_if_none(default=NOTHING, factory=None):
"""
A converter that allows to replace ``None`` values by *default* or the
result of *factory*.
:param default: Value to be used if ``None`` is passed. Passing an instance
of `attrs.Factory` is supported, however the ``takes_self`` option
is *not*.
:param callable factory: A callable that takes no parameters whose result
is used if ``None`` is passed.
:raises TypeError: If **neither** *default* or *factory* is passed.
:raises TypeError: If **both** *default* and *factory* are passed.
:raises ValueError: If an instance of `attrs.Factory` is passed with
``takes_self=True``.
.. versionadded:: 18.2.0
"""
if default is NOTHING and factory is None:
raise TypeError("Must pass either `default` or `factory`.")
if default is not NOTHING and factory is not None:
raise TypeError(
"Must pass either `default` or `factory` but not both."
)
if factory is not None:
default = Factory(factory)
if isinstance(default, Factory):
if default.takes_self:
raise ValueError(
"`takes_self` is not supported by default_if_none."
)
def default_if_none_converter(val):
if val is not None:
return val
return default.factory()
else:
def default_if_none_converter(val):
if val is not None:
return val
return default
return default_if_none_converter
def to_bool(val):
"""
Convert "boolean" strings (e.g., from env. vars.) to real booleans.
Values mapping to :code:`True`:
- :code:`True`
- :code:`"true"` / :code:`"t"`
- :code:`"yes"` / :code:`"y"`
- :code:`"on"`
- :code:`"1"`
- :code:`1`
Values mapping to :code:`False`:
- :code:`False`
- :code:`"false"` / :code:`"f"`
- :code:`"no"` / :code:`"n"`
- :code:`"off"`
- :code:`"0"`
- :code:`0`
:raises ValueError: for any other value.
.. versionadded:: 21.3.0
"""
if isinstance(val, str):
val = val.lower()
truthy = {True, "true", "t", "yes", "y", "on", "1", 1}
falsy = {False, "false", "f", "no", "n", "off", "0", 0}
try:
if val in truthy:
return True
if val in falsy:
return False
except TypeError:
# Raised when "val" is not hashable (e.g., lists)
pass
raise ValueError("Cannot convert value to bool: {}".format(val))

View File

@ -0,0 +1,92 @@
# SPDX-License-Identifier: MIT
class FrozenError(AttributeError):
"""
A frozen/immutable instance or attribute have been attempted to be
modified.
It mirrors the behavior of ``namedtuples`` by using the same error message
and subclassing `AttributeError`.
.. versionadded:: 20.1.0
"""
msg = "can't set attribute"
args = [msg]
class FrozenInstanceError(FrozenError):
"""
A frozen instance has been attempted to be modified.
.. versionadded:: 16.1.0
"""
class FrozenAttributeError(FrozenError):
"""
A frozen attribute has been attempted to be modified.
.. versionadded:: 20.1.0
"""
class AttrsAttributeNotFoundError(ValueError):
"""
An ``attrs`` function couldn't find an attribute that the user asked for.
.. versionadded:: 16.2.0
"""
class NotAnAttrsClassError(ValueError):
"""
A non-``attrs`` class has been passed into an ``attrs`` function.
.. versionadded:: 16.2.0
"""
class DefaultAlreadySetError(RuntimeError):
"""
A default has been set using ``attr.ib()`` and is attempted to be reset
using the decorator.
.. versionadded:: 17.1.0
"""
class UnannotatedAttributeError(RuntimeError):
"""
A class with ``auto_attribs=True`` has an ``attr.ib()`` without a type
annotation.
.. versionadded:: 17.3.0
"""
class PythonTooOldError(RuntimeError):
"""
It was attempted to use an ``attrs`` feature that requires a newer Python
version.
.. versionadded:: 18.2.0
"""
class NotCallableError(TypeError):
"""
A ``attr.ib()`` requiring a callable has been set with a value
that is not callable.
.. versionadded:: 19.2.0
"""
def __init__(self, msg, value):
super(TypeError, self).__init__(msg, value)
self.msg = msg
self.value = value
def __str__(self):
return str(self.msg)

View File

@ -0,0 +1,51 @@
# SPDX-License-Identifier: MIT
"""
Commonly useful filters for `attr.asdict`.
"""
from ._make import Attribute
def _split_what(what):
"""
Returns a tuple of `frozenset`s of classes and attributes.
"""
return (
frozenset(cls for cls in what if isinstance(cls, type)),
frozenset(cls for cls in what if isinstance(cls, Attribute)),
)
def include(*what):
"""
Include *what*.
:param what: What to include.
:type what: `list` of `type` or `attrs.Attribute`\\ s
:rtype: `callable`
"""
cls, attrs = _split_what(what)
def include_(attribute, value):
return value.__class__ in cls or attribute in attrs
return include_
def exclude(*what):
"""
Exclude *what*.
:param what: What to exclude.
:type what: `list` of classes or `attrs.Attribute`\\ s.
:rtype: `callable`
"""
cls, attrs = _split_what(what)
def exclude_(attribute, value):
return value.__class__ not in cls and attribute not in attrs
return exclude_

View File

View File

@ -0,0 +1,73 @@
# SPDX-License-Identifier: MIT
"""
Commonly used hooks for on_setattr.
"""
from . import _config
from .exceptions import FrozenAttributeError
def pipe(*setters):
"""
Run all *setters* and return the return value of the last one.
.. versionadded:: 20.1.0
"""
def wrapped_pipe(instance, attrib, new_value):
rv = new_value
for setter in setters:
rv = setter(instance, attrib, rv)
return rv
return wrapped_pipe
def frozen(_, __, ___):
"""
Prevent an attribute to be modified.
.. versionadded:: 20.1.0
"""
raise FrozenAttributeError()
def validate(instance, attrib, new_value):
"""
Run *attrib*'s validator on *new_value* if it has one.
.. versionadded:: 20.1.0
"""
if _config._run_validators is False:
return new_value
v = attrib.validator
if not v:
return new_value
v(instance, attrib, new_value)
return new_value
def convert(instance, attrib, new_value):
"""
Run *attrib*'s converter -- if it has one -- on *new_value* and return the
result.
.. versionadded:: 20.1.0
"""
c = attrib.converter
if c:
return c(new_value)
return new_value
# Sentinel for disabling class-wide *on_setattr* hooks for certain attributes.
# autodata stopped working, so the docstring is inlined in the API docs.
NO_OP = object()

View File

@ -0,0 +1,594 @@
# SPDX-License-Identifier: MIT
"""
Commonly useful validators.
"""
import operator
import re
from contextlib import contextmanager
from ._config import get_run_validators, set_run_validators
from ._make import _AndValidator, and_, attrib, attrs
from .exceptions import NotCallableError
try:
Pattern = re.Pattern
except AttributeError: # Python <3.7 lacks a Pattern type.
Pattern = type(re.compile(""))
__all__ = [
"and_",
"deep_iterable",
"deep_mapping",
"disabled",
"ge",
"get_disabled",
"gt",
"in_",
"instance_of",
"is_callable",
"le",
"lt",
"matches_re",
"max_len",
"min_len",
"optional",
"provides",
"set_disabled",
]
def set_disabled(disabled):
"""
Globally disable or enable running validators.
By default, they are run.
:param disabled: If ``True``, disable running all validators.
:type disabled: bool
.. warning::
This function is not thread-safe!
.. versionadded:: 21.3.0
"""
set_run_validators(not disabled)
def get_disabled():
"""
Return a bool indicating whether validators are currently disabled or not.
:return: ``True`` if validators are currently disabled.
:rtype: bool
.. versionadded:: 21.3.0
"""
return not get_run_validators()
@contextmanager
def disabled():
"""
Context manager that disables running validators within its context.
.. warning::
This context manager is not thread-safe!
.. versionadded:: 21.3.0
"""
set_run_validators(False)
try:
yield
finally:
set_run_validators(True)
@attrs(repr=False, slots=True, hash=True)
class _InstanceOfValidator:
type = attrib()
def __call__(self, inst, attr, value):
"""
We use a callable class to be able to change the ``__repr__``.
"""
if not isinstance(value, self.type):
raise TypeError(
"'{name}' must be {type!r} (got {value!r} that is a "
"{actual!r}).".format(
name=attr.name,
type=self.type,
actual=value.__class__,
value=value,
),
attr,
self.type,
value,
)
def __repr__(self):
return "<instance_of validator for type {type!r}>".format(
type=self.type
)
def instance_of(type):
"""
A validator that raises a `TypeError` if the initializer is called
with a wrong type for this particular attribute (checks are performed using
`isinstance` therefore it's also valid to pass a tuple of types).
:param type: The type to check for.
:type type: type or tuple of types
:raises TypeError: With a human readable error message, the attribute
(of type `attrs.Attribute`), the expected type, and the value it
got.
"""
return _InstanceOfValidator(type)
@attrs(repr=False, frozen=True, slots=True)
class _MatchesReValidator:
pattern = attrib()
match_func = attrib()
def __call__(self, inst, attr, value):
"""
We use a callable class to be able to change the ``__repr__``.
"""
if not self.match_func(value):
raise ValueError(
"'{name}' must match regex {pattern!r}"
" ({value!r} doesn't)".format(
name=attr.name, pattern=self.pattern.pattern, value=value
),
attr,
self.pattern,
value,
)
def __repr__(self):
return "<matches_re validator for pattern {pattern!r}>".format(
pattern=self.pattern
)
def matches_re(regex, flags=0, func=None):
r"""
A validator that raises `ValueError` if the initializer is called
with a string that doesn't match *regex*.
:param regex: a regex string or precompiled pattern to match against
:param int flags: flags that will be passed to the underlying re function
(default 0)
:param callable func: which underlying `re` function to call. Valid options
are `re.fullmatch`, `re.search`, and `re.match`; the default ``None``
means `re.fullmatch`. For performance reasons, the pattern is always
precompiled using `re.compile`.
.. versionadded:: 19.2.0
.. versionchanged:: 21.3.0 *regex* can be a pre-compiled pattern.
"""
valid_funcs = (re.fullmatch, None, re.search, re.match)
if func not in valid_funcs:
raise ValueError(
"'func' must be one of {}.".format(
", ".join(
sorted(
e and e.__name__ or "None" for e in set(valid_funcs)
)
)
)
)
if isinstance(regex, Pattern):
if flags:
raise TypeError(
"'flags' can only be used with a string pattern; "
"pass flags to re.compile() instead"
)
pattern = regex
else:
pattern = re.compile(regex, flags)
if func is re.match:
match_func = pattern.match
elif func is re.search:
match_func = pattern.search
else:
match_func = pattern.fullmatch
return _MatchesReValidator(pattern, match_func)
@attrs(repr=False, slots=True, hash=True)
class _ProvidesValidator:
interface = attrib()
def __call__(self, inst, attr, value):
"""
We use a callable class to be able to change the ``__repr__``.
"""
if not self.interface.providedBy(value):
raise TypeError(
"'{name}' must provide {interface!r} which {value!r} "
"doesn't.".format(
name=attr.name, interface=self.interface, value=value
),
attr,
self.interface,
value,
)
def __repr__(self):
return "<provides validator for interface {interface!r}>".format(
interface=self.interface
)
def provides(interface):
"""
A validator that raises a `TypeError` if the initializer is called
with an object that does not provide the requested *interface* (checks are
performed using ``interface.providedBy(value)`` (see `zope.interface
<https://zopeinterface.readthedocs.io/en/latest/>`_).
:param interface: The interface to check for.
:type interface: ``zope.interface.Interface``
:raises TypeError: With a human readable error message, the attribute
(of type `attrs.Attribute`), the expected interface, and the
value it got.
"""
return _ProvidesValidator(interface)
@attrs(repr=False, slots=True, hash=True)
class _OptionalValidator:
validator = attrib()
def __call__(self, inst, attr, value):
if value is None:
return
self.validator(inst, attr, value)
def __repr__(self):
return "<optional validator for {what} or None>".format(
what=repr(self.validator)
)
def optional(validator):
"""
A validator that makes an attribute optional. An optional attribute is one
which can be set to ``None`` in addition to satisfying the requirements of
the sub-validator.
:param validator: A validator (or a list of validators) that is used for
non-``None`` values.
:type validator: callable or `list` of callables.
.. versionadded:: 15.1.0
.. versionchanged:: 17.1.0 *validator* can be a list of validators.
"""
if isinstance(validator, list):
return _OptionalValidator(_AndValidator(validator))
return _OptionalValidator(validator)
@attrs(repr=False, slots=True, hash=True)
class _InValidator:
options = attrib()
def __call__(self, inst, attr, value):
try:
in_options = value in self.options
except TypeError: # e.g. `1 in "abc"`
in_options = False
if not in_options:
raise ValueError(
"'{name}' must be in {options!r} (got {value!r})".format(
name=attr.name, options=self.options, value=value
),
attr,
self.options,
value,
)
def __repr__(self):
return "<in_ validator with options {options!r}>".format(
options=self.options
)
def in_(options):
"""
A validator that raises a `ValueError` if the initializer is called
with a value that does not belong in the options provided. The check is
performed using ``value in options``.
:param options: Allowed options.
:type options: list, tuple, `enum.Enum`, ...
:raises ValueError: With a human readable error message, the attribute (of
type `attrs.Attribute`), the expected options, and the value it
got.
.. versionadded:: 17.1.0
.. versionchanged:: 22.1.0
The ValueError was incomplete until now and only contained the human
readable error message. Now it contains all the information that has
been promised since 17.1.0.
"""
return _InValidator(options)
@attrs(repr=False, slots=False, hash=True)
class _IsCallableValidator:
def __call__(self, inst, attr, value):
"""
We use a callable class to be able to change the ``__repr__``.
"""
if not callable(value):
message = (
"'{name}' must be callable "
"(got {value!r} that is a {actual!r})."
)
raise NotCallableError(
msg=message.format(
name=attr.name, value=value, actual=value.__class__
),
value=value,
)
def __repr__(self):
return "<is_callable validator>"
def is_callable():
"""
A validator that raises a `attr.exceptions.NotCallableError` if the
initializer is called with a value for this particular attribute
that is not callable.
.. versionadded:: 19.1.0
:raises `attr.exceptions.NotCallableError`: With a human readable error
message containing the attribute (`attrs.Attribute`) name,
and the value it got.
"""
return _IsCallableValidator()
@attrs(repr=False, slots=True, hash=True)
class _DeepIterable:
member_validator = attrib(validator=is_callable())
iterable_validator = attrib(
default=None, validator=optional(is_callable())
)
def __call__(self, inst, attr, value):
"""
We use a callable class to be able to change the ``__repr__``.
"""
if self.iterable_validator is not None:
self.iterable_validator(inst, attr, value)
for member in value:
self.member_validator(inst, attr, member)
def __repr__(self):
iterable_identifier = (
""
if self.iterable_validator is None
else " {iterable!r}".format(iterable=self.iterable_validator)
)
return (
"<deep_iterable validator for{iterable_identifier}"
" iterables of {member!r}>"
).format(
iterable_identifier=iterable_identifier,
member=self.member_validator,
)
def deep_iterable(member_validator, iterable_validator=None):
"""
A validator that performs deep validation of an iterable.
:param member_validator: Validator(s) to apply to iterable members
:param iterable_validator: Validator to apply to iterable itself
(optional)
.. versionadded:: 19.1.0
:raises TypeError: if any sub-validators fail
"""
if isinstance(member_validator, (list, tuple)):
member_validator = and_(*member_validator)
return _DeepIterable(member_validator, iterable_validator)
@attrs(repr=False, slots=True, hash=True)
class _DeepMapping:
key_validator = attrib(validator=is_callable())
value_validator = attrib(validator=is_callable())
mapping_validator = attrib(default=None, validator=optional(is_callable()))
def __call__(self, inst, attr, value):
"""
We use a callable class to be able to change the ``__repr__``.
"""
if self.mapping_validator is not None:
self.mapping_validator(inst, attr, value)
for key in value:
self.key_validator(inst, attr, key)
self.value_validator(inst, attr, value[key])
def __repr__(self):
return (
"<deep_mapping validator for objects mapping {key!r} to {value!r}>"
).format(key=self.key_validator, value=self.value_validator)
def deep_mapping(key_validator, value_validator, mapping_validator=None):
"""
A validator that performs deep validation of a dictionary.
:param key_validator: Validator to apply to dictionary keys
:param value_validator: Validator to apply to dictionary values
:param mapping_validator: Validator to apply to top-level mapping
attribute (optional)
.. versionadded:: 19.1.0
:raises TypeError: if any sub-validators fail
"""
return _DeepMapping(key_validator, value_validator, mapping_validator)
@attrs(repr=False, frozen=True, slots=True)
class _NumberValidator:
bound = attrib()
compare_op = attrib()
compare_func = attrib()
def __call__(self, inst, attr, value):
"""
We use a callable class to be able to change the ``__repr__``.
"""
if not self.compare_func(value, self.bound):
raise ValueError(
"'{name}' must be {op} {bound}: {value}".format(
name=attr.name,
op=self.compare_op,
bound=self.bound,
value=value,
)
)
def __repr__(self):
return "<Validator for x {op} {bound}>".format(
op=self.compare_op, bound=self.bound
)
def lt(val):
"""
A validator that raises `ValueError` if the initializer is called
with a number larger or equal to *val*.
:param val: Exclusive upper bound for values
.. versionadded:: 21.3.0
"""
return _NumberValidator(val, "<", operator.lt)
def le(val):
"""
A validator that raises `ValueError` if the initializer is called
with a number greater than *val*.
:param val: Inclusive upper bound for values
.. versionadded:: 21.3.0
"""
return _NumberValidator(val, "<=", operator.le)
def ge(val):
"""
A validator that raises `ValueError` if the initializer is called
with a number smaller than *val*.
:param val: Inclusive lower bound for values
.. versionadded:: 21.3.0
"""
return _NumberValidator(val, ">=", operator.ge)
def gt(val):
"""
A validator that raises `ValueError` if the initializer is called
with a number smaller or equal to *val*.
:param val: Exclusive lower bound for values
.. versionadded:: 21.3.0
"""
return _NumberValidator(val, ">", operator.gt)
@attrs(repr=False, frozen=True, slots=True)
class _MaxLengthValidator:
max_length = attrib()
def __call__(self, inst, attr, value):
"""
We use a callable class to be able to change the ``__repr__``.
"""
if len(value) > self.max_length:
raise ValueError(
"Length of '{name}' must be <= {max}: {len}".format(
name=attr.name, max=self.max_length, len=len(value)
)
)
def __repr__(self):
return "<max_len validator for {max}>".format(max=self.max_length)
def max_len(length):
"""
A validator that raises `ValueError` if the initializer is called
with a string or iterable that is longer than *length*.
:param int length: Maximum length of the string or iterable
.. versionadded:: 21.3.0
"""
return _MaxLengthValidator(length)
@attrs(repr=False, frozen=True, slots=True)
class _MinLengthValidator:
min_length = attrib()
def __call__(self, inst, attr, value):
"""
We use a callable class to be able to change the ``__repr__``.
"""
if len(value) < self.min_length:
raise ValueError(
"Length of '{name}' must be => {min}: {len}".format(
name=attr.name, min=self.min_length, len=len(value)
)
)
def __repr__(self):
return "<min_len validator for {min}>".format(min=self.min_length)
def min_len(length):
"""
A validator that raises `ValueError` if the initializer is called
with a string or iterable that is shorter than *length*.
:param int length: Minimum length of the string or iterable
.. versionadded:: 22.1.0
"""
return _MinLengthValidator(length)

View File

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2015 Hynek Schlawack and the attrs contributors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,70 @@
# SPDX-License-Identifier: MIT
from attr import (
NOTHING,
Attribute,
Factory,
__author__,
__copyright__,
__description__,
__doc__,
__email__,
__license__,
__title__,
__url__,
__version__,
__version_info__,
assoc,
cmp_using,
define,
evolve,
field,
fields,
fields_dict,
frozen,
has,
make_class,
mutable,
resolve_types,
validate,
)
from attr._next_gen import asdict, astuple
from . import converters, exceptions, filters, setters, validators
__all__ = [
"__author__",
"__copyright__",
"__description__",
"__doc__",
"__email__",
"__license__",
"__title__",
"__url__",
"__version__",
"__version_info__",
"asdict",
"assoc",
"astuple",
"Attribute",
"cmp_using",
"converters",
"define",
"evolve",
"exceptions",
"Factory",
"field",
"fields_dict",
"fields",
"filters",
"frozen",
"has",
"make_class",
"mutable",
"NOTHING",
"resolve_types",
"setters",
"validate",
"validators",
]

View File

@ -0,0 +1,3 @@
# SPDX-License-Identifier: MIT
from attr.converters import * # noqa

View File

@ -0,0 +1,3 @@
# SPDX-License-Identifier: MIT
from attr.exceptions import * # noqa

View File

@ -0,0 +1,3 @@
# SPDX-License-Identifier: MIT
from attr.filters import * # noqa

View File

View File

@ -0,0 +1,3 @@
# SPDX-License-Identifier: MIT
from attr.setters import * # noqa

View File

@ -0,0 +1,3 @@
# SPDX-License-Identifier: MIT
from attr.validators import * # noqa

View File

@ -0,0 +1,19 @@
Copyright (c) 2013 Julian Berman
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@ -0,0 +1,71 @@
"""
An implementation of JSON Schema for Python
The main functionality is provided by the validator classes for each of the
supported JSON Schema versions.
Most commonly, `jsonschema.validators.validate` is the quickest way to simply
validate a given instance under a schema, and will create a validator
for you.
"""
import warnings
from jsonschema._format import FormatChecker
from jsonschema._types import TypeChecker
from jsonschema.exceptions import (
ErrorTree,
FormatError,
RefResolutionError,
SchemaError,
ValidationError,
)
from jsonschema.protocols import Validator
from jsonschema.validators import (
Draft3Validator,
Draft4Validator,
Draft6Validator,
Draft7Validator,
Draft201909Validator,
Draft202012Validator,
RefResolver,
validate,
)
def __getattr__(name):
if name == "__version__":
warnings.warn(
"Accessing jsonschema.__version__ is deprecated and will be "
"removed in a future release. Use importlib.metadata directly "
"to query for jsonschema's version.",
DeprecationWarning,
stacklevel=2,
)
try:
from importlib import metadata
except ImportError:
import importlib_metadata as metadata
return metadata.version("jsonschema")
format_checkers = {
"draft3_format_checker": Draft3Validator,
"draft4_format_checker": Draft4Validator,
"draft6_format_checker": Draft6Validator,
"draft7_format_checker": Draft7Validator,
"draft201909_format_checker": Draft201909Validator,
"draft202012_format_checker": Draft202012Validator,
}
ValidatorForFormat = format_checkers.get(name)
if ValidatorForFormat is not None:
warnings.warn(
f"Accessing jsonschema.{name} is deprecated and will be "
"removed in a future release. Instead, use the FORMAT_CHECKER "
"attribute on the corresponding Validator.",
DeprecationWarning,
stacklevel=2,
)
return ValidatorForFormat.FORMAT_CHECKER
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@ -0,0 +1,3 @@
from jsonschema.cli import main
main()

View File

@ -0,0 +1,513 @@
from __future__ import annotations
from contextlib import suppress
from uuid import UUID
import datetime
import ipaddress
import re
import typing
import warnings
from jsonschema.exceptions import FormatError
_FormatCheckCallable = typing.Callable[[object], bool]
_F = typing.TypeVar("_F", bound=_FormatCheckCallable)
_RaisesType = typing.Union[
typing.Type[Exception], typing.Tuple[typing.Type[Exception], ...],
]
class FormatChecker:
"""
A ``format`` property checker.
JSON Schema does not mandate that the ``format`` property actually do any
validation. If validation is desired however, instances of this class can
be hooked into validators to enable format validation.
`FormatChecker` objects always return ``True`` when asked about
formats that they do not know how to validate.
To add a check for a custom format use the `FormatChecker.checks`
decorator.
Arguments:
formats:
The known formats to validate. This argument can be used to
limit which formats will be used during validation.
"""
checkers: dict[
str,
tuple[_FormatCheckCallable, _RaisesType],
] = {}
def __init__(self, formats: typing.Iterable[str] = None):
if formats is None:
formats = self.checkers.keys()
self.checkers = {k: self.checkers[k] for k in formats}
def __repr__(self):
return "<FormatChecker checkers={}>".format(sorted(self.checkers))
def checks(
self, format: str, raises: _RaisesType = (),
) -> typing.Callable[[_F], _F]:
"""
Register a decorated function as validating a new format.
Arguments:
format:
The format that the decorated function will check.
raises:
The exception(s) raised by the decorated function when an
invalid instance is found.
The exception object will be accessible as the
`jsonschema.exceptions.ValidationError.cause` attribute of the
resulting validation error.
"""
def _checks(func: _F) -> _F:
self.checkers[format] = (func, raises)
return func
return _checks
@classmethod
def cls_checks(
cls, format: str, raises: _RaisesType = (),
) -> typing.Callable[[_F], _F]:
warnings.warn(
(
"FormatChecker.cls_checks is deprecated. Call "
"FormatChecker.checks on a specific FormatChecker instance "
"instead."
),
DeprecationWarning,
stacklevel=2,
)
return cls._cls_checks(format=format, raises=raises)
@classmethod
def _cls_checks(
cls, format: str, raises: _RaisesType = (),
) -> typing.Callable[[_F], _F]:
def _checks(func: _F) -> _F:
cls.checkers[format] = (func, raises)
return func
return _checks
def check(self, instance: object, format: str) -> None:
"""
Check whether the instance conforms to the given format.
Arguments:
instance (*any primitive type*, i.e. str, number, bool):
The instance to check
format:
The format that instance should conform to
Raises:
FormatError:
if the instance does not conform to ``format``
"""
if format not in self.checkers:
return
func, raises = self.checkers[format]
result, cause = None, None
try:
result = func(instance)
except raises as e:
cause = e
if not result:
raise FormatError(f"{instance!r} is not a {format!r}", cause=cause)
def conforms(self, instance: object, format: str) -> bool:
"""
Check whether the instance conforms to the given format.
Arguments:
instance (*any primitive type*, i.e. str, number, bool):
The instance to check
format:
The format that instance should conform to
Returns:
bool: whether it conformed
"""
try:
self.check(instance, format)
except FormatError:
return False
else:
return True
draft3_format_checker = FormatChecker()
draft4_format_checker = FormatChecker()
draft6_format_checker = FormatChecker()
draft7_format_checker = FormatChecker()
draft201909_format_checker = FormatChecker()
draft202012_format_checker = FormatChecker()
_draft_checkers: dict[str, FormatChecker] = dict(
draft3=draft3_format_checker,
draft4=draft4_format_checker,
draft6=draft6_format_checker,
draft7=draft7_format_checker,
draft201909=draft201909_format_checker,
draft202012=draft202012_format_checker,
)
def _checks_drafts(
name=None,
draft3=None,
draft4=None,
draft6=None,
draft7=None,
draft201909=None,
draft202012=None,
raises=(),
) -> typing.Callable[[_F], _F]:
draft3 = draft3 or name
draft4 = draft4 or name
draft6 = draft6 or name
draft7 = draft7 or name
draft201909 = draft201909 or name
draft202012 = draft202012 or name
def wrap(func: _F) -> _F:
if draft3:
func = _draft_checkers["draft3"].checks(draft3, raises)(func)
if draft4:
func = _draft_checkers["draft4"].checks(draft4, raises)(func)
if draft6:
func = _draft_checkers["draft6"].checks(draft6, raises)(func)
if draft7:
func = _draft_checkers["draft7"].checks(draft7, raises)(func)
if draft201909:
func = _draft_checkers["draft201909"].checks(draft201909, raises)(
func,
)
if draft202012:
func = _draft_checkers["draft202012"].checks(draft202012, raises)(
func,
)
# Oy. This is bad global state, but relied upon for now, until
# deprecation. See #519 and test_format_checkers_come_with_defaults
FormatChecker._cls_checks(
draft202012 or draft201909 or draft7 or draft6 or draft4 or draft3,
raises,
)(func)
return func
return wrap
@_checks_drafts(name="idn-email")
@_checks_drafts(name="email")
def is_email(instance: object) -> bool:
if not isinstance(instance, str):
return True
return "@" in instance
@_checks_drafts(
draft3="ip-address",
draft4="ipv4",
draft6="ipv4",
draft7="ipv4",
draft201909="ipv4",
draft202012="ipv4",
raises=ipaddress.AddressValueError,
)
def is_ipv4(instance: object) -> bool:
if not isinstance(instance, str):
return True
return bool(ipaddress.IPv4Address(instance))
@_checks_drafts(name="ipv6", raises=ipaddress.AddressValueError)
def is_ipv6(instance: object) -> bool:
if not isinstance(instance, str):
return True
address = ipaddress.IPv6Address(instance)
return not getattr(address, "scope_id", "")
with suppress(ImportError):
from fqdn import FQDN
@_checks_drafts(
draft3="host-name",
draft4="hostname",
draft6="hostname",
draft7="hostname",
draft201909="hostname",
draft202012="hostname",
)
def is_host_name(instance: object) -> bool:
if not isinstance(instance, str):
return True
return FQDN(instance).is_valid
with suppress(ImportError):
# The built-in `idna` codec only implements RFC 3890, so we go elsewhere.
import idna
@_checks_drafts(
draft7="idn-hostname",
draft201909="idn-hostname",
draft202012="idn-hostname",
raises=(idna.IDNAError, UnicodeError),
)
def is_idn_host_name(instance: object) -> bool:
if not isinstance(instance, str):
return True
idna.encode(instance)
return True
try:
import rfc3987
except ImportError:
with suppress(ImportError):
from rfc3986_validator import validate_rfc3986
@_checks_drafts(name="uri")
def is_uri(instance: object) -> bool:
if not isinstance(instance, str):
return True
return validate_rfc3986(instance, rule="URI")
@_checks_drafts(
draft6="uri-reference",
draft7="uri-reference",
draft201909="uri-reference",
draft202012="uri-reference",
raises=ValueError,
)
def is_uri_reference(instance: object) -> bool:
if not isinstance(instance, str):
return True
return validate_rfc3986(instance, rule="URI_reference")
else:
@_checks_drafts(
draft7="iri",
draft201909="iri",
draft202012="iri",
raises=ValueError,
)
def is_iri(instance: object) -> bool:
if not isinstance(instance, str):
return True
return rfc3987.parse(instance, rule="IRI")
@_checks_drafts(
draft7="iri-reference",
draft201909="iri-reference",
draft202012="iri-reference",
raises=ValueError,
)
def is_iri_reference(instance: object) -> bool:
if not isinstance(instance, str):
return True
return rfc3987.parse(instance, rule="IRI_reference")
@_checks_drafts(name="uri", raises=ValueError)
def is_uri(instance: object) -> bool:
if not isinstance(instance, str):
return True
return rfc3987.parse(instance, rule="URI")
@_checks_drafts(
draft6="uri-reference",
draft7="uri-reference",
draft201909="uri-reference",
draft202012="uri-reference",
raises=ValueError,
)
def is_uri_reference(instance: object) -> bool:
if not isinstance(instance, str):
return True
return rfc3987.parse(instance, rule="URI_reference")
with suppress(ImportError):
from rfc3339_validator import validate_rfc3339
@_checks_drafts(name="date-time")
def is_datetime(instance: object) -> bool:
if not isinstance(instance, str):
return True
return validate_rfc3339(instance.upper())
@_checks_drafts(
draft7="time",
draft201909="time",
draft202012="time",
)
def is_time(instance: object) -> bool:
if not isinstance(instance, str):
return True
return is_datetime("1970-01-01T" + instance)
@_checks_drafts(name="regex", raises=re.error)
def is_regex(instance: object) -> bool:
if not isinstance(instance, str):
return True
return bool(re.compile(instance))
@_checks_drafts(
draft3="date",
draft7="date",
draft201909="date",
draft202012="date",
raises=ValueError,
)
def is_date(instance: object) -> bool:
if not isinstance(instance, str):
return True
return bool(instance.isascii() and datetime.date.fromisoformat(instance))
@_checks_drafts(draft3="time", raises=ValueError)
def is_draft3_time(instance: object) -> bool:
if not isinstance(instance, str):
return True
return bool(datetime.datetime.strptime(instance, "%H:%M:%S"))
with suppress(ImportError):
from webcolors import CSS21_NAMES_TO_HEX
import webcolors
def is_css_color_code(instance: object) -> bool:
return webcolors.normalize_hex(instance)
@_checks_drafts(draft3="color", raises=(ValueError, TypeError))
def is_css21_color(instance: object) -> bool:
if (
not isinstance(instance, str)
or instance.lower() in CSS21_NAMES_TO_HEX
):
return True
return is_css_color_code(instance)
with suppress(ImportError):
import jsonpointer
@_checks_drafts(
draft6="json-pointer",
draft7="json-pointer",
draft201909="json-pointer",
draft202012="json-pointer",
raises=jsonpointer.JsonPointerException,
)
def is_json_pointer(instance: object) -> bool:
if not isinstance(instance, str):
return True
return bool(jsonpointer.JsonPointer(instance))
# TODO: I don't want to maintain this, so it
# needs to go either into jsonpointer (pending
# https://github.com/stefankoegl/python-json-pointer/issues/34) or
# into a new external library.
@_checks_drafts(
draft7="relative-json-pointer",
draft201909="relative-json-pointer",
draft202012="relative-json-pointer",
raises=jsonpointer.JsonPointerException,
)
def is_relative_json_pointer(instance: object) -> bool:
# Definition taken from:
# https://tools.ietf.org/html/draft-handrews-relative-json-pointer-01#section-3
if not isinstance(instance, str):
return True
non_negative_integer, rest = [], ""
for i, character in enumerate(instance):
if character.isdigit():
# digits with a leading "0" are not allowed
if i > 0 and int(instance[i - 1]) == 0:
return False
non_negative_integer.append(character)
continue
if not non_negative_integer:
return False
rest = instance[i:]
break
return (rest == "#") or bool(jsonpointer.JsonPointer(rest))
with suppress(ImportError):
import uri_template
@_checks_drafts(
draft6="uri-template",
draft7="uri-template",
draft201909="uri-template",
draft202012="uri-template",
)
def is_uri_template(instance: object) -> bool:
if not isinstance(instance, str):
return True
return uri_template.validate(instance)
with suppress(ImportError):
import isoduration
@_checks_drafts(
draft201909="duration",
draft202012="duration",
raises=isoduration.DurationParsingException,
)
def is_duration(instance: object) -> bool:
if not isinstance(instance, str):
return True
return bool(isoduration.parse_duration(instance))
@_checks_drafts(
draft201909="uuid",
draft202012="uuid",
raises=ValueError,
)
def is_uuid(instance: object) -> bool:
if not isinstance(instance, str):
return True
UUID(instance)
return all(instance[position] == "-" for position in (8, 13, 18, 23))

View File

@ -0,0 +1,319 @@
from jsonschema import _utils
from jsonschema.exceptions import ValidationError
def id_of_ignore_ref(property="$id"):
def id_of(schema):
"""
Ignore an ``$id`` sibling of ``$ref`` if it is present.
Otherwise, return the ID of the given schema.
"""
if schema is True or schema is False or "$ref" in schema:
return ""
return schema.get(property, "")
return id_of
def ignore_ref_siblings(schema):
"""
Ignore siblings of ``$ref`` if it is present.
Otherwise, return all keywords.
Suitable for use with `create`'s ``applicable_validators`` argument.
"""
ref = schema.get("$ref")
if ref is not None:
return [("$ref", ref)]
else:
return schema.items()
def dependencies_draft3(validator, dependencies, instance, schema):
if not validator.is_type(instance, "object"):
return
for property, dependency in dependencies.items():
if property not in instance:
continue
if validator.is_type(dependency, "object"):
yield from validator.descend(
instance, dependency, schema_path=property,
)
elif validator.is_type(dependency, "string"):
if dependency not in instance:
message = f"{dependency!r} is a dependency of {property!r}"
yield ValidationError(message)
else:
for each in dependency:
if each not in instance:
message = f"{each!r} is a dependency of {property!r}"
yield ValidationError(message)
def dependencies_draft4_draft6_draft7(
validator,
dependencies,
instance,
schema,
):
"""
Support for the ``dependencies`` keyword from pre-draft 2019-09.
In later drafts, the keyword was split into separate
``dependentRequired`` and ``dependentSchemas`` validators.
"""
if not validator.is_type(instance, "object"):
return
for property, dependency in dependencies.items():
if property not in instance:
continue
if validator.is_type(dependency, "array"):
for each in dependency:
if each not in instance:
message = f"{each!r} is a dependency of {property!r}"
yield ValidationError(message)
else:
yield from validator.descend(
instance, dependency, schema_path=property,
)
def disallow_draft3(validator, disallow, instance, schema):
for disallowed in _utils.ensure_list(disallow):
if validator.evolve(schema={"type": [disallowed]}).is_valid(instance):
message = f"{disallowed!r} is disallowed for {instance!r}"
yield ValidationError(message)
def extends_draft3(validator, extends, instance, schema):
if validator.is_type(extends, "object"):
yield from validator.descend(instance, extends)
return
for index, subschema in enumerate(extends):
yield from validator.descend(instance, subschema, schema_path=index)
def items_draft3_draft4(validator, items, instance, schema):
if not validator.is_type(instance, "array"):
return
if validator.is_type(items, "object"):
for index, item in enumerate(instance):
yield from validator.descend(item, items, path=index)
else:
for (index, item), subschema in zip(enumerate(instance), items):
yield from validator.descend(
item, subschema, path=index, schema_path=index,
)
def items_draft6_draft7_draft201909(validator, items, instance, schema):
if not validator.is_type(instance, "array"):
return
if validator.is_type(items, "array"):
for (index, item), subschema in zip(enumerate(instance), items):
yield from validator.descend(
item, subschema, path=index, schema_path=index,
)
else:
for index, item in enumerate(instance):
yield from validator.descend(item, items, path=index)
def minimum_draft3_draft4(validator, minimum, instance, schema):
if not validator.is_type(instance, "number"):
return
if schema.get("exclusiveMinimum", False):
failed = instance <= minimum
cmp = "less than or equal to"
else:
failed = instance < minimum
cmp = "less than"
if failed:
message = f"{instance!r} is {cmp} the minimum of {minimum!r}"
yield ValidationError(message)
def maximum_draft3_draft4(validator, maximum, instance, schema):
if not validator.is_type(instance, "number"):
return
if schema.get("exclusiveMaximum", False):
failed = instance >= maximum
cmp = "greater than or equal to"
else:
failed = instance > maximum
cmp = "greater than"
if failed:
message = f"{instance!r} is {cmp} the maximum of {maximum!r}"
yield ValidationError(message)
def properties_draft3(validator, properties, instance, schema):
if not validator.is_type(instance, "object"):
return
for property, subschema in properties.items():
if property in instance:
yield from validator.descend(
instance[property],
subschema,
path=property,
schema_path=property,
)
elif subschema.get("required", False):
error = ValidationError(f"{property!r} is a required property")
error._set(
validator="required",
validator_value=subschema["required"],
instance=instance,
schema=schema,
)
error.path.appendleft(property)
error.schema_path.extend([property, "required"])
yield error
def type_draft3(validator, types, instance, schema):
types = _utils.ensure_list(types)
all_errors = []
for index, type in enumerate(types):
if validator.is_type(type, "object"):
errors = list(validator.descend(instance, type, schema_path=index))
if not errors:
return
all_errors.extend(errors)
else:
if validator.is_type(instance, type):
return
else:
reprs = []
for type in types:
try:
reprs.append(repr(type["name"]))
except Exception:
reprs.append(repr(type))
yield ValidationError(
f"{instance!r} is not of type {', '.join(reprs)}",
context=all_errors,
)
def contains_draft6_draft7(validator, contains, instance, schema):
if not validator.is_type(instance, "array"):
return
if not any(
validator.evolve(schema=contains).is_valid(element)
for element in instance
):
yield ValidationError(
f"None of {instance!r} are valid under the given schema",
)
def recursiveRef(validator, recursiveRef, instance, schema):
lookup_url, target = validator.resolver.resolution_scope, validator.schema
for each in reversed(validator.resolver._scopes_stack[1:]):
lookup_url, next_target = validator.resolver.resolve(each)
if next_target.get("$recursiveAnchor"):
target = next_target
else:
break
fragment = recursiveRef.lstrip("#")
subschema = validator.resolver.resolve_fragment(target, fragment)
# FIXME: This is gutted (and not calling .descend) because it can trigger
# recursion errors, so there's a bug here. Re-enable the tests to
# see it.
subschema
return []
def find_evaluated_item_indexes_by_schema(validator, instance, schema):
"""
Get all indexes of items that get evaluated under the current schema
Covers all keywords related to unevaluatedItems: items, prefixItems, if,
then, else, contains, unevaluatedItems, allOf, oneOf, anyOf
"""
if validator.is_type(schema, "boolean"):
return []
evaluated_indexes = []
if "additionalItems" in schema:
return list(range(0, len(instance)))
if "$ref" in schema:
scope, resolved = validator.resolver.resolve(schema["$ref"])
validator.resolver.push_scope(scope)
try:
evaluated_indexes += find_evaluated_item_indexes_by_schema(
validator, instance, resolved,
)
finally:
validator.resolver.pop_scope()
if "items" in schema:
if validator.is_type(schema["items"], "object"):
return list(range(0, len(instance)))
evaluated_indexes += list(range(0, len(schema["items"])))
if "if" in schema:
if validator.evolve(schema=schema["if"]).is_valid(instance):
evaluated_indexes += find_evaluated_item_indexes_by_schema(
validator, instance, schema["if"],
)
if "then" in schema:
evaluated_indexes += find_evaluated_item_indexes_by_schema(
validator, instance, schema["then"],
)
else:
if "else" in schema:
evaluated_indexes += find_evaluated_item_indexes_by_schema(
validator, instance, schema["else"],
)
for keyword in ["contains", "unevaluatedItems"]:
if keyword in schema:
for k, v in enumerate(instance):
if validator.evolve(schema=schema[keyword]).is_valid(v):
evaluated_indexes.append(k)
for keyword in ["allOf", "oneOf", "anyOf"]:
if keyword in schema:
for subschema in schema[keyword]:
errs = list(validator.descend(instance, subschema))
if not errs:
evaluated_indexes += find_evaluated_item_indexes_by_schema(
validator, instance, subschema,
)
return evaluated_indexes
def unevaluatedItems_draft2019(validator, unevaluatedItems, instance, schema):
if not validator.is_type(instance, "array"):
return
evaluated_item_indexes = find_evaluated_item_indexes_by_schema(
validator, instance, schema,
)
unevaluated_items = [
item for index, item in enumerate(instance)
if index not in evaluated_item_indexes
]
if unevaluated_items:
error = "Unevaluated items are not allowed (%s %s unexpected)"
yield ValidationError(error % _utils.extras_msg(unevaluated_items))

View File

@ -0,0 +1,203 @@
from __future__ import annotations
import numbers
import typing
from pyrsistent import pmap
from pyrsistent.typing import PMap
import attr
from jsonschema.exceptions import UndefinedTypeCheck
# unfortunately, the type of pmap is generic, and if used as the attr.ib
# converter, the generic type is presented to mypy, which then fails to match
# the concrete type of a type checker mapping
# this "do nothing" wrapper presents the correct information to mypy
def _typed_pmap_converter(
init_val: typing.Mapping[
str,
typing.Callable[["TypeChecker", typing.Any], bool],
],
) -> PMap[str, typing.Callable[["TypeChecker", typing.Any], bool]]:
return pmap(init_val)
def is_array(checker, instance):
return isinstance(instance, list)
def is_bool(checker, instance):
return isinstance(instance, bool)
def is_integer(checker, instance):
# bool inherits from int, so ensure bools aren't reported as ints
if isinstance(instance, bool):
return False
return isinstance(instance, int)
def is_null(checker, instance):
return instance is None
def is_number(checker, instance):
# bool inherits from int, so ensure bools aren't reported as ints
if isinstance(instance, bool):
return False
return isinstance(instance, numbers.Number)
def is_object(checker, instance):
return isinstance(instance, dict)
def is_string(checker, instance):
return isinstance(instance, str)
def is_any(checker, instance):
return True
@attr.s(frozen=True, repr=False)
class TypeChecker:
"""
A :kw:`type` property checker.
A `TypeChecker` performs type checking for a `Validator`, converting
between the defined JSON Schema types and some associated Python types or
objects.
Modifying the behavior just mentioned by redefining which Python objects
are considered to be of which JSON Schema types can be done using
`TypeChecker.redefine` or `TypeChecker.redefine_many`, and types can be
removed via `TypeChecker.remove`. Each of these return a new `TypeChecker`.
Arguments:
type_checkers:
The initial mapping of types to their checking functions.
"""
_type_checkers: PMap[
str, typing.Callable[["TypeChecker", typing.Any], bool],
] = attr.ib(
default=pmap(),
converter=_typed_pmap_converter,
)
def __repr__(self):
types = ", ".join(repr(k) for k in sorted(self._type_checkers))
return f"<{self.__class__.__name__} types={{{types}}}>"
def is_type(self, instance, type: str) -> bool:
"""
Check if the instance is of the appropriate type.
Arguments:
instance:
The instance to check
type:
The name of the type that is expected.
Raises:
`jsonschema.exceptions.UndefinedTypeCheck`:
if ``type`` is unknown to this object.
"""
try:
fn = self._type_checkers[type]
except KeyError:
raise UndefinedTypeCheck(type) from None
return fn(self, instance)
def redefine(self, type: str, fn) -> "TypeChecker":
"""
Produce a new checker with the given type redefined.
Arguments:
type:
The name of the type to check.
fn (collections.abc.Callable):
A callable taking exactly two parameters - the type
checker calling the function and the instance to check.
The function should return true if instance is of this
type and false otherwise.
"""
return self.redefine_many({type: fn})
def redefine_many(self, definitions=()) -> "TypeChecker":
"""
Produce a new checker with the given types redefined.
Arguments:
definitions (dict):
A dictionary mapping types to their checking functions.
"""
type_checkers = self._type_checkers.update(definitions)
return attr.evolve(self, type_checkers=type_checkers)
def remove(self, *types) -> "TypeChecker":
"""
Produce a new checker with the given types forgotten.
Arguments:
types:
the names of the types to remove.
Raises:
`jsonschema.exceptions.UndefinedTypeCheck`:
if any given type is unknown to this object
"""
type_checkers = self._type_checkers
for each in types:
try:
type_checkers = type_checkers.remove(each)
except KeyError:
raise UndefinedTypeCheck(each)
return attr.evolve(self, type_checkers=type_checkers)
draft3_type_checker = TypeChecker(
{
"any": is_any,
"array": is_array,
"boolean": is_bool,
"integer": is_integer,
"object": is_object,
"null": is_null,
"number": is_number,
"string": is_string,
},
)
draft4_type_checker = draft3_type_checker.remove("any")
draft6_type_checker = draft4_type_checker.redefine(
"integer",
lambda checker, instance: (
is_integer(checker, instance)
or isinstance(instance, float) and instance.is_integer()
),
)
draft7_type_checker = draft6_type_checker
draft201909_type_checker = draft7_type_checker
draft202012_type_checker = draft201909_type_checker

View File

@ -0,0 +1,345 @@
from collections.abc import Mapping, MutableMapping, Sequence
from urllib.parse import urlsplit
import itertools
import json
import os
import re
class URIDict(MutableMapping):
"""
Dictionary which uses normalized URIs as keys.
"""
def normalize(self, uri):
return urlsplit(uri).geturl()
def __init__(self, *args, **kwargs):
self.store = dict()
self.store.update(*args, **kwargs)
def __getitem__(self, uri):
return self.store[self.normalize(uri)]
def __setitem__(self, uri, value):
self.store[self.normalize(uri)] = value
def __delitem__(self, uri):
del self.store[self.normalize(uri)]
def __iter__(self):
return iter(self.store)
def __len__(self):
return len(self.store)
def __repr__(self):
return repr(self.store)
class Unset:
"""
An as-of-yet unset attribute or unprovided default parameter.
"""
def __repr__(self):
return "<unset>"
def load_schema(name):
"""
Load a schema from ./schemas/``name``.json and return it.
"""
with open(
os.path.join(os.path.dirname(__file__), "schemas", "{0}.json".format(name)),
encoding="utf-8"
) as f:
data = f.read()
return json.loads(data)
def format_as_index(container, indices):
"""
Construct a single string containing indexing operations for the indices.
For example for a container ``bar``, [1, 2, "foo"] -> bar[1][2]["foo"]
Arguments:
container (str):
A word to use for the thing being indexed
indices (sequence):
The indices to format.
"""
if not indices:
return container
return f"{container}[{']['.join(repr(index) for index in indices)}]"
def find_additional_properties(instance, schema):
"""
Return the set of additional properties for the given ``instance``.
Weeds out properties that should have been validated by ``properties`` and
/ or ``patternProperties``.
Assumes ``instance`` is dict-like already.
"""
properties = schema.get("properties", {})
patterns = "|".join(schema.get("patternProperties", {}))
for property in instance:
if property not in properties:
if patterns and re.search(patterns, property):
continue
yield property
def extras_msg(extras):
"""
Create an error message for extra items or properties.
"""
if len(extras) == 1:
verb = "was"
else:
verb = "were"
return ", ".join(repr(extra) for extra in sorted(extras)), verb
def ensure_list(thing):
"""
Wrap ``thing`` in a list if it's a single str.
Otherwise, return it unchanged.
"""
if isinstance(thing, str):
return [thing]
return thing
def _mapping_equal(one, two):
"""
Check if two mappings are equal using the semantics of `equal`.
"""
if len(one) != len(two):
return False
return all(
key in two and equal(value, two[key])
for key, value in one.items()
)
def _sequence_equal(one, two):
"""
Check if two sequences are equal using the semantics of `equal`.
"""
if len(one) != len(two):
return False
return all(equal(i, j) for i, j in zip(one, two))
def equal(one, two):
"""
Check if two things are equal evading some Python type hierarchy semantics.
Specifically in JSON Schema, evade `bool` inheriting from `int`,
recursing into sequences to do the same.
"""
if isinstance(one, str) or isinstance(two, str):
return one == two
if isinstance(one, Sequence) and isinstance(two, Sequence):
return _sequence_equal(one, two)
if isinstance(one, Mapping) and isinstance(two, Mapping):
return _mapping_equal(one, two)
return unbool(one) == unbool(two)
def unbool(element, true=object(), false=object()):
"""
A hack to make True and 1 and False and 0 unique for ``uniq``.
"""
if element is True:
return true
elif element is False:
return false
return element
def uniq(container):
"""
Check if all of a container's elements are unique.
Tries to rely on the container being recursively sortable, or otherwise
falls back on (slow) brute force.
"""
try:
sort = sorted(unbool(i) for i in container)
sliced = itertools.islice(sort, 1, None)
for i, j in zip(sort, sliced):
if equal(i, j):
return False
except (NotImplementedError, TypeError):
seen = []
for e in container:
e = unbool(e)
for i in seen:
if equal(i, e):
return False
seen.append(e)
return True
def find_evaluated_item_indexes_by_schema(validator, instance, schema):
"""
Get all indexes of items that get evaluated under the current schema
Covers all keywords related to unevaluatedItems: items, prefixItems, if,
then, else, contains, unevaluatedItems, allOf, oneOf, anyOf
"""
if validator.is_type(schema, "boolean"):
return []
evaluated_indexes = []
if "items" in schema:
return list(range(0, len(instance)))
if "$ref" in schema:
scope, resolved = validator.resolver.resolve(schema["$ref"])
validator.resolver.push_scope(scope)
try:
evaluated_indexes += find_evaluated_item_indexes_by_schema(
validator, instance, resolved,
)
finally:
validator.resolver.pop_scope()
if "prefixItems" in schema:
evaluated_indexes += list(range(0, len(schema["prefixItems"])))
if "if" in schema:
if validator.evolve(schema=schema["if"]).is_valid(instance):
evaluated_indexes += find_evaluated_item_indexes_by_schema(
validator, instance, schema["if"],
)
if "then" in schema:
evaluated_indexes += find_evaluated_item_indexes_by_schema(
validator, instance, schema["then"],
)
else:
if "else" in schema:
evaluated_indexes += find_evaluated_item_indexes_by_schema(
validator, instance, schema["else"],
)
for keyword in ["contains", "unevaluatedItems"]:
if keyword in schema:
for k, v in enumerate(instance):
if validator.evolve(schema=schema[keyword]).is_valid(v):
evaluated_indexes.append(k)
for keyword in ["allOf", "oneOf", "anyOf"]:
if keyword in schema:
for subschema in schema[keyword]:
errs = list(validator.descend(instance, subschema))
if not errs:
evaluated_indexes += find_evaluated_item_indexes_by_schema(
validator, instance, subschema,
)
return evaluated_indexes
def find_evaluated_property_keys_by_schema(validator, instance, schema):
"""
Get all keys of items that get evaluated under the current schema
Covers all keywords related to unevaluatedProperties: properties,
additionalProperties, unevaluatedProperties, patternProperties,
dependentSchemas, allOf, oneOf, anyOf, if, then, else
"""
if validator.is_type(schema, "boolean"):
return []
evaluated_keys = []
if "$ref" in schema:
scope, resolved = validator.resolver.resolve(schema["$ref"])
validator.resolver.push_scope(scope)
try:
evaluated_keys += find_evaluated_property_keys_by_schema(
validator, instance, resolved,
)
finally:
validator.resolver.pop_scope()
for keyword in [
"properties", "additionalProperties", "unevaluatedProperties",
]:
if keyword in schema:
if validator.is_type(schema[keyword], "boolean"):
for property, value in instance.items():
if validator.evolve(schema=schema[keyword]).is_valid(
{property: value},
):
evaluated_keys.append(property)
if validator.is_type(schema[keyword], "object"):
for property, subschema in schema[keyword].items():
if property in instance and validator.evolve(
schema=subschema,
).is_valid(instance[property]):
evaluated_keys.append(property)
if "patternProperties" in schema:
for property, value in instance.items():
for pattern, _ in schema["patternProperties"].items():
if re.search(pattern, property) and validator.evolve(
schema=schema["patternProperties"],
).is_valid({property: value}):
evaluated_keys.append(property)
if "dependentSchemas" in schema:
for property, subschema in schema["dependentSchemas"].items():
if property not in instance:
continue
evaluated_keys += find_evaluated_property_keys_by_schema(
validator, instance, subschema,
)
for keyword in ["allOf", "oneOf", "anyOf"]:
if keyword in schema:
for subschema in schema[keyword]:
errs = list(validator.descend(instance, subschema))
if not errs:
evaluated_keys += find_evaluated_property_keys_by_schema(
validator, instance, subschema,
)
if "if" in schema:
if validator.evolve(schema=schema["if"]).is_valid(instance):
evaluated_keys += find_evaluated_property_keys_by_schema(
validator, instance, schema["if"],
)
if "then" in schema:
evaluated_keys += find_evaluated_property_keys_by_schema(
validator, instance, schema["then"],
)
else:
if "else" in schema:
evaluated_keys += find_evaluated_property_keys_by_schema(
validator, instance, schema["else"],
)
return evaluated_keys

View File

@ -0,0 +1,467 @@
from fractions import Fraction
from urllib.parse import urldefrag, urljoin
import re
from jsonschema._utils import (
ensure_list,
equal,
extras_msg,
find_additional_properties,
find_evaluated_item_indexes_by_schema,
find_evaluated_property_keys_by_schema,
unbool,
uniq,
)
from jsonschema.exceptions import FormatError, ValidationError
def patternProperties(validator, patternProperties, instance, schema):
if not validator.is_type(instance, "object"):
return
for pattern, subschema in patternProperties.items():
for k, v in instance.items():
if re.search(pattern, k):
yield from validator.descend(
v, subschema, path=k, schema_path=pattern,
)
def propertyNames(validator, propertyNames, instance, schema):
if not validator.is_type(instance, "object"):
return
for property in instance:
yield from validator.descend(instance=property, schema=propertyNames)
def additionalProperties(validator, aP, instance, schema):
if not validator.is_type(instance, "object"):
return
extras = set(find_additional_properties(instance, schema))
if validator.is_type(aP, "object"):
for extra in extras:
yield from validator.descend(instance[extra], aP, path=extra)
elif not aP and extras:
if "patternProperties" in schema:
if len(extras) == 1:
verb = "does"
else:
verb = "do"
joined = ", ".join(repr(each) for each in sorted(extras))
patterns = ", ".join(
repr(each) for each in sorted(schema["patternProperties"])
)
error = f"{joined} {verb} not match any of the regexes: {patterns}"
yield ValidationError(error)
else:
error = "Additional properties are not allowed (%s %s unexpected)"
yield ValidationError(error % extras_msg(extras))
def items(validator, items, instance, schema):
if not validator.is_type(instance, "array"):
return
prefix = len(schema.get("prefixItems", []))
total = len(instance)
if items is False and total > prefix:
message = f"Expected at most {prefix} items, but found {total}"
yield ValidationError(message)
else:
for index in range(prefix, total):
yield from validator.descend(
instance=instance[index],
schema=items,
path=index,
)
def additionalItems(validator, aI, instance, schema):
if (
not validator.is_type(instance, "array")
or validator.is_type(schema.get("items", {}), "object")
):
return
len_items = len(schema.get("items", []))
if validator.is_type(aI, "object"):
for index, item in enumerate(instance[len_items:], start=len_items):
yield from validator.descend(item, aI, path=index)
elif not aI and len(instance) > len(schema.get("items", [])):
error = "Additional items are not allowed (%s %s unexpected)"
yield ValidationError(
error % extras_msg(instance[len(schema.get("items", [])):]),
)
def const(validator, const, instance, schema):
if not equal(instance, const):
yield ValidationError(f"{const!r} was expected")
def contains(validator, contains, instance, schema):
if not validator.is_type(instance, "array"):
return
matches = 0
min_contains = schema.get("minContains", 1)
max_contains = schema.get("maxContains", len(instance))
for each in instance:
if validator.evolve(schema=contains).is_valid(each):
matches += 1
if matches > max_contains:
yield ValidationError(
"Too many items match the given schema "
f"(expected at most {max_contains})",
validator="maxContains",
validator_value=max_contains,
)
return
if matches < min_contains:
if not matches:
yield ValidationError(
f"{instance!r} does not contain items "
"matching the given schema",
)
else:
yield ValidationError(
"Too few items match the given schema (expected at least "
f"{min_contains} but only {matches} matched)",
validator="minContains",
validator_value=min_contains,
)
def exclusiveMinimum(validator, minimum, instance, schema):
if not validator.is_type(instance, "number"):
return
if instance <= minimum:
yield ValidationError(
f"{instance!r} is less than or equal to "
f"the minimum of {minimum!r}",
)
def exclusiveMaximum(validator, maximum, instance, schema):
if not validator.is_type(instance, "number"):
return
if instance >= maximum:
yield ValidationError(
f"{instance!r} is greater than or equal "
f"to the maximum of {maximum!r}",
)
def minimum(validator, minimum, instance, schema):
if not validator.is_type(instance, "number"):
return
if instance < minimum:
message = f"{instance!r} is less than the minimum of {minimum!r}"
yield ValidationError(message)
def maximum(validator, maximum, instance, schema):
if not validator.is_type(instance, "number"):
return
if instance > maximum:
message = f"{instance!r} is greater than the maximum of {maximum!r}"
yield ValidationError(message)
def multipleOf(validator, dB, instance, schema):
if not validator.is_type(instance, "number"):
return
if isinstance(dB, float):
quotient = instance / dB
try:
failed = int(quotient) != quotient
except OverflowError:
# When `instance` is large and `dB` is less than one,
# quotient can overflow to infinity; and then casting to int
# raises an error.
#
# In this case we fall back to Fraction logic, which is
# exact and cannot overflow. The performance is also
# acceptable: we try the fast all-float option first, and
# we know that fraction(dB) can have at most a few hundred
# digits in each part. The worst-case slowdown is therefore
# for already-slow enormous integers or Decimals.
failed = (Fraction(instance) / Fraction(dB)).denominator != 1
else:
failed = instance % dB
if failed:
yield ValidationError(f"{instance!r} is not a multiple of {dB}")
def minItems(validator, mI, instance, schema):
if validator.is_type(instance, "array") and len(instance) < mI:
yield ValidationError(f"{instance!r} is too short")
def maxItems(validator, mI, instance, schema):
if validator.is_type(instance, "array") and len(instance) > mI:
yield ValidationError(f"{instance!r} is too long")
def uniqueItems(validator, uI, instance, schema):
if (
uI
and validator.is_type(instance, "array")
and not uniq(instance)
):
yield ValidationError(f"{instance!r} has non-unique elements")
def pattern(validator, patrn, instance, schema):
if (
validator.is_type(instance, "string")
and not re.search(patrn, instance)
):
yield ValidationError(f"{instance!r} does not match {patrn!r}")
def format(validator, format, instance, schema):
if validator.format_checker is not None:
try:
validator.format_checker.check(instance, format)
except FormatError as error:
yield ValidationError(error.message, cause=error.cause)
def minLength(validator, mL, instance, schema):
if validator.is_type(instance, "string") and len(instance) < mL:
yield ValidationError(f"{instance!r} is too short")
def maxLength(validator, mL, instance, schema):
if validator.is_type(instance, "string") and len(instance) > mL:
yield ValidationError(f"{instance!r} is too long")
def dependentRequired(validator, dependentRequired, instance, schema):
if not validator.is_type(instance, "object"):
return
for property, dependency in dependentRequired.items():
if property not in instance:
continue
for each in dependency:
if each not in instance:
message = f"{each!r} is a dependency of {property!r}"
yield ValidationError(message)
def dependentSchemas(validator, dependentSchemas, instance, schema):
if not validator.is_type(instance, "object"):
return
for property, dependency in dependentSchemas.items():
if property not in instance:
continue
yield from validator.descend(
instance, dependency, schema_path=property,
)
def enum(validator, enums, instance, schema):
if instance == 0 or instance == 1:
unbooled = unbool(instance)
if all(unbooled != unbool(each) for each in enums):
yield ValidationError(f"{instance!r} is not one of {enums!r}")
elif instance not in enums:
yield ValidationError(f"{instance!r} is not one of {enums!r}")
def ref(validator, ref, instance, schema):
resolve = getattr(validator.resolver, "resolve", None)
if resolve is None:
with validator.resolver.resolving(ref) as resolved:
yield from validator.descend(instance, resolved)
else:
scope, resolved = validator.resolver.resolve(ref)
validator.resolver.push_scope(scope)
try:
yield from validator.descend(instance, resolved)
finally:
validator.resolver.pop_scope()
def dynamicRef(validator, dynamicRef, instance, schema):
_, fragment = urldefrag(dynamicRef)
for url in validator.resolver._scopes_stack:
lookup_url = urljoin(url, dynamicRef)
with validator.resolver.resolving(lookup_url) as subschema:
if ("$dynamicAnchor" in subschema
and fragment == subschema["$dynamicAnchor"]):
yield from validator.descend(instance, subschema)
break
else:
with validator.resolver.resolving(dynamicRef) as subschema:
yield from validator.descend(instance, subschema)
def type(validator, types, instance, schema):
types = ensure_list(types)
if not any(validator.is_type(instance, type) for type in types):
reprs = ", ".join(repr(type) for type in types)
yield ValidationError(f"{instance!r} is not of type {reprs}")
def properties(validator, properties, instance, schema):
if not validator.is_type(instance, "object"):
return
for property, subschema in properties.items():
if property in instance:
yield from validator.descend(
instance[property],
subschema,
path=property,
schema_path=property,
)
def required(validator, required, instance, schema):
if not validator.is_type(instance, "object"):
return
for property in required:
if property not in instance:
yield ValidationError(f"{property!r} is a required property")
def minProperties(validator, mP, instance, schema):
if validator.is_type(instance, "object") and len(instance) < mP:
yield ValidationError(f"{instance!r} does not have enough properties")
def maxProperties(validator, mP, instance, schema):
if not validator.is_type(instance, "object"):
return
if validator.is_type(instance, "object") and len(instance) > mP:
yield ValidationError(f"{instance!r} has too many properties")
def allOf(validator, allOf, instance, schema):
for index, subschema in enumerate(allOf):
yield from validator.descend(instance, subschema, schema_path=index)
def anyOf(validator, anyOf, instance, schema):
all_errors = []
for index, subschema in enumerate(anyOf):
errs = list(validator.descend(instance, subschema, schema_path=index))
if not errs:
break
all_errors.extend(errs)
else:
yield ValidationError(
f"{instance!r} is not valid under any of the given schemas",
context=all_errors,
)
def oneOf(validator, oneOf, instance, schema):
subschemas = enumerate(oneOf)
all_errors = []
for index, subschema in subschemas:
errs = list(validator.descend(instance, subschema, schema_path=index))
if not errs:
first_valid = subschema
break
all_errors.extend(errs)
else:
yield ValidationError(
f"{instance!r} is not valid under any of the given schemas",
context=all_errors,
)
more_valid = [
each for _, each in subschemas
if validator.evolve(schema=each).is_valid(instance)
]
if more_valid:
more_valid.append(first_valid)
reprs = ", ".join(repr(schema) for schema in more_valid)
yield ValidationError(f"{instance!r} is valid under each of {reprs}")
def not_(validator, not_schema, instance, schema):
if validator.evolve(schema=not_schema).is_valid(instance):
message = f"{instance!r} should not be valid under {not_schema!r}"
yield ValidationError(message)
def if_(validator, if_schema, instance, schema):
if validator.evolve(schema=if_schema).is_valid(instance):
if "then" in schema:
then = schema["then"]
yield from validator.descend(instance, then, schema_path="then")
elif "else" in schema:
else_ = schema["else"]
yield from validator.descend(instance, else_, schema_path="else")
def unevaluatedItems(validator, unevaluatedItems, instance, schema):
if not validator.is_type(instance, "array"):
return
evaluated_item_indexes = find_evaluated_item_indexes_by_schema(
validator, instance, schema,
)
unevaluated_items = [
item for index, item in enumerate(instance)
if index not in evaluated_item_indexes
]
if unevaluated_items:
error = "Unevaluated items are not allowed (%s %s unexpected)"
yield ValidationError(error % extras_msg(unevaluated_items))
def unevaluatedProperties(validator, unevaluatedProperties, instance, schema):
if not validator.is_type(instance, "object"):
return
evaluated_property_keys = find_evaluated_property_keys_by_schema(
validator, instance, schema,
)
unevaluated_property_keys = []
for property in instance:
if property not in evaluated_property_keys:
for _ in validator.descend(
instance[property],
unevaluatedProperties,
path=property,
schema_path=property,
):
unevaluated_property_keys.append(property)
if unevaluated_property_keys:
error = "Unevaluated properties are not allowed (%s %s unexpected)"
yield ValidationError(error % extras_msg(unevaluated_property_keys))
def prefixItems(validator, prefixItems, instance, schema):
if not validator.is_type(instance, "array"):
return
for (index, item), subschema in zip(enumerate(instance), prefixItems):
yield from validator.descend(
instance=item,
schema=subschema,
schema_path=index,
path=index,
)

View File

@ -0,0 +1,5 @@
"""
Benchmarks for validation.
This package is *not* public API.
"""

View File

@ -0,0 +1,25 @@
"""
A performance benchmark using the example from issue #232.
See https://github.com/python-jsonschema/jsonschema/pull/232.
"""
from pathlib import Path
from pyperf import Runner
from pyrsistent import m
from jsonschema.tests._suite import Version
import jsonschema
issue232 = Version(
path=Path(__file__).parent / "issue232",
remotes=m(),
name="issue232",
)
if __name__ == "__main__":
issue232.benchmark(
runner=Runner(),
Validator=jsonschema.Draft4Validator,
)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,12 @@
"""
A performance benchmark using the official test suite.
This benchmarks jsonschema using every valid example in the
JSON-Schema-Test-Suite. It will take some time to complete.
"""
from pyperf import Runner
from jsonschema.tests._suite import Suite
if __name__ == "__main__":
Suite().benchmark(runner=Runner())

View File

@ -0,0 +1,299 @@
"""
The ``jsonschema`` command line.
"""
from json import JSONDecodeError
from textwrap import dedent
import argparse
import json
import sys
import traceback
import warnings
try:
from importlib import metadata
except ImportError:
import importlib_metadata as metadata # type: ignore
try:
from pkgutil import resolve_name
except ImportError:
from pkgutil_resolve_name import resolve_name # type: ignore
import attr
from jsonschema.exceptions import SchemaError
from jsonschema.validators import RefResolver, validator_for
warnings.warn(
(
"The jsonschema CLI is deprecated and will be removed in a future "
"version. Please use check-jsonschema instead, which can be installed "
"from https://pypi.org/project/check-jsonschema/"
),
DeprecationWarning,
stacklevel=2,
)
class _CannotLoadFile(Exception):
pass
@attr.s
class _Outputter:
_formatter = attr.ib()
_stdout = attr.ib()
_stderr = attr.ib()
@classmethod
def from_arguments(cls, arguments, stdout, stderr):
if arguments["output"] == "plain":
formatter = _PlainFormatter(arguments["error_format"])
elif arguments["output"] == "pretty":
formatter = _PrettyFormatter()
return cls(formatter=formatter, stdout=stdout, stderr=stderr)
def load(self, path):
try:
file = open(path)
except FileNotFoundError:
self.filenotfound_error(path=path, exc_info=sys.exc_info())
raise _CannotLoadFile()
with file:
try:
return json.load(file)
except JSONDecodeError:
self.parsing_error(path=path, exc_info=sys.exc_info())
raise _CannotLoadFile()
def filenotfound_error(self, **kwargs):
self._stderr.write(self._formatter.filenotfound_error(**kwargs))
def parsing_error(self, **kwargs):
self._stderr.write(self._formatter.parsing_error(**kwargs))
def validation_error(self, **kwargs):
self._stderr.write(self._formatter.validation_error(**kwargs))
def validation_success(self, **kwargs):
self._stdout.write(self._formatter.validation_success(**kwargs))
@attr.s
class _PrettyFormatter:
_ERROR_MSG = dedent(
"""\
===[{type}]===({path})===
{body}
-----------------------------
""",
)
_SUCCESS_MSG = "===[SUCCESS]===({path})===\n"
def filenotfound_error(self, path, exc_info):
return self._ERROR_MSG.format(
path=path,
type="FileNotFoundError",
body="{!r} does not exist.".format(path),
)
def parsing_error(self, path, exc_info):
exc_type, exc_value, exc_traceback = exc_info
exc_lines = "".join(
traceback.format_exception(exc_type, exc_value, exc_traceback),
)
return self._ERROR_MSG.format(
path=path,
type=exc_type.__name__,
body=exc_lines,
)
def validation_error(self, instance_path, error):
return self._ERROR_MSG.format(
path=instance_path,
type=error.__class__.__name__,
body=error,
)
def validation_success(self, instance_path):
return self._SUCCESS_MSG.format(path=instance_path)
@attr.s
class _PlainFormatter:
_error_format = attr.ib()
def filenotfound_error(self, path, exc_info):
return "{!r} does not exist.\n".format(path)
def parsing_error(self, path, exc_info):
return "Failed to parse {}: {}\n".format(
"<stdin>" if path == "<stdin>" else repr(path),
exc_info[1],
)
def validation_error(self, instance_path, error):
return self._error_format.format(file_name=instance_path, error=error)
def validation_success(self, instance_path):
return ""
def _resolve_name_with_default(name):
if "." not in name:
name = "jsonschema." + name
return resolve_name(name)
parser = argparse.ArgumentParser(
description="JSON Schema Validation CLI",
)
parser.add_argument(
"-i", "--instance",
action="append",
dest="instances",
help="""
a path to a JSON instance (i.e. filename.json) to validate (may
be specified multiple times). If no instances are provided via this
option, one will be expected on standard input.
""",
)
parser.add_argument(
"-F", "--error-format",
help="""
the format to use for each validation error message, specified
in a form suitable for str.format. This string will be passed
one formatted object named 'error' for each ValidationError.
Only provide this option when using --output=plain, which is the
default. If this argument is unprovided and --output=plain is
used, a simple default representation will be used.
""",
)
parser.add_argument(
"-o", "--output",
choices=["plain", "pretty"],
default="plain",
help="""
an output format to use. 'plain' (default) will produce minimal
text with one line for each error, while 'pretty' will produce
more detailed human-readable output on multiple lines.
""",
)
parser.add_argument(
"-V", "--validator",
type=_resolve_name_with_default,
help="""
the fully qualified object name of a validator to use, or, for
validators that are registered with jsonschema, simply the name
of the class.
""",
)
parser.add_argument(
"--base-uri",
help="""
a base URI to assign to the provided schema, even if it does not
declare one (via e.g. $id). This option can be used if you wish to
resolve relative references to a particular URI (or local path)
""",
)
parser.add_argument(
"--version",
action="version",
version=metadata.version("jsonschema"),
)
parser.add_argument(
"schema",
help="the path to a JSON Schema to validate with (i.e. schema.json)",
)
def parse_args(args):
arguments = vars(parser.parse_args(args=args or ["--help"]))
if arguments["output"] != "plain" and arguments["error_format"]:
raise parser.error(
"--error-format can only be used with --output plain",
)
if arguments["output"] == "plain" and arguments["error_format"] is None:
arguments["error_format"] = "{error.instance}: {error.message}\n"
return arguments
def _validate_instance(instance_path, instance, validator, outputter):
invalid = False
for error in validator.iter_errors(instance):
invalid = True
outputter.validation_error(instance_path=instance_path, error=error)
if not invalid:
outputter.validation_success(instance_path=instance_path)
return invalid
def main(args=sys.argv[1:]):
sys.exit(run(arguments=parse_args(args=args)))
def run(arguments, stdout=sys.stdout, stderr=sys.stderr, stdin=sys.stdin):
outputter = _Outputter.from_arguments(
arguments=arguments,
stdout=stdout,
stderr=stderr,
)
try:
schema = outputter.load(arguments["schema"])
except _CannotLoadFile:
return 1
if arguments["validator"] is None:
arguments["validator"] = validator_for(schema)
try:
arguments["validator"].check_schema(schema)
except SchemaError as error:
outputter.validation_error(
instance_path=arguments["schema"],
error=error,
)
return 1
if arguments["instances"]:
load, instances = outputter.load, arguments["instances"]
else:
def load(_):
try:
return json.load(stdin)
except JSONDecodeError:
outputter.parsing_error(
path="<stdin>", exc_info=sys.exc_info(),
)
raise _CannotLoadFile()
instances = ["<stdin>"]
resolver = RefResolver(
base_uri=arguments["base_uri"],
referrer=schema,
) if arguments["base_uri"] is not None else None
validator = arguments["validator"](schema, resolver=resolver)
exit_code = 0
for each in instances:
try:
instance = load(each)
except _CannotLoadFile:
exit_code = 1
else:
exit_code |= _validate_instance(
instance_path=each,
instance=instance,
validator=validator,
outputter=outputter,
)
return exit_code

View File

@ -0,0 +1,396 @@
"""
Validation errors, and some surrounding helpers.
"""
from __future__ import annotations
from collections import defaultdict, deque
from pprint import pformat
from textwrap import dedent, indent
import heapq
import itertools
import attr
from jsonschema import _utils
WEAK_MATCHES: frozenset[str] = frozenset(["anyOf", "oneOf"])
STRONG_MATCHES: frozenset[str] = frozenset()
_unset = _utils.Unset()
class _Error(Exception):
def __init__(
self,
message,
validator=_unset,
path=(),
cause=None,
context=(),
validator_value=_unset,
instance=_unset,
schema=_unset,
schema_path=(),
parent=None,
type_checker=_unset,
):
super(_Error, self).__init__(
message,
validator,
path,
cause,
context,
validator_value,
instance,
schema,
schema_path,
parent,
)
self.message = message
self.path = self.relative_path = deque(path)
self.schema_path = self.relative_schema_path = deque(schema_path)
self.context = list(context)
self.cause = self.__cause__ = cause
self.validator = validator
self.validator_value = validator_value
self.instance = instance
self.schema = schema
self.parent = parent
self._type_checker = type_checker
for error in context:
error.parent = self
def __repr__(self):
return f"<{self.__class__.__name__}: {self.message!r}>"
def __str__(self):
essential_for_verbose = (
self.validator, self.validator_value, self.instance, self.schema,
)
if any(m is _unset for m in essential_for_verbose):
return self.message
schema_path = _utils.format_as_index(
container=self._word_for_schema_in_error_message,
indices=list(self.relative_schema_path)[:-1],
)
instance_path = _utils.format_as_index(
container=self._word_for_instance_in_error_message,
indices=self.relative_path,
)
prefix = 16 * " "
return dedent(
f"""\
{self.message}
Failed validating {self.validator!r} in {schema_path}:
{indent(pformat(self.schema, width=72), prefix).lstrip()}
On {instance_path}:
{indent(pformat(self.instance, width=72), prefix).lstrip()}
""".rstrip(),
)
@classmethod
def create_from(cls, other):
return cls(**other._contents())
@property
def absolute_path(self):
parent = self.parent
if parent is None:
return self.relative_path
path = deque(self.relative_path)
path.extendleft(reversed(parent.absolute_path))
return path
@property
def absolute_schema_path(self):
parent = self.parent
if parent is None:
return self.relative_schema_path
path = deque(self.relative_schema_path)
path.extendleft(reversed(parent.absolute_schema_path))
return path
@property
def json_path(self):
path = "$"
for elem in self.absolute_path:
if isinstance(elem, int):
path += "[" + str(elem) + "]"
else:
path += "." + elem
return path
def _set(self, type_checker=None, **kwargs):
if type_checker is not None and self._type_checker is _unset:
self._type_checker = type_checker
for k, v in kwargs.items():
if getattr(self, k) is _unset:
setattr(self, k, v)
def _contents(self):
attrs = (
"message", "cause", "context", "validator", "validator_value",
"path", "schema_path", "instance", "schema", "parent",
)
return dict((attr, getattr(self, attr)) for attr in attrs)
def _matches_type(self):
try:
expected = self.schema["type"]
except (KeyError, TypeError):
return False
if isinstance(expected, str):
return self._type_checker.is_type(self.instance, expected)
return any(
self._type_checker.is_type(self.instance, expected_type)
for expected_type in expected
)
class ValidationError(_Error):
"""
An instance was invalid under a provided schema.
"""
_word_for_schema_in_error_message = "schema"
_word_for_instance_in_error_message = "instance"
class SchemaError(_Error):
"""
A schema was invalid under its corresponding metaschema.
"""
_word_for_schema_in_error_message = "metaschema"
_word_for_instance_in_error_message = "schema"
@attr.s(hash=True)
class RefResolutionError(Exception):
"""
A ref could not be resolved.
"""
_cause = attr.ib()
def __str__(self):
return str(self._cause)
class UndefinedTypeCheck(Exception):
"""
A type checker was asked to check a type it did not have registered.
"""
def __init__(self, type):
self.type = type
def __str__(self):
return f"Type {self.type!r} is unknown to this type checker"
class UnknownType(Exception):
"""
A validator was asked to validate an instance against an unknown type.
"""
def __init__(self, type, instance, schema):
self.type = type
self.instance = instance
self.schema = schema
def __str__(self):
prefix = 16 * " "
return dedent(
f"""\
Unknown type {self.type!r} for validator with schema:
{indent(pformat(self.schema, width=72), prefix).lstrip()}
While checking instance:
{indent(pformat(self.instance, width=72), prefix).lstrip()}
""".rstrip(),
)
class FormatError(Exception):
"""
Validating a format failed.
"""
def __init__(self, message, cause=None):
super(FormatError, self).__init__(message, cause)
self.message = message
self.cause = self.__cause__ = cause
def __str__(self):
return self.message
class ErrorTree:
"""
ErrorTrees make it easier to check which validations failed.
"""
_instance = _unset
def __init__(self, errors=()):
self.errors = {}
self._contents = defaultdict(self.__class__)
for error in errors:
container = self
for element in error.path:
container = container[element]
container.errors[error.validator] = error
container._instance = error.instance
def __contains__(self, index):
"""
Check whether ``instance[index]`` has any errors.
"""
return index in self._contents
def __getitem__(self, index):
"""
Retrieve the child tree one level down at the given ``index``.
If the index is not in the instance that this tree corresponds
to and is not known by this tree, whatever error would be raised
by ``instance.__getitem__`` will be propagated (usually this is
some subclass of `LookupError`.
"""
if self._instance is not _unset and index not in self:
self._instance[index]
return self._contents[index]
def __setitem__(self, index, value):
"""
Add an error to the tree at the given ``index``.
"""
self._contents[index] = value
def __iter__(self):
"""
Iterate (non-recursively) over the indices in the instance with errors.
"""
return iter(self._contents)
def __len__(self):
"""
Return the `total_errors`.
"""
return self.total_errors
def __repr__(self):
total = len(self)
errors = "error" if total == 1 else "errors"
return f"<{self.__class__.__name__} ({total} total {errors})>"
@property
def total_errors(self):
"""
The total number of errors in the entire tree, including children.
"""
child_errors = sum(len(tree) for _, tree in self._contents.items())
return len(self.errors) + child_errors
def by_relevance(weak=WEAK_MATCHES, strong=STRONG_MATCHES):
"""
Create a key function that can be used to sort errors by relevance.
Arguments:
weak (set):
a collection of validation keywords to consider to be
"weak". If there are two errors at the same level of the
instance and one is in the set of weak validation keywords,
the other error will take priority. By default, :kw:`anyOf`
and :kw:`oneOf` are considered weak keywords and will be
superseded by other same-level validation errors.
strong (set):
a collection of validation keywords to consider to be
"strong"
"""
def relevance(error):
validator = error.validator
return (
-len(error.path),
validator not in weak,
validator in strong,
not error._matches_type(),
)
return relevance
relevance = by_relevance()
def best_match(errors, key=relevance):
"""
Try to find an error that appears to be the best match among given errors.
In general, errors that are higher up in the instance (i.e. for which
`ValidationError.path` is shorter) are considered better matches,
since they indicate "more" is wrong with the instance.
If the resulting match is either :kw:`oneOf` or :kw:`anyOf`, the
*opposite* assumption is made -- i.e. the deepest error is picked,
since these keywords only need to match once, and any other errors
may not be relevant.
Arguments:
errors (collections.abc.Iterable):
the errors to select from. Do not provide a mixture of
errors from different validation attempts (i.e. from
different instances or schemas), since it won't produce
sensical output.
key (collections.abc.Callable):
the key to use when sorting errors. See `relevance` and
transitively `by_relevance` for more details (the default is
to sort with the defaults of that function). Changing the
default is only useful if you want to change the function
that rates errors but still want the error context descent
done by this function.
Returns:
the best matching error, or ``None`` if the iterable was empty
.. note::
This function is a heuristic. Its return value may change for a given
set of inputs from version to version if better heuristics are added.
"""
errors = iter(errors)
best = next(errors, None)
if best is None:
return
best = max(itertools.chain([best], errors), key=key)
while best.context:
# Calculate the minimum via nsmallest, because we don't recurse if
# all nested errors have the same relevance (i.e. if min == max == all)
smallest = heapq.nsmallest(2, best.context, key=key)
if len(smallest) == 2 and key(smallest[0]) == key(smallest[1]):
return best
best = smallest[0]
return best

View File

@ -0,0 +1,224 @@
"""
typing.Protocol classes for jsonschema interfaces.
"""
# for reference material on Protocols, see
# https://www.python.org/dev/peps/pep-0544/
from __future__ import annotations
from collections.abc import Callable, Mapping
from typing import TYPE_CHECKING, Any, ClassVar, Iterable
import sys
# doing these imports with `try ... except ImportError` doesn't pass mypy
# checking because mypy sees `typing._SpecialForm` and
# `typing_extensions._SpecialForm` as incompatible
#
# see:
# https://mypy.readthedocs.io/en/stable/runtime_troubles.html#using-new-additions-to-the-typing-module
# https://github.com/python/mypy/issues/4427
if sys.version_info >= (3, 8):
from typing import Protocol, runtime_checkable
else:
from typing_extensions import Protocol, runtime_checkable
# in order for Sphinx to resolve references accurately from type annotations,
# it needs to see names like `jsonschema.TypeChecker`
# therefore, only import at type-checking time (to avoid circular references),
# but use `jsonschema` for any types which will otherwise not be resolvable
if TYPE_CHECKING:
import jsonschema
from jsonschema.exceptions import ValidationError
# For code authors working on the validator protocol, these are the three
# use-cases which should be kept in mind:
#
# 1. As a protocol class, it can be used in type annotations to describe the
# available methods and attributes of a validator
# 2. It is the source of autodoc for the validator documentation
# 3. It is runtime_checkable, meaning that it can be used in isinstance()
# checks.
#
# Since protocols are not base classes, isinstance() checking is limited in
# its capabilities. See docs on runtime_checkable for detail
@runtime_checkable
class Validator(Protocol):
"""
The protocol to which all validator classes adhere.
Arguments:
schema:
The schema that the validator object will validate with.
It is assumed to be valid, and providing
an invalid schema can lead to undefined behavior. See
`Validator.check_schema` to validate a schema first.
resolver:
a resolver that will be used to resolve :kw:`$ref`
properties (JSON references). If unprovided, one will be created.
format_checker:
if provided, a checker which will be used to assert about
:kw:`format` properties present in the schema. If unprovided,
*no* format validation is done, and the presence of format
within schemas is strictly informational. Certain formats
require additional packages to be installed in order to assert
against instances. Ensure you've installed `jsonschema` with
its `extra (optional) dependencies <index:extras>` when
invoking ``pip``.
.. deprecated:: v4.12.0
Subclassing validator classes now explicitly warns this is not part of
their public API.
"""
#: An object representing the validator's meta schema (the schema that
#: describes valid schemas in the given version).
META_SCHEMA: ClassVar[Mapping]
#: A mapping of validation keywords (`str`\s) to functions that
#: validate the keyword with that name. For more information see
#: `creating-validators`.
VALIDATORS: ClassVar[Mapping]
#: A `jsonschema.TypeChecker` that will be used when validating
#: :kw:`type` keywords in JSON schemas.
TYPE_CHECKER: ClassVar[jsonschema.TypeChecker]
#: A `jsonschema.FormatChecker` that will be used when validating
#: :kw:`format` keywords in JSON schemas.
FORMAT_CHECKER: ClassVar[jsonschema.FormatChecker]
#: A function which given a schema returns its ID.
ID_OF: Callable[[Any], str | None]
#: The schema that will be used to validate instances
schema: Mapping | bool
def __init__(
self,
schema: Mapping | bool,
resolver: jsonschema.RefResolver | None = None,
format_checker: jsonschema.FormatChecker | None = None,
) -> None:
...
@classmethod
def check_schema(cls, schema: Mapping | bool) -> None:
"""
Validate the given schema against the validator's `META_SCHEMA`.
Raises:
`jsonschema.exceptions.SchemaError`:
if the schema is invalid
"""
def is_type(self, instance: Any, type: str) -> bool:
"""
Check if the instance is of the given (JSON Schema) type.
Arguments:
instance:
the value to check
type:
the name of a known (JSON Schema) type
Returns:
whether the instance is of the given type
Raises:
`jsonschema.exceptions.UnknownType`:
if ``type`` is not a known type
"""
def is_valid(self, instance: Any) -> bool:
"""
Check if the instance is valid under the current `schema`.
Returns:
whether the instance is valid or not
>>> schema = {"maxItems" : 2}
>>> Draft202012Validator(schema).is_valid([2, 3, 4])
False
"""
def iter_errors(self, instance: Any) -> Iterable[ValidationError]:
r"""
Lazily yield each of the validation errors in the given instance.
>>> schema = {
... "type" : "array",
... "items" : {"enum" : [1, 2, 3]},
... "maxItems" : 2,
... }
>>> v = Draft202012Validator(schema)
>>> for error in sorted(v.iter_errors([2, 3, 4]), key=str):
... print(error.message)
4 is not one of [1, 2, 3]
[2, 3, 4] is too long
.. deprecated:: v4.0.0
Calling this function with a second schema argument is deprecated.
Use `Validator.evolve` instead.
"""
def validate(self, instance: Any) -> None:
"""
Check if the instance is valid under the current `schema`.
Raises:
`jsonschema.exceptions.ValidationError`:
if the instance is invalid
>>> schema = {"maxItems" : 2}
>>> Draft202012Validator(schema).validate([2, 3, 4])
Traceback (most recent call last):
...
ValidationError: [2, 3, 4] is too long
"""
def evolve(self, **kwargs) -> "Validator":
"""
Create a new validator like this one, but with given changes.
Preserves all other attributes, so can be used to e.g. create a
validator with a different schema but with the same :kw:`$ref`
resolution behavior.
>>> validator = Draft202012Validator({})
>>> validator.evolve(schema={"type": "number"})
Draft202012Validator(schema={'type': 'number'}, format_checker=None)
The returned object satisfies the validator protocol, but may not
be of the same concrete class! In particular this occurs
when a :kw:`$ref` occurs to a schema with a different
:kw:`$schema` than this one (i.e. for a different draft).
>>> validator.evolve(
... schema={"$schema": Draft7Validator.META_SCHEMA["$id"]}
... )
Draft7Validator(schema=..., format_checker=None)
"""

View File

@ -0,0 +1,42 @@
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://json-schema.org/draft/2019-09/schema",
"$vocabulary": {
"https://json-schema.org/draft/2019-09/vocab/core": true,
"https://json-schema.org/draft/2019-09/vocab/applicator": true,
"https://json-schema.org/draft/2019-09/vocab/validation": true,
"https://json-schema.org/draft/2019-09/vocab/meta-data": true,
"https://json-schema.org/draft/2019-09/vocab/format": false,
"https://json-schema.org/draft/2019-09/vocab/content": true
},
"$recursiveAnchor": true,
"title": "Core and Validation specifications meta-schema",
"allOf": [
{"$ref": "meta/core"},
{"$ref": "meta/applicator"},
{"$ref": "meta/validation"},
{"$ref": "meta/meta-data"},
{"$ref": "meta/format"},
{"$ref": "meta/content"}
],
"type": ["object", "boolean"],
"properties": {
"definitions": {
"$comment": "While no longer an official keyword as it is replaced by $defs, this keyword is retained in the meta-schema to prevent incompatible extensions as it remains in common use.",
"type": "object",
"additionalProperties": { "$recursiveRef": "#" },
"default": {}
},
"dependencies": {
"$comment": "\"dependencies\" is no longer a keyword, but schema authors should avoid redefining it to facilitate a smooth transition to \"dependentSchemas\" and \"dependentRequired\"",
"type": "object",
"additionalProperties": {
"anyOf": [
{ "$recursiveRef": "#" },
{ "$ref": "meta/validation#/$defs/stringArray" }
]
}
}
}
}

View File

@ -0,0 +1,58 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://json-schema.org/draft/2020-12/schema",
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/core": true,
"https://json-schema.org/draft/2020-12/vocab/applicator": true,
"https://json-schema.org/draft/2020-12/vocab/unevaluated": true,
"https://json-schema.org/draft/2020-12/vocab/validation": true,
"https://json-schema.org/draft/2020-12/vocab/meta-data": true,
"https://json-schema.org/draft/2020-12/vocab/format-annotation": true,
"https://json-schema.org/draft/2020-12/vocab/content": true
},
"$dynamicAnchor": "meta",
"title": "Core and Validation specifications meta-schema",
"allOf": [
{"$ref": "meta/core"},
{"$ref": "meta/applicator"},
{"$ref": "meta/unevaluated"},
{"$ref": "meta/validation"},
{"$ref": "meta/meta-data"},
{"$ref": "meta/format-annotation"},
{"$ref": "meta/content"}
],
"type": ["object", "boolean"],
"$comment": "This meta-schema also defines keywords that have appeared in previous drafts in order to prevent incompatible extensions as they remain in common use.",
"properties": {
"definitions": {
"$comment": "\"definitions\" has been replaced by \"$defs\".",
"type": "object",
"additionalProperties": { "$dynamicRef": "#meta" },
"deprecated": true,
"default": {}
},
"dependencies": {
"$comment": "\"dependencies\" has been split and replaced by \"dependentSchemas\" and \"dependentRequired\" in order to serve their differing semantics.",
"type": "object",
"additionalProperties": {
"anyOf": [
{ "$dynamicRef": "#meta" },
{ "$ref": "meta/validation#/$defs/stringArray" }
]
},
"deprecated": true,
"default": {}
},
"$recursiveAnchor": {
"$comment": "\"$recursiveAnchor\" has been replaced by \"$dynamicAnchor\".",
"$ref": "meta/core#/$defs/anchorString",
"deprecated": true
},
"$recursiveRef": {
"$comment": "\"$recursiveRef\" has been replaced by \"$dynamicRef\".",
"$ref": "meta/core#/$defs/uriReferenceString",
"deprecated": true
}
}
}

View File

@ -0,0 +1,172 @@
{
"$schema" : "http://json-schema.org/draft-03/schema#",
"id" : "http://json-schema.org/draft-03/schema#",
"type" : "object",
"properties" : {
"type" : {
"type" : ["string", "array"],
"items" : {
"type" : ["string", {"$ref" : "#"}]
},
"uniqueItems" : true,
"default" : "any"
},
"properties" : {
"type" : "object",
"additionalProperties" : {"$ref" : "#"},
"default" : {}
},
"patternProperties" : {
"type" : "object",
"additionalProperties" : {"$ref" : "#"},
"default" : {}
},
"additionalProperties" : {
"type" : [{"$ref" : "#"}, "boolean"],
"default" : {}
},
"items" : {
"type" : [{"$ref" : "#"}, "array"],
"items" : {"$ref" : "#"},
"default" : {}
},
"additionalItems" : {
"type" : [{"$ref" : "#"}, "boolean"],
"default" : {}
},
"required" : {
"type" : "boolean",
"default" : false
},
"dependencies" : {
"type" : "object",
"additionalProperties" : {
"type" : ["string", "array", {"$ref" : "#"}],
"items" : {
"type" : "string"
}
},
"default" : {}
},
"minimum" : {
"type" : "number"
},
"maximum" : {
"type" : "number"
},
"exclusiveMinimum" : {
"type" : "boolean",
"default" : false
},
"exclusiveMaximum" : {
"type" : "boolean",
"default" : false
},
"minItems" : {
"type" : "integer",
"minimum" : 0,
"default" : 0
},
"maxItems" : {
"type" : "integer",
"minimum" : 0
},
"uniqueItems" : {
"type" : "boolean",
"default" : false
},
"pattern" : {
"type" : "string",
"format" : "regex"
},
"minLength" : {
"type" : "integer",
"minimum" : 0,
"default" : 0
},
"maxLength" : {
"type" : "integer"
},
"enum" : {
"type" : "array",
"minItems" : 1,
"uniqueItems" : true
},
"default" : {
"type" : "any"
},
"title" : {
"type" : "string"
},
"description" : {
"type" : "string"
},
"format" : {
"type" : "string"
},
"divisibleBy" : {
"type" : "number",
"minimum" : 0,
"exclusiveMinimum" : true,
"default" : 1
},
"disallow" : {
"type" : ["string", "array"],
"items" : {
"type" : ["string", {"$ref" : "#"}]
},
"uniqueItems" : true
},
"extends" : {
"type" : [{"$ref" : "#"}, "array"],
"items" : {"$ref" : "#"},
"default" : {}
},
"id" : {
"type" : "string"
},
"$ref" : {
"type" : "string"
},
"$schema" : {
"type" : "string",
"format" : "uri"
}
},
"dependencies" : {
"exclusiveMinimum" : "minimum",
"exclusiveMaximum" : "maximum"
},
"default" : {}
}

View File

@ -0,0 +1,149 @@
{
"id": "http://json-schema.org/draft-04/schema#",
"$schema": "http://json-schema.org/draft-04/schema#",
"description": "Core schema meta-schema",
"definitions": {
"schemaArray": {
"type": "array",
"minItems": 1,
"items": { "$ref": "#" }
},
"positiveInteger": {
"type": "integer",
"minimum": 0
},
"positiveIntegerDefault0": {
"allOf": [ { "$ref": "#/definitions/positiveInteger" }, { "default": 0 } ]
},
"simpleTypes": {
"enum": [ "array", "boolean", "integer", "null", "number", "object", "string" ]
},
"stringArray": {
"type": "array",
"items": { "type": "string" },
"minItems": 1,
"uniqueItems": true
}
},
"type": "object",
"properties": {
"id": {
"type": "string"
},
"$schema": {
"type": "string"
},
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"default": {},
"multipleOf": {
"type": "number",
"minimum": 0,
"exclusiveMinimum": true
},
"maximum": {
"type": "number"
},
"exclusiveMaximum": {
"type": "boolean",
"default": false
},
"minimum": {
"type": "number"
},
"exclusiveMinimum": {
"type": "boolean",
"default": false
},
"maxLength": { "$ref": "#/definitions/positiveInteger" },
"minLength": { "$ref": "#/definitions/positiveIntegerDefault0" },
"pattern": {
"type": "string",
"format": "regex"
},
"additionalItems": {
"anyOf": [
{ "type": "boolean" },
{ "$ref": "#" }
],
"default": {}
},
"items": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/schemaArray" }
],
"default": {}
},
"maxItems": { "$ref": "#/definitions/positiveInteger" },
"minItems": { "$ref": "#/definitions/positiveIntegerDefault0" },
"uniqueItems": {
"type": "boolean",
"default": false
},
"maxProperties": { "$ref": "#/definitions/positiveInteger" },
"minProperties": { "$ref": "#/definitions/positiveIntegerDefault0" },
"required": { "$ref": "#/definitions/stringArray" },
"additionalProperties": {
"anyOf": [
{ "type": "boolean" },
{ "$ref": "#" }
],
"default": {}
},
"definitions": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"properties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"patternProperties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"dependencies": {
"type": "object",
"additionalProperties": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/stringArray" }
]
}
},
"enum": {
"type": "array",
"minItems": 1,
"uniqueItems": true
},
"type": {
"anyOf": [
{ "$ref": "#/definitions/simpleTypes" },
{
"type": "array",
"items": { "$ref": "#/definitions/simpleTypes" },
"minItems": 1,
"uniqueItems": true
}
]
},
"format": { "type": "string" },
"allOf": { "$ref": "#/definitions/schemaArray" },
"anyOf": { "$ref": "#/definitions/schemaArray" },
"oneOf": { "$ref": "#/definitions/schemaArray" },
"not": { "$ref": "#" }
},
"dependencies": {
"exclusiveMaximum": [ "maximum" ],
"exclusiveMinimum": [ "minimum" ]
},
"default": {}
}

View File

@ -0,0 +1,153 @@
{
"$schema": "http://json-schema.org/draft-06/schema#",
"$id": "http://json-schema.org/draft-06/schema#",
"title": "Core schema meta-schema",
"definitions": {
"schemaArray": {
"type": "array",
"minItems": 1,
"items": { "$ref": "#" }
},
"nonNegativeInteger": {
"type": "integer",
"minimum": 0
},
"nonNegativeIntegerDefault0": {
"allOf": [
{ "$ref": "#/definitions/nonNegativeInteger" },
{ "default": 0 }
]
},
"simpleTypes": {
"enum": [
"array",
"boolean",
"integer",
"null",
"number",
"object",
"string"
]
},
"stringArray": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true,
"default": []
}
},
"type": ["object", "boolean"],
"properties": {
"$id": {
"type": "string",
"format": "uri-reference"
},
"$schema": {
"type": "string",
"format": "uri"
},
"$ref": {
"type": "string",
"format": "uri-reference"
},
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"default": {},
"examples": {
"type": "array",
"items": {}
},
"multipleOf": {
"type": "number",
"exclusiveMinimum": 0
},
"maximum": {
"type": "number"
},
"exclusiveMaximum": {
"type": "number"
},
"minimum": {
"type": "number"
},
"exclusiveMinimum": {
"type": "number"
},
"maxLength": { "$ref": "#/definitions/nonNegativeInteger" },
"minLength": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"pattern": {
"type": "string",
"format": "regex"
},
"additionalItems": { "$ref": "#" },
"items": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/schemaArray" }
],
"default": {}
},
"maxItems": { "$ref": "#/definitions/nonNegativeInteger" },
"minItems": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"uniqueItems": {
"type": "boolean",
"default": false
},
"contains": { "$ref": "#" },
"maxProperties": { "$ref": "#/definitions/nonNegativeInteger" },
"minProperties": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"required": { "$ref": "#/definitions/stringArray" },
"additionalProperties": { "$ref": "#" },
"definitions": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"properties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"patternProperties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"propertyNames": { "format": "regex" },
"default": {}
},
"dependencies": {
"type": "object",
"additionalProperties": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/stringArray" }
]
}
},
"propertyNames": { "$ref": "#" },
"const": {},
"enum": {
"type": "array"
},
"type": {
"anyOf": [
{ "$ref": "#/definitions/simpleTypes" },
{
"type": "array",
"items": { "$ref": "#/definitions/simpleTypes" },
"minItems": 1,
"uniqueItems": true
}
]
},
"format": { "type": "string" },
"allOf": { "$ref": "#/definitions/schemaArray" },
"anyOf": { "$ref": "#/definitions/schemaArray" },
"oneOf": { "$ref": "#/definitions/schemaArray" },
"not": { "$ref": "#" }
},
"default": {}
}

View File

@ -0,0 +1,166 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "http://json-schema.org/draft-07/schema#",
"title": "Core schema meta-schema",
"definitions": {
"schemaArray": {
"type": "array",
"minItems": 1,
"items": { "$ref": "#" }
},
"nonNegativeInteger": {
"type": "integer",
"minimum": 0
},
"nonNegativeIntegerDefault0": {
"allOf": [
{ "$ref": "#/definitions/nonNegativeInteger" },
{ "default": 0 }
]
},
"simpleTypes": {
"enum": [
"array",
"boolean",
"integer",
"null",
"number",
"object",
"string"
]
},
"stringArray": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true,
"default": []
}
},
"type": ["object", "boolean"],
"properties": {
"$id": {
"type": "string",
"format": "uri-reference"
},
"$schema": {
"type": "string",
"format": "uri"
},
"$ref": {
"type": "string",
"format": "uri-reference"
},
"$comment": {
"type": "string"
},
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"default": true,
"readOnly": {
"type": "boolean",
"default": false
},
"examples": {
"type": "array",
"items": true
},
"multipleOf": {
"type": "number",
"exclusiveMinimum": 0
},
"maximum": {
"type": "number"
},
"exclusiveMaximum": {
"type": "number"
},
"minimum": {
"type": "number"
},
"exclusiveMinimum": {
"type": "number"
},
"maxLength": { "$ref": "#/definitions/nonNegativeInteger" },
"minLength": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"pattern": {
"type": "string",
"format": "regex"
},
"additionalItems": { "$ref": "#" },
"items": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/schemaArray" }
],
"default": true
},
"maxItems": { "$ref": "#/definitions/nonNegativeInteger" },
"minItems": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"uniqueItems": {
"type": "boolean",
"default": false
},
"contains": { "$ref": "#" },
"maxProperties": { "$ref": "#/definitions/nonNegativeInteger" },
"minProperties": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"required": { "$ref": "#/definitions/stringArray" },
"additionalProperties": { "$ref": "#" },
"definitions": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"properties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"patternProperties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"propertyNames": { "format": "regex" },
"default": {}
},
"dependencies": {
"type": "object",
"additionalProperties": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/stringArray" }
]
}
},
"propertyNames": { "$ref": "#" },
"const": true,
"enum": {
"type": "array",
"items": true
},
"type": {
"anyOf": [
{ "$ref": "#/definitions/simpleTypes" },
{
"type": "array",
"items": { "$ref": "#/definitions/simpleTypes" },
"minItems": 1,
"uniqueItems": true
}
]
},
"format": { "type": "string" },
"contentMediaType": { "type": "string" },
"contentEncoding": { "type": "string" },
"if": {"$ref": "#"},
"then": {"$ref": "#"},
"else": {"$ref": "#"},
"allOf": { "$ref": "#/definitions/schemaArray" },
"anyOf": { "$ref": "#/definitions/schemaArray" },
"oneOf": { "$ref": "#/definitions/schemaArray" },
"not": { "$ref": "#" }
},
"default": true
}

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,18 @@
Copyright © 2017 Erez Shinan
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -0,0 +1,38 @@
from .exceptions import (
GrammarError,
LarkError,
LexError,
ParseError,
UnexpectedCharacters,
UnexpectedEOF,
UnexpectedInput,
UnexpectedToken,
)
from .lark import Lark
from .lexer import Token
from .tree import ParseTree, Tree
from .utils import logger
from .visitors import Discard, Transformer, Transformer_NonRecursive, Visitor, v_args
__version__: str = "1.1.4"
__all__ = (
"GrammarError",
"LarkError",
"LexError",
"ParseError",
"UnexpectedCharacters",
"UnexpectedEOF",
"UnexpectedInput",
"UnexpectedToken",
"Lark",
"Token",
"ParseTree",
"Tree",
"logger",
"Discard",
"Transformer",
"Transformer_NonRecursive",
"Visitor",
"v_args",
)

View File

@ -0,0 +1,6 @@
# For usage of lark with PyInstaller. See https://pyinstaller-sample-hook.readthedocs.io/en/latest/index.html
import os
def get_hook_dirs():
return [os.path.dirname(__file__)]

View File

@ -0,0 +1,14 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2017-2020, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
from PyInstaller.utils.hooks import collect_data_files
datas = collect_data_files('lark')

View File

@ -0,0 +1,59 @@
"""
Module of utilities for transforming a lark.Tree into a custom Abstract Syntax Tree
"""
import inspect, re
import types
from typing import Optional, Callable
from lark import Transformer, v_args
class Ast:
"""Abstract class
Subclasses will be collected by `create_transformer()`
"""
pass
class AsList:
"""Abstract class
Subclasses will be instantiated with the parse results as a single list, instead of as arguments.
"""
class WithMeta:
"""Abstract class
Subclasses will be instantiated with the Meta instance of the tree. (see ``v_args`` for more detail)
"""
pass
def camel_to_snake(name):
return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
def create_transformer(ast_module: types.ModuleType,
transformer: Optional[Transformer]=None,
decorator_factory: Callable=v_args) -> Transformer:
"""Collects `Ast` subclasses from the given module, and creates a Lark transformer that builds the AST.
For each class, we create a corresponding rule in the transformer, with a matching name.
CamelCase names will be converted into snake_case. Example: "CodeBlock" -> "code_block".
Classes starting with an underscore (`_`) will be skipped.
Parameters:
ast_module: A Python module containing all the subclasses of ``ast_utils.Ast``
transformer (Optional[Transformer]): An initial transformer. Its attributes may be overwritten.
decorator_factory (Callable): An optional callable accepting two booleans, inline, and meta,
and returning a decorator for the methods of ``transformer``. (default: ``v_args``).
"""
t = transformer or Transformer()
for name, obj in inspect.getmembers(ast_module):
if not name.startswith('_') and inspect.isclass(obj):
if issubclass(obj, Ast):
wrapper = decorator_factory(inline=not issubclass(obj, AsList), meta=issubclass(obj, WithMeta))
obj = wrapper(obj).__get__(t)
setattr(t, camel_to_snake(name), obj)
return t

View File

@ -0,0 +1,82 @@
from copy import deepcopy
import sys
from types import ModuleType
from typing import Callable, Collection, Dict, Optional, TYPE_CHECKING
if TYPE_CHECKING:
from .lark import PostLex
from .lexer import Lexer
from typing import Union, Type
if sys.version_info >= (3, 8):
from typing import Literal
else:
from typing_extensions import Literal
if sys.version_info >= (3, 10):
from typing import TypeAlias
else:
from typing_extensions import TypeAlias
from .utils import Serialize
from .lexer import TerminalDef, Token
###{standalone
_ParserArgType: 'TypeAlias' = 'Literal["earley", "lalr", "cyk", "auto"]'
_LexerArgType: 'TypeAlias' = 'Union[Literal["auto", "basic", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]]'
_Callback = Callable[[Token], Token]
class LexerConf(Serialize):
__serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type'
__serialize_namespace__ = TerminalDef,
terminals: Collection[TerminalDef]
re_module: ModuleType
ignore: Collection[str]
postlex: 'Optional[PostLex]'
callbacks: Dict[str, _Callback]
g_regex_flags: int
skip_validation: bool
use_bytes: bool
lexer_type: Optional[_LexerArgType]
def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False):
self.terminals = terminals
self.terminals_by_name = {t.name: t for t in self.terminals}
assert len(self.terminals) == len(self.terminals_by_name)
self.ignore = ignore
self.postlex = postlex
self.callbacks = callbacks or {}
self.g_regex_flags = g_regex_flags
self.re_module = re_module
self.skip_validation = skip_validation
self.use_bytes = use_bytes
self.lexer_type = None
def _deserialize(self):
self.terminals_by_name = {t.name: t for t in self.terminals}
def __deepcopy__(self, memo=None):
return type(self)(
deepcopy(self.terminals, memo),
self.re_module,
deepcopy(self.ignore, memo),
deepcopy(self.postlex, memo),
deepcopy(self.callbacks, memo),
deepcopy(self.g_regex_flags, memo),
deepcopy(self.skip_validation, memo),
deepcopy(self.use_bytes, memo),
)
class ParserConf(Serialize):
__serialize_fields__ = 'rules', 'start', 'parser_type'
def __init__(self, rules, callbacks, start):
assert isinstance(start, list)
self.rules = rules
self.callbacks = callbacks
self.start = start
self.parser_type = None
###}

View File

@ -0,0 +1,292 @@
from .utils import logger, NO_VALUE
from typing import Mapping, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, Collection, TYPE_CHECKING
if TYPE_CHECKING:
from .lexer import Token
from .parsers.lalr_interactive_parser import InteractiveParser
from .tree import Tree
###{standalone
class LarkError(Exception):
pass
class ConfigurationError(LarkError, ValueError):
pass
def assert_config(value, options: Collection, msg='Got %r, expected one of %s'):
if value not in options:
raise ConfigurationError(msg % (value, options))
class GrammarError(LarkError):
pass
class ParseError(LarkError):
pass
class LexError(LarkError):
pass
T = TypeVar('T')
class UnexpectedInput(LarkError):
"""UnexpectedInput Error.
Used as a base class for the following exceptions:
- ``UnexpectedCharacters``: The lexer encountered an unexpected string
- ``UnexpectedToken``: The parser received an unexpected token
- ``UnexpectedEOF``: The parser expected a token, but the input ended
After catching one of these exceptions, you may call the following helper methods to create a nicer error message.
"""
line: int
column: int
pos_in_stream = None
state: Any
_terminals_by_name = None
def get_context(self, text: str, span: int=40) -> str:
"""Returns a pretty string pinpointing the error in the text,
with span amount of context characters around it.
Note:
The parser doesn't hold a copy of the text it has to parse,
so you have to provide it again
"""
assert self.pos_in_stream is not None, self
pos = self.pos_in_stream
start = max(pos - span, 0)
end = pos + span
if not isinstance(text, bytes):
before = text[start:pos].rsplit('\n', 1)[-1]
after = text[pos:end].split('\n', 1)[0]
return before + after + '\n' + ' ' * len(before.expandtabs()) + '^\n'
else:
before = text[start:pos].rsplit(b'\n', 1)[-1]
after = text[pos:end].split(b'\n', 1)[0]
return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace")
def match_examples(self, parse_fn: 'Callable[[str], Tree]',
examples: Union[Mapping[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
token_type_match_fallback: bool=False,
use_accepts: bool=True
) -> Optional[T]:
"""Allows you to detect what's wrong in the input text by matching
against example errors.
Given a parser instance and a dictionary mapping some label with
some malformed syntax examples, it'll return the label for the
example that bests matches the current error. The function will
iterate the dictionary until it finds a matching error, and
return the corresponding value.
For an example usage, see `examples/error_reporting_lalr.py`
Parameters:
parse_fn: parse function (usually ``lark_instance.parse``)
examples: dictionary of ``{'example_string': value}``.
use_accepts: Recommended to keep this as ``use_accepts=True``.
"""
assert self.state is not None, "Not supported for this exception"
if isinstance(examples, Mapping):
examples = examples.items()
candidate = (None, False)
for i, (label, example) in enumerate(examples):
assert not isinstance(example, str), "Expecting a list"
for j, malformed in enumerate(example):
try:
parse_fn(malformed)
except UnexpectedInput as ut:
if ut.state == self.state:
if (
use_accepts
and isinstance(self, UnexpectedToken)
and isinstance(ut, UnexpectedToken)
and ut.accepts != self.accepts
):
logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
(self.state, self.accepts, ut.accepts, i, j))
continue
if (
isinstance(self, (UnexpectedToken, UnexpectedEOF))
and isinstance(ut, (UnexpectedToken, UnexpectedEOF))
):
if ut.token == self.token: # Try exact match first
logger.debug("Exact Match at example [%s][%s]" % (i, j))
return label
if token_type_match_fallback:
# Fallback to token types match
if (ut.token.type == self.token.type) and not candidate[-1]:
logger.debug("Token Type Fallback at example [%s][%s]" % (i, j))
candidate = label, True
if candidate[0] is None:
logger.debug("Same State match at example [%s][%s]" % (i, j))
candidate = label, False
return candidate[0]
def _format_expected(self, expected):
if self._terminals_by_name:
d = self._terminals_by_name
expected = [d[t_name].user_repr() if t_name in d else t_name for t_name in expected]
return "Expected one of: \n\t* %s\n" % '\n\t* '.join(expected)
class UnexpectedEOF(ParseError, UnexpectedInput):
"""An exception that is raised by the parser, when the input ends while it still expects a token.
"""
expected: 'List[Token]'
def __init__(self, expected, state=None, terminals_by_name=None):
super(UnexpectedEOF, self).__init__()
self.expected = expected
self.state = state
from .lexer import Token
self.token = Token("<EOF>", "") # , line=-1, column=-1, pos_in_stream=-1)
self.pos_in_stream = -1
self.line = -1
self.column = -1
self._terminals_by_name = terminals_by_name
def __str__(self):
message = "Unexpected end-of-input. "
message += self._format_expected(self.expected)
return message
class UnexpectedCharacters(LexError, UnexpectedInput):
"""An exception that is raised by the lexer, when it cannot match the next
string of characters to any of its terminals.
"""
allowed: Set[str]
considered_tokens: Set[Any]
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None,
terminals_by_name=None, considered_rules=None):
super(UnexpectedCharacters, self).__init__()
# TODO considered_tokens and allowed can be figured out using state
self.line = line
self.column = column
self.pos_in_stream = lex_pos
self.state = state
self._terminals_by_name = terminals_by_name
self.allowed = allowed
self.considered_tokens = considered_tokens
self.considered_rules = considered_rules
self.token_history = token_history
if isinstance(seq, bytes):
self.char = seq[lex_pos:lex_pos + 1].decode("ascii", "backslashreplace")
else:
self.char = seq[lex_pos]
self._context = self.get_context(seq)
def __str__(self):
message = "No terminal matches '%s' in the current parser context, at line %d col %d" % (self.char, self.line, self.column)
message += '\n\n' + self._context
if self.allowed:
message += self._format_expected(self.allowed)
if self.token_history:
message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in self.token_history)
return message
class UnexpectedToken(ParseError, UnexpectedInput):
"""An exception that is raised by the parser, when the token it received
doesn't match any valid step forward.
Parameters:
token: The mismatched token
expected: The set of expected tokens
considered_rules: Which rules were considered, to deduce the expected tokens
state: A value representing the parser state. Do not rely on its value or type.
interactive_parser: An instance of ``InteractiveParser``, that is initialized to the point of failture,
and can be used for debugging and error handling.
Note: These parameters are available as attributes of the instance.
"""
expected: Set[str]
considered_rules: Set[str]
interactive_parser: 'InteractiveParser'
def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None):
super(UnexpectedToken, self).__init__()
# TODO considered_rules and expected can be figured out using state
self.line = getattr(token, 'line', '?')
self.column = getattr(token, 'column', '?')
self.pos_in_stream = getattr(token, 'start_pos', None)
self.state = state
self.token = token
self.expected = expected # XXX deprecate? `accepts` is better
self._accepts = NO_VALUE
self.considered_rules = considered_rules
self.interactive_parser = interactive_parser
self._terminals_by_name = terminals_by_name
self.token_history = token_history
@property
def accepts(self) -> Set[str]:
if self._accepts is NO_VALUE:
self._accepts = self.interactive_parser and self.interactive_parser.accepts()
return self._accepts
def __str__(self):
message = ("Unexpected token %r at line %s, column %s.\n%s"
% (self.token, self.line, self.column, self._format_expected(self.accepts or self.expected)))
if self.token_history:
message += "Previous tokens: %r\n" % self.token_history
return message
class VisitError(LarkError):
"""VisitError is raised when visitors are interrupted by an exception
It provides the following attributes for inspection:
Parameters:
rule: the name of the visit rule that failed
obj: the tree-node or token that was being processed
orig_exc: the exception that cause it to fail
Note: These parameters are available as attributes
"""
obj: 'Union[Tree, Token]'
orig_exc: Exception
def __init__(self, rule, obj, orig_exc):
message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
super(VisitError, self).__init__(message)
self.rule = rule
self.obj = obj
self.orig_exc = orig_exc
class MissingVariableError(LarkError):
pass
###}

View File

@ -0,0 +1,122 @@
from typing import Optional, Tuple, ClassVar
from .utils import Serialize
###{standalone
TOKEN_DEFAULT_PRIORITY = 0
class Symbol(Serialize):
__slots__ = ('name',)
name: str
is_term: ClassVar[bool] = NotImplemented
def __init__(self, name: str) -> None:
self.name = name
def __eq__(self, other):
assert isinstance(other, Symbol), other
return self.is_term == other.is_term and self.name == other.name
def __ne__(self, other):
return not (self == other)
def __hash__(self):
return hash(self.name)
def __repr__(self):
return '%s(%r)' % (type(self).__name__, self.name)
fullrepr = property(__repr__)
def renamed(self, f):
return type(self)(f(self.name))
class Terminal(Symbol):
__serialize_fields__ = 'name', 'filter_out'
is_term: ClassVar[bool] = True
def __init__(self, name, filter_out=False):
self.name = name
self.filter_out = filter_out
@property
def fullrepr(self):
return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out)
def renamed(self, f):
return type(self)(f(self.name), self.filter_out)
class NonTerminal(Symbol):
__serialize_fields__ = 'name',
is_term: ClassVar[bool] = False
class RuleOptions(Serialize):
__serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices'
keep_all_tokens: bool
expand1: bool
priority: Optional[int]
template_source: Optional[str]
empty_indices: Tuple[bool, ...]
def __init__(self, keep_all_tokens: bool=False, expand1: bool=False, priority: Optional[int]=None, template_source: Optional[str]=None, empty_indices: Tuple[bool, ...]=()) -> None:
self.keep_all_tokens = keep_all_tokens
self.expand1 = expand1
self.priority = priority
self.template_source = template_source
self.empty_indices = empty_indices
def __repr__(self):
return 'RuleOptions(%r, %r, %r, %r)' % (
self.keep_all_tokens,
self.expand1,
self.priority,
self.template_source
)
class Rule(Serialize):
"""
origin : a symbol
expansion : a list of symbols
order : index of this expansion amongst all rules of the same name
"""
__slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')
__serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options'
__serialize_namespace__ = Terminal, NonTerminal, RuleOptions
def __init__(self, origin, expansion, order=0, alias=None, options=None):
self.origin = origin
self.expansion = expansion
self.alias = alias
self.order = order
self.options = options or RuleOptions()
self._hash = hash((self.origin, tuple(self.expansion)))
def _deserialize(self):
self._hash = hash((self.origin, tuple(self.expansion)))
def __str__(self):
return '<%s : %s>' % (self.origin.name, ' '.join(x.name for x in self.expansion))
def __repr__(self):
return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options)
def __hash__(self):
return self._hash
def __eq__(self, other):
if not isinstance(other, Rule):
return False
return self.origin == other.origin and self.expansion == other.expansion
###}

View File

@ -0,0 +1,59 @@
// Basic terminals for common use
//
// Numbers
//
DIGIT: "0".."9"
HEXDIGIT: "a".."f"|"A".."F"|DIGIT
INT: DIGIT+
SIGNED_INT: ["+"|"-"] INT
DECIMAL: INT "." INT? | "." INT
// float = /-?\d+(\.\d+)?([eE][+-]?\d+)?/
_EXP: ("e"|"E") SIGNED_INT
FLOAT: INT _EXP | DECIMAL _EXP?
SIGNED_FLOAT: ["+"|"-"] FLOAT
NUMBER: FLOAT | INT
SIGNED_NUMBER: ["+"|"-"] NUMBER
//
// Strings
//
_STRING_INNER: /.*?/
_STRING_ESC_INNER: _STRING_INNER /(?<!\\)(\\\\)*?/
ESCAPED_STRING : "\"" _STRING_ESC_INNER "\""
//
// Names (Variables)
//
LCASE_LETTER: "a".."z"
UCASE_LETTER: "A".."Z"
LETTER: UCASE_LETTER | LCASE_LETTER
WORD: LETTER+
CNAME: ("_"|LETTER) ("_"|LETTER|DIGIT)*
//
// Whitespace
//
WS_INLINE: (" "|/\t/)+
WS: /[ \t\f\r\n]/+
CR : /\r/
LF : /\n/
NEWLINE: (CR? LF)+
// Comments
SH_COMMENT: /#[^\n]*/
CPP_COMMENT: /\/\/[^\n]*/
C_COMMENT: "/*" /(.|\n)*?/ "*/"
SQL_COMMENT: /--[^\n]*/

View File

@ -0,0 +1,59 @@
start: (_item? _NL)* _item?
_item: rule
| token
| statement
rule: RULE rule_params priority? ":" expansions
token: TOKEN token_params priority? ":" expansions
rule_params: ["{" RULE ("," RULE)* "}"]
token_params: ["{" TOKEN ("," TOKEN)* "}"]
priority: "." NUMBER
statement: "%ignore" expansions -> ignore
| "%import" import_path ["->" name] -> import
| "%import" import_path name_list -> multi_import
| "%override" rule -> override_rule
| "%declare" name+ -> declare
!import_path: "."? name ("." name)*
name_list: "(" name ("," name)* ")"
?expansions: alias (_VBAR alias)*
?alias: expansion ["->" RULE]
?expansion: expr*
?expr: atom [OP | "~" NUMBER [".." NUMBER]]
?atom: "(" expansions ")"
| "[" expansions "]" -> maybe
| value
?value: STRING ".." STRING -> literal_range
| name
| (REGEXP | STRING) -> literal
| name "{" value ("," value)* "}" -> template_usage
name: RULE
| TOKEN
_VBAR: _NL? "|"
OP: /[+*]|[?](?![a-z])/
RULE: /!?[_?]?[a-z][_a-z0-9]*/
TOKEN: /_?[A-Z][_A-Z0-9]*/
STRING: _STRING "i"?
REGEXP: /\/(?!\/)(\\\/|\\\\|[^\/])*?\/[imslux]*/
_NL: /(\r?\n)+\s*/
%import common.ESCAPED_STRING -> _STRING
%import common.SIGNED_INT -> NUMBER
%import common.WS_INLINE
COMMENT: /\s*/ "//" /[^\n]/*
%ignore WS_INLINE
%ignore COMMENT

View File

@ -0,0 +1,304 @@
// Python 3 grammar for Lark
// This grammar should parse all python 3.x code successfully.
// Adapted from: https://docs.python.org/3/reference/grammar.html
// Start symbols for the grammar:
// single_input is a single interactive statement;
// file_input is a module or sequence of commands read from an input file;
// eval_input is the input for the eval() functions.
// NB: compound_stmt in single_input is followed by extra NEWLINE!
//
single_input: _NEWLINE | simple_stmt | compound_stmt _NEWLINE
file_input: (_NEWLINE | stmt)*
eval_input: testlist _NEWLINE*
decorator: "@" dotted_name [ "(" [arguments] ")" ] _NEWLINE
decorators: decorator+
decorated: decorators (classdef | funcdef | async_funcdef)
async_funcdef: "async" funcdef
funcdef: "def" name "(" [parameters] ")" ["->" test] ":" suite
parameters: paramvalue ("," paramvalue)* ["," SLASH ("," paramvalue)*] ["," [starparams | kwparams]]
| starparams
| kwparams
SLASH: "/" // Otherwise the it will completely disappear and it will be undisguisable in the result
starparams: (starparam | starguard) poststarparams
starparam: "*" typedparam
starguard: "*"
poststarparams: ("," paramvalue)* ["," kwparams]
kwparams: "**" typedparam ","?
?paramvalue: typedparam ("=" test)?
?typedparam: name (":" test)?
lambdef: "lambda" [lambda_params] ":" test
lambdef_nocond: "lambda" [lambda_params] ":" test_nocond
lambda_params: lambda_paramvalue ("," lambda_paramvalue)* ["," [lambda_starparams | lambda_kwparams]]
| lambda_starparams
| lambda_kwparams
?lambda_paramvalue: name ("=" test)?
lambda_starparams: "*" [name] ("," lambda_paramvalue)* ["," [lambda_kwparams]]
lambda_kwparams: "**" name ","?
?stmt: simple_stmt | compound_stmt
?simple_stmt: small_stmt (";" small_stmt)* [";"] _NEWLINE
?small_stmt: (expr_stmt | assign_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
expr_stmt: testlist_star_expr
assign_stmt: annassign | augassign | assign
annassign: testlist_star_expr ":" test ["=" test]
assign: testlist_star_expr ("=" (yield_expr|testlist_star_expr))+
augassign: testlist_star_expr augassign_op (yield_expr|testlist)
!augassign_op: "+=" | "-=" | "*=" | "@=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//="
?testlist_star_expr: test_or_star_expr
| test_or_star_expr ("," test_or_star_expr)+ ","? -> tuple
| test_or_star_expr "," -> tuple
// For normal and annotated assignments, additional restrictions enforced by the interpreter
del_stmt: "del" exprlist
pass_stmt: "pass"
?flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
break_stmt: "break"
continue_stmt: "continue"
return_stmt: "return" [testlist]
yield_stmt: yield_expr
raise_stmt: "raise" [test ["from" test]]
import_stmt: import_name | import_from
import_name: "import" dotted_as_names
// note below: the ("." | "...") is necessary because "..." is tokenized as ELLIPSIS
import_from: "from" (dots? dotted_name | dots) "import" ("*" | "(" import_as_names ")" | import_as_names)
!dots: "."+
import_as_name: name ["as" name]
dotted_as_name: dotted_name ["as" name]
import_as_names: import_as_name ("," import_as_name)* [","]
dotted_as_names: dotted_as_name ("," dotted_as_name)*
dotted_name: name ("." name)*
global_stmt: "global" name ("," name)*
nonlocal_stmt: "nonlocal" name ("," name)*
assert_stmt: "assert" test ["," test]
?compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | match_stmt
| with_stmt | funcdef | classdef | decorated | async_stmt
async_stmt: "async" (funcdef | with_stmt | for_stmt)
if_stmt: "if" test ":" suite elifs ["else" ":" suite]
elifs: elif_*
elif_: "elif" test ":" suite
while_stmt: "while" test ":" suite ["else" ":" suite]
for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite]
try_stmt: "try" ":" suite except_clauses ["else" ":" suite] [finally]
| "try" ":" suite finally -> try_finally
finally: "finally" ":" suite
except_clauses: except_clause+
except_clause: "except" [test ["as" name]] ":" suite
// NB compile.c makes sure that the default except clause is last
with_stmt: "with" with_items ":" suite
with_items: with_item ("," with_item)*
with_item: test ["as" name]
match_stmt: "match" test ":" _NEWLINE _INDENT case+ _DEDENT
case: "case" pattern ["if" test] ":" suite
?pattern: sequence_item_pattern "," _sequence_pattern -> sequence_pattern
| as_pattern
?as_pattern: or_pattern ("as" NAME)?
?or_pattern: closed_pattern ("|" closed_pattern)*
?closed_pattern: literal_pattern
| NAME -> capture_pattern
| "_" -> any_pattern
| attr_pattern
| "(" as_pattern ")"
| "[" _sequence_pattern "]" -> sequence_pattern
| "(" (sequence_item_pattern "," _sequence_pattern)? ")" -> sequence_pattern
| "{" (mapping_item_pattern ("," mapping_item_pattern)* ","?)?"}" -> mapping_pattern
| "{" (mapping_item_pattern ("," mapping_item_pattern)* ",")? "**" NAME ","? "}" -> mapping_star_pattern
| class_pattern
literal_pattern: inner_literal_pattern
?inner_literal_pattern: "None" -> const_none
| "True" -> const_true
| "False" -> const_false
| STRING -> string
| number
attr_pattern: NAME ("." NAME)+ -> value
name_or_attr_pattern: NAME ("." NAME)* -> value
mapping_item_pattern: (literal_pattern|attr_pattern) ":" as_pattern
_sequence_pattern: (sequence_item_pattern ("," sequence_item_pattern)* ","?)?
?sequence_item_pattern: as_pattern
| "*" NAME -> star_pattern
class_pattern: name_or_attr_pattern "(" [arguments_pattern ","?] ")"
arguments_pattern: pos_arg_pattern ["," keyws_arg_pattern]
| keyws_arg_pattern -> no_pos_arguments
pos_arg_pattern: as_pattern ("," as_pattern)*
keyws_arg_pattern: keyw_arg_pattern ("," keyw_arg_pattern)*
keyw_arg_pattern: NAME "=" as_pattern
suite: simple_stmt | _NEWLINE _INDENT stmt+ _DEDENT
?test: or_test ("if" or_test "else" test)?
| lambdef
| assign_expr
assign_expr: name ":=" test
?test_nocond: or_test | lambdef_nocond
?or_test: and_test ("or" and_test)*
?and_test: not_test_ ("and" not_test_)*
?not_test_: "not" not_test_ -> not_test
| comparison
?comparison: expr (comp_op expr)*
star_expr: "*" expr
?expr: or_expr
?or_expr: xor_expr ("|" xor_expr)*
?xor_expr: and_expr ("^" and_expr)*
?and_expr: shift_expr ("&" shift_expr)*
?shift_expr: arith_expr (_shift_op arith_expr)*
?arith_expr: term (_add_op term)*
?term: factor (_mul_op factor)*
?factor: _unary_op factor | power
!_unary_op: "+"|"-"|"~"
!_add_op: "+"|"-"
!_shift_op: "<<"|">>"
!_mul_op: "*"|"@"|"/"|"%"|"//"
// <> isn't actually a valid comparison operator in Python. It's here for the
// sake of a __future__ import described in PEP 401 (which really works :-)
!comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not"
?power: await_expr ("**" factor)?
?await_expr: AWAIT? atom_expr
AWAIT: "await"
?atom_expr: atom_expr "(" [arguments] ")" -> funccall
| atom_expr "[" subscriptlist "]" -> getitem
| atom_expr "." name -> getattr
| atom
?atom: "(" yield_expr ")"
| "(" _tuple_inner? ")" -> tuple
| "(" comprehension{test_or_star_expr} ")" -> tuple_comprehension
| "[" _testlist_comp? "]" -> list
| "[" comprehension{test_or_star_expr} "]" -> list_comprehension
| "{" _dict_exprlist? "}" -> dict
| "{" comprehension{key_value} "}" -> dict_comprehension
| "{" _set_exprlist "}" -> set
| "{" comprehension{test} "}" -> set_comprehension
| name -> var
| number
| string_concat
| "(" test ")"
| "..." -> ellipsis
| "None" -> const_none
| "True" -> const_true
| "False" -> const_false
?string_concat: string+
_testlist_comp: test | _tuple_inner
_tuple_inner: test_or_star_expr (("," test_or_star_expr)+ [","] | ",")
?test_or_star_expr: test
| star_expr
?subscriptlist: subscript
| subscript (("," subscript)+ [","] | ",") -> subscript_tuple
?subscript: test | ([test] ":" [test] [sliceop]) -> slice
sliceop: ":" [test]
?exprlist: (expr|star_expr)
| (expr|star_expr) (("," (expr|star_expr))+ [","]|",")
?testlist: test | testlist_tuple
testlist_tuple: test (("," test)+ [","] | ",")
_dict_exprlist: (key_value | "**" expr) ("," (key_value | "**" expr))* [","]
key_value: test ":" test
_set_exprlist: test_or_star_expr ("," test_or_star_expr)* [","]
classdef: "class" name ["(" [arguments] ")"] ":" suite
arguments: argvalue ("," argvalue)* ("," [ starargs | kwargs])?
| starargs
| kwargs
| comprehension{test}
starargs: stararg ("," stararg)* ("," argvalue)* ["," kwargs]
stararg: "*" test
kwargs: "**" test ("," argvalue)*
?argvalue: test ("=" test)?
comprehension{comp_result}: comp_result comp_fors [comp_if]
comp_fors: comp_for+
comp_for: [ASYNC] "for" exprlist "in" or_test
ASYNC: "async"
?comp_if: "if" test_nocond
// not used in grammar, but may appear in "node" passed from Parser to Compiler
encoding_decl: name
yield_expr: "yield" [testlist]
| "yield" "from" test -> yield_from
number: DEC_NUMBER | HEX_NUMBER | BIN_NUMBER | OCT_NUMBER | FLOAT_NUMBER | IMAG_NUMBER
string: STRING | LONG_STRING
// Other terminals
_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+
%ignore /[\t \f]+/ // WS
%ignore /\\[\t \f]*\r?\n/ // LINE_CONT
%ignore COMMENT
%declare _INDENT _DEDENT
// Python terminals
!name: NAME | "match" | "case"
NAME: /[^\W\d]\w*/
COMMENT: /#[^\n]*/
STRING: /([ubf]?r?|r[ubf])("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/i
LONG_STRING: /([ubf]?r?|r[ubf])(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/is
_SPECIAL_DEC: "0".."9" ("_"? "0".."9" )*
DEC_NUMBER: "1".."9" ("_"? "0".."9" )*
| "0" ("_"? "0" )* /(?![1-9])/
HEX_NUMBER.2: "0" ("x" | "X") ("_"? ("0".."9" | "a".."f" | "A".."F"))+
OCT_NUMBER.2: "0" ("o" | "O") ("_"? "0".."7" )+
BIN_NUMBER.2: "0" ("b" | "B") ("_"? "0".."1" )+
_EXP: ("e"|"E") ["+" | "-"] _SPECIAL_DEC
DECIMAL: "." _SPECIAL_DEC | _SPECIAL_DEC "." _SPECIAL_DEC?
FLOAT_NUMBER.2: _SPECIAL_DEC _EXP | DECIMAL _EXP?
IMAG_NUMBER.2: (_SPECIAL_DEC | FLOAT_NUMBER) ("J" | "j")
// Comma-separated list (with an optional trailing comma)
cs_list{item}: item ("," item)* ","?
_cs_list{item}: item ("," item)* ","?

View File

@ -0,0 +1,7 @@
// TODO: LETTER, WORD, etc.
//
// Whitespace
//
WS_INLINE: /[ \t\xa0]/+
WS: /[ \t\xa0\f\r\n]/+

View File

@ -0,0 +1,112 @@
"Provides Indentation services for languages with indentation similar to Python"
from abc import ABC, abstractmethod
from typing import List, Iterator
from .exceptions import LarkError
from .lark import PostLex
from .lexer import Token
###{standalone
class DedentError(LarkError):
pass
class Indenter(PostLex, ABC):
paren_level: int
indent_level: List[int]
def __init__(self) -> None:
self.paren_level = 0
self.indent_level = [0]
assert self.tab_len > 0
def handle_NL(self, token: Token) -> Iterator[Token]:
if self.paren_level > 0:
return
yield token
indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces
indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len
if indent > self.indent_level[-1]:
self.indent_level.append(indent)
yield Token.new_borrow_pos(self.INDENT_type, indent_str, token)
else:
while indent < self.indent_level[-1]:
self.indent_level.pop()
yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token)
if indent != self.indent_level[-1]:
raise DedentError('Unexpected dedent to column %s. Expected dedent to %s' % (indent, self.indent_level[-1]))
def _process(self, stream):
for token in stream:
if token.type == self.NL_type:
yield from self.handle_NL(token)
else:
yield token
if token.type in self.OPEN_PAREN_types:
self.paren_level += 1
elif token.type in self.CLOSE_PAREN_types:
self.paren_level -= 1
assert self.paren_level >= 0
while len(self.indent_level) > 1:
self.indent_level.pop()
yield Token(self.DEDENT_type, '')
assert self.indent_level == [0], self.indent_level
def process(self, stream):
self.paren_level = 0
self.indent_level = [0]
return self._process(stream)
# XXX Hack for ContextualLexer. Maybe there's a more elegant solution?
@property
def always_accept(self):
return (self.NL_type,)
@property
@abstractmethod
def NL_type(self) -> str:
raise NotImplementedError()
@property
@abstractmethod
def OPEN_PAREN_types(self) -> List[str]:
raise NotImplementedError()
@property
@abstractmethod
def CLOSE_PAREN_types(self) -> List[str]:
raise NotImplementedError()
@property
@abstractmethod
def INDENT_type(self) -> str:
raise NotImplementedError()
@property
@abstractmethod
def DEDENT_type(self) -> str:
raise NotImplementedError()
@property
@abstractmethod
def tab_len(self) -> int:
raise NotImplementedError()
class PythonIndenter(Indenter):
NL_type = '_NEWLINE'
OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE']
CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE']
INDENT_type = '_INDENT'
DEDENT_type = '_DEDENT'
tab_len = 8
###}

View File

@ -0,0 +1,648 @@
from abc import ABC, abstractmethod
import getpass
import sys, os, pickle
import tempfile
import types
import re
from typing import (
TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional, Sequence,
Tuple, Iterable, IO, Any, TYPE_CHECKING, Collection
)
if TYPE_CHECKING:
from .parsers.lalr_interactive_parser import InteractiveParser
from .tree import ParseTree
from .visitors import Transformer
if sys.version_info >= (3, 8):
from typing import Literal
else:
from typing_extensions import Literal
from .parser_frontends import ParsingFrontend
from .exceptions import ConfigurationError, assert_config, UnexpectedInput
from .utils import Serialize, SerializeMemoizer, FS, isascii, logger
from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource, md5_digest
from .tree import Tree
from .common import LexerConf, ParserConf, _ParserArgType, _LexerArgType
from .lexer import Lexer, BasicLexer, TerminalDef, LexerThread, Token
from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import _validate_frontend_args, _get_lexer_callbacks, _deserialize_parsing_frontend, _construct_parsing_frontend
from .grammar import Rule
try:
import regex
_has_regex = True
except ImportError:
_has_regex = False
###{standalone
class PostLex(ABC):
@abstractmethod
def process(self, stream: Iterator[Token]) -> Iterator[Token]:
return stream
always_accept: Iterable[str] = ()
class LarkOptions(Serialize):
"""Specifies the options for Lark
"""
start: List[str]
debug: bool
transformer: 'Optional[Transformer]'
propagate_positions: Union[bool, str]
maybe_placeholders: bool
cache: Union[bool, str]
regex: bool
g_regex_flags: int
keep_all_tokens: bool
tree_class: Any
parser: _ParserArgType
lexer: _LexerArgType
ambiguity: 'Literal["auto", "resolve", "explicit", "forest"]'
postlex: Optional[PostLex]
priority: 'Optional[Literal["auto", "normal", "invert"]]'
lexer_callbacks: Dict[str, Callable[[Token], Token]]
use_bytes: bool
edit_terminals: Optional[Callable[[TerminalDef], TerminalDef]]
import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]'
source_path: Optional[str]
OPTIONS_DOC = """
**=== General Options ===**
start
The start symbol. Either a string, or a list of strings for multiple possible starts (Default: "start")
debug
Display debug information and extra warnings. Use only when debugging (Default: ``False``)
When used with Earley, it generates a forest graph as "sppf.png", if 'dot' is installed.
transformer
Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster)
propagate_positions
Propagates (line, column, end_line, end_column) attributes into all tree branches.
Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating.
maybe_placeholders
When ``True``, the ``[]`` operator returns ``None`` when not matched.
When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all.
(default= ``True``)
cache
Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now.
- When ``False``, does nothing (default)
- When ``True``, caches to a temporary file in the local directory
- When given a string, caches to the path pointed by the string
regex
When True, uses the ``regex`` module instead of the stdlib ``re``.
g_regex_flags
Flags that are applied to all terminals (both regex and strings)
keep_all_tokens
Prevent the tree builder from automagically removing "punctuation" tokens (Default: ``False``)
tree_class
Lark will produce trees comprised of instances of this class instead of the default ``lark.Tree``.
**=== Algorithm Options ===**
parser
Decides which parser engine to use. Accepts "earley" or "lalr". (Default: "earley").
(there is also a "cyk" option for legacy)
lexer
Decides whether or not to use a lexer stage
- "auto" (default): Choose for me based on the parser
- "basic": Use a basic lexer
- "contextual": Stronger lexer (only works with parser="lalr")
- "dynamic": Flexible and powerful (only with parser="earley")
- "dynamic_complete": Same as dynamic, but tries *every* variation of tokenizing possible.
ambiguity
Decides how to handle ambiguity in the parse. Only relevant if parser="earley"
- "resolve": The parser will automatically choose the simplest derivation
(it chooses consistently: greedy for tokens, non-greedy for rules)
- "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).
- "forest": The parser will return the root of the shared packed parse forest.
**=== Misc. / Domain Specific Options ===**
postlex
Lexer post-processing (Default: ``None``) Only works with the basic and contextual lexers.
priority
How priorities should be evaluated - "auto", ``None``, "normal", "invert" (Default: "auto")
lexer_callbacks
Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
use_bytes
Accept an input of type ``bytes`` instead of ``str``.
edit_terminals
A callback for editing the terminals before parse.
import_paths
A List of either paths or loader functions to specify from where grammars are imported
source_path
Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading
**=== End of Options ===**
"""
if __doc__:
__doc__ += OPTIONS_DOC
# Adding a new option needs to be done in multiple places:
# - In the dictionary below. This is the primary truth of which options `Lark.__init__` accepts
# - In the docstring above. It is used both for the docstring of `LarkOptions` and `Lark`, and in readthedocs
# - As an attribute of `LarkOptions` above
# - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded
# - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument
_defaults: Dict[str, Any] = {
'debug': False,
'keep_all_tokens': False,
'tree_class': None,
'cache': False,
'postlex': None,
'parser': 'earley',
'lexer': 'auto',
'transformer': None,
'start': 'start',
'priority': 'auto',
'ambiguity': 'auto',
'regex': False,
'propagate_positions': False,
'lexer_callbacks': {},
'maybe_placeholders': True,
'edit_terminals': None,
'g_regex_flags': 0,
'use_bytes': False,
'import_paths': [],
'source_path': None,
'_plugins': {},
}
def __init__(self, options_dict: Dict[str, Any]) -> None:
o = dict(options_dict)
options = {}
for name, default in self._defaults.items():
if name in o:
value = o.pop(name)
if isinstance(default, bool) and name not in ('cache', 'use_bytes', 'propagate_positions'):
value = bool(value)
else:
value = default
options[name] = value
if isinstance(options['start'], str):
options['start'] = [options['start']]
self.__dict__['options'] = options
assert_config(self.parser, ('earley', 'lalr', 'cyk', None))
if self.parser == 'earley' and self.transformer:
raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm. '
'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)')
if o:
raise ConfigurationError("Unknown options: %s" % o.keys())
def __getattr__(self, name: str) -> Any:
try:
return self.__dict__['options'][name]
except KeyError as e:
raise AttributeError(e)
def __setattr__(self, name: str, value: str) -> None:
assert_config(name, self.options.keys(), "%r isn't a valid option. Expected one of: %s")
self.options[name] = value
def serialize(self, memo = None) -> Dict[str, Any]:
return self.options
@classmethod
def deserialize(cls, data: Dict[str, Any], memo: Dict[int, Union[TerminalDef, Rule]]) -> "LarkOptions":
return cls(data)
# Options that can be passed to the Lark parser, even when it was loaded from cache/standalone.
# These options are only used outside of `load_grammar`.
_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'lexer_callbacks', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class', '_plugins'}
_VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None)
_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest')
_T = TypeVar('_T', bound="Lark")
class Lark(Serialize):
"""Main interface for the library.
It's mostly a thin wrapper for the many different parsers, and for the tree constructor.
Parameters:
grammar: a string or file-object containing the grammar spec (using Lark's ebnf syntax)
options: a dictionary controlling various aspects of Lark.
Example:
>>> Lark(r'''start: "foo" ''')
Lark(...)
"""
source_path: str
source_grammar: str
grammar: 'Grammar'
options: LarkOptions
lexer: Lexer
terminals: Collection[TerminalDef]
def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
self.options = LarkOptions(options)
re_module: types.ModuleType
# Set regex or re module
use_regex = self.options.regex
if use_regex:
if _has_regex:
re_module = regex
else:
raise ImportError('`regex` module must be installed if calling `Lark(regex=True)`.')
else:
re_module = re
# Some, but not all file-like objects have a 'name' attribute
if self.options.source_path is None:
try:
self.source_path = grammar.name # type: ignore[union-attr]
except AttributeError:
self.source_path = '<string>'
else:
self.source_path = self.options.source_path
# Drain file-like objects to get their contents
try:
read = grammar.read # type: ignore[union-attr]
except AttributeError:
pass
else:
grammar = read()
cache_fn = None
cache_md5 = None
if isinstance(grammar, str):
self.source_grammar = grammar
if self.options.use_bytes:
if not isascii(grammar):
raise ConfigurationError("Grammar must be ascii only, when use_bytes=True")
if self.options.cache:
if self.options.parser != 'lalr':
raise ConfigurationError("cache only works with parser='lalr' for now")
unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals', '_plugins')
options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable)
from . import __version__
s = grammar + options_str + __version__ + str(sys.version_info[:2])
cache_md5 = md5_digest(s)
if isinstance(self.options.cache, str):
cache_fn = self.options.cache
else:
if self.options.cache is not True:
raise ConfigurationError("cache argument must be bool or str")
try:
username = getpass.getuser()
except Exception:
# The exception raised may be ImportError or OSError in
# the future. For the cache, we don't care about the
# specific reason - we just want a username.
username = "unknown"
cache_fn = tempfile.gettempdir() + "/.lark_cache_%s_%s_%s_%s.tmp" % (username, cache_md5, *sys.version_info[:2])
old_options = self.options
try:
with FS.open(cache_fn, 'rb') as f:
logger.debug('Loading grammar from cache: %s', cache_fn)
# Remove options that aren't relevant for loading from cache
for name in (set(options) - _LOAD_ALLOWED_OPTIONS):
del options[name]
file_md5 = f.readline().rstrip(b'\n')
cached_used_files = pickle.load(f)
if file_md5 == cache_md5.encode('utf8') and verify_used_files(cached_used_files):
cached_parser_data = pickle.load(f)
self._load(cached_parser_data, **options)
return
except FileNotFoundError:
# The cache file doesn't exist; parse and compose the grammar as normal
pass
except Exception: # We should probably narrow done which errors we catch here.
logger.exception("Failed to load Lark from cache: %r. We will try to carry on.", cache_fn)
# In theory, the Lark instance might have been messed up by the call to `_load`.
# In practice the only relevant thing that might have been overwritten should be `options`
self.options = old_options
# Parse the grammar file and compose the grammars
self.grammar, used_files = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens)
else:
assert isinstance(grammar, Grammar)
self.grammar = grammar
if self.options.lexer == 'auto':
if self.options.parser == 'lalr':
self.options.lexer = 'contextual'
elif self.options.parser == 'earley':
if self.options.postlex is not None:
logger.info("postlex can't be used with the dynamic lexer, so we use 'basic' instead. "
"Consider using lalr with contextual instead of earley")
self.options.lexer = 'basic'
else:
self.options.lexer = 'dynamic'
elif self.options.parser == 'cyk':
self.options.lexer = 'basic'
else:
assert False, self.options.parser
lexer = self.options.lexer
if isinstance(lexer, type):
assert issubclass(lexer, Lexer) # XXX Is this really important? Maybe just ensure interface compliance
else:
assert_config(lexer, ('basic', 'contextual', 'dynamic', 'dynamic_complete'))
if self.options.postlex is not None and 'dynamic' in lexer:
raise ConfigurationError("Can't use postlex with a dynamic lexer. Use basic or contextual instead")
if self.options.ambiguity == 'auto':
if self.options.parser == 'earley':
self.options.ambiguity = 'resolve'
else:
assert_config(self.options.parser, ('earley', 'cyk'), "%r doesn't support disambiguation. Use one of these parsers instead: %s")
if self.options.priority == 'auto':
self.options.priority = 'normal'
if self.options.priority not in _VALID_PRIORITY_OPTIONS:
raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS))
if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS:
raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS))
if self.options.parser is None:
terminals_to_keep = '*'
elif self.options.postlex is not None:
terminals_to_keep = set(self.options.postlex.always_accept)
else:
terminals_to_keep = set()
# Compile the EBNF grammar into BNF
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start, terminals_to_keep)
if self.options.edit_terminals:
for t in self.terminals:
self.options.edit_terminals(t)
self._terminals_dict = {t.name: t for t in self.terminals}
# If the user asked to invert the priorities, negate them all here.
if self.options.priority == 'invert':
for rule in self.rules:
if rule.options.priority is not None:
rule.options.priority = -rule.options.priority
for term in self.terminals:
term.priority = -term.priority
# Else, if the user asked to disable priorities, strip them from the
# rules and terminals. This allows the Earley parsers to skip an extra forest walk
# for improved performance, if you don't need them (or didn't specify any).
elif self.options.priority is None:
for rule in self.rules:
if rule.options.priority is not None:
rule.options.priority = None
for term in self.terminals:
term.priority = 0
# TODO Deprecate lexer_callbacks?
self.lexer_conf = LexerConf(
self.terminals, re_module, self.ignore_tokens, self.options.postlex,
self.options.lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes
)
if self.options.parser:
self.parser = self._build_parser()
elif lexer:
self.lexer = self._build_lexer()
if cache_fn:
logger.debug('Saving grammar to cache: %s', cache_fn)
try:
with FS.open(cache_fn, 'wb') as f:
assert cache_md5 is not None
f.write(cache_md5.encode('utf8') + b'\n')
pickle.dump(used_files, f)
self.save(f, _LOAD_ALLOWED_OPTIONS)
except IOError as e:
logger.exception("Failed to save Lark to cache: %r.", cache_fn, e)
if __doc__:
__doc__ += "\n\n" + LarkOptions.OPTIONS_DOC
__serialize_fields__ = 'parser', 'rules', 'options'
def _build_lexer(self, dont_ignore: bool=False) -> BasicLexer:
lexer_conf = self.lexer_conf
if dont_ignore:
from copy import copy
lexer_conf = copy(lexer_conf)
lexer_conf.ignore = ()
return BasicLexer(lexer_conf)
def _prepare_callbacks(self) -> None:
self._callbacks = {}
# we don't need these callbacks if we aren't building a tree
if self.options.ambiguity != 'forest':
self._parse_tree_builder = ParseTreeBuilder(
self.rules,
self.options.tree_class or Tree,
self.options.propagate_positions,
self.options.parser != 'lalr' and self.options.ambiguity == 'explicit',
self.options.maybe_placeholders
)
self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
self._callbacks.update(_get_lexer_callbacks(self.options.transformer, self.terminals))
def _build_parser(self) -> "ParsingFrontend":
self._prepare_callbacks()
_validate_frontend_args(self.options.parser, self.options.lexer)
parser_conf = ParserConf(self.rules, self._callbacks, self.options.start)
return _construct_parsing_frontend(
self.options.parser,
self.options.lexer,
self.lexer_conf,
parser_conf,
options=self.options
)
def save(self, f, exclude_options: Collection[str] = ()) -> None:
"""Saves the instance into the given file object
Useful for caching and multiprocessing.
"""
data, m = self.memo_serialize([TerminalDef, Rule])
if exclude_options:
data["options"] = {n: v for n, v in data["options"].items() if n not in exclude_options}
pickle.dump({'data': data, 'memo': m}, f, protocol=pickle.HIGHEST_PROTOCOL)
@classmethod
def load(cls: Type[_T], f) -> _T:
"""Loads an instance from the given file object
Useful for caching and multiprocessing.
"""
inst = cls.__new__(cls)
return inst._load(f)
def _deserialize_lexer_conf(self, data: Dict[str, Any], memo: Dict[int, Union[TerminalDef, Rule]], options: LarkOptions) -> LexerConf:
lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo)
lexer_conf.callbacks = options.lexer_callbacks or {}
lexer_conf.re_module = regex if options.regex else re
lexer_conf.use_bytes = options.use_bytes
lexer_conf.g_regex_flags = options.g_regex_flags
lexer_conf.skip_validation = True
lexer_conf.postlex = options.postlex
return lexer_conf
def _load(self: _T, f: Any, **kwargs) -> _T:
if isinstance(f, dict):
d = f
else:
d = pickle.load(f)
memo_json = d['memo']
data = d['data']
assert memo_json
memo = SerializeMemoizer.deserialize(memo_json, {'Rule': Rule, 'TerminalDef': TerminalDef}, {})
options = dict(data['options'])
if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults):
raise ConfigurationError("Some options are not allowed when loading a Parser: {}"
.format(set(kwargs) - _LOAD_ALLOWED_OPTIONS))
options.update(kwargs)
self.options = LarkOptions.deserialize(options, memo)
self.rules = [Rule.deserialize(r, memo) for r in data['rules']]
self.source_path = '<deserialized>'
_validate_frontend_args(self.options.parser, self.options.lexer)
self.lexer_conf = self._deserialize_lexer_conf(data['parser'], memo, self.options)
self.terminals = self.lexer_conf.terminals
self._prepare_callbacks()
self._terminals_dict = {t.name: t for t in self.terminals}
self.parser = _deserialize_parsing_frontend(
data['parser'],
memo,
self.lexer_conf,
self._callbacks,
self.options, # Not all, but multiple attributes are used
)
return self
@classmethod
def _load_from_dict(cls, data, memo, **kwargs):
inst = cls.__new__(cls)
return inst._load({'data': data, 'memo': memo}, **kwargs)
@classmethod
def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str]=None, **options) -> _T:
"""Create an instance of Lark with the grammar given by its filename
If ``rel_to`` is provided, the function will find the grammar filename in relation to it.
Example:
>>> Lark.open("grammar_file.lark", rel_to=__file__, parser="lalr")
Lark(...)
"""
if rel_to:
basepath = os.path.dirname(rel_to)
grammar_filename = os.path.join(basepath, grammar_filename)
with open(grammar_filename, encoding='utf8') as f:
return cls(f, **options)
@classmethod
def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: 'Sequence[str]'=[""], **options) -> _T:
"""Create an instance of Lark with the grammar loaded from within the package `package`.
This allows grammar loading from zipapps.
Imports in the grammar will use the `package` and `search_paths` provided, through `FromPackageLoader`
Example:
Lark.open_from_package(__name__, "example.lark", ("grammars",), parser=...)
"""
package_loader = FromPackageLoader(package, search_paths)
full_path, text = package_loader(None, grammar_path)
options.setdefault('source_path', full_path)
options.setdefault('import_paths', [])
options['import_paths'].append(package_loader)
return cls(text, **options)
def __repr__(self):
return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer)
def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]:
"""Only lex (and postlex) the text, without parsing it. Only relevant when lexer='basic'
When dont_ignore=True, the lexer will return all tokens, even those marked for %ignore.
:raises UnexpectedCharacters: In case the lexer cannot find a suitable match.
"""
lexer: Lexer
if not hasattr(self, 'lexer') or dont_ignore:
lexer = self._build_lexer(dont_ignore)
else:
lexer = self.lexer
lexer_thread = LexerThread.from_text(lexer, text)
stream = lexer_thread.lex(None)
if self.options.postlex:
return self.options.postlex.process(stream)
return stream
def get_terminal(self, name: str) -> TerminalDef:
"""Get information about a terminal"""
return self._terminals_dict[name]
def parse_interactive(self, text: Optional[str]=None, start: Optional[str]=None) -> 'InteractiveParser':
"""Start an interactive parsing session.
Parameters:
text (str, optional): Text to be parsed. Required for ``resume_parse()``.
start (str, optional): Start symbol
Returns:
A new InteractiveParser instance.
See Also: ``Lark.parse()``
"""
return self.parser.parse_interactive(text, start=start)
def parse(self, text: str, start: Optional[str]=None, on_error: 'Optional[Callable[[UnexpectedInput], bool]]'=None) -> 'ParseTree':
"""Parse the given text, according to the options provided.
Parameters:
text (str): Text to be parsed.
start (str, optional): Required if Lark was given multiple possible start symbols (using the start option).
on_error (function, optional): if provided, will be called on UnexpectedToken error. Return true to resume parsing.
LALR only. See examples/advanced/error_handling.py for an example of how to use on_error.
Returns:
If a transformer is supplied to ``__init__``, returns whatever is the
result of the transformation. Otherwise, returns a Tree instance.
:raises UnexpectedInput: On a parse error, one of these sub-exceptions will rise:
``UnexpectedCharacters``, ``UnexpectedToken``, or ``UnexpectedEOF``.
For convenience, these sub-exceptions also inherit from ``ParserError`` and ``LexerError``.
"""
return self.parser.parse(text, start=start, on_error=on_error)
###}

View File

@ -0,0 +1,603 @@
# Lexer Implementation
from abc import abstractmethod, ABC
import re
from contextlib import suppress
from typing import (
TypeVar, Type, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
Pattern as REPattern, ClassVar, TYPE_CHECKING, overload
)
from types import ModuleType
import warnings
if TYPE_CHECKING:
from .common import LexerConf
from .utils import classify, get_regexp_width, Serialize
from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken
from .grammar import TOKEN_DEFAULT_PRIORITY
###{standalone
from copy import copy
class Pattern(Serialize, ABC):
value: str
flags: Collection[str]
raw: Optional[str]
type: ClassVar[str]
def __init__(self, value: str, flags: Collection[str]=(), raw: Optional[str]=None) -> None:
self.value = value
self.flags = frozenset(flags)
self.raw = raw
def __repr__(self):
return repr(self.to_regexp())
# Pattern Hashing assumes all subclasses have a different priority!
def __hash__(self):
return hash((type(self), self.value, self.flags))
def __eq__(self, other):
return type(self) == type(other) and self.value == other.value and self.flags == other.flags
@abstractmethod
def to_regexp(self) -> str:
raise NotImplementedError()
@property
@abstractmethod
def min_width(self) -> int:
raise NotImplementedError()
@property
@abstractmethod
def max_width(self) -> int:
raise NotImplementedError()
def _get_flags(self, value):
for f in self.flags:
value = ('(?%s:%s)' % (f, value))
return value
class PatternStr(Pattern):
__serialize_fields__ = 'value', 'flags'
type: ClassVar[str] = "str"
def to_regexp(self) -> str:
return self._get_flags(re.escape(self.value))
@property
def min_width(self) -> int:
return len(self.value)
@property
def max_width(self) -> int:
return len(self.value)
class PatternRE(Pattern):
__serialize_fields__ = 'value', 'flags', '_width'
type: ClassVar[str] = "re"
def to_regexp(self) -> str:
return self._get_flags(self.value)
_width = None
def _get_width(self):
if self._width is None:
self._width = get_regexp_width(self.to_regexp())
return self._width
@property
def min_width(self) -> int:
return self._get_width()[0]
@property
def max_width(self) -> int:
return self._get_width()[1]
class TerminalDef(Serialize):
__serialize_fields__ = 'name', 'pattern', 'priority'
__serialize_namespace__ = PatternStr, PatternRE
name: str
pattern: Pattern
priority: int
def __init__(self, name: str, pattern: Pattern, priority: int=TOKEN_DEFAULT_PRIORITY) -> None:
assert isinstance(pattern, Pattern), pattern
self.name = name
self.pattern = pattern
self.priority = priority
def __repr__(self):
return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)
def user_repr(self) -> str:
if self.name.startswith('__'): # We represent a generated terminal
return self.pattern.raw or self.name
else:
return self.name
_T = TypeVar('_T', bound="Token")
class Token(str):
"""A string with meta-information, that is produced by the lexer.
When parsing text, the resulting chunks of the input that haven't been discarded,
will end up in the tree as Token instances. The Token class inherits from Python's ``str``,
so normal string comparisons and operations will work as expected.
Attributes:
type: Name of the token (as specified in grammar)
value: Value of the token (redundant, as ``token.value == token`` will always be true)
start_pos: The index of the token in the text
line: The line of the token in the text (starting with 1)
column: The column of the token in the text (starting with 1)
end_line: The line where the token ends
end_column: The next column after the end of the token. For example,
if the token is a single character with a column value of 4,
end_column will be 5.
end_pos: the index where the token ends (basically ``start_pos + len(token)``)
"""
__slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')
__match_args__ = ('type', 'value')
type: str
start_pos: Optional[int]
value: Any
line: Optional[int]
column: Optional[int]
end_line: Optional[int]
end_column: Optional[int]
end_pos: Optional[int]
@overload
def __new__(
cls,
type: str,
value: Any,
start_pos: Optional[int]=None,
line: Optional[int]=None,
column: Optional[int]=None,
end_line: Optional[int]=None,
end_column: Optional[int]=None,
end_pos: Optional[int]=None
) -> 'Token':
...
@overload
def __new__(
cls,
type_: str,
value: Any,
start_pos: Optional[int]=None,
line: Optional[int]=None,
column: Optional[int]=None,
end_line: Optional[int]=None,
end_column: Optional[int]=None,
end_pos: Optional[int]=None
) -> 'Token': ...
def __new__(cls, *args, **kwargs):
if "type_" in kwargs:
warnings.warn("`type_` is deprecated use `type` instead", DeprecationWarning)
if "type" in kwargs:
raise TypeError("Error: using both 'type' and the deprecated 'type_' as arguments.")
kwargs["type"] = kwargs.pop("type_")
return cls._future_new(*args, **kwargs)
@classmethod
def _future_new(cls, type, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None):
inst = super(Token, cls).__new__(cls, value)
inst.type = type
inst.start_pos = start_pos
inst.value = value
inst.line = line
inst.column = column
inst.end_line = end_line
inst.end_column = end_column
inst.end_pos = end_pos
return inst
@overload
def update(self, type: Optional[str]=None, value: Optional[Any]=None) -> 'Token':
...
@overload
def update(self, type_: Optional[str]=None, value: Optional[Any]=None) -> 'Token':
...
def update(self, *args, **kwargs):
if "type_" in kwargs:
warnings.warn("`type_` is deprecated use `type` instead", DeprecationWarning)
if "type" in kwargs:
raise TypeError("Error: using both 'type' and the deprecated 'type_' as arguments.")
kwargs["type"] = kwargs.pop("type_")
return self._future_update(*args, **kwargs)
def _future_update(self, type: Optional[str]=None, value: Optional[Any]=None) -> 'Token':
return Token.new_borrow_pos(
type if type is not None else self.type,
value if value is not None else self.value,
self
)
@classmethod
def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: 'Token') -> _T:
return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)
def __reduce__(self):
return (self.__class__, (self.type, self.value, self.start_pos, self.line, self.column))
def __repr__(self):
return 'Token(%r, %r)' % (self.type, self.value)
def __deepcopy__(self, memo):
return Token(self.type, self.value, self.start_pos, self.line, self.column)
def __eq__(self, other):
if isinstance(other, Token) and self.type != other.type:
return False
return str.__eq__(self, other)
__hash__ = str.__hash__
class LineCounter:
__slots__ = 'char_pos', 'line', 'column', 'line_start_pos', 'newline_char'
def __init__(self, newline_char):
self.newline_char = newline_char
self.char_pos = 0
self.line = 1
self.column = 1
self.line_start_pos = 0
def __eq__(self, other):
if not isinstance(other, LineCounter):
return NotImplemented
return self.char_pos == other.char_pos and self.newline_char == other.newline_char
def feed(self, token: Token, test_newline=True):
"""Consume a token and calculate the new line & column.
As an optional optimization, set test_newline=False if token doesn't contain a newline.
"""
if test_newline:
newlines = token.count(self.newline_char)
if newlines:
self.line += newlines
self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1
self.char_pos += len(token)
self.column = self.char_pos - self.line_start_pos + 1
class UnlessCallback:
def __init__(self, scanner):
self.scanner = scanner
def __call__(self, t):
res = self.scanner.match(t.value, 0)
if res:
_value, t.type = res
return t
class CallChain:
def __init__(self, callback1, callback2, cond):
self.callback1 = callback1
self.callback2 = callback2
self.cond = cond
def __call__(self, t):
t2 = self.callback1(t)
return self.callback2(t) if self.cond(t2) else t2
def _get_match(re_, regexp, s, flags):
m = re_.match(regexp, s, flags)
if m:
return m.group(0)
def _create_unless(terminals, g_regex_flags, re_, use_bytes):
tokens_by_type = classify(terminals, lambda t: type(t.pattern))
assert len(tokens_by_type) <= 2, tokens_by_type.keys()
embedded_strs = set()
callback = {}
for retok in tokens_by_type.get(PatternRE, []):
unless = []
for strtok in tokens_by_type.get(PatternStr, []):
if strtok.priority != retok.priority:
continue
s = strtok.pattern.value
if s == _get_match(re_, retok.pattern.to_regexp(), s, g_regex_flags):
unless.append(strtok)
if strtok.pattern.flags <= retok.pattern.flags:
embedded_strs.add(strtok)
if unless:
callback[retok.name] = UnlessCallback(Scanner(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes))
new_terminals = [t for t in terminals if t not in embedded_strs]
return new_terminals, callback
class Scanner:
def __init__(self, terminals, g_regex_flags, re_, use_bytes, match_whole=False):
self.terminals = terminals
self.g_regex_flags = g_regex_flags
self.re_ = re_
self.use_bytes = use_bytes
self.match_whole = match_whole
self.allowed_types = {t.name for t in self.terminals}
self._mres = self._build_mres(terminals, len(terminals))
def _build_mres(self, terminals, max_size):
# Python sets an unreasonable group limit (currently 100) in its re module
# Worse, the only way to know we reached it is by catching an AssertionError!
# This function recursively tries less and less groups until it's successful.
postfix = '$' if self.match_whole else ''
mres = []
while terminals:
pattern = u'|'.join(u'(?P<%s>%s)' % (t.name, t.pattern.to_regexp() + postfix) for t in terminals[:max_size])
if self.use_bytes:
pattern = pattern.encode('latin-1')
try:
mre = self.re_.compile(pattern, self.g_regex_flags)
except AssertionError: # Yes, this is what Python provides us.. :/
return self._build_mres(terminals, max_size//2)
mres.append(mre)
terminals = terminals[max_size:]
return mres
def match(self, text, pos):
for mre in self._mres:
m = mre.match(text, pos)
if m:
return m.group(0), m.lastgroup
def _regexp_has_newline(r: str):
r"""Expressions that may indicate newlines in a regexp:
- newlines (\n)
- escaped newline (\\n)
- anything but ([^...])
- any-char (.) when the flag (?s) exists
- spaces (\s)
"""
return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r)
class LexerState:
"""Represents the current state of the lexer as it scans the text
(Lexer objects are only instanciated per grammar, not per text)
"""
__slots__ = 'text', 'line_ctr', 'last_token'
def __init__(self, text, line_ctr=None, last_token=None):
self.text = text
self.line_ctr = line_ctr or LineCounter(b'\n' if isinstance(text, bytes) else '\n')
self.last_token = last_token
def __eq__(self, other):
if not isinstance(other, LexerState):
return NotImplemented
return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token
def __copy__(self):
return type(self)(self.text, copy(self.line_ctr), self.last_token)
class LexerThread:
"""A thread that ties a lexer instance and a lexer state, to be used by the parser
"""
def __init__(self, lexer: 'Lexer', lexer_state: LexerState):
self.lexer = lexer
self.state = lexer_state
@classmethod
def from_text(cls, lexer: 'Lexer', text: str):
return cls(lexer, LexerState(text))
def lex(self, parser_state):
return self.lexer.lex(self.state, parser_state)
def __copy__(self):
return type(self)(self.lexer, copy(self.state))
_Token = Token
_Callback = Callable[[Token], Token]
class Lexer(ABC):
"""Lexer interface
Method Signatures:
lex(self, lexer_state, parser_state) -> Iterator[Token]
"""
@abstractmethod
def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]:
return NotImplemented
def make_lexer_state(self, text):
"Deprecated"
return LexerState(text)
class BasicLexer(Lexer):
terminals: Collection[TerminalDef]
ignore_types: FrozenSet[str]
newline_types: FrozenSet[str]
user_callbacks: Dict[str, _Callback]
callback: Dict[str, _Callback]
re: ModuleType
def __init__(self, conf: 'LexerConf') -> None:
terminals = list(conf.terminals)
assert all(isinstance(t, TerminalDef) for t in terminals), terminals
self.re = conf.re_module
if not conf.skip_validation:
# Sanitization
for t in terminals:
try:
self.re.compile(t.pattern.to_regexp(), conf.g_regex_flags)
except self.re.error:
raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern))
if t.pattern.min_width == 0:
raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern))
if not (set(conf.ignore) <= {t.name for t in terminals}):
raise LexError("Ignore terminals are not defined: %s" % (set(conf.ignore) - {t.name for t in terminals}))
# Init
self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp()))
self.ignore_types = frozenset(conf.ignore)
terminals.sort(key=lambda x: (-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))
self.terminals = terminals
self.user_callbacks = conf.callbacks
self.g_regex_flags = conf.g_regex_flags
self.use_bytes = conf.use_bytes
self.terminals_by_name = conf.terminals_by_name
self._scanner = None
def _build_scanner(self):
terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes)
assert all(self.callback.values())
for type_, f in self.user_callbacks.items():
if type_ in self.callback:
# Already a callback there, probably UnlessCallback
self.callback[type_] = CallChain(self.callback[type_], f, lambda t: t.type == type_)
else:
self.callback[type_] = f
self._scanner = Scanner(terminals, self.g_regex_flags, self.re, self.use_bytes)
@property
def scanner(self):
if self._scanner is None:
self._build_scanner()
return self._scanner
def match(self, text, pos):
return self.scanner.match(text, pos)
def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]:
with suppress(EOFError):
while True:
yield self.next_token(state, parser_state)
def next_token(self, lex_state: LexerState, parser_state: Any=None) -> Token:
line_ctr = lex_state.line_ctr
while line_ctr.char_pos < len(lex_state.text):
res = self.match(lex_state.text, line_ctr.char_pos)
if not res:
allowed = self.scanner.allowed_types - self.ignore_types
if not allowed:
allowed = {"<END-OF-FILE>"}
raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column,
allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token],
state=parser_state, terminals_by_name=self.terminals_by_name)
value, type_ = res
if type_ not in self.ignore_types:
t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
line_ctr.feed(value, type_ in self.newline_types)
t.end_line = line_ctr.line
t.end_column = line_ctr.column
t.end_pos = line_ctr.char_pos
if t.type in self.callback:
t = self.callback[t.type](t)
if not isinstance(t, Token):
raise LexError("Callbacks must return a token (returned %r)" % t)
lex_state.last_token = t
return t
else:
if type_ in self.callback:
t2 = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
self.callback[type_](t2)
line_ctr.feed(value, type_ in self.newline_types)
# EOF
raise EOFError(self)
class ContextualLexer(Lexer):
lexers: Dict[str, BasicLexer]
root_lexer: BasicLexer
def __init__(self, conf: 'LexerConf', states: Dict[str, Collection[str]], always_accept: Collection[str]=()) -> None:
terminals = list(conf.terminals)
terminals_by_name = conf.terminals_by_name
trad_conf = copy(conf)
trad_conf.terminals = terminals
lexer_by_tokens: Dict[FrozenSet[str], BasicLexer] = {}
self.lexers = {}
for state, accepts in states.items():
key = frozenset(accepts)
try:
lexer = lexer_by_tokens[key]
except KeyError:
accepts = set(accepts) | set(conf.ignore) | set(always_accept)
lexer_conf = copy(trad_conf)
lexer_conf.terminals = [terminals_by_name[n] for n in accepts if n in terminals_by_name]
lexer = BasicLexer(lexer_conf)
lexer_by_tokens[key] = lexer
self.lexers[state] = lexer
assert trad_conf.terminals is terminals
self.root_lexer = BasicLexer(trad_conf)
def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]:
try:
while True:
lexer = self.lexers[parser_state.position]
yield lexer.next_token(lexer_state, parser_state)
except EOFError:
pass
except UnexpectedCharacters as e:
# In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, but not in the current context.
# This tests the input against the global context, to provide a nicer error.
try:
last_token = lexer_state.last_token # Save last_token. Calling root_lexer.next_token will change this to the wrong token
token = self.root_lexer.next_token(lexer_state, parser_state)
raise UnexpectedToken(token, e.allowed, state=parser_state, token_history=[last_token], terminals_by_name=self.root_lexer.terminals_by_name)
except UnexpectedCharacters:
raise e # Raise the original UnexpectedCharacters. The root lexer raises it with the wrong expected set.
###}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,387 @@
from typing import List
from .exceptions import GrammarError, ConfigurationError
from .lexer import Token
from .tree import Tree
from .visitors import Transformer_InPlace
from .visitors import _vargs_meta, _vargs_meta_inline
###{standalone
from functools import partial, wraps
from itertools import repeat, product
class ExpandSingleChild:
def __init__(self, node_builder):
self.node_builder = node_builder
def __call__(self, children):
if len(children) == 1:
return children[0]
else:
return self.node_builder(children)
class PropagatePositions:
def __init__(self, node_builder, node_filter=None):
self.node_builder = node_builder
self.node_filter = node_filter
def __call__(self, children):
res = self.node_builder(children)
if isinstance(res, Tree):
# Calculate positions while the tree is streaming, according to the rule:
# - nodes start at the start of their first child's container,
# and end at the end of their last child's container.
# Containers are nodes that take up space in text, but have been inlined in the tree.
res_meta = res.meta
first_meta = self._pp_get_meta(children)
if first_meta is not None:
if not hasattr(res_meta, 'line'):
# meta was already set, probably because the rule has been inlined (e.g. `?rule`)
res_meta.line = getattr(first_meta, 'container_line', first_meta.line)
res_meta.column = getattr(first_meta, 'container_column', first_meta.column)
res_meta.start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos)
res_meta.empty = False
res_meta.container_line = getattr(first_meta, 'container_line', first_meta.line)
res_meta.container_column = getattr(first_meta, 'container_column', first_meta.column)
last_meta = self._pp_get_meta(reversed(children))
if last_meta is not None:
if not hasattr(res_meta, 'end_line'):
res_meta.end_line = getattr(last_meta, 'container_end_line', last_meta.end_line)
res_meta.end_column = getattr(last_meta, 'container_end_column', last_meta.end_column)
res_meta.end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos)
res_meta.empty = False
res_meta.container_end_line = getattr(last_meta, 'container_end_line', last_meta.end_line)
res_meta.container_end_column = getattr(last_meta, 'container_end_column', last_meta.end_column)
return res
def _pp_get_meta(self, children):
for c in children:
if self.node_filter is not None and not self.node_filter(c):
continue
if isinstance(c, Tree):
if not c.meta.empty:
return c.meta
elif isinstance(c, Token):
return c
elif hasattr(c, '__lark_meta__'):
return c.__lark_meta__()
def make_propagate_positions(option):
if callable(option):
return partial(PropagatePositions, node_filter=option)
elif option is True:
return PropagatePositions
elif option is False:
return None
raise ConfigurationError('Invalid option for propagate_positions: %r' % option)
class ChildFilter:
def __init__(self, to_include, append_none, node_builder):
self.node_builder = node_builder
self.to_include = to_include
self.append_none = append_none
def __call__(self, children):
filtered = []
for i, to_expand, add_none in self.to_include:
if add_none:
filtered += [None] * add_none
if to_expand:
filtered += children[i].children
else:
filtered.append(children[i])
if self.append_none:
filtered += [None] * self.append_none
return self.node_builder(filtered)
class ChildFilterLALR(ChildFilter):
"""Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"""
def __call__(self, children):
filtered = []
for i, to_expand, add_none in self.to_include:
if add_none:
filtered += [None] * add_none
if to_expand:
if filtered:
filtered += children[i].children
else: # Optimize for left-recursion
filtered = children[i].children
else:
filtered.append(children[i])
if self.append_none:
filtered += [None] * self.append_none
return self.node_builder(filtered)
class ChildFilterLALR_NoPlaceholders(ChildFilter):
"Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
def __init__(self, to_include, node_builder):
self.node_builder = node_builder
self.to_include = to_include
def __call__(self, children):
filtered = []
for i, to_expand in self.to_include:
if to_expand:
if filtered:
filtered += children[i].children
else: # Optimize for left-recursion
filtered = children[i].children
else:
filtered.append(children[i])
return self.node_builder(filtered)
def _should_expand(sym):
return not sym.is_term and sym.name.startswith('_')
def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices: List[bool]):
# Prepare empty_indices as: How many Nones to insert at each index?
if _empty_indices:
assert _empty_indices.count(False) == len(expansion)
s = ''.join(str(int(b)) for b in _empty_indices)
empty_indices = [len(ones) for ones in s.split('0')]
assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion))
else:
empty_indices = [0] * (len(expansion)+1)
to_include = []
nones_to_add = 0
for i, sym in enumerate(expansion):
nones_to_add += empty_indices[i]
if keep_all_tokens or not (sym.is_term and sym.filter_out):
to_include.append((i, _should_expand(sym), nones_to_add))
nones_to_add = 0
nones_to_add += empty_indices[len(expansion)]
if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include):
if _empty_indices or ambiguous:
return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add)
else:
# LALR without placeholders
return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include])
class AmbiguousExpander:
"""Deal with the case where we're expanding children ('_rule') into a parent but the children
are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself
ambiguous with as many copies as their are ambiguous children, and then copy the ambiguous children
into the right parents in the right places, essentially shifting the ambiguity up the tree."""
def __init__(self, to_expand, tree_class, node_builder):
self.node_builder = node_builder
self.tree_class = tree_class
self.to_expand = to_expand
def __call__(self, children):
def _is_ambig_tree(t):
return hasattr(t, 'data') and t.data == '_ambig'
# -- When we're repeatedly expanding ambiguities we can end up with nested ambiguities.
# All children of an _ambig node should be a derivation of that ambig node, hence
# it is safe to assume that if we see an _ambig node nested within an ambig node
# it is safe to simply expand it into the parent _ambig node as an alternative derivation.
ambiguous = []
for i, child in enumerate(children):
if _is_ambig_tree(child):
if i in self.to_expand:
ambiguous.append(i)
child.expand_kids_by_data('_ambig')
if not ambiguous:
return self.node_builder(children)
expand = [iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children)]
return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))])
def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens):
to_expand = [i for i, sym in enumerate(expansion)
if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))]
if to_expand:
return partial(AmbiguousExpander, to_expand, tree_class)
class AmbiguousIntermediateExpander:
"""
Propagate ambiguous intermediate nodes and their derivations up to the
current rule.
In general, converts
rule
_iambig
_inter
someChildren1
...
_inter
someChildren2
...
someChildren3
...
to
_ambig
rule
someChildren1
...
someChildren3
...
rule
someChildren2
...
someChildren3
...
rule
childrenFromNestedIambigs
...
someChildren3
...
...
propagating up any nested '_iambig' nodes along the way.
"""
def __init__(self, tree_class, node_builder):
self.node_builder = node_builder
self.tree_class = tree_class
def __call__(self, children):
def _is_iambig_tree(child):
return hasattr(child, 'data') and child.data == '_iambig'
def _collapse_iambig(children):
"""
Recursively flatten the derivations of the parent of an '_iambig'
node. Returns a list of '_inter' nodes guaranteed not
to contain any nested '_iambig' nodes, or None if children does
not contain an '_iambig' node.
"""
# Due to the structure of the SPPF,
# an '_iambig' node can only appear as the first child
if children and _is_iambig_tree(children[0]):
iambig_node = children[0]
result = []
for grandchild in iambig_node.children:
collapsed = _collapse_iambig(grandchild.children)
if collapsed:
for child in collapsed:
child.children += children[1:]
result += collapsed
else:
new_tree = self.tree_class('_inter', grandchild.children + children[1:])
result.append(new_tree)
return result
collapsed = _collapse_iambig(children)
if collapsed:
processed_nodes = [self.node_builder(c.children) for c in collapsed]
return self.tree_class('_ambig', processed_nodes)
return self.node_builder(children)
def inplace_transformer(func):
@wraps(func)
def f(children):
# function name in a Transformer is a rule name.
tree = Tree(func.__name__, children)
return func(tree)
return f
def apply_visit_wrapper(func, name, wrapper):
if wrapper is _vargs_meta or wrapper is _vargs_meta_inline:
raise NotImplementedError("Meta args not supported for internal transformer")
@wraps(func)
def f(children):
return wrapper(func, name, children, None)
return f
class ParseTreeBuilder:
def __init__(self, rules, tree_class, propagate_positions=False, ambiguous=False, maybe_placeholders=False):
self.tree_class = tree_class
self.propagate_positions = propagate_positions
self.ambiguous = ambiguous
self.maybe_placeholders = maybe_placeholders
self.rule_builders = list(self._init_builders(rules))
def _init_builders(self, rules):
propagate_positions = make_propagate_positions(self.propagate_positions)
for rule in rules:
options = rule.options
keep_all_tokens = options.keep_all_tokens
expand_single_child = options.expand1
wrapper_chain = list(filter(None, [
(expand_single_child and not rule.alias) and ExpandSingleChild,
maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None),
propagate_positions,
self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class)
]))
yield rule, wrapper_chain
def create_callback(self, transformer=None):
callbacks = {}
default_handler = getattr(transformer, '__default__', None)
if default_handler:
def default_callback(data, children):
return default_handler(data, children, None)
else:
default_callback = self.tree_class
for rule, wrapper_chain in self.rule_builders:
user_callback_name = rule.alias or rule.options.template_source or rule.origin.name
try:
f = getattr(transformer, user_callback_name)
wrapper = getattr(f, 'visit_wrapper', None)
if wrapper is not None:
f = apply_visit_wrapper(f, user_callback_name, wrapper)
elif isinstance(transformer, Transformer_InPlace):
f = inplace_transformer(f)
except AttributeError:
f = partial(default_callback, user_callback_name)
for w in wrapper_chain:
f = w(f)
if rule in callbacks:
raise GrammarError("Rule '%s' already exists" % (rule,))
callbacks[rule] = f
return callbacks
###}

View File

@ -0,0 +1,245 @@
from typing import Any, Callable, Dict, Tuple
from .exceptions import ConfigurationError, GrammarError, assert_config
from .utils import get_regexp_width, Serialize
from .parsers.grammar_analysis import GrammarAnalyzer
from .lexer import LexerThread, BasicLexer, ContextualLexer, Lexer
from .parsers import earley, xearley, cyk
from .parsers.lalr_parser import LALR_Parser
from .tree import Tree
from .common import LexerConf, ParserConf, _ParserArgType, _LexerArgType
###{standalone
def _wrap_lexer(lexer_class):
future_interface = getattr(lexer_class, '__future_interface__', False)
if future_interface:
return lexer_class
else:
class CustomLexerWrapper(Lexer):
def __init__(self, lexer_conf):
self.lexer = lexer_class(lexer_conf)
def lex(self, lexer_state, parser_state):
return self.lexer.lex(lexer_state.text)
return CustomLexerWrapper
def _deserialize_parsing_frontend(data, memo, lexer_conf, callbacks, options):
parser_conf = ParserConf.deserialize(data['parser_conf'], memo)
cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser
parser = cls.deserialize(data['parser'], memo, callbacks, options.debug)
parser_conf.callbacks = callbacks
return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser)
_parser_creators: 'Dict[str, Callable[[LexerConf, Any, Any], Any]]' = {}
class ParsingFrontend(Serialize):
__serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser'
def __init__(self, lexer_conf, parser_conf, options, parser=None):
self.parser_conf = parser_conf
self.lexer_conf = lexer_conf
self.options = options
# Set-up parser
if parser: # From cache
self.parser = parser
else:
create_parser = _parser_creators.get(parser_conf.parser_type)
assert create_parser is not None, "{} is not supported in standalone mode".format(
parser_conf.parser_type
)
self.parser = create_parser(lexer_conf, parser_conf, options)
# Set-up lexer
lexer_type = lexer_conf.lexer_type
self.skip_lexer = False
if lexer_type in ('dynamic', 'dynamic_complete'):
assert lexer_conf.postlex is None
self.skip_lexer = True
return
try:
create_lexer = {
'basic': create_basic_lexer,
'contextual': create_contextual_lexer,
}[lexer_type]
except KeyError:
assert issubclass(lexer_type, Lexer), lexer_type
self.lexer = _wrap_lexer(lexer_type)(lexer_conf)
else:
self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex, options)
if lexer_conf.postlex:
self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)
def _verify_start(self, start=None):
if start is None:
start_decls = self.parser_conf.start
if len(start_decls) > 1:
raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls)
start ,= start_decls
elif start not in self.parser_conf.start:
raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start))
return start
def _make_lexer_thread(self, text):
cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread
return text if self.skip_lexer else cls.from_text(self.lexer, text)
def parse(self, text, start=None, on_error=None):
chosen_start = self._verify_start(start)
kw = {} if on_error is None else {'on_error': on_error}
stream = self._make_lexer_thread(text)
return self.parser.parse(stream, chosen_start, **kw)
def parse_interactive(self, text=None, start=None):
chosen_start = self._verify_start(start)
if self.parser_conf.parser_type != 'lalr':
raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ")
stream = self._make_lexer_thread(text)
return self.parser.parse_interactive(stream, chosen_start)
def _validate_frontend_args(parser, lexer) -> None:
assert_config(parser, ('lalr', 'earley', 'cyk'))
if not isinstance(lexer, type): # not custom lexer?
expected = {
'lalr': ('basic', 'contextual'),
'earley': ('basic', 'dynamic', 'dynamic_complete'),
'cyk': ('basic', ),
}[parser]
assert_config(lexer, expected, 'Parser %r does not support lexer %%r, expected one of %%s' % parser)
def _get_lexer_callbacks(transformer, terminals):
result = {}
for terminal in terminals:
callback = getattr(transformer, terminal.name, None)
if callback is not None:
result[terminal.name] = callback
return result
class PostLexConnector:
def __init__(self, lexer, postlexer):
self.lexer = lexer
self.postlexer = postlexer
def lex(self, lexer_state, parser_state):
i = self.lexer.lex(lexer_state, parser_state)
return self.postlexer.process(i)
def create_basic_lexer(lexer_conf, parser, postlex, options):
cls = (options and options._plugins.get('BasicLexer')) or BasicLexer
return cls(lexer_conf)
def create_contextual_lexer(lexer_conf, parser, postlex, options):
cls = (options and options._plugins.get('ContextualLexer')) or ContextualLexer
states = {idx:list(t.keys()) for idx, t in parser._parse_table.states.items()}
always_accept = postlex.always_accept if postlex else ()
return cls(lexer_conf, states, always_accept=always_accept)
def create_lalr_parser(lexer_conf, parser_conf, options=None):
debug = options.debug if options else False
cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser
return cls(parser_conf, debug=debug)
_parser_creators['lalr'] = create_lalr_parser
###}
class EarleyRegexpMatcher:
def __init__(self, lexer_conf):
self.regexps = {}
for t in lexer_conf.terminals:
regexp = t.pattern.to_regexp()
try:
width = get_regexp_width(regexp)[0]
except ValueError:
raise GrammarError("Bad regexp in token %s: %s" % (t.name, regexp))
else:
if width == 0:
raise GrammarError("Dynamic Earley doesn't allow zero-width regexps", t)
if lexer_conf.use_bytes:
regexp = regexp.encode('utf-8')
self.regexps[t.name] = lexer_conf.re_module.compile(regexp, lexer_conf.g_regex_flags)
def match(self, term, text, index=0):
return self.regexps[term.name].match(text, index)
def create_earley_parser__dynamic(lexer_conf, parser_conf, options=None, **kw):
if lexer_conf.callbacks:
raise GrammarError("Earley's dynamic lexer doesn't support lexer_callbacks.")
earley_matcher = EarleyRegexpMatcher(lexer_conf)
return xearley.Parser(lexer_conf, parser_conf, earley_matcher.match, **kw)
def _match_earley_basic(term, token):
return term.name == token.type
def create_earley_parser__basic(lexer_conf, parser_conf, options, **kw):
return earley.Parser(lexer_conf, parser_conf, _match_earley_basic, **kw)
def create_earley_parser(lexer_conf, parser_conf, options):
resolve_ambiguity = options.ambiguity == 'resolve'
debug = options.debug if options else False
tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None
extra = {}
if lexer_conf.lexer_type == 'dynamic':
f = create_earley_parser__dynamic
elif lexer_conf.lexer_type == 'dynamic_complete':
extra['complete_lex'] =True
f = create_earley_parser__dynamic
else:
f = create_earley_parser__basic
return f(lexer_conf, parser_conf, options, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class, **extra)
class CYK_FrontEnd:
def __init__(self, lexer_conf, parser_conf, options=None):
self._analysis = GrammarAnalyzer(parser_conf)
self.parser = cyk.Parser(parser_conf.rules)
self.callbacks = parser_conf.callbacks
def parse(self, lexer_thread, start):
tokens = list(lexer_thread.lex(None))
tree = self.parser.parse(tokens, start)
return self._transform(tree)
def _transform(self, tree):
subtrees = list(tree.iter_subtrees())
for subtree in subtrees:
subtree.children = [self._apply_callback(c) if isinstance(c, Tree) else c for c in subtree.children]
return self._apply_callback(tree)
def _apply_callback(self, tree):
return self.callbacks[tree.rule](tree.children)
_parser_creators['earley'] = create_earley_parser
_parser_creators['cyk'] = CYK_FrontEnd
def _construct_parsing_frontend(
parser_type: _ParserArgType,
lexer_type: _LexerArgType,
lexer_conf,
parser_conf,
options
):
assert isinstance(lexer_conf, LexerConf)
assert isinstance(parser_conf, ParserConf)
parser_conf.parser_type = parser_type
lexer_conf.lexer_type = lexer_type
return ParsingFrontend(lexer_conf, parser_conf, options)

View File

@ -0,0 +1,345 @@
"""This module implements a CYK parser."""
# Author: https://github.com/ehudt (2018)
#
# Adapted by Erez
from collections import defaultdict
import itertools
from ..exceptions import ParseError
from ..lexer import Token
from ..tree import Tree
from ..grammar import Terminal as T, NonTerminal as NT, Symbol
try:
xrange
except NameError:
xrange = range
def match(t, s):
assert isinstance(t, T)
return t.name == s.type
class Rule:
"""Context-free grammar rule."""
def __init__(self, lhs, rhs, weight, alias):
super(Rule, self).__init__()
assert isinstance(lhs, NT), lhs
assert all(isinstance(x, NT) or isinstance(x, T) for x in rhs), rhs
self.lhs = lhs
self.rhs = rhs
self.weight = weight
self.alias = alias
def __str__(self):
return '%s -> %s' % (str(self.lhs), ' '.join(str(x) for x in self.rhs))
def __repr__(self):
return str(self)
def __hash__(self):
return hash((self.lhs, tuple(self.rhs)))
def __eq__(self, other):
return self.lhs == other.lhs and self.rhs == other.rhs
def __ne__(self, other):
return not (self == other)
class Grammar:
"""Context-free grammar."""
def __init__(self, rules):
self.rules = frozenset(rules)
def __eq__(self, other):
return self.rules == other.rules
def __str__(self):
return '\n' + '\n'.join(sorted(repr(x) for x in self.rules)) + '\n'
def __repr__(self):
return str(self)
# Parse tree data structures
class RuleNode:
"""A node in the parse tree, which also contains the full rhs rule."""
def __init__(self, rule, children, weight=0):
self.rule = rule
self.children = children
self.weight = weight
def __repr__(self):
return 'RuleNode(%s, [%s])' % (repr(self.rule.lhs), ', '.join(str(x) for x in self.children))
class Parser:
"""Parser wrapper."""
def __init__(self, rules):
super(Parser, self).__init__()
self.orig_rules = {rule: rule for rule in rules}
rules = [self._to_rule(rule) for rule in rules]
self.grammar = to_cnf(Grammar(rules))
def _to_rule(self, lark_rule):
"""Converts a lark rule, (lhs, rhs, callback, options), to a Rule."""
assert isinstance(lark_rule.origin, NT)
assert all(isinstance(x, Symbol) for x in lark_rule.expansion)
return Rule(
lark_rule.origin, lark_rule.expansion,
weight=lark_rule.options.priority if lark_rule.options.priority else 0,
alias=lark_rule)
def parse(self, tokenized, start): # pylint: disable=invalid-name
"""Parses input, which is a list of tokens."""
assert start
start = NT(start)
table, trees = _parse(tokenized, self.grammar)
# Check if the parse succeeded.
if all(r.lhs != start for r in table[(0, len(tokenized) - 1)]):
raise ParseError('Parsing failed.')
parse = trees[(0, len(tokenized) - 1)][start]
return self._to_tree(revert_cnf(parse))
def _to_tree(self, rule_node):
"""Converts a RuleNode parse tree to a lark Tree."""
orig_rule = self.orig_rules[rule_node.rule.alias]
children = []
for child in rule_node.children:
if isinstance(child, RuleNode):
children.append(self._to_tree(child))
else:
assert isinstance(child.name, Token)
children.append(child.name)
t = Tree(orig_rule.origin, children)
t.rule=orig_rule
return t
def print_parse(node, indent=0):
if isinstance(node, RuleNode):
print(' ' * (indent * 2) + str(node.rule.lhs))
for child in node.children:
print_parse(child, indent + 1)
else:
print(' ' * (indent * 2) + str(node.s))
def _parse(s, g):
"""Parses sentence 's' using CNF grammar 'g'."""
# The CYK table. Indexed with a 2-tuple: (start pos, end pos)
table = defaultdict(set)
# Top-level structure is similar to the CYK table. Each cell is a dict from
# rule name to the best (lightest) tree for that rule.
trees = defaultdict(dict)
# Populate base case with existing terminal production rules
for i, w in enumerate(s):
for terminal, rules in g.terminal_rules.items():
if match(terminal, w):
for rule in rules:
table[(i, i)].add(rule)
if (rule.lhs not in trees[(i, i)] or
rule.weight < trees[(i, i)][rule.lhs].weight):
trees[(i, i)][rule.lhs] = RuleNode(rule, [T(w)], weight=rule.weight)
# Iterate over lengths of sub-sentences
for l in xrange(2, len(s) + 1):
# Iterate over sub-sentences with the given length
for i in xrange(len(s) - l + 1):
# Choose partition of the sub-sentence in [1, l)
for p in xrange(i + 1, i + l):
span1 = (i, p - 1)
span2 = (p, i + l - 1)
for r1, r2 in itertools.product(table[span1], table[span2]):
for rule in g.nonterminal_rules.get((r1.lhs, r2.lhs), []):
table[(i, i + l - 1)].add(rule)
r1_tree = trees[span1][r1.lhs]
r2_tree = trees[span2][r2.lhs]
rule_total_weight = rule.weight + r1_tree.weight + r2_tree.weight
if (rule.lhs not in trees[(i, i + l - 1)]
or rule_total_weight < trees[(i, i + l - 1)][rule.lhs].weight):
trees[(i, i + l - 1)][rule.lhs] = RuleNode(rule, [r1_tree, r2_tree], weight=rule_total_weight)
return table, trees
# This section implements context-free grammar converter to Chomsky normal form.
# It also implements a conversion of parse trees from its CNF to the original
# grammar.
# Overview:
# Applies the following operations in this order:
# * TERM: Eliminates non-solitary terminals from all rules
# * BIN: Eliminates rules with more than 2 symbols on their right-hand-side.
# * UNIT: Eliminates non-terminal unit rules
#
# The following grammar characteristics aren't featured:
# * Start symbol appears on RHS
# * Empty rules (epsilon rules)
class CnfWrapper:
"""CNF wrapper for grammar.
Validates that the input grammar is CNF and provides helper data structures.
"""
def __init__(self, grammar):
super(CnfWrapper, self).__init__()
self.grammar = grammar
self.rules = grammar.rules
self.terminal_rules = defaultdict(list)
self.nonterminal_rules = defaultdict(list)
for r in self.rules:
# Validate that the grammar is CNF and populate auxiliary data structures.
assert isinstance(r.lhs, NT), r
if len(r.rhs) not in [1, 2]:
raise ParseError("CYK doesn't support empty rules")
if len(r.rhs) == 1 and isinstance(r.rhs[0], T):
self.terminal_rules[r.rhs[0]].append(r)
elif len(r.rhs) == 2 and all(isinstance(x, NT) for x in r.rhs):
self.nonterminal_rules[tuple(r.rhs)].append(r)
else:
assert False, r
def __eq__(self, other):
return self.grammar == other.grammar
def __repr__(self):
return repr(self.grammar)
class UnitSkipRule(Rule):
"""A rule that records NTs that were skipped during transformation."""
def __init__(self, lhs, rhs, skipped_rules, weight, alias):
super(UnitSkipRule, self).__init__(lhs, rhs, weight, alias)
self.skipped_rules = skipped_rules
def __eq__(self, other):
return isinstance(other, type(self)) and self.skipped_rules == other.skipped_rules
__hash__ = Rule.__hash__
def build_unit_skiprule(unit_rule, target_rule):
skipped_rules = []
if isinstance(unit_rule, UnitSkipRule):
skipped_rules += unit_rule.skipped_rules
skipped_rules.append(target_rule)
if isinstance(target_rule, UnitSkipRule):
skipped_rules += target_rule.skipped_rules
return UnitSkipRule(unit_rule.lhs, target_rule.rhs, skipped_rules,
weight=unit_rule.weight + target_rule.weight, alias=unit_rule.alias)
def get_any_nt_unit_rule(g):
"""Returns a non-terminal unit rule from 'g', or None if there is none."""
for rule in g.rules:
if len(rule.rhs) == 1 and isinstance(rule.rhs[0], NT):
return rule
return None
def _remove_unit_rule(g, rule):
"""Removes 'rule' from 'g' without changing the langugage produced by 'g'."""
new_rules = [x for x in g.rules if x != rule]
refs = [x for x in g.rules if x.lhs == rule.rhs[0]]
new_rules += [build_unit_skiprule(rule, ref) for ref in refs]
return Grammar(new_rules)
def _split(rule):
"""Splits a rule whose len(rhs) > 2 into shorter rules."""
rule_str = str(rule.lhs) + '__' + '_'.join(str(x) for x in rule.rhs)
rule_name = '__SP_%s' % (rule_str) + '_%d'
yield Rule(rule.lhs, [rule.rhs[0], NT(rule_name % 1)], weight=rule.weight, alias=rule.alias)
for i in xrange(1, len(rule.rhs) - 2):
yield Rule(NT(rule_name % i), [rule.rhs[i], NT(rule_name % (i + 1))], weight=0, alias='Split')
yield Rule(NT(rule_name % (len(rule.rhs) - 2)), rule.rhs[-2:], weight=0, alias='Split')
def _term(g):
"""Applies the TERM rule on 'g' (see top comment)."""
all_t = {x for rule in g.rules for x in rule.rhs if isinstance(x, T)}
t_rules = {t: Rule(NT('__T_%s' % str(t)), [t], weight=0, alias='Term') for t in all_t}
new_rules = []
for rule in g.rules:
if len(rule.rhs) > 1 and any(isinstance(x, T) for x in rule.rhs):
new_rhs = [t_rules[x].lhs if isinstance(x, T) else x for x in rule.rhs]
new_rules.append(Rule(rule.lhs, new_rhs, weight=rule.weight, alias=rule.alias))
new_rules.extend(v for k, v in t_rules.items() if k in rule.rhs)
else:
new_rules.append(rule)
return Grammar(new_rules)
def _bin(g):
"""Applies the BIN rule to 'g' (see top comment)."""
new_rules = []
for rule in g.rules:
if len(rule.rhs) > 2:
new_rules += _split(rule)
else:
new_rules.append(rule)
return Grammar(new_rules)
def _unit(g):
"""Applies the UNIT rule to 'g' (see top comment)."""
nt_unit_rule = get_any_nt_unit_rule(g)
while nt_unit_rule:
g = _remove_unit_rule(g, nt_unit_rule)
nt_unit_rule = get_any_nt_unit_rule(g)
return g
def to_cnf(g):
"""Creates a CNF grammar from a general context-free grammar 'g'."""
g = _unit(_bin(_term(g)))
return CnfWrapper(g)
def unroll_unit_skiprule(lhs, orig_rhs, skipped_rules, children, weight, alias):
if not skipped_rules:
return RuleNode(Rule(lhs, orig_rhs, weight=weight, alias=alias), children, weight=weight)
else:
weight = weight - skipped_rules[0].weight
return RuleNode(
Rule(lhs, [skipped_rules[0].lhs], weight=weight, alias=alias), [
unroll_unit_skiprule(skipped_rules[0].lhs, orig_rhs,
skipped_rules[1:], children,
skipped_rules[0].weight, skipped_rules[0].alias)
], weight=weight)
def revert_cnf(node):
"""Reverts a parse tree (RuleNode) to its original non-CNF form (Node)."""
if isinstance(node, T):
return node
# Reverts TERM rule.
if node.rule.lhs.name.startswith('__T_'):
return node.children[0]
else:
children = []
for child in map(revert_cnf, node.children):
# Reverts BIN rule.
if isinstance(child, RuleNode) and child.rule.lhs.name.startswith('__SP_'):
children += child.children
else:
children.append(child)
# Reverts UNIT rule.
if isinstance(node.rule, UnitSkipRule):
return unroll_unit_skiprule(node.rule.lhs, node.rule.rhs,
node.rule.skipped_rules, children,
node.rule.weight, node.rule.alias)
else:
return RuleNode(node.rule, children)

View File

@ -0,0 +1,295 @@
"""This module implements an Earley parser.
The core Earley algorithm used here is based on Elizabeth Scott's implementation, here:
https://www.sciencedirect.com/science/article/pii/S1571066108001497
That is probably the best reference for understanding the algorithm here.
The Earley parser outputs an SPPF-tree as per that document. The SPPF tree format
is explained here: https://lark-parser.readthedocs.io/en/latest/_static/sppf/sppf.html
"""
from collections import deque
from ..lexer import Token
from ..tree import Tree
from ..exceptions import UnexpectedEOF, UnexpectedToken
from ..utils import logger
from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal
from .earley_common import Item
from .earley_forest import ForestSumVisitor, SymbolNode, TokenNode, ForestToParseTree
class Parser:
def __init__(self, lexer_conf, parser_conf, term_matcher, resolve_ambiguity=True, debug=False, tree_class=Tree):
analysis = GrammarAnalyzer(parser_conf)
self.lexer_conf = lexer_conf
self.parser_conf = parser_conf
self.resolve_ambiguity = resolve_ambiguity
self.debug = debug
self.tree_class = tree_class
self.FIRST = analysis.FIRST
self.NULLABLE = analysis.NULLABLE
self.callbacks = parser_conf.callbacks
self.predictions = {}
## These could be moved to the grammar analyzer. Pre-computing these is *much* faster than
# the slow 'isupper' in is_terminal.
self.TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if sym.is_term }
self.NON_TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if not sym.is_term }
self.forest_sum_visitor = None
for rule in parser_conf.rules:
if rule.origin not in self.predictions:
self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]
## Detect if any rules/terminals have priorities set. If the user specified priority = None, then
# the priorities will be stripped from all rules/terminals before they reach us, allowing us to
# skip the extra tree walk. We'll also skip this if the user just didn't specify priorities
# on any rules/terminals.
if self.forest_sum_visitor is None and rule.options.priority is not None:
self.forest_sum_visitor = ForestSumVisitor
# Check terminals for priorities
# Ignore terminal priorities if the basic lexer is used
if self.lexer_conf.lexer_type != 'basic' and self.forest_sum_visitor is None:
for term in self.lexer_conf.terminals:
if term.priority:
self.forest_sum_visitor = ForestSumVisitor
break
self.term_matcher = term_matcher
def predict_and_complete(self, i, to_scan, columns, transitives):
"""The core Earley Predictor and Completer.
At each stage of the input, we handling any completed items (things
that matched on the last cycle) and use those to predict what should
come next in the input stream. The completions and any predicted
non-terminals are recursively processed until we reach a set of,
which can be added to the scan list for the next scanner cycle."""
# Held Completions (H in E.Scotts paper).
node_cache = {}
held_completions = {}
column = columns[i]
# R (items) = Ei (column.items)
items = deque(column)
while items:
item = items.pop() # remove an element, A say, from R
### The Earley completer
if item.is_complete: ### (item.s == string)
if item.node is None:
label = (item.s, item.start, i)
item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
item.node.add_family(item.s, item.rule, item.start, None, None)
# create_leo_transitives(item.rule.origin, item.start)
###R Joop Leo right recursion Completer
if item.rule.origin in transitives[item.start]:
transitive = transitives[item.start][item.s]
if transitive.previous in transitives[transitive.column]:
root_transitive = transitives[transitive.column][transitive.previous]
else:
root_transitive = transitive
new_item = Item(transitive.rule, transitive.ptr, transitive.start)
label = (root_transitive.s, root_transitive.start, i)
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
new_item.node.add_path(root_transitive, item.node)
if new_item.expect in self.TERMINALS:
# Add (B :: aC.B, h, y) to Q
to_scan.add(new_item)
elif new_item not in column:
# Add (B :: aC.B, h, y) to Ei and R
column.add(new_item)
items.append(new_item)
###R Regular Earley completer
else:
# Empty has 0 length. If we complete an empty symbol in a particular
# parse step, we need to be able to use that same empty symbol to complete
# any predictions that result, that themselves require empty. Avoids
# infinite recursion on empty symbols.
# held_completions is 'H' in E.Scott's paper.
is_empty_item = item.start == i
if is_empty_item:
held_completions[item.rule.origin] = item.node
originators = [originator for originator in columns[item.start] if originator.expect is not None and originator.expect == item.s]
for originator in originators:
new_item = originator.advance()
label = (new_item.s, originator.start, i)
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
new_item.node.add_family(new_item.s, new_item.rule, i, originator.node, item.node)
if new_item.expect in self.TERMINALS:
# Add (B :: aC.B, h, y) to Q
to_scan.add(new_item)
elif new_item not in column:
# Add (B :: aC.B, h, y) to Ei and R
column.add(new_item)
items.append(new_item)
### The Earley predictor
elif item.expect in self.NON_TERMINALS: ### (item.s == lr0)
new_items = []
for rule in self.predictions[item.expect]:
new_item = Item(rule, 0, i)
new_items.append(new_item)
# Process any held completions (H).
if item.expect in held_completions:
new_item = item.advance()
label = (new_item.s, item.start, i)
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect])
new_items.append(new_item)
for new_item in new_items:
if new_item.expect in self.TERMINALS:
to_scan.add(new_item)
elif new_item not in column:
column.add(new_item)
items.append(new_item)
def _parse(self, lexer, columns, to_scan, start_symbol=None):
def is_quasi_complete(item):
if item.is_complete:
return True
quasi = item.advance()
while not quasi.is_complete:
if quasi.expect not in self.NULLABLE:
return False
if quasi.rule.origin == start_symbol and quasi.expect == start_symbol:
return False
quasi = quasi.advance()
return True
# def create_leo_transitives(origin, start):
# ... # removed at commit 4c1cfb2faf24e8f8bff7112627a00b94d261b420
def scan(i, token, to_scan):
"""The core Earley Scanner.
This is a custom implementation of the scanner that uses the
Lark lexer to match tokens. The scan list is built by the
Earley predictor, based on the previously completed tokens.
This ensures that at each phase of the parse we have a custom
lexer context, allowing for more complex ambiguities."""
next_to_scan = set()
next_set = set()
columns.append(next_set)
transitives.append({})
node_cache = {}
for item in set(to_scan):
if match(item.expect, token):
new_item = item.advance()
label = (new_item.s, new_item.start, i)
# 'terminals' may not contain token.type when using %declare
# Additionally, token is not always a Token
# For example, it can be a Tree when using TreeMatcher
term = terminals.get(token.type) if isinstance(token, Token) else None
# Set the priority of the token node to 0 so that the
# terminal priorities do not affect the Tree chosen by
# ForestSumVisitor after the basic lexer has already
# "used up" the terminal priorities
token_node = TokenNode(token, term, priority=0)
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token_node)
if new_item.expect in self.TERMINALS:
# add (B ::= Aai+1.B, h, y) to Q'
next_to_scan.add(new_item)
else:
# add (B ::= Aa+1.B, h, y) to Ei+1
next_set.add(new_item)
if not next_set and not next_to_scan:
expect = {i.expect.name for i in to_scan}
raise UnexpectedToken(token, expect, considered_rules=set(to_scan), state=frozenset(i.s for i in to_scan))
return next_to_scan
# Define parser functions
match = self.term_matcher
terminals = self.lexer_conf.terminals_by_name
# Cache for nodes & tokens created in a particular parse step.
transitives = [{}]
## The main Earley loop.
# Run the Prediction/Completion cycle for any Items in the current Earley set.
# Completions will be added to the SPPF tree, and predictions will be recursively
# processed down to terminals/empty nodes to be added to the scanner for the next
# step.
expects = {i.expect for i in to_scan}
i = 0
for token in lexer.lex(expects):
self.predict_and_complete(i, to_scan, columns, transitives)
to_scan = scan(i, token, to_scan)
i += 1
expects.clear()
expects |= {i.expect for i in to_scan}
self.predict_and_complete(i, to_scan, columns, transitives)
## Column is now the final column in the parse.
assert i == len(columns)-1
return to_scan
def parse(self, lexer, start):
assert start, start
start_symbol = NonTerminal(start)
columns = [set()]
to_scan = set() # The scan buffer. 'Q' in E.Scott's paper.
## Predict for the start_symbol.
# Add predicted items to the first Earley set (for the predictor) if they
# result in a non-terminal, or the scanner if they result in a terminal.
for rule in self.predictions[start_symbol]:
item = Item(rule, 0, 0)
if item.expect in self.TERMINALS:
to_scan.add(item)
else:
columns[0].add(item)
to_scan = self._parse(lexer, columns, to_scan, start_symbol)
# If the parse was successful, the start
# symbol should have been completed in the last step of the Earley cycle, and will be in
# this column. Find the item for the start_symbol, which is the root of the SPPF tree.
solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0]
if not solutions:
expected_terminals = [t.expect.name for t in to_scan]
raise UnexpectedEOF(expected_terminals, state=frozenset(i.s for i in to_scan))
if self.debug:
from .earley_forest import ForestToPyDotVisitor
try:
debug_walker = ForestToPyDotVisitor()
except ImportError:
logger.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
else:
debug_walker.visit(solutions[0], "sppf.png")
if len(solutions) > 1:
assert False, 'Earley should not generate multiple start symbol items!'
if self.tree_class is not None:
# Perform our SPPF -> AST conversion
transformer = ForestToParseTree(self.tree_class, self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor(), self.resolve_ambiguity)
return transformer.transform(solutions[0])
# return the root of the SPPF
return solutions[0]

View File

@ -0,0 +1,42 @@
"""This module implements useful building blocks for the Earley parser
"""
class Item:
"An Earley Item, the atom of the algorithm."
__slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'previous', 'node', '_hash')
def __init__(self, rule, ptr, start):
self.is_complete = len(rule.expansion) == ptr
self.rule = rule # rule
self.ptr = ptr # ptr
self.start = start # j
self.node = None # w
if self.is_complete:
self.s = rule.origin
self.expect = None
self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
else:
self.s = (rule, ptr)
self.expect = rule.expansion[ptr]
self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
self._hash = hash((self.s, self.start))
def advance(self):
return Item(self.rule, self.ptr + 1, self.start)
def __eq__(self, other):
return self is other or (self.s == other.s and self.start == other.start)
def __hash__(self):
return self._hash
def __repr__(self):
before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
symbol = "{} ::= {}* {}".format(self.rule.origin.name, ' '.join(before), ' '.join(after))
return '%s (%d)' % (symbol, self.start)
# class TransitiveItem(Item):
# ... # removed at commit 4c1cfb2faf24e8f8bff7112627a00b94d261b420

View File

@ -0,0 +1,804 @@
""""This module implements an SPPF implementation
This is used as the primary output mechanism for the Earley parser
in order to store complex ambiguities.
Full reference and more details is here:
https://web.archive.org/web/20190616123959/http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
"""
from random import randint
from collections import deque
from operator import attrgetter
from importlib import import_module
from functools import partial
from ..parse_tree_builder import AmbiguousIntermediateExpander
from ..visitors import Discard
from ..lexer import Token
from ..utils import logger
from ..tree import Tree
class ForestNode:
pass
class SymbolNode(ForestNode):
"""
A Symbol Node represents a symbol (or Intermediate LR0).
Symbol nodes are keyed by the symbol (s). For intermediate nodes
s will be an LR0, stored as a tuple of (rule, ptr). For completed symbol
nodes, s will be a string representing the non-terminal origin (i.e.
the left hand side of the rule).
The children of a Symbol or Intermediate Node will always be Packed Nodes;
with each Packed Node child representing a single derivation of a production.
Hence a Symbol Node with a single child is unambiguous.
Parameters:
s: A Symbol, or a tuple of (rule, ptr) for an intermediate node.
start: The index of the start of the substring matched by this symbol (inclusive).
end: The index of the end of the substring matched by this symbol (exclusive).
Properties:
is_intermediate: True if this node is an intermediate node.
priority: The priority of the node's symbol.
"""
__slots__ = ('s', 'start', 'end', '_children', 'paths', 'paths_loaded', 'priority', 'is_intermediate', '_hash')
def __init__(self, s, start, end):
self.s = s
self.start = start
self.end = end
self._children = set()
self.paths = set()
self.paths_loaded = False
### We use inf here as it can be safely negated without resorting to conditionals,
# unlike None or float('NaN'), and sorts appropriately.
self.priority = float('-inf')
self.is_intermediate = isinstance(s, tuple)
self._hash = hash((self.s, self.start, self.end))
def add_family(self, lr0, rule, start, left, right):
self._children.add(PackedNode(self, lr0, rule, start, left, right))
def add_path(self, transitive, node):
self.paths.add((transitive, node))
def load_paths(self):
for transitive, node in self.paths:
if transitive.next_titem is not None:
vn = SymbolNode(transitive.next_titem.s, transitive.next_titem.start, self.end)
vn.add_path(transitive.next_titem, node)
self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, vn)
else:
self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, node)
self.paths_loaded = True
@property
def is_ambiguous(self):
"""Returns True if this node is ambiguous."""
return len(self.children) > 1
@property
def children(self):
"""Returns a list of this node's children sorted from greatest to
least priority."""
if not self.paths_loaded: self.load_paths()
return sorted(self._children, key=attrgetter('sort_key'))
def __iter__(self):
return iter(self._children)
def __eq__(self, other):
if not isinstance(other, SymbolNode):
return False
return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.end is other.end)
def __hash__(self):
return self._hash
def __repr__(self):
if self.is_intermediate:
rule = self.s[0]
ptr = self.s[1]
before = ( expansion.name for expansion in rule.expansion[:ptr] )
after = ( expansion.name for expansion in rule.expansion[ptr:] )
symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
else:
symbol = self.s.name
return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority)
class PackedNode(ForestNode):
"""
A Packed Node represents a single derivation in a symbol node.
Parameters:
rule: The rule associated with this node.
parent: The parent of this node.
left: The left child of this node. ``None`` if one does not exist.
right: The right child of this node. ``None`` if one does not exist.
priority: The priority of this node.
"""
__slots__ = ('parent', 's', 'rule', 'start', 'left', 'right', 'priority', '_hash')
def __init__(self, parent, s, rule, start, left, right):
self.parent = parent
self.s = s
self.start = start
self.rule = rule
self.left = left
self.right = right
self.priority = float('-inf')
self._hash = hash((self.left, self.right))
@property
def is_empty(self):
return self.left is None and self.right is None
@property
def sort_key(self):
"""
Used to sort PackedNode children of SymbolNodes.
A SymbolNode has multiple PackedNodes if it matched
ambiguously. Hence, we use the sort order to identify
the order in which ambiguous children should be considered.
"""
return self.is_empty, -self.priority, self.rule.order
@property
def children(self):
"""Returns a list of this node's children."""
return [x for x in [self.left, self.right] if x is not None]
def __iter__(self):
yield self.left
yield self.right
def __eq__(self, other):
if not isinstance(other, PackedNode):
return False
return self is other or (self.left == other.left and self.right == other.right)
def __hash__(self):
return self._hash
def __repr__(self):
if isinstance(self.s, tuple):
rule = self.s[0]
ptr = self.s[1]
before = ( expansion.name for expansion in rule.expansion[:ptr] )
after = ( expansion.name for expansion in rule.expansion[ptr:] )
symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
else:
symbol = self.s.name
return "({}, {}, {}, {})".format(symbol, self.start, self.priority, self.rule.order)
class TokenNode(ForestNode):
"""
A Token Node represents a matched terminal and is always a leaf node.
Parameters:
token: The Token associated with this node.
term: The TerminalDef matched by the token.
priority: The priority of this node.
"""
__slots__ = ('token', 'term', 'priority', '_hash')
def __init__(self, token, term, priority=None):
self.token = token
self.term = term
if priority is not None:
self.priority = priority
else:
self.priority = term.priority if term is not None else 0
self._hash = hash(token)
def __eq__(self, other):
if not isinstance(other, TokenNode):
return False
return self is other or (self.token == other.token)
def __hash__(self):
return self._hash
def __repr__(self):
return repr(self.token)
class ForestVisitor:
"""
An abstract base class for building forest visitors.
This class performs a controllable depth-first walk of an SPPF.
The visitor will not enter cycles and will backtrack if one is encountered.
Subclasses are notified of cycles through the ``on_cycle`` method.
Behavior for visit events is defined by overriding the
``visit*node*`` functions.
The walk is controlled by the return values of the ``visit*node_in``
methods. Returning a node(s) will schedule them to be visited. The visitor
will begin to backtrack if no nodes are returned.
Parameters:
single_visit: If ``True``, non-Token nodes will only be visited once.
"""
def __init__(self, single_visit=False):
self.single_visit = single_visit
def visit_token_node(self, node):
"""Called when a ``Token`` is visited. ``Token`` nodes are always leaves."""
pass
def visit_symbol_node_in(self, node):
"""Called when a symbol node is visited. Nodes that are returned
will be scheduled to be visited. If ``visit_intermediate_node_in``
is not implemented, this function will be called for intermediate
nodes as well."""
pass
def visit_symbol_node_out(self, node):
"""Called after all nodes returned from a corresponding ``visit_symbol_node_in``
call have been visited. If ``visit_intermediate_node_out``
is not implemented, this function will be called for intermediate
nodes as well."""
pass
def visit_packed_node_in(self, node):
"""Called when a packed node is visited. Nodes that are returned
will be scheduled to be visited. """
pass
def visit_packed_node_out(self, node):
"""Called after all nodes returned from a corresponding ``visit_packed_node_in``
call have been visited."""
pass
def on_cycle(self, node, path):
"""Called when a cycle is encountered.
Parameters:
node: The node that causes a cycle.
path: The list of nodes being visited: nodes that have been
entered but not exited. The first element is the root in a forest
visit, and the last element is the node visited most recently.
``path`` should be treated as read-only.
"""
pass
def get_cycle_in_path(self, node, path):
"""A utility function for use in ``on_cycle`` to obtain a slice of
``path`` that only contains the nodes that make up the cycle."""
index = len(path) - 1
while id(path[index]) != id(node):
index -= 1
return path[index:]
def visit(self, root):
# Visiting is a list of IDs of all symbol/intermediate nodes currently in
# the stack. It serves two purposes: to detect when we 'recurse' in and out
# of a symbol/intermediate so that we can process both up and down. Also,
# since the SPPF can have cycles it allows us to detect if we're trying
# to recurse into a node that's already on the stack (infinite recursion).
visiting = set()
# set of all nodes that have been visited
visited = set()
# a list of nodes that are currently being visited
# used for the `on_cycle` callback
path = []
# We do not use recursion here to walk the Forest due to the limited
# stack size in python. Therefore input_stack is essentially our stack.
input_stack = deque([root])
# It is much faster to cache these as locals since they are called
# many times in large parses.
vpno = getattr(self, 'visit_packed_node_out')
vpni = getattr(self, 'visit_packed_node_in')
vsno = getattr(self, 'visit_symbol_node_out')
vsni = getattr(self, 'visit_symbol_node_in')
vino = getattr(self, 'visit_intermediate_node_out', vsno)
vini = getattr(self, 'visit_intermediate_node_in', vsni)
vtn = getattr(self, 'visit_token_node')
oc = getattr(self, 'on_cycle')
while input_stack:
current = next(reversed(input_stack))
try:
next_node = next(current)
except StopIteration:
input_stack.pop()
continue
except TypeError:
### If the current object is not an iterator, pass through to Token/SymbolNode
pass
else:
if next_node is None:
continue
if id(next_node) in visiting:
oc(next_node, path)
continue
input_stack.append(next_node)
continue
if isinstance(current, TokenNode):
vtn(current.token)
input_stack.pop()
continue
current_id = id(current)
if current_id in visiting:
if isinstance(current, PackedNode):
vpno(current)
elif current.is_intermediate:
vino(current)
else:
vsno(current)
input_stack.pop()
path.pop()
visiting.remove(current_id)
visited.add(current_id)
elif self.single_visit and current_id in visited:
input_stack.pop()
else:
visiting.add(current_id)
path.append(current)
if isinstance(current, PackedNode):
next_node = vpni(current)
elif current.is_intermediate:
next_node = vini(current)
else:
next_node = vsni(current)
if next_node is None:
continue
if not isinstance(next_node, ForestNode):
next_node = iter(next_node)
elif id(next_node) in visiting:
oc(next_node, path)
continue
input_stack.append(next_node)
class ForestTransformer(ForestVisitor):
"""The base class for a bottom-up forest transformation. Most users will
want to use ``TreeForestTransformer`` instead as it has a friendlier
interface and covers most use cases.
Transformations are applied via inheritance and overriding of the
``transform*node`` methods.
``transform_token_node`` receives a ``Token`` as an argument.
All other methods receive the node that is being transformed and
a list of the results of the transformations of that node's children.
The return value of these methods are the resulting transformations.
If ``Discard`` is raised in a node's transformation, no data from that node
will be passed to its parent's transformation.
"""
def __init__(self):
super(ForestTransformer, self).__init__()
# results of transformations
self.data = dict()
# used to track parent nodes
self.node_stack = deque()
def transform(self, root):
"""Perform a transformation on an SPPF."""
self.node_stack.append('result')
self.data['result'] = []
self.visit(root)
assert len(self.data['result']) <= 1
if self.data['result']:
return self.data['result'][0]
def transform_symbol_node(self, node, data):
"""Transform a symbol node."""
return node
def transform_intermediate_node(self, node, data):
"""Transform an intermediate node."""
return node
def transform_packed_node(self, node, data):
"""Transform a packed node."""
return node
def transform_token_node(self, node):
"""Transform a ``Token``."""
return node
def visit_symbol_node_in(self, node):
self.node_stack.append(id(node))
self.data[id(node)] = []
return node.children
def visit_packed_node_in(self, node):
self.node_stack.append(id(node))
self.data[id(node)] = []
return node.children
def visit_token_node(self, node):
transformed = self.transform_token_node(node)
if transformed is not Discard:
self.data[self.node_stack[-1]].append(transformed)
def _visit_node_out_helper(self, node, method):
self.node_stack.pop()
transformed = method(node, self.data[id(node)])
if transformed is not Discard:
self.data[self.node_stack[-1]].append(transformed)
del self.data[id(node)]
def visit_symbol_node_out(self, node):
self._visit_node_out_helper(node, self.transform_symbol_node)
def visit_intermediate_node_out(self, node):
self._visit_node_out_helper(node, self.transform_intermediate_node)
def visit_packed_node_out(self, node):
self._visit_node_out_helper(node, self.transform_packed_node)
class ForestSumVisitor(ForestVisitor):
"""
A visitor for prioritizing ambiguous parts of the Forest.
This visitor is used when support for explicit priorities on
rules is requested (whether normal, or invert). It walks the
forest (or subsets thereof) and cascades properties upwards
from the leaves.
It would be ideal to do this during parsing, however this would
require processing each Earley item multiple times. That's
a big performance drawback; so running a forest walk is the
lesser of two evils: there can be significantly more Earley
items created during parsing than there are SPPF nodes in the
final tree.
"""
def __init__(self):
super(ForestSumVisitor, self).__init__(single_visit=True)
def visit_packed_node_in(self, node):
yield node.left
yield node.right
def visit_symbol_node_in(self, node):
return iter(node.children)
def visit_packed_node_out(self, node):
priority = node.rule.options.priority if not node.parent.is_intermediate and node.rule.options.priority else 0
priority += getattr(node.right, 'priority', 0)
priority += getattr(node.left, 'priority', 0)
node.priority = priority
def visit_symbol_node_out(self, node):
node.priority = max(child.priority for child in node.children)
class PackedData():
"""Used in transformationss of packed nodes to distinguish the data
that comes from the left child and the right child.
"""
class _NoData():
pass
NO_DATA = _NoData()
def __init__(self, node, data):
self.left = self.NO_DATA
self.right = self.NO_DATA
if data:
if node.left is not None:
self.left = data[0]
if len(data) > 1:
self.right = data[1]
else:
self.right = data[0]
class ForestToParseTree(ForestTransformer):
"""Used by the earley parser when ambiguity equals 'resolve' or
'explicit'. Transforms an SPPF into an (ambiguous) parse tree.
Parameters:
tree_class: The tree class to use for construction
callbacks: A dictionary of rules to functions that output a tree
prioritizer: A ``ForestVisitor`` that manipulates the priorities of ForestNodes
resolve_ambiguity: If True, ambiguities will be resolved based on
priorities. Otherwise, `_ambig` nodes will be in the resulting tree.
use_cache: If True, the results of packed node transformations will be cached.
"""
def __init__(self, tree_class=Tree, callbacks=dict(), prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=True):
super(ForestToParseTree, self).__init__()
self.tree_class = tree_class
self.callbacks = callbacks
self.prioritizer = prioritizer
self.resolve_ambiguity = resolve_ambiguity
self._use_cache = use_cache
self._cache = {}
self._on_cycle_retreat = False
self._cycle_node = None
self._successful_visits = set()
def visit(self, root):
if self.prioritizer:
self.prioritizer.visit(root)
super(ForestToParseTree, self).visit(root)
self._cache = {}
def on_cycle(self, node, path):
logger.debug("Cycle encountered in the SPPF at node: %s. "
"As infinite ambiguities cannot be represented in a tree, "
"this family of derivations will be discarded.", node)
self._cycle_node = node
self._on_cycle_retreat = True
def _check_cycle(self, node):
if self._on_cycle_retreat:
if id(node) == id(self._cycle_node) or id(node) in self._successful_visits:
self._cycle_node = None
self._on_cycle_retreat = False
else:
return Discard
def _collapse_ambig(self, children):
new_children = []
for child in children:
if hasattr(child, 'data') and child.data == '_ambig':
new_children += child.children
else:
new_children.append(child)
return new_children
def _call_rule_func(self, node, data):
# called when transforming children of symbol nodes
# data is a list of trees or tokens that correspond to the
# symbol's rule expansion
return self.callbacks[node.rule](data)
def _call_ambig_func(self, node, data):
# called when transforming a symbol node
# data is a list of trees where each tree's data is
# equal to the name of the symbol or one of its aliases.
if len(data) > 1:
return self.tree_class('_ambig', data)
elif data:
return data[0]
return Discard
def transform_symbol_node(self, node, data):
if id(node) not in self._successful_visits:
return Discard
r = self._check_cycle(node)
if r is Discard:
return r
self._successful_visits.remove(id(node))
data = self._collapse_ambig(data)
return self._call_ambig_func(node, data)
def transform_intermediate_node(self, node, data):
if id(node) not in self._successful_visits:
return Discard
r = self._check_cycle(node)
if r is Discard:
return r
self._successful_visits.remove(id(node))
if len(data) > 1:
children = [self.tree_class('_inter', c) for c in data]
return self.tree_class('_iambig', children)
return data[0]
def transform_packed_node(self, node, data):
r = self._check_cycle(node)
if r is Discard:
return r
if self.resolve_ambiguity and id(node.parent) in self._successful_visits:
return Discard
if self._use_cache and id(node) in self._cache:
return self._cache[id(node)]
children = []
assert len(data) <= 2
data = PackedData(node, data)
if data.left is not PackedData.NO_DATA:
if node.left.is_intermediate and isinstance(data.left, list):
children += data.left
else:
children.append(data.left)
if data.right is not PackedData.NO_DATA:
children.append(data.right)
if node.parent.is_intermediate:
return self._cache.setdefault(id(node), children)
return self._cache.setdefault(id(node), self._call_rule_func(node, children))
def visit_symbol_node_in(self, node):
super(ForestToParseTree, self).visit_symbol_node_in(node)
if self._on_cycle_retreat:
return
return node.children
def visit_packed_node_in(self, node):
self._on_cycle_retreat = False
to_visit = super(ForestToParseTree, self).visit_packed_node_in(node)
if not self.resolve_ambiguity or id(node.parent) not in self._successful_visits:
if not self._use_cache or id(node) not in self._cache:
return to_visit
def visit_packed_node_out(self, node):
super(ForestToParseTree, self).visit_packed_node_out(node)
if not self._on_cycle_retreat:
self._successful_visits.add(id(node.parent))
def handles_ambiguity(func):
"""Decorator for methods of subclasses of ``TreeForestTransformer``.
Denotes that the method should receive a list of transformed derivations."""
func.handles_ambiguity = True
return func
class TreeForestTransformer(ForestToParseTree):
"""A ``ForestTransformer`` with a tree ``Transformer``-like interface.
By default, it will construct a tree.
Methods provided via inheritance are called based on the rule/symbol
names of nodes in the forest.
Methods that act on rules will receive a list of the results of the
transformations of the rule's children. By default, trees and tokens.
Methods that act on tokens will receive a token.
Alternatively, methods that act on rules may be annotated with
``handles_ambiguity``. In this case, the function will receive a list
of all the transformations of all the derivations of the rule.
By default, a list of trees where each tree.data is equal to the
rule name or one of its aliases.
Non-tree transformations are made possible by override of
``__default__``, ``__default_token__``, and ``__default_ambig__``.
Note:
Tree shaping features such as inlined rules and token filtering are
not built into the transformation. Positions are also not propagated.
Parameters:
tree_class: The tree class to use for construction
prioritizer: A ``ForestVisitor`` that manipulates the priorities of nodes in the SPPF.
resolve_ambiguity: If True, ambiguities will be resolved based on priorities.
use_cache (bool): If True, caches the results of some transformations,
potentially improving performance when ``resolve_ambiguity==False``.
Only use if you know what you are doing: i.e. All transformation
functions are pure and referentially transparent.
"""
def __init__(self, tree_class=Tree, prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=False):
super(TreeForestTransformer, self).__init__(tree_class, dict(), prioritizer, resolve_ambiguity, use_cache)
def __default__(self, name, data):
"""Default operation on tree (for override).
Returns a tree with name with data as children.
"""
return self.tree_class(name, data)
def __default_ambig__(self, name, data):
"""Default operation on ambiguous rule (for override).
Wraps data in an '_ambig_' node if it contains more than
one element.
"""
if len(data) > 1:
return self.tree_class('_ambig', data)
elif data:
return data[0]
return Discard
def __default_token__(self, node):
"""Default operation on ``Token`` (for override).
Returns ``node``.
"""
return node
def transform_token_node(self, node):
return getattr(self, node.type, self.__default_token__)(node)
def _call_rule_func(self, node, data):
name = node.rule.alias or node.rule.options.template_source or node.rule.origin.name
user_func = getattr(self, name, self.__default__)
if user_func == self.__default__ or hasattr(user_func, 'handles_ambiguity'):
user_func = partial(self.__default__, name)
if not self.resolve_ambiguity:
wrapper = partial(AmbiguousIntermediateExpander, self.tree_class)
user_func = wrapper(user_func)
return user_func(data)
def _call_ambig_func(self, node, data):
name = node.s.name
user_func = getattr(self, name, self.__default_ambig__)
if user_func == self.__default_ambig__ or not hasattr(user_func, 'handles_ambiguity'):
user_func = partial(self.__default_ambig__, name)
return user_func(data)
class ForestToPyDotVisitor(ForestVisitor):
"""
A Forest visitor which writes the SPPF to a PNG.
The SPPF can get really large, really quickly because
of the amount of meta-data it stores, so this is probably
only useful for trivial trees and learning how the SPPF
is structured.
"""
def __init__(self, rankdir="TB"):
super(ForestToPyDotVisitor, self).__init__(single_visit=True)
self.pydot = import_module('pydot')
self.graph = self.pydot.Dot(graph_type='digraph', rankdir=rankdir)
def visit(self, root, filename):
super(ForestToPyDotVisitor, self).visit(root)
try:
self.graph.write_png(filename)
except FileNotFoundError as e:
logger.error("Could not write png: ", e)
def visit_token_node(self, node):
graph_node_id = str(id(node))
graph_node_label = "\"{}\"".format(node.value.replace('"', '\\"'))
graph_node_color = 0x808080
graph_node_style = "\"filled,rounded\""
graph_node_shape = "diamond"
graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
self.graph.add_node(graph_node)
def visit_packed_node_in(self, node):
graph_node_id = str(id(node))
graph_node_label = repr(node)
graph_node_color = 0x808080
graph_node_style = "filled"
graph_node_shape = "diamond"
graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
self.graph.add_node(graph_node)
yield node.left
yield node.right
def visit_packed_node_out(self, node):
graph_node_id = str(id(node))
graph_node = self.graph.get_node(graph_node_id)[0]
for child in [node.left, node.right]:
if child is not None:
child_graph_node_id = str(id(child.token if isinstance(child, TokenNode) else child))
child_graph_node = self.graph.get_node(child_graph_node_id)[0]
self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))
else:
#### Try and be above the Python object ID range; probably impl. specific, but maybe this is okay.
child_graph_node_id = str(randint(100000000000000000000000000000,123456789012345678901234567890))
child_graph_node_style = "invis"
child_graph_node = self.pydot.Node(child_graph_node_id, style=child_graph_node_style, label="None")
child_edge_style = "invis"
self.graph.add_node(child_graph_node)
self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node, style=child_edge_style))
def visit_symbol_node_in(self, node):
graph_node_id = str(id(node))
graph_node_label = repr(node)
graph_node_color = 0x808080
graph_node_style = "\"filled\""
if node.is_intermediate:
graph_node_shape = "ellipse"
else:
graph_node_shape = "rectangle"
graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
self.graph.add_node(graph_node)
return iter(node.children)
def visit_symbol_node_out(self, node):
graph_node_id = str(id(node))
graph_node = self.graph.get_node(graph_node_id)[0]
for child in node.children:
child_graph_node_id = str(id(child))
child_graph_node = self.graph.get_node(child_graph_node_id)[0]
self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))

View File

@ -0,0 +1,185 @@
from collections import Counter, defaultdict
from ..utils import bfs, fzset, classify
from ..exceptions import GrammarError
from ..grammar import Rule, Terminal, NonTerminal
class RulePtr:
__slots__ = ('rule', 'index')
def __init__(self, rule, index):
assert isinstance(rule, Rule)
assert index <= len(rule.expansion)
self.rule = rule
self.index = index
def __repr__(self):
before = [x.name for x in self.rule.expansion[:self.index]]
after = [x.name for x in self.rule.expansion[self.index:]]
return '<%s : %s * %s>' % (self.rule.origin.name, ' '.join(before), ' '.join(after))
@property
def next(self):
return self.rule.expansion[self.index]
def advance(self, sym):
assert self.next == sym
return RulePtr(self.rule, self.index+1)
@property
def is_satisfied(self):
return self.index == len(self.rule.expansion)
def __eq__(self, other):
return self.rule == other.rule and self.index == other.index
def __hash__(self):
return hash((self.rule, self.index))
# state generation ensures no duplicate LR0ItemSets
class LR0ItemSet:
__slots__ = ('kernel', 'closure', 'transitions', 'lookaheads')
def __init__(self, kernel, closure):
self.kernel = fzset(kernel)
self.closure = fzset(closure)
self.transitions = {}
self.lookaheads = defaultdict(set)
def __repr__(self):
return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure]))
def update_set(set1, set2):
if not set2 or set1 > set2:
return False
copy = set(set1)
set1 |= set2
return set1 != copy
def calculate_sets(rules):
"""Calculate FOLLOW sets.
Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
# foreach grammar rule X ::= Y(1) ... Y(k)
# if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
# NULLABLE = NULLABLE union {X}
# for i = 1 to k
# if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
# FIRST(X) = FIRST(X) union FIRST(Y(i))
# for j = i+1 to k
# if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
# FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
# if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
# FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
# until none of NULLABLE,FIRST,FOLLOW changed in last iteration
NULLABLE = set()
FIRST = {}
FOLLOW = {}
for sym in symbols:
FIRST[sym]={sym} if sym.is_term else set()
FOLLOW[sym]=set()
# Calculate NULLABLE and FIRST
changed = True
while changed:
changed = False
for rule in rules:
if set(rule.expansion) <= NULLABLE:
if update_set(NULLABLE, {rule.origin}):
changed = True
for i, sym in enumerate(rule.expansion):
if set(rule.expansion[:i]) <= NULLABLE:
if update_set(FIRST[rule.origin], FIRST[sym]):
changed = True
else:
break
# Calculate FOLLOW
changed = True
while changed:
changed = False
for rule in rules:
for i, sym in enumerate(rule.expansion):
if i==len(rule.expansion)-1 or set(rule.expansion[i+1:]) <= NULLABLE:
if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
changed = True
for j in range(i+1, len(rule.expansion)):
if set(rule.expansion[i+1:j]) <= NULLABLE:
if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
changed = True
return FIRST, FOLLOW, NULLABLE
class GrammarAnalyzer:
def __init__(self, parser_conf, debug=False):
self.debug = debug
root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')])
for start in parser_conf.start}
rules = parser_conf.rules + list(root_rules.values())
self.rules_by_origin = classify(rules, lambda r: r.origin)
if len(rules) != len(set(rules)):
duplicates = [item for item, count in Counter(rules).items() if count > 1]
raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates))
for r in rules:
for sym in r.expansion:
if not (sym.is_term or sym in self.rules_by_origin):
raise GrammarError("Using an undefined rule: %s" % sym)
self.start_states = {start: self.expand_rule(root_rule.origin)
for start, root_rule in root_rules.items()}
self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))})
for start, root_rule in root_rules.items()}
lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)])
for start in parser_conf.start}
lr0_rules = parser_conf.rules + list(lr0_root_rules.values())
assert(len(lr0_rules) == len(set(lr0_rules)))
self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)
# cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
for start, root_rule in lr0_root_rules.items()}
self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
def expand_rule(self, source_rule, rules_by_origin=None):
"Returns all init_ptrs accessible by rule (recursive)"
if rules_by_origin is None:
rules_by_origin = self.rules_by_origin
init_ptrs = set()
def _expand_rule(rule):
assert not rule.is_term, rule
for r in rules_by_origin[rule]:
init_ptr = RulePtr(r, 0)
init_ptrs.add(init_ptr)
if r.expansion: # if not empty rule
new_r = init_ptr.next
if not new_r.is_term:
yield new_r
for _ in bfs([source_rule], _expand_rule):
pass
return fzset(init_ptrs)

View File

@ -0,0 +1,303 @@
"""This module builds a LALR(1) transition-table for lalr_parser.py
For now, shift/reduce conflicts are automatically resolved as shifts.
"""
# Author: Erez Shinan (2017)
# Email : erezshin@gmail.com
from collections import defaultdict
from ..utils import classify, classify_bool, bfs, fzset, Enumerator, logger
from ..exceptions import GrammarError
from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet
from ..grammar import Rule
###{standalone
class Action:
def __init__(self, name):
self.name = name
def __str__(self):
return self.name
def __repr__(self):
return str(self)
Shift = Action('Shift')
Reduce = Action('Reduce')
class ParseTable:
def __init__(self, states, start_states, end_states):
self.states = states
self.start_states = start_states
self.end_states = end_states
def serialize(self, memo):
tokens = Enumerator()
states = {
state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg))
for token, (action, arg) in actions.items()}
for state, actions in self.states.items()
}
return {
'tokens': tokens.reversed(),
'states': states,
'start_states': self.start_states,
'end_states': self.end_states,
}
@classmethod
def deserialize(cls, data, memo):
tokens = data['tokens']
states = {
state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg))
for token, (action, arg) in actions.items()}
for state, actions in data['states'].items()
}
return cls(states, data['start_states'], data['end_states'])
class IntParseTable(ParseTable):
@classmethod
def from_ParseTable(cls, parse_table):
enum = list(parse_table.states)
state_to_idx = {s:i for i,s in enumerate(enum)}
int_states = {}
for s, la in parse_table.states.items():
la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v
for k,v in la.items()}
int_states[ state_to_idx[s] ] = la
start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()}
end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()}
return cls(int_states, start_states, end_states)
###}
# digraph and traverse, see The Theory and Practice of Compiler Writing
# computes F(x) = G(x) union (union { G(y) | x R y })
# X: nodes
# R: relation (function mapping node -> list of nodes that satisfy the relation)
# G: set valued function
def digraph(X, R, G):
F = {}
S = []
N = {}
for x in X:
N[x] = 0
for x in X:
# this is always true for the first iteration, but N[x] may be updated in traverse below
if N[x] == 0:
traverse(x, S, N, X, R, G, F)
return F
# x: single node
# S: stack
# N: weights
# X: nodes
# R: relation (see above)
# G: set valued function
# F: set valued function we are computing (map of input -> output)
def traverse(x, S, N, X, R, G, F):
S.append(x)
d = len(S)
N[x] = d
F[x] = G[x]
for y in R[x]:
if N[y] == 0:
traverse(y, S, N, X, R, G, F)
n_x = N[x]
assert(n_x > 0)
n_y = N[y]
assert(n_y != 0)
if (n_y > 0) and (n_y < n_x):
N[x] = n_y
F[x].update(F[y])
if N[x] == d:
f_x = F[x]
while True:
z = S.pop()
N[z] = -1
F[z] = f_x
if z == x:
break
class LALR_Analyzer(GrammarAnalyzer):
def __init__(self, parser_conf, debug=False):
GrammarAnalyzer.__init__(self, parser_conf, debug)
self.nonterminal_transitions = []
self.directly_reads = defaultdict(set)
self.reads = defaultdict(set)
self.includes = defaultdict(set)
self.lookback = defaultdict(set)
def compute_lr0_states(self):
self.lr0_states = set()
# map of kernels to LR0ItemSets
cache = {}
def step(state):
_, unsat = classify_bool(state.closure, lambda rp: rp.is_satisfied)
d = classify(unsat, lambda rp: rp.next)
for sym, rps in d.items():
kernel = fzset({rp.advance(sym) for rp in rps})
new_state = cache.get(kernel, None)
if new_state is None:
closure = set(kernel)
for rp in kernel:
if not rp.is_satisfied and not rp.next.is_term:
closure |= self.expand_rule(rp.next, self.lr0_rules_by_origin)
new_state = LR0ItemSet(kernel, closure)
cache[kernel] = new_state
state.transitions[sym] = new_state
yield new_state
self.lr0_states.add(state)
for _ in bfs(self.lr0_start_states.values(), step):
pass
def compute_reads_relations(self):
# handle start state
for root in self.lr0_start_states.values():
assert(len(root.kernel) == 1)
for rp in root.kernel:
assert(rp.index == 0)
self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ])
for state in self.lr0_states:
seen = set()
for rp in state.closure:
if rp.is_satisfied:
continue
s = rp.next
# if s is a not a nonterminal
if s not in self.lr0_rules_by_origin:
continue
if s in seen:
continue
seen.add(s)
nt = (state, s)
self.nonterminal_transitions.append(nt)
dr = self.directly_reads[nt]
r = self.reads[nt]
next_state = state.transitions[s]
for rp2 in next_state.closure:
if rp2.is_satisfied:
continue
s2 = rp2.next
# if s2 is a terminal
if s2 not in self.lr0_rules_by_origin:
dr.add(s2)
if s2 in self.NULLABLE:
r.add((next_state, s2))
def compute_includes_lookback(self):
for nt in self.nonterminal_transitions:
state, nonterminal = nt
includes = []
lookback = self.lookback[nt]
for rp in state.closure:
if rp.rule.origin != nonterminal:
continue
# traverse the states for rp(.rule)
state2 = state
for i in range(rp.index, len(rp.rule.expansion)):
s = rp.rule.expansion[i]
nt2 = (state2, s)
state2 = state2.transitions[s]
if nt2 not in self.reads:
continue
for j in range(i + 1, len(rp.rule.expansion)):
if not rp.rule.expansion[j] in self.NULLABLE:
break
else:
includes.append(nt2)
# state2 is at the final state for rp.rule
if rp.index == 0:
for rp2 in state2.closure:
if (rp2.rule == rp.rule) and rp2.is_satisfied:
lookback.add((state2, rp2.rule))
for nt2 in includes:
self.includes[nt2].add(nt)
def compute_lookaheads(self):
read_sets = digraph(self.nonterminal_transitions, self.reads, self.directly_reads)
follow_sets = digraph(self.nonterminal_transitions, self.includes, read_sets)
for nt, lookbacks in self.lookback.items():
for state, rule in lookbacks:
for s in follow_sets[nt]:
state.lookaheads[s].add(rule)
def compute_lalr1_states(self):
m = {}
reduce_reduce = []
for state in self.lr0_states:
actions = {}
for la, next_state in state.transitions.items():
actions[la] = (Shift, next_state.closure)
for la, rules in state.lookaheads.items():
if len(rules) > 1:
# Try to resolve conflict based on priority
p = [(r.options.priority or 0, r) for r in rules]
p.sort(key=lambda r: r[0], reverse=True)
best, second_best = p[:2]
if best[0] > second_best[0]:
rules = [best[1]]
else:
reduce_reduce.append((state, la, rules))
if la in actions:
if self.debug:
logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
logger.warning(' * %s', list(rules)[0])
else:
actions[la] = (Reduce, list(rules)[0])
m[state] = { k.name: v for k, v in actions.items() }
if reduce_reduce:
msgs = []
for state, la, rules in reduce_reduce:
msg = 'Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t- ' + str(r) for r in rules ]))
if self.debug:
msg += '\n collision occurred in state: {%s\n }' % ''.join(['\n\t' + str(x) for x in state.closure])
msgs.append(msg)
raise GrammarError('\n\n'.join(msgs))
states = { k.closure: v for k, v in m.items() }
# compute end states
end_states = {}
for state in states:
for rp in state:
for start in self.lr0_start_states:
if rp.rule.origin.name == ('$root_' + start) and rp.is_satisfied:
assert(start not in end_states)
end_states[start] = state
_parse_table = ParseTable(states, { start: state.closure for start, state in self.lr0_start_states.items() }, end_states)
if self.debug:
self.parse_table = _parse_table
else:
self.parse_table = IntParseTable.from_ParseTable(_parse_table)
def compute_lalr(self):
self.compute_lr0_states()
self.compute_reads_relations()
self.compute_includes_lookback()
self.compute_lookaheads()
self.compute_lalr1_states()

View File

@ -0,0 +1,148 @@
# This module provides a LALR interactive parser, which is used for debugging and error handling
from typing import Iterator, List
from copy import copy
import warnings
from lark.exceptions import UnexpectedToken
from lark.lexer import Token, LexerThread
class InteractiveParser:
"""InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR.
For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``.
"""
def __init__(self, parser, parser_state, lexer_thread: LexerThread):
self.parser = parser
self.parser_state = parser_state
self.lexer_thread = lexer_thread
self.result = None
@property
def lexer_state(self) -> LexerThread:
warnings.warn("lexer_state will be removed in subsequent releases. Use lexer_thread instead.", DeprecationWarning)
return self.lexer_thread
def feed_token(self, token: Token):
"""Feed the parser with a token, and advance it to the next state, as if it received it from the lexer.
Note that ``token`` has to be an instance of ``Token``.
"""
return self.parser_state.feed_token(token, token.type == '$END')
def iter_parse(self) -> Iterator[Token]:
"""Step through the different stages of the parse, by reading tokens from the lexer
and feeding them to the parser, one per iteration.
Returns an iterator of the tokens it encounters.
When the parse is over, the resulting tree can be found in ``InteractiveParser.result``.
"""
for token in self.lexer_thread.lex(self.parser_state):
yield token
self.result = self.feed_token(token)
def exhaust_lexer(self) -> List[Token]:
"""Try to feed the rest of the lexer state into the interactive parser.
Note that this modifies the instance in place and does not feed an '$END' Token
"""
return list(self.iter_parse())
def feed_eof(self, last_token=None):
"""Feed a '$END' Token. Borrows from 'last_token' if given."""
eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else self.lexer_thread._Token('$END', '', 0, 1, 1)
return self.feed_token(eof)
def __copy__(self):
"""Create a new interactive parser with a separate state.
Calls to feed_token() won't affect the old instance, and vice-versa.
"""
return type(self)(
self.parser,
copy(self.parser_state),
copy(self.lexer_thread),
)
def copy(self):
return copy(self)
def __eq__(self, other):
if not isinstance(other, InteractiveParser):
return False
return self.parser_state == other.parser_state and self.lexer_thread == other.lexer_thread
def as_immutable(self):
"""Convert to an ``ImmutableInteractiveParser``."""
p = copy(self)
return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_thread)
def pretty(self):
"""Print the output of ``choices()`` in a way that's easier to read."""
out = ["Parser choices:"]
for k, v in self.choices().items():
out.append('\t- %s -> %r' % (k, v))
out.append('stack size: %s' % len(self.parser_state.state_stack))
return '\n'.join(out)
def choices(self):
"""Returns a dictionary of token types, matched to their action in the parser.
Only returns token types that are accepted by the current state.
Updated by ``feed_token()``.
"""
return self.parser_state.parse_conf.parse_table.states[self.parser_state.position]
def accepts(self):
"""Returns the set of possible tokens that will advance the parser into a new valid state."""
accepts = set()
for t in self.choices():
if t.isupper(): # is terminal?
new_cursor = copy(self)
try:
new_cursor.feed_token(self.lexer_thread._Token(t, ''))
except UnexpectedToken:
pass
else:
accepts.add(t)
return accepts
def resume_parse(self):
"""Resume automated parsing from the current state."""
return self.parser.parse_from_state(self.parser_state)
class ImmutableInteractiveParser(InteractiveParser):
"""Same as ``InteractiveParser``, but operations create a new instance instead
of changing it in-place.
"""
result = None
def __hash__(self):
return hash((self.parser_state, self.lexer_thread))
def feed_token(self, token):
c = copy(self)
c.result = InteractiveParser.feed_token(c, token)
return c
def exhaust_lexer(self):
"""Try to feed the rest of the lexer state into the parser.
Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token"""
cursor = self.as_mutable()
cursor.exhaust_lexer()
return cursor.as_immutable()
def as_mutable(self):
"""Convert to an ``InteractiveParser``."""
p = copy(self)
return InteractiveParser(p.parser, p.parser_state, p.lexer_thread)

View File

@ -0,0 +1,199 @@
"""This module implements a LALR(1) Parser
"""
# Author: Erez Shinan (2017)
# Email : erezshin@gmail.com
from copy import deepcopy, copy
from typing import Dict, Any
from ..lexer import Token
from ..utils import Serialize
from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
from .lalr_interactive_parser import InteractiveParser
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken
###{standalone
class LALR_Parser(Serialize):
def __init__(self, parser_conf, debug=False):
analysis = LALR_Analyzer(parser_conf, debug=debug)
analysis.compute_lalr()
callbacks = parser_conf.callbacks
self._parse_table = analysis.parse_table
self.parser_conf = parser_conf
self.parser = _Parser(analysis.parse_table, callbacks, debug)
@classmethod
def deserialize(cls, data, memo, callbacks, debug=False):
inst = cls.__new__(cls)
inst._parse_table = IntParseTable.deserialize(data, memo)
inst.parser = _Parser(inst._parse_table, callbacks, debug)
return inst
def serialize(self, memo: Any = None) -> Dict[str, Any]:
return self._parse_table.serialize(memo)
def parse_interactive(self, lexer, start):
return self.parser.parse(lexer, start, start_interactive=True)
def parse(self, lexer, start, on_error=None):
try:
return self.parser.parse(lexer, start)
except UnexpectedInput as e:
if on_error is None:
raise
while True:
if isinstance(e, UnexpectedCharacters):
s = e.interactive_parser.lexer_thread.state
p = s.line_ctr.char_pos
if not on_error(e):
raise e
if isinstance(e, UnexpectedCharacters):
# If user didn't change the character position, then we should
if p == s.line_ctr.char_pos:
s.line_ctr.feed(s.text[p:p+1])
try:
return e.interactive_parser.resume_parse()
except UnexpectedToken as e2:
if (isinstance(e, UnexpectedToken)
and e.token.type == e2.token.type == '$END'
and e.interactive_parser == e2.interactive_parser):
# Prevent infinite loop
raise e2
e = e2
except UnexpectedCharacters as e2:
e = e2
class ParseConf:
__slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states'
def __init__(self, parse_table, callbacks, start):
self.parse_table = parse_table
self.start_state = self.parse_table.start_states[start]
self.end_state = self.parse_table.end_states[start]
self.states = self.parse_table.states
self.callbacks = callbacks
self.start = start
class ParserState:
__slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack'
def __init__(self, parse_conf, lexer, state_stack=None, value_stack=None):
self.parse_conf = parse_conf
self.lexer = lexer
self.state_stack = state_stack or [self.parse_conf.start_state]
self.value_stack = value_stack or []
@property
def position(self):
return self.state_stack[-1]
# Necessary for match_examples() to work
def __eq__(self, other):
if not isinstance(other, ParserState):
return NotImplemented
return len(self.state_stack) == len(other.state_stack) and self.position == other.position
def __copy__(self):
return type(self)(
self.parse_conf,
self.lexer, # XXX copy
copy(self.state_stack),
deepcopy(self.value_stack),
)
def copy(self):
return copy(self)
def feed_token(self, token, is_end=False):
state_stack = self.state_stack
value_stack = self.value_stack
states = self.parse_conf.states
end_state = self.parse_conf.end_state
callbacks = self.parse_conf.callbacks
while True:
state = state_stack[-1]
try:
action, arg = states[state][token.type]
except KeyError:
expected = {s for s in states[state].keys() if s.isupper()}
raise UnexpectedToken(token, expected, state=self, interactive_parser=None)
assert arg != end_state
if action is Shift:
# shift once and return
assert not is_end
state_stack.append(arg)
value_stack.append(token if token.type not in callbacks else callbacks[token.type](token))
return
else:
# reduce+shift as many times as necessary
rule = arg
size = len(rule.expansion)
if size:
s = value_stack[-size:]
del state_stack[-size:]
del value_stack[-size:]
else:
s = []
value = callbacks[rule](s)
_action, new_state = states[state_stack[-1]][rule.origin.name]
assert _action is Shift
state_stack.append(new_state)
value_stack.append(value)
if is_end and state_stack[-1] == end_state:
return value_stack[-1]
class _Parser:
def __init__(self, parse_table, callbacks, debug=False):
self.parse_table = parse_table
self.callbacks = callbacks
self.debug = debug
def parse(self, lexer, start, value_stack=None, state_stack=None, start_interactive=False):
parse_conf = ParseConf(self.parse_table, self.callbacks, start)
parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
if start_interactive:
return InteractiveParser(self, parser_state, parser_state.lexer)
return self.parse_from_state(parser_state)
def parse_from_state(self, state):
# Main LALR-parser loop
try:
token = None
for token in state.lexer.lex(state):
state.feed_token(token)
end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
return state.feed_token(end_token, True)
except UnexpectedInput as e:
try:
e.interactive_parser = InteractiveParser(self, state, state.lexer)
except NameError:
pass
raise e
except Exception as e:
if self.debug:
print("")
print("STATE STACK DUMP")
print("----------------")
for i, s in enumerate(state.state_stack):
print('%d)' % i , s)
print("")
raise
###}

View File

@ -0,0 +1,109 @@
from ..utils import compare
from functools import cmp_to_key
from ..tree import Tree
# Standard ambiguity resolver (uses comparison)
#
# Author: Erez Sh
def _compare_rules(rule1, rule2):
return -compare( len(rule1.expansion), len(rule2.expansion))
def _sum_priority(tree):
p = 0
for n in tree.iter_subtrees():
try:
p += n.meta.rule.options.priority or 0
except AttributeError:
pass
return p
def _compare_priority(tree1, tree2):
tree1.iter_subtrees()
def _compare_drv(tree1, tree2):
try:
rule1 = tree1.meta.rule
except AttributeError:
rule1 = None
try:
rule2 = tree2.meta.rule
except AttributeError:
rule2 = None
if None == rule1 == rule2:
return compare(tree1, tree2)
elif rule1 is None:
return -1
elif rule2 is None:
return 1
assert tree1.data != '_ambig'
assert tree2.data != '_ambig'
p1 = _sum_priority(tree1)
p2 = _sum_priority(tree2)
c = (p1 or p2) and compare(p1, p2)
if c:
return c
c = _compare_rules(tree1.meta.rule, tree2.meta.rule)
if c:
return c
# rules are "equal", so compare trees
if len(tree1.children) == len(tree2.children):
for t1, t2 in zip(tree1.children, tree2.children):
c = _compare_drv(t1, t2)
if c:
return c
return compare(len(tree1.children), len(tree2.children))
def _standard_resolve_ambig(tree):
assert tree.data == '_ambig'
key_f = cmp_to_key(_compare_drv)
best = max(tree.children, key=key_f)
assert best.data == 'drv'
tree.set('drv', best.children)
tree.meta.rule = best.meta.rule # needed for applying callbacks
def standard_resolve_ambig(tree):
for ambig in tree.find_data('_ambig'):
_standard_resolve_ambig(ambig)
return tree
# Anti-score Sum
#
# Author: Uriva (https://github.com/uriva)
def _antiscore_sum_drv(tree):
if not isinstance(tree, Tree):
return 0
assert tree.data != '_ambig'
return _sum_priority(tree)
def _antiscore_sum_resolve_ambig(tree):
assert tree.data == '_ambig'
best = min(tree.children, key=_antiscore_sum_drv)
assert best.data == 'drv'
tree.set('drv', best.children)
tree.meta.rule = best.meta.rule # needed for applying callbacks
def antiscore_sum_resolve_ambig(tree):
for ambig in tree.find_data('_ambig'):
_antiscore_sum_resolve_ambig(ambig)
return tree

View File

@ -0,0 +1,159 @@
"""This module implements an experimental Earley parser with a dynamic lexer
The core Earley algorithm used here is based on Elizabeth Scott's implementation, here:
https://www.sciencedirect.com/science/article/pii/S1571066108001497
That is probably the best reference for understanding the algorithm here.
The Earley parser outputs an SPPF-tree as per that document. The SPPF tree format
is better documented here:
http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
Instead of running a lexer beforehand, or using a costy char-by-char method, this parser
uses regular expressions by necessity, achieving high-performance while maintaining all of
Earley's power in parsing any CFG.
"""
from collections import defaultdict
from ..tree import Tree
from ..exceptions import UnexpectedCharacters
from ..lexer import Token
from ..grammar import Terminal
from .earley import Parser as BaseParser
from .earley_forest import SymbolNode, TokenNode
class Parser(BaseParser):
def __init__(self, lexer_conf, parser_conf, term_matcher, resolve_ambiguity=True, complete_lex = False, debug=False, tree_class=Tree):
BaseParser.__init__(self, lexer_conf, parser_conf, term_matcher, resolve_ambiguity, debug, tree_class)
self.ignore = [Terminal(t) for t in lexer_conf.ignore]
self.complete_lex = complete_lex
def _parse(self, stream, columns, to_scan, start_symbol=None):
def scan(i, to_scan):
"""The core Earley Scanner.
This is a custom implementation of the scanner that uses the
Lark lexer to match tokens. The scan list is built by the
Earley predictor, based on the previously completed tokens.
This ensures that at each phase of the parse we have a custom
lexer context, allowing for more complex ambiguities."""
node_cache = {}
# 1) Loop the expectations and ask the lexer to match.
# Since regexp is forward looking on the input stream, and we only
# want to process tokens when we hit the point in the stream at which
# they complete, we push all tokens into a buffer (delayed_matches), to
# be held possibly for a later parse step when we reach the point in the
# input stream at which they complete.
for item in set(to_scan):
m = match(item.expect, stream, i)
if m:
t = Token(item.expect.name, m.group(0), i, text_line, text_column)
delayed_matches[m.end()].append( (item, i, t) )
if self.complete_lex:
s = m.group(0)
for j in range(1, len(s)):
m = match(item.expect, s[:-j])
if m:
t = Token(item.expect.name, m.group(0), i, text_line, text_column)
delayed_matches[i+m.end()].append( (item, i, t) )
# XXX The following 3 lines were commented out for causing a bug. See issue #768
# # Remove any items that successfully matched in this pass from the to_scan buffer.
# # This ensures we don't carry over tokens that already matched, if we're ignoring below.
# to_scan.remove(item)
# 3) Process any ignores. This is typically used for e.g. whitespace.
# We carry over any unmatched items from the to_scan buffer to be matched again after
# the ignore. This should allow us to use ignored symbols in non-terminals to implement
# e.g. mandatory spacing.
for x in self.ignore:
m = match(x, stream, i)
if m:
# Carry over any items still in the scan buffer, to past the end of the ignored items.
delayed_matches[m.end()].extend([(item, i, None) for item in to_scan ])
# If we're ignoring up to the end of the file, # carry over the start symbol if it already completed.
delayed_matches[m.end()].extend([(item, i, None) for item in columns[i] if item.is_complete and item.s == start_symbol])
next_to_scan = set()
next_set = set()
columns.append(next_set)
transitives.append({})
## 4) Process Tokens from delayed_matches.
# This is the core of the Earley scanner. Create an SPPF node for each Token,
# and create the symbol node in the SPPF tree. Advance the item that completed,
# and add the resulting new item to either the Earley set (for processing by the
# completer/predictor) or the to_scan buffer for the next parse step.
for item, start, token in delayed_matches[i+1]:
if token is not None:
token.end_line = text_line
token.end_column = text_column + 1
token.end_pos = i + 1
new_item = item.advance()
label = (new_item.s, new_item.start, i)
token_node = TokenNode(token, terminals[token.type])
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token_node)
else:
new_item = item
if new_item.expect in self.TERMINALS:
# add (B ::= Aai+1.B, h, y) to Q'
next_to_scan.add(new_item)
else:
# add (B ::= Aa+1.B, h, y) to Ei+1
next_set.add(new_item)
del delayed_matches[i+1] # No longer needed, so unburden memory
if not next_set and not delayed_matches and not next_to_scan:
considered_rules = list(sorted(to_scan, key=lambda key: key.rule.origin.name))
raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan},
set(to_scan), state=frozenset(i.s for i in to_scan),
considered_rules=considered_rules
)
return next_to_scan
delayed_matches = defaultdict(list)
match = self.term_matcher
terminals = self.lexer_conf.terminals_by_name
# Cache for nodes & tokens created in a particular parse step.
transitives = [{}]
text_line = 1
text_column = 1
## The main Earley loop.
# Run the Prediction/Completion cycle for any Items in the current Earley set.
# Completions will be added to the SPPF tree, and predictions will be recursively
# processed down to terminals/empty nodes to be added to the scanner for the next
# step.
i = 0
for token in stream:
self.predict_and_complete(i, to_scan, columns, transitives)
to_scan = scan(i, to_scan)
if token == '\n':
text_line += 1
text_column = 1
else:
text_column += 1
i += 1
self.predict_and_complete(i, to_scan, columns, transitives)
## Column is now the final column in the parse.
assert i == len(columns)-1
return to_scan

View File

View File

@ -0,0 +1,106 @@
"""Reconstruct text from a tree, based on Lark grammar"""
from typing import List, Dict, Union, Callable, Iterable, Optional
from .lark import Lark
from .tree import Tree, ParseTree
from .visitors import Transformer_InPlace
from .lexer import Token, PatternStr, TerminalDef
from .grammar import Terminal, NonTerminal, Symbol
from .tree_matcher import TreeMatcher, is_discarded_terminal
from .utils import is_id_continue
def is_iter_empty(i):
try:
_ = next(i)
return False
except StopIteration:
return True
class WriteTokensTransformer(Transformer_InPlace):
"Inserts discarded tokens into their correct place, according to the rules of grammar"
tokens: Dict[str, TerminalDef]
term_subs: Dict[str, Callable[[Symbol], str]]
def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None:
self.tokens = tokens
self.term_subs = term_subs
def __default__(self, data, children, meta):
if not getattr(meta, 'match_tree', False):
return Tree(data, children)
iter_args = iter(children)
to_write = []
for sym in meta.orig_expansion:
if is_discarded_terminal(sym):
try:
v = self.term_subs[sym.name](sym)
except KeyError:
t = self.tokens[sym.name]
if not isinstance(t.pattern, PatternStr):
raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t)
v = t.pattern.value
to_write.append(v)
else:
x = next(iter_args)
if isinstance(x, list):
to_write += x
else:
if isinstance(x, Token):
assert Terminal(x.type) == sym, x
else:
assert NonTerminal(x.data) == sym, (sym, x)
to_write.append(x)
assert is_iter_empty(iter_args)
return to_write
class Reconstructor(TreeMatcher):
"""
A Reconstructor that will, given a full parse Tree, generate source code.
Note:
The reconstructor cannot generate values from regexps. If you need to produce discarded
regexes, such as newlines, use `term_subs` and provide default values for them.
Paramters:
parser: a Lark instance
term_subs: a dictionary of [Terminal name as str] to [output text as str]
"""
write_tokens: WriteTokensTransformer
def __init__(self, parser: Lark, term_subs: Optional[Dict[str, Callable[[Symbol], str]]]=None) -> None:
TreeMatcher.__init__(self, parser)
self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {})
def _reconstruct(self, tree):
unreduced_tree = self.match_tree(tree, tree.data)
res = self.write_tokens.transform(unreduced_tree)
for item in res:
if isinstance(item, Tree):
# TODO use orig_expansion.rulename to support templates
yield from self._reconstruct(item)
else:
yield item
def reconstruct(self, tree: ParseTree, postproc: Optional[Callable[[Iterable[str]], Iterable[str]]]=None, insert_spaces: bool=True) -> str:
x = self._reconstruct(tree)
if postproc:
x = postproc(x)
y = []
prev_item = ''
for item in x:
if insert_spaces and prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]):
y.append(' ')
y.append(item)
prev_item = item
return ''.join(y)

View File

@ -0,0 +1,64 @@
import sys
from argparse import ArgumentParser, FileType
from textwrap import indent
from logging import DEBUG, INFO, WARN, ERROR
from typing import Optional
import warnings
from lark import Lark, logger
lalr_argparser = ArgumentParser(add_help=False, epilog='Look at the Lark documentation for more info on the options')
flags = [
('d', 'debug'),
'keep_all_tokens',
'regex',
'propagate_positions',
'maybe_placeholders',
'use_bytes'
]
options = ['start', 'lexer']
lalr_argparser.add_argument('-v', '--verbose', action='count', default=0, help="Increase Logger output level, up to three times")
lalr_argparser.add_argument('-s', '--start', action='append', default=[])
lalr_argparser.add_argument('-l', '--lexer', default='contextual', choices=('basic', 'contextual'))
encoding: Optional[str] = 'utf-8' if sys.version_info > (3, 4) else None
lalr_argparser.add_argument('-o', '--out', type=FileType('w', encoding=encoding), default=sys.stdout, help='the output file (default=stdout)')
lalr_argparser.add_argument('grammar_file', type=FileType('r', encoding=encoding), help='A valid .lark file')
for flag in flags:
if isinstance(flag, tuple):
options.append(flag[1])
lalr_argparser.add_argument('-' + flag[0], '--' + flag[1], action='store_true')
elif isinstance(flag, str):
options.append(flag)
lalr_argparser.add_argument('--' + flag, action='store_true')
else:
raise NotImplementedError("flags must only contain strings or tuples of strings")
def build_lalr(namespace):
logger.setLevel((ERROR, WARN, INFO, DEBUG)[min(namespace.verbose, 3)])
if len(namespace.start) == 0:
namespace.start.append('start')
kwargs = {n: getattr(namespace, n) for n in options}
return Lark(namespace.grammar_file, parser='lalr', **kwargs), namespace.out
def showwarning_as_comment(message, category, filename, lineno, file=None, line=None):
# Based on warnings._showwarnmsg_impl
text = warnings.formatwarning(message, category, filename, lineno, line)
text = indent(text, '# ')
if file is None:
file = sys.stderr
if file is None:
return
try:
file.write(text)
except OSError:
pass
def make_warnings_comments():
warnings.showwarning = showwarning_as_comment

View File

@ -0,0 +1,202 @@
"Converts Nearley grammars to Lark"
import os.path
import sys
import codecs
import argparse
from lark import Lark, Transformer, v_args
nearley_grammar = r"""
start: (ruledef|directive)+
directive: "@" NAME (STRING|NAME)
| "@" JS -> js_code
ruledef: NAME "->" expansions
| NAME REGEXP "->" expansions -> macro
expansions: expansion ("|" expansion)*
expansion: expr+ js
?expr: item (":" /[+*?]/)?
?item: rule|string|regexp|null
| "(" expansions ")"
rule: NAME
string: STRING
regexp: REGEXP
null: "null"
JS: /{%.*?%}/s
js: JS?
NAME: /[a-zA-Z_$]\w*/
COMMENT: /#[^\n]*/
REGEXP: /\[.*?\]/
STRING: _STRING "i"?
%import common.ESCAPED_STRING -> _STRING
%import common.WS
%ignore WS
%ignore COMMENT
"""
nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='basic')
def _get_rulename(name):
name = {'_': '_ws_maybe', '__': '_ws'}.get(name, name)
return 'n_' + name.replace('$', '__DOLLAR__').lower()
@v_args(inline=True)
class NearleyToLark(Transformer):
def __init__(self):
self._count = 0
self.extra_rules = {}
self.extra_rules_rev = {}
self.alias_js_code = {}
def _new_function(self, code):
name = 'alias_%d' % self._count
self._count += 1
self.alias_js_code[name] = code
return name
def _extra_rule(self, rule):
if rule in self.extra_rules_rev:
return self.extra_rules_rev[rule]
name = 'xrule_%d' % len(self.extra_rules)
assert name not in self.extra_rules
self.extra_rules[name] = rule
self.extra_rules_rev[rule] = name
return name
def rule(self, name):
return _get_rulename(name)
def ruledef(self, name, exps):
return '!%s: %s' % (_get_rulename(name), exps)
def expr(self, item, op):
rule = '(%s)%s' % (item, op)
return self._extra_rule(rule)
def regexp(self, r):
return '/%s/' % r
def null(self):
return ''
def string(self, s):
return self._extra_rule(s)
def expansion(self, *x):
x, js = x[:-1], x[-1]
if js.children:
js_code ,= js.children
js_code = js_code[2:-2]
alias = '-> ' + self._new_function(js_code)
else:
alias = ''
return ' '.join(x) + alias
def expansions(self, *x):
return '%s' % ('\n |'.join(x))
def start(self, *rules):
return '\n'.join(filter(None, rules))
def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes):
rule_defs = []
tree = nearley_grammar_parser.parse(g)
for statement in tree.children:
if statement.data == 'directive':
directive, arg = statement.children
if directive in ('builtin', 'include'):
folder = builtin_path if directive == 'builtin' else folder_path
path = os.path.join(folder, arg[1:-1])
if path not in includes:
includes.add(path)
with codecs.open(path, encoding='utf8') as f:
text = f.read()
rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes)
else:
assert False, directive
elif statement.data == 'js_code':
code ,= statement.children
code = code[2:-2]
js_code.append(code)
elif statement.data == 'macro':
pass # TODO Add support for macros!
elif statement.data == 'ruledef':
rule_defs.append(n2l.transform(statement))
else:
raise Exception("Unknown statement: %s" % statement)
return rule_defs
def create_code_for_nearley_grammar(g, start, builtin_path, folder_path, es6=False):
import js2py
emit_code = []
def emit(x=None):
if x:
emit_code.append(x)
emit_code.append('\n')
js_code = ['function id(x) {return x[0];}']
n2l = NearleyToLark()
rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set())
lark_g = '\n'.join(rule_defs)
lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items())
emit('from lark import Lark, Transformer')
emit()
emit('grammar = ' + repr(lark_g))
emit()
for alias, code in n2l.alias_js_code.items():
js_code.append('%s = (%s);' % (alias, code))
if es6:
emit(js2py.translate_js6('\n'.join(js_code)))
else:
emit(js2py.translate_js('\n'.join(js_code)))
emit('class TransformNearley(Transformer):')
for alias in n2l.alias_js_code:
emit(" %s = var.get('%s').to_python()" % (alias, alias))
emit(" __default__ = lambda self, n, c, m: c if c else None")
emit()
emit('parser = Lark(grammar, start="n_%s", maybe_placeholders=False)' % start)
emit('def parse(text):')
emit(' return TransformNearley().transform(parser.parse(text))')
return ''.join(emit_code)
def main(fn, start, nearley_lib, es6=False):
with codecs.open(fn, encoding='utf8') as f:
grammar = f.read()
return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)), es6=es6)
def get_arg_parser():
parser = argparse.ArgumentParser(description='Reads a Nearley grammar (with js functions), and outputs an equivalent lark parser.')
parser.add_argument('nearley_grammar', help='Path to the file containing the nearley grammar')
parser.add_argument('start_rule', help='Rule within the nearley grammar to make the base rule')
parser.add_argument('nearley_lib', help='Path to root directory of nearley codebase (used for including builtins)')
parser.add_argument('--es6', help='Enable experimental ES6 support', action='store_true')
return parser
if __name__ == '__main__':
parser = get_arg_parser()
if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
args = parser.parse_args()
print(main(fn=args.nearley_grammar, start=args.start_rule, nearley_lib=args.nearley_lib, es6=args.es6))

View File

@ -0,0 +1,34 @@
import codecs
import sys
import json
from lark import Lark
from lark.grammar import RuleOptions, Rule
from lark.lexer import TerminalDef
from lark.tools import lalr_argparser, build_lalr
import argparse
argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize', parents=[lalr_argparser],
description="Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file",
epilog='Look at the Lark documentation for more info on the options')
def serialize(lark_inst, outfile):
data, memo = lark_inst.memo_serialize([TerminalDef, Rule])
outfile.write('{\n')
outfile.write(' "data": %s,\n' % json.dumps(data))
outfile.write(' "memo": %s\n' % json.dumps(memo))
outfile.write('}\n')
def main():
if len(sys.argv)==1:
argparser.print_help(sys.stderr)
sys.exit(1)
ns = argparser.parse_args()
serialize(*build_lalr(ns))
if __name__ == '__main__':
main()

View File

@ -0,0 +1,194 @@
###{standalone
#
#
# Lark Stand-alone Generator Tool
# ----------------------------------
# Generates a stand-alone LALR(1) parser
#
# Git: https://github.com/erezsh/lark
# Author: Erez Shinan (erezshin@gmail.com)
#
#
# >>> LICENSE
#
# This tool and its generated code use a separate license from Lark,
# and are subject to the terms of the Mozilla Public License, v. 2.0.
# If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
#
# If you wish to purchase a commercial license for this tool and its
# generated code, you may contact me via email or otherwise.
#
# If MPL2 is incompatible with your free or open-source project,
# contact me and we'll work it out.
#
#
from abc import ABC, abstractmethod
from collections.abc import Sequence
from types import ModuleType
from typing import (
TypeVar, Generic, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
Union, Iterable, IO, TYPE_CHECKING, overload,
Pattern as REPattern, ClassVar, Set, Mapping
)
###}
import sys
import token, tokenize
import os
from os import path
from collections import defaultdict
from functools import partial
from argparse import ArgumentParser
import lark
from lark.tools import lalr_argparser, build_lalr, make_warnings_comments
from lark.grammar import Rule
from lark.lexer import TerminalDef
_dir = path.dirname(__file__)
_larkdir = path.join(_dir, path.pardir)
EXTRACT_STANDALONE_FILES = [
'tools/standalone.py',
'exceptions.py',
'utils.py',
'tree.py',
'visitors.py',
'grammar.py',
'lexer.py',
'common.py',
'parse_tree_builder.py',
'parsers/lalr_parser.py',
'parsers/lalr_analysis.py',
'parser_frontends.py',
'lark.py',
'indenter.py',
]
def extract_sections(lines):
section = None
text = []
sections = defaultdict(list)
for line in lines:
if line.startswith('###'):
if line[3] == '{':
section = line[4:].strip()
elif line[3] == '}':
sections[section] += text
section = None
text = []
else:
raise ValueError(line)
elif section:
text.append(line)
return {name: ''.join(text) for name, text in sections.items()}
def strip_docstrings(line_gen):
""" Strip comments and docstrings from a file.
Based on code from: https://stackoverflow.com/questions/1769332/script-to-remove-python-comments-docstrings
"""
res = []
prev_toktype = token.INDENT
last_lineno = -1
last_col = 0
tokgen = tokenize.generate_tokens(line_gen)
for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
if slineno > last_lineno:
last_col = 0
if scol > last_col:
res.append(" " * (scol - last_col))
if toktype == token.STRING and prev_toktype == token.INDENT:
# Docstring
res.append("#--")
elif toktype == tokenize.COMMENT:
# Comment
res.append("##\n")
else:
res.append(ttext)
prev_toktype = toktype
last_col = ecol
last_lineno = elineno
return ''.join(res)
def gen_standalone(lark_inst, output=None, out=sys.stdout, compress=False):
if output is None:
output = partial(print, file=out)
import pickle, zlib, base64
def compressed_output(obj):
s = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
c = zlib.compress(s)
output(repr(base64.b64encode(c)))
def output_decompress(name):
output('%(name)s = pickle.loads(zlib.decompress(base64.b64decode(%(name)s)))' % locals())
output('# The file was automatically generated by Lark v%s' % lark.__version__)
output('__version__ = "%s"' % lark.__version__)
output()
for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES):
with open(os.path.join(_larkdir, pyfile)) as f:
code = extract_sections(f)['standalone']
if i: # if not this file
code = strip_docstrings(partial(next, iter(code.splitlines(True))))
output(code)
data, m = lark_inst.memo_serialize([TerminalDef, Rule])
output('import pickle, zlib, base64')
if compress:
output('DATA = (')
compressed_output(data)
output(')')
output_decompress('DATA')
output('MEMO = (')
compressed_output(m)
output(')')
output_decompress('MEMO')
else:
output('DATA = (')
output(data)
output(')')
output('MEMO = (')
output(m)
output(')')
output('Shift = 0')
output('Reduce = 1')
output("def Lark_StandAlone(**kwargs):")
output(" return Lark._load_from_dict(DATA, MEMO, **kwargs)")
def main():
make_warnings_comments()
parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool",
parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options')
parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression")
if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
ns = parser.parse_args()
lark_inst, out = build_lalr(ns)
gen_standalone(lark_inst, out=out, compress=ns.compress)
ns.out.close()
ns.grammar_file.close()
if __name__ == '__main__':
main()

View File

@ -0,0 +1,263 @@
import sys
from copy import deepcopy
from typing import List, Callable, Iterator, Union, Optional, Generic, TypeVar, Any, TYPE_CHECKING
if TYPE_CHECKING:
from .lexer import TerminalDef, Token
import rich
if sys.version_info >= (3, 8):
from typing import Literal
else:
from typing_extensions import Literal
###{standalone
from collections import OrderedDict
class Meta:
empty: bool
line: int
column: int
start_pos: int
end_line: int
end_column: int
end_pos: int
orig_expansion: 'List[TerminalDef]'
match_tree: bool
def __init__(self):
self.empty = True
_Leaf_T = TypeVar("_Leaf_T")
Branch = Union[_Leaf_T, 'Tree[_Leaf_T]']
class Tree(Generic[_Leaf_T]):
"""The main tree class.
Creates a new tree, and stores "data" and "children" in attributes of the same name.
Trees can be hashed and compared.
Parameters:
data: The name of the rule or alias
children: List of matched sub-rules and terminals
meta: Line & Column numbers (if ``propagate_positions`` is enabled).
meta attributes: line, column, start_pos, end_line, end_column, end_pos
"""
data: str
children: 'List[Branch[_Leaf_T]]'
def __init__(self, data: str, children: 'List[Branch[_Leaf_T]]', meta: Optional[Meta]=None) -> None:
self.data = data
self.children = children
self._meta = meta
@property
def meta(self) -> Meta:
if self._meta is None:
self._meta = Meta()
return self._meta
def __repr__(self):
return 'Tree(%r, %r)' % (self.data, self.children)
def _pretty_label(self):
return self.data
def _pretty(self, level, indent_str):
if len(self.children) == 1 and not isinstance(self.children[0], Tree):
return [indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n']
l = [indent_str*level, self._pretty_label(), '\n']
for n in self.children:
if isinstance(n, Tree):
l += n._pretty(level+1, indent_str)
else:
l += [indent_str*(level+1), '%s' % (n,), '\n']
return l
def pretty(self, indent_str: str=' ') -> str:
"""Returns an indented string representation of the tree.
Great for debugging.
"""
return ''.join(self._pretty(0, indent_str))
def __rich__(self, parent:'rich.tree.Tree'=None) -> 'rich.tree.Tree':
"""Returns a tree widget for the 'rich' library.
Example:
::
from rich import print
from lark import Tree
tree = Tree('root', ['node1', 'node2'])
print(tree)
"""
return self._rich(parent)
def _rich(self, parent):
if parent:
tree = parent.add(f'[bold]{self.data}[/bold]')
else:
import rich.tree
tree = rich.tree.Tree(self.data)
for c in self.children:
if isinstance(c, Tree):
c._rich(tree)
else:
tree.add(f'[green]{c}[/green]')
return tree
def __eq__(self, other):
try:
return self.data == other.data and self.children == other.children
except AttributeError:
return False
def __ne__(self, other):
return not (self == other)
def __hash__(self) -> int:
return hash((self.data, tuple(self.children)))
def iter_subtrees(self) -> 'Iterator[Tree[_Leaf_T]]':
"""Depth-first iteration.
Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG).
"""
queue = [self]
subtrees = OrderedDict()
for subtree in queue:
subtrees[id(subtree)] = subtree
# Reason for type ignore https://github.com/python/mypy/issues/10999
queue += [c for c in reversed(subtree.children) # type: ignore[misc]
if isinstance(c, Tree) and id(c) not in subtrees]
del queue
return reversed(list(subtrees.values()))
def iter_subtrees_topdown(self):
"""Breadth-first iteration.
Iterates over all the subtrees, return nodes in order like pretty() does.
"""
stack = [self]
while stack:
node = stack.pop()
if not isinstance(node, Tree):
continue
yield node
for child in reversed(node.children):
stack.append(child)
def find_pred(self, pred: 'Callable[[Tree[_Leaf_T]], bool]') -> 'Iterator[Tree[_Leaf_T]]':
"""Returns all nodes of the tree that evaluate pred(node) as true."""
return filter(pred, self.iter_subtrees())
def find_data(self, data: str) -> 'Iterator[Tree[_Leaf_T]]':
"""Returns all nodes of the tree whose data equals the given data."""
return self.find_pred(lambda t: t.data == data)
###}
def expand_kids_by_data(self, *data_values):
"""Expand (inline) children with any of the given data values. Returns True if anything changed"""
changed = False
for i in range(len(self.children)-1, -1, -1):
child = self.children[i]
if isinstance(child, Tree) and child.data in data_values:
self.children[i:i+1] = child.children
changed = True
return changed
def scan_values(self, pred: 'Callable[[Branch[_Leaf_T]], bool]') -> Iterator[_Leaf_T]:
"""Return all values in the tree that evaluate pred(value) as true.
This can be used to find all the tokens in the tree.
Example:
>>> all_tokens = tree.scan_values(lambda v: isinstance(v, Token))
"""
for c in self.children:
if isinstance(c, Tree):
for t in c.scan_values(pred):
yield t
else:
if pred(c):
yield c
def __deepcopy__(self, memo):
return type(self)(self.data, deepcopy(self.children, memo), meta=self._meta)
def copy(self) -> 'Tree[_Leaf_T]':
return type(self)(self.data, self.children)
def set(self, data: str, children: 'List[Branch[_Leaf_T]]') -> None:
self.data = data
self.children = children
ParseTree = Tree['Token']
class SlottedTree(Tree):
__slots__ = 'data', 'children', 'rule', '_meta'
def pydot__tree_to_png(tree: Tree, filename: str, rankdir: 'Literal["TB", "LR", "BT", "RL"]'="LR", **kwargs) -> None:
graph = pydot__tree_to_graph(tree, rankdir, **kwargs)
graph.write_png(filename)
def pydot__tree_to_dot(tree: Tree, filename, rankdir="LR", **kwargs):
graph = pydot__tree_to_graph(tree, rankdir, **kwargs)
graph.write(filename)
def pydot__tree_to_graph(tree: Tree, rankdir="LR", **kwargs):
"""Creates a colorful image that represents the tree (data+children, without meta)
Possible values for `rankdir` are "TB", "LR", "BT", "RL", corresponding to
directed graphs drawn from top to bottom, from left to right, from bottom to
top, and from right to left, respectively.
`kwargs` can be any graph attribute (e. g. `dpi=200`). For a list of
possible attributes, see https://www.graphviz.org/doc/info/attrs.html.
"""
import pydot # type: ignore[import]
graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs)
i = [0]
def new_leaf(leaf):
node = pydot.Node(i[0], label=repr(leaf))
i[0] += 1
graph.add_node(node)
return node
def _to_pydot(subtree):
color = hash(subtree.data) & 0xffffff
color |= 0x808080
subnodes = [_to_pydot(child) if isinstance(child, Tree) else new_leaf(child)
for child in subtree.children]
node = pydot.Node(i[0], style="filled", fillcolor="#%x" % color, label=subtree.data)
i[0] += 1
graph.add_node(node)
for subnode in subnodes:
graph.add_edge(pydot.Edge(node, subnode))
return node
_to_pydot(tree)
return graph

Some files were not shown because too many files have changed in this diff Show More