commit 473845b537f992836a4493144f5e5e4a3727f76d Author: su-fang Date: Tue Mar 14 14:48:50 2023 +0800 Import Upstream version 2.2.2 diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..e32c802 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,13 @@ +root = true + +[*] +indent_style = space +indent_size = 4 +insert_final_newline = true +trim_trailing_whitespace = true +end_of_line = lf +charset = utf-8 +max_line_length = 88 + +[*.{yml,yaml,json,js,css,html}] +indent_size = 2 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..764a442 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,8 @@ +# Normalize CRLF to LF for all text files +* text=auto + +# Declare binary file types so they won't be normalized +*.png binary +*.jpg binary +tests/**/*.http binary +tests/res/test.txt binary diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md new file mode 100644 index 0000000..eb5e22b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -0,0 +1,27 @@ +--- +name: Bug report +about: Report a bug in Werkzeug (not other projects which depend on Werkzeug) +--- + + + + + + + +Environment: + +- Python version: +- Werkzeug version: diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..9df4cec --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,11 @@ +blank_issues_enabled: false +contact_links: + - name: Security issue + url: security@palletsprojects.com + about: Do not report security issues publicly. Email our security contact. + - name: Questions + url: https://stackoverflow.com/questions/tagged/werkzeug?tab=Frequent + about: Search for and ask questions about your code on Stack Overflow. + - name: Questions and discussions + url: https://discord.gg/pallets + about: Discuss questions about your code on our Discord chat. diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md new file mode 100644 index 0000000..4869879 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -0,0 +1,15 @@ +--- +name: Feature request +about: Suggest a new feature for Werkzeug +--- + + + + diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..90f94bc --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,9 @@ +version: 2 +updates: +- package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" + day: "monday" + time: "16:00" + timezone: "UTC" diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..29fd35f --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,30 @@ + + + + +- fixes # + + + +Checklist: + +- [ ] Add tests that demonstrate the correct behavior of the change. Tests should fail without the change. +- [ ] Add or update relevant docs, in the docs folder and in code. +- [ ] Add an entry in `CHANGES.rst` summarizing the change and linking to the issue. +- [ ] Add `.. versionchanged::` entries in any relevant code docs. +- [ ] Run `pre-commit` hooks and fix any issues. +- [ ] Run `pytest` and `tox`, no tests failed. diff --git a/.github/workflows/lock.yaml b/.github/workflows/lock.yaml new file mode 100644 index 0000000..b4f7633 --- /dev/null +++ b/.github/workflows/lock.yaml @@ -0,0 +1,15 @@ +name: 'Lock threads' + +on: + schedule: + - cron: '0 0 * * *' + +jobs: + lock: + runs-on: ubuntu-latest + steps: + - uses: dessant/lock-threads@v3 + with: + github-token: ${{ github.token }} + issue-inactive-days: 14 + pr-inactive-days: 14 diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml new file mode 100644 index 0000000..d4441ff --- /dev/null +++ b/.github/workflows/tests.yaml @@ -0,0 +1,55 @@ +name: Tests +on: + push: + branches: + - main + - '*.x' + paths-ignore: + - 'docs/**' + - '*.md' + - '*.rst' + pull_request: + branches: + - main + - '*.x' + paths-ignore: + - 'docs/**' + - '*.md' + - '*.rst' +jobs: + tests: + name: ${{ matrix.name }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - {name: Linux, python: '3.10', os: ubuntu-latest, tox: py310} + - {name: Windows, python: '3.10', os: windows-latest, tox: py310} + - {name: Mac, python: '3.10', os: macos-latest, tox: py310} + - {name: '3.11-dev', python: '3.11-dev', os: ubuntu-latest, tox: py311} + - {name: '3.9', python: '3.9', os: ubuntu-latest, tox: py39} + - {name: '3.8', python: '3.8', os: ubuntu-latest, tox: py38} + - {name: '3.7', python: '3.7', os: ubuntu-latest, tox: py37} + - {name: 'PyPy', python: 'pypy-3.7', os: ubuntu-latest, tox: pypy37} + - {name: Typing, python: '3.10', os: ubuntu-latest, tox: typing} + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python }} + cache: 'pip' + cache-dependency-path: 'requirements/*.txt' + - name: update pip + run: | + pip install -U wheel + pip install -U setuptools + python -m pip install -U pip + - name: cache mypy + uses: actions/cache@v3.0.4 + with: + path: ./.mypy_cache + key: mypy|${{ matrix.python }}|${{ hashFiles('setup.cfg') }} + if: matrix.tox == 'typing' + - run: pip install tox + - run: tox -e ${{ matrix.tox }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..36f3670 --- /dev/null +++ b/.gitignore @@ -0,0 +1,26 @@ +MANIFEST +build +dist +/src/Werkzeug.egg-info +*.pyc +*.pyo +env +.DS_Store +docs/_build +bench/a +bench/b +.tox +.coverage +.coverage.* +coverage_out +htmlcov +.cache +.xprocess +.hypothesis +test_uwsgi_failed +.idea +.pytest_cache/ +venv/ +.vscode +.mypy_cache/ +.dmypy.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..55f8c13 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,44 @@ +ci: + autoupdate_branch: "2.2.x" + autoupdate_schedule: monthly +repos: + - repo: https://github.com/asottile/pyupgrade + rev: v2.37.3 + hooks: + - id: pyupgrade + args: ["--py37-plus"] + - repo: https://github.com/asottile/reorder_python_imports + rev: v3.8.2 + hooks: + - id: reorder-python-imports + name: Reorder Python imports (src, tests) + files: "^(?!examples/)" + args: ["--application-directories", ".:src"] + additional_dependencies: ["setuptools>60.9"] + - id: reorder-python-imports + name: Reorder Python imports (examples) + files: "^examples/" + args: ["--application-directories", "examples"] + additional_dependencies: ["setuptools>60.9"] + - repo: https://github.com/psf/black + rev: 22.6.0 + hooks: + - id: black + - repo: https://github.com/PyCQA/flake8 + rev: 5.0.4 + hooks: + - id: flake8 + additional_dependencies: + - flake8-bugbear + - flake8-implicit-str-concat + - repo: https://github.com/peterdemin/pip-compile-multi + rev: v2.4.6 + hooks: + - id: pip-compile-multi-verify + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: fix-byte-order-marker + - id: trailing-whitespace + - id: end-of-file-fixer + exclude: "^tests/.*.http$" diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..346900b --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,13 @@ +version: 2 +build: + os: ubuntu-20.04 + tools: + python: "3.10" +python: + install: + - requirements: requirements/docs.txt + - method: pip + path: . +sphinx: + builder: dirhtml + fail_on_warning: true diff --git a/CHANGES.rst b/CHANGES.rst new file mode 100644 index 0000000..18e68af --- /dev/null +++ b/CHANGES.rst @@ -0,0 +1,2259 @@ +.. currentmodule:: werkzeug + +Version 2.2.2 +------------- + +Released 2022-08-08 + +- Fix router to restore the 2.1 ``strict_slashes == False`` behaviour + whereby leaf-requests match branch rules and vice + versa. :pr:`2489` +- Fix router to identify invalid rules rather than hang parsing them, + and to correctly parse ``/`` within converter arguments. :pr:`2489` +- Update subpackage imports in :mod:`werkzeug.routing` to use the + ``import as`` syntax for explicitly re-exporting public attributes. + :pr:`2493` +- Parsing of some invalid header characters is more robust. :pr:`2494` +- When starting the development server, a warning not to use it in a + production deployment is always shown. :issue:`2480` +- ``LocalProxy.__wrapped__`` is always set to the wrapped object when + the proxy is unbound, fixing an issue in doctest that would cause it + to fail. :issue:`2485` +- Address one ``ResourceWarning`` related to the socket used by + ``run_simple``. :issue:`2421` + + +Version 2.2.1 +------------- + +Released 2022-07-27 + +- Fix router so that ``/path/`` will match a rule ``/path`` if strict + slashes mode is disabled for the rule. :issue:`2467` +- Fix router so that partial part matches are not allowed + i.e. ``/2df`` does not match ``/``. :pr:`2470` +- Fix router static part weighting, so that simpler routes are matched + before more complex ones. :issue:`2471` +- Restore ``ValidationError`` to be importable from + ``werkzeug.routing``. :issue:`2465` + + +Version 2.2.0 +------------- + +Released 2022-07-23 + +- Deprecated ``get_script_name``, ``get_query_string``, + ``peek_path_info``, ``pop_path_info``, and + ``extract_path_info``. :pr:`2461` +- Remove previously deprecated code. :pr:`2461` +- Add MarkupSafe as a dependency and use it to escape values when + rendering HTML. :issue:`2419` +- Added the ``werkzeug.debug.preserve_context`` mechanism for + restoring context-local data for a request when running code in the + debug console. :pr:`2439` +- Fix compatibility with Python 3.11 by ensuring that ``end_lineno`` + and ``end_col_offset`` are present on AST nodes. :issue:`2425` +- Add a new faster matching router based on a state + machine. :pr:`2433` +- Fix branch leaf path masking branch paths when strict-slashes is + disabled. :issue:`1074` +- Names within options headers are always converted to lowercase. This + matches :rfc:`6266` that the case is not relevant. :issue:`2442` +- ``AnyConverter`` validates the value passed for it when building + URLs. :issue:`2388` +- The debugger shows enhanced error locations in tracebacks in Python + 3.11. :issue:`2407` +- Added Sans-IO ``is_resource_modified`` and ``parse_cookie`` functions + based on WSGI versions. :issue:`2408` +- Added Sans-IO ``get_content_length`` function. :pr:`2415` +- Don't assume a mimetype for test responses. :issue:`2450` +- Type checking ``FileStorage`` accepts ``os.PathLike``. :pr:`2418` + + +Version 2.1.2 +------------- + +Released 2022-04-28 + +- The development server does not set ``Transfer-Encoding: chunked`` + for 1xx, 204, 304, and HEAD responses. :issue:`2375` +- Response HTML for exceptions and redirects starts with + ```` and ````. :issue:`2390` +- Fix ability to set some ``cache_control`` attributes to ``False``. + :issue:`2379` +- Disable ``keep-alive`` connections in the development server, which + are not supported sufficiently by Python's ``http.server``. + :issue:`2397` + + +Version 2.1.1 +------------- + +Released 2022-04-01 + +- ``ResponseCacheControl.s_maxage`` converts its value to an int, like + ``max_age``. :issue:`2364` + + +Version 2.1.0 +------------- + +Released 2022-03-28 + +- Drop support for Python 3.6. :pr:`2277` +- Using gevent or eventlet requires greenlet>=1.0 or PyPy>=7.3.7. + ``werkzeug.locals`` and ``contextvars`` will not work correctly with + older versions. :pr:`2278` +- Remove previously deprecated code. :pr:`2276` + + - Remove the non-standard ``shutdown`` function from the WSGI + environ when running the development server. See the docs for + alternatives. + - Request and response mixins have all been merged into the + ``Request`` and ``Response`` classes. + - The user agent parser and the ``useragents`` module is removed. + The ``user_agent`` module provides an interface that can be + subclassed to add a parser, such as ua-parser. By default it + only stores the whole string. + - The test client returns ``TestResponse`` instances and can no + longer be treated as a tuple. All data is available as + properties on the response. + - Remove ``locals.get_ident`` and related thread-local code from + ``locals``, it no longer makes sense when moving to a + contextvars-based implementation. + - Remove the ``python -m werkzeug.serving`` CLI. + - The ``has_key`` method on some mapping datastructures; use + ``key in data`` instead. + - ``Request.disable_data_descriptor`` is removed, pass + ``shallow=True`` instead. + - Remove the ``no_etag`` parameter from ``Response.freeze()``. + - Remove the ``HTTPException.wrap`` class method. + - Remove the ``cookie_date`` function. Use ``http_date`` instead. + - Remove the ``pbkdf2_hex``, ``pbkdf2_bin``, and ``safe_str_cmp`` + functions. Use equivalents in ``hashlib`` and ``hmac`` modules + instead. + - Remove the ``Href`` class. + - Remove the ``HTMLBuilder`` class. + - Remove the ``invalidate_cached_property`` function. Use + ``del obj.attr`` instead. + - Remove ``bind_arguments`` and ``validate_arguments``. Use + :meth:`Signature.bind` and :func:`inspect.signature` instead. + - Remove ``detect_utf_encoding``, it's built-in to ``json.loads``. + - Remove ``format_string``, use :class:`string.Template` instead. + - Remove ``escape`` and ``unescape``. Use MarkupSafe instead. + +- The ``multiple`` parameter of ``parse_options_header`` is + deprecated. :pr:`2357` +- Rely on :pep:`538` and :pep:`540` to handle decoding file names + with the correct filesystem encoding. The ``filesystem`` module is + removed. :issue:`1760` +- Default values passed to ``Headers`` are validated the same way + values added later are. :issue:`1608` +- Setting ``CacheControl`` int properties, such as ``max_age``, will + convert the value to an int. :issue:`2230` +- Always use ``socket.fromfd`` when restarting the dev server. + :pr:`2287` +- When passing a dict of URL values to ``Map.build``, list values do + not filter out ``None`` or collapse to a single value. Passing a + ``MultiDict`` does collapse single items. This undoes a previous + change that made it difficult to pass a list, or ``None`` values in + a list, to custom URL converters. :issue:`2249` +- ``run_simple`` shows instructions for dealing with "address already + in use" errors, including extra instructions for macOS. :pr:`2321` +- Extend list of characters considered always safe in URLs based on + :rfc:`3986`. :issue:`2319` +- Optimize the stat reloader to avoid watching unnecessary files in + more cases. The watchdog reloader is still recommended for + performance and accuracy. :issue:`2141` +- The development server uses ``Transfer-Encoding: chunked`` for + streaming responses when it is configured for HTTP/1.1. + :issue:`2090, 1327`, :pr:`2091` +- The development server uses HTTP/1.1, which enables keep-alive + connections and chunked streaming responses, when ``threaded`` or + ``processes`` is enabled. :pr:`2323` +- ``cached_property`` works for classes with ``__slots__`` if a + corresponding ``_cache_{name}`` slot is added. :pr:`2332` +- Refactor the debugger traceback formatter to use Python's built-in + ``traceback`` module as much as possible. :issue:`1753` +- The ``TestResponse.text`` property is a shortcut for + ``r.get_data(as_text=True)``, for convenient testing against text + instead of bytes. :pr:`2337` +- ``safe_join`` ensures that the path remains relative if the trusted + directory is the empty string. :pr:`2349` +- Percent-encoded newlines (``%0a``), which are decoded by WSGI + servers, are considered when routing instead of terminating the + match early. :pr:`2350` +- The test client doesn't set duplicate headers for ``CONTENT_LENGTH`` + and ``CONTENT_TYPE``. :pr:`2348` +- ``append_slash_redirect`` handles ``PATH_INFO`` with internal + slashes. :issue:`1972`, :pr:`2338` +- The default status code for ``append_slash_redirect`` is 308 instead + of 301. This preserves the request body, and matches a previous + change to ``strict_slashes`` in routing. :issue:`2351` +- Fix ``ValueError: I/O operation on closed file.`` with the test + client when following more than one redirect. :issue:`2353` +- ``Response.autocorrect_location_header`` is disabled by default. + The ``Location`` header URL will remain relative, and exclude the + scheme and domain, by default. :issue:`2352` +- ``Request.get_json()`` will raise a 400 ``BadRequest`` error if the + ``Content-Type`` header is not ``application/json``. This makes a + very common source of confusion more visible. :issue:`2339` + + +Version 2.0.3 +------------- + +Released 2022-02-07 + +- ``ProxyFix`` supports IPv6 addresses. :issue:`2262` +- Type annotation for ``Response.make_conditional``, + ``HTTPException.get_response``, and ``Map.bind_to_environ`` accepts + ``Request`` in addition to ``WSGIEnvironment`` for the first + parameter. :pr:`2290` +- Fix type annotation for ``Request.user_agent_class``. :issue:`2273` +- Accessing ``LocalProxy.__class__`` and ``__doc__`` on an unbound + proxy returns the fallback value instead of a method object. + :issue:`2188` +- Redirects with the test client set ``RAW_URI`` and ``REQUEST_URI`` + correctly. :issue:`2151` + + +Version 2.0.2 +------------- + +Released 2021-10-05 + +- Handle multiple tokens in ``Connection`` header when routing + WebSocket requests. :issue:`2131` +- Set the debugger pin cookie secure flag when on https. :pr:`2150` +- Fix type annotation for ``MultiDict.update`` to accept iterable + values :pr:`2142` +- Prevent double encoding of redirect URL when ``merge_slash=True`` + for ``Rule.match``. :issue:`2157` +- ``CombinedMultiDict.to_dict`` with ``flat=False`` considers all + component dicts when building value lists. :issue:`2189` +- ``send_file`` only sets a detected ``Content-Encoding`` if + ``as_attachment`` is disabled to avoid browsers saving + decompressed ``.tar.gz`` files. :issue:`2149` +- Fix type annotations for ``TypeConversionDict.get`` to not return an + ``Optional`` value if both ``default`` and ``type`` are not + ``None``. :issue:`2169` +- Fix type annotation for routing rule factories to accept + ``Iterable[RuleFactory]`` instead of ``Iterable[Rule]`` for the + ``rules`` parameter. :issue:`2183` +- Add missing type annotation for ``FileStorage.__getattr__`` + :issue:`2155` +- The debugger pin cookie is set with ``SameSite`` set to ``Strict`` + instead of ``None`` to be compatible with modern browser security. + :issue:`2156` +- Type annotations use ``IO[bytes]`` and ``IO[str]`` instead of + ``BinaryIO`` and ``TextIO`` for wider type compatibility. + :issue:`2130` +- Ad-hoc TLS certs are generated with SAN matching CN. :issue:`2158` +- Fix memory usage for locals when using Python 3.6 or pre 0.4.17 + greenlet versions. :pr:`2212` +- Fix type annotation in ``CallbackDict``, because it is not + utilizing a bound TypeVar. :issue:`2235` +- Fix setting CSP header options on the response. :pr:`2237` +- Fix an issue with with the interactive debugger where lines would + not expand on click for very long tracebacks. :pr:`2239` +- The interactive debugger handles displaying an exception that does + not have a traceback, such as from ``ProcessPoolExecutor``. + :issue:`2217` + + +Version 2.0.1 +------------- + +Released 2021-05-17 + +- Fix type annotation for ``send_file`` ``max_age`` callable. Don't + pass ``pathlib.Path`` to ``max_age``. :issue:`2119` +- Mark top-level names as exported so type checking understands + imports in user projects. :issue:`2122` +- Fix some types that weren't available in Python 3.6.0. :issue:`2123` +- ``cached_property`` is generic over its return type, properties + decorated with it report the correct type. :issue:`2113` +- Fix multipart parsing bug when boundary contains special regex + characters. :issue:`2125` +- Type checking understands that calling ``headers.get`` with a string + default will always return a string. :issue:`2128` +- If ``HTTPException.description`` is not a string, + ``get_description`` will convert it to a string. :issue:`2115` + + +Version 2.0.0 +------------- + +Released 2021-05-11 + +- Drop support for Python 2 and 3.5. :pr:`1693` +- Deprecate :func:`utils.format_string`, use :class:`string.Template` + instead. :issue:`1756` +- Deprecate :func:`utils.bind_arguments` and + :func:`utils.validate_arguments`, use :meth:`Signature.bind` and + :func:`inspect.signature` instead. :issue:`1757` +- Deprecate :class:`utils.HTMLBuilder`. :issue:`1761` +- Deprecate :func:`utils.escape` and :func:`utils.unescape`, use + MarkupSafe instead. :issue:`1758` +- Deprecate the undocumented ``python -m werkzeug.serving`` CLI. + :issue:`1834` +- Deprecate the ``environ["werkzeug.server.shutdown"]`` function + that is available when running the development server. :issue:`1752` +- Deprecate the ``useragents`` module and the built-in user agent + parser. Use a dedicated parser library instead by subclassing + ``user_agent.UserAgent`` and setting ``Request.user_agent_class``. + :issue:`2078` +- Remove the unused, internal ``posixemulation`` module. :issue:`1759` +- All ``datetime`` values are timezone-aware with + ``tzinfo=timezone.utc``. This applies to anything using + ``http.parse_date``: ``Request.date``, ``.if_modified_since``, + ``.if_unmodified_since``; ``Response.date``, ``.expires``, + ``.last_modified``, ``.retry_after``; ``parse_if_range_header``, and + ``IfRange.date``. When comparing values, the other values must also + be aware, or these values must be made naive. When passing + parameters or setting attributes, naive values are still assumed to + be in UTC. :pr:`2040` +- Merge all request and response wrapper mixin code into single + ``Request`` and ``Response`` classes. Using the mixin classes is no + longer necessary and will show a deprecation warning. Checking + ``isinstance`` or ``issubclass`` against ``BaseRequest`` and + ``BaseResponse`` will show a deprecation warning and check against + ``Request`` or ``Response`` instead. :issue:`1963` +- JSON support no longer uses simplejson if it's installed. To use + another JSON module, override ``Request.json_module`` and + ``Response.json_module``. :pr:`1766` +- ``Response.get_json()`` no longer caches the result, and the + ``cache`` parameter is removed. :issue:`1698` +- ``Response.freeze()`` generates an ``ETag`` header if one is not + set. The ``no_etag`` parameter (which usually wasn't visible + anyway) is no longer used. :issue:`1963` +- Add a ``url_scheme`` argument to :meth:`~routing.MapAdapter.build` + to override the bound scheme. :pr:`1721` +- Passing an empty list as a query string parameter to ``build()`` + won't append an unnecessary ``?``. Also drop any number of ``None`` + items in a list. :issue:`1992` +- When passing a ``Headers`` object to a test client method or + ``EnvironBuilder``, multiple values for a key are joined into one + comma separated value. This matches the HTTP spec on multi-value + headers. :issue:`1655` +- Setting ``Response.status`` and ``status_code`` uses identical + parsing and error checking. :issue:`1658`, :pr:`1728` +- ``MethodNotAllowed`` and ``RequestedRangeNotSatisfiable`` take a + ``response`` kwarg, consistent with other HTTP errors. :pr:`1748` +- The response generated by :exc:`~exceptions.Unauthorized` produces + one ``WWW-Authenticate`` header per value in ``www_authenticate``, + rather than joining them into a single value, to improve + interoperability with browsers and other clients. :pr:`1755` +- If ``parse_authorization_header`` can't decode the header value, it + returns ``None`` instead of raising a ``UnicodeDecodeError``. + :issue:`1816` +- The debugger no longer uses jQuery. :issue:`1807` +- The test client includes the query string in ``REQUEST_URI`` and + ``RAW_URI``. :issue:`1781` +- Switch the parameter order of ``default_stream_factory`` to match + the order used when calling it. :pr:`1085` +- Add ``send_file`` function to generate a response that serves a + file. Adapted from Flask's implementation. :issue:`265`, :pr:`1850` +- Add ``send_from_directory`` function to safely serve an untrusted + path within a trusted directory. Adapted from Flask's + implementation. :issue:`1880` +- ``send_file`` takes ``download_name``, which is passed even if + ``as_attachment=False`` by using ``Content-Disposition: inline``. + ``download_name`` replaces Flask's ``attachment_filename``. + :issue:`1869` +- ``send_file`` sets ``conditional=True`` and ``max_age=None`` by + default. ``Cache-Control`` is set to ``no-cache`` if ``max_age`` is + not set, otherwise ``public``. This tells browsers to validate + conditional requests instead of using a timed cache. + ``max_age=None`` replaces Flask's ``cache_timeout=43200``. + :issue:`1882` +- ``send_file`` can be called with ``etag="string"`` to set a custom + ETag instead of generating one. ``etag`` replaces Flask's + ``add_etags``. :issue:`1868` +- ``send_file`` sets the ``Content-Encoding`` header if an encoding is + returned when guessing ``mimetype`` from ``download_name``. + :pr:`3896` +- Update the defaults used by ``generate_password_hash``. Increase + PBKDF2 iterations to 260000 from 150000. Increase salt length to 16 + from 8. Use ``secrets`` module to generate salt. :pr:`1935` +- The reloader doesn't crash if ``sys.stdin`` is somehow ``None``. + :pr:`1915` +- Add arguments to ``delete_cookie`` to match ``set_cookie`` and the + attributes modern browsers expect. :pr:`1889` +- ``utils.cookie_date`` is deprecated, use ``utils.http_date`` + instead. The value for ``Set-Cookie expires`` is no longer "-" + delimited. :pr:`2040` +- Use ``request.headers`` instead of ``request.environ`` to look up + header attributes. :pr:`1808` +- The test ``Client`` request methods (``client.get``, etc.) always + return an instance of ``TestResponse``. In addition to the normal + behavior of ``Response``, this class provides ``request`` with the + request that produced the response, and ``history`` to track + intermediate responses when ``follow_redirects`` is used. + :issue:`763, 1894` +- The test ``Client`` request methods takes an ``auth`` parameter to + add an ``Authorization`` header. It can be an ``Authorization`` + object or a ``(username, password)`` tuple for ``Basic`` auth. + :pr:`1809` +- Calling ``response.close()`` on a response from the test ``Client`` + will close the request input stream. This matches file behavior + and can prevent a ``ResourceWarning`` in some cases. :issue:`1785` +- ``EnvironBuilder.from_environ`` decodes values encoded for WSGI, to + avoid double encoding the new values. :pr:`1959` +- The default stat reloader will watch Python files under + non-system/virtualenv ``sys.path`` entries, which should contain + most user code. It will also watch all Python files under + directories given in ``extra_files``. :pr:`1945` +- The reloader ignores ``__pycache__`` directories again. :pr:`1945` +- ``run_simple`` takes ``exclude_patterns`` a list of ``fnmatch`` + patterns that will not be scanned by the reloader. :issue:`1333` +- Cookie names are no longer unquoted. This was against :rfc:`6265` + and potentially allowed setting ``__Secure`` prefixed cookies. + :pr:`1965` +- Fix some word matches for user agent platform when the word can be a + substring. :issue:`1923` +- The development server logs ignored SSL errors. :pr:`1967` +- Temporary files for form data are opened in ``rb+`` instead of + ``wb+`` mode for better compatibility with some libraries. + :issue:`1961` +- Use SHA-1 instead of MD5 for generating ETags and the debugger pin, + and in some tests. MD5 is not available in some environments, such + as FIPS 140. This may invalidate some caches since the ETag will be + different. :issue:`1897` +- Add ``Cross-Origin-Opener-Policy`` and + ``Cross-Origin-Embedder-Policy`` response header properties. + :pr:`2008` +- ``run_simple`` tries to show a valid IP address when binding to all + addresses, instead of ``0.0.0.0`` or ``::``. It also warns about not + running the development server in production in this case. + :issue:`1964` +- Colors in the development server log are displayed if Colorama is + installed on Windows. For all platforms, style support no longer + requires Click. :issue:`1832` +- A range request for an empty file (or other data with length 0) will + return a 200 response with the empty file instead of a 416 error. + :issue:`1937` +- New sans-IO base classes for ``Request`` and ``Response`` have been + extracted to contain all the behavior that is not WSGI or IO + dependent. These are not a public API, they are part of an ongoing + refactor to let ASGI frameworks use Werkzeug. :pr:`2005` +- Parsing ``multipart/form-data`` has been refactored to use sans-io + patterns. This should also make parsing forms with large binary file + uploads significantly faster. :issue:`1788, 875` +- ``LocalProxy`` matches the current Python data model special + methods, including all r-ops, in-place ops, and async. ``__class__`` + is proxied, so the proxy will look like the object in more cases, + including ``isinstance``. Use ``issubclass(type(obj), LocalProxy)`` + to check if an object is actually a proxy. :issue:`1754` +- ``Local`` uses ``ContextVar`` on Python 3.7+ instead of + ``threading.local``. :pr:`1778` +- ``request.values`` does not include ``form`` for GET requests (even + though GET bodies are undefined). This prevents bad caching proxies + from caching form data instead of query strings. :pr:`2037` +- The development server adds the underlying socket to ``environ`` as + ``werkzeug.socket``. This is non-standard and specific to the dev + server, other servers may expose this under their own key. It is + useful for handling a WebSocket upgrade request. :issue:`2052` +- URL matching assumes ``websocket=True`` mode for WebSocket upgrade + requests. :issue:`2052` +- Updated ``UserAgentParser`` to handle more cases. :issue:`1971` +- ``werzeug.DechunkedInput.readinto`` will not read beyond the size of + the buffer. :issue:`2021` +- Fix connection reset when exceeding max content size. :pr:`2051` +- ``pbkdf2_hex``, ``pbkdf2_bin``, and ``safe_str_cmp`` are deprecated. + ``hashlib`` and ``hmac`` provide equivalents. :pr:`2083` +- ``invalidate_cached_property`` is deprecated. Use ``del obj.name`` + instead. :pr:`2084` +- ``Href`` is deprecated. Use ``werkzeug.routing`` instead. + :pr:`2085` +- ``Request.disable_data_descriptor`` is deprecated. Create the + request with ``shallow=True`` instead. :pr:`2085` +- ``HTTPException.wrap`` is deprecated. Create a subclass manually + instead. :pr:`2085` + + +Version 1.0.1 +------------- + +Released 2020-03-31 + +- Make the argument to ``RequestRedirect.get_response`` optional. + :issue:`1718` +- Only allow a single access control allow origin value. :pr:`1723` +- Fix crash when trying to parse a non-existent Content Security + Policy header. :pr:`1731` +- ``http_date`` zero fills years < 1000 to always output four digits. + :issue:`1739` +- Fix missing local variables in interactive debugger console. + :issue:`1746` +- Fix passing file-like objects like ``io.BytesIO`` to + ``FileStorage.save``. :issue:`1733` + + +Version 1.0.0 +------------- + +Released 2020-02-06 + +- Drop support for Python 3.4. (:issue:`1478`) +- Remove code that issued deprecation warnings in version 0.15. + (:issue:`1477`) +- Remove most top-level attributes provided by the ``werkzeug`` + module in favor of direct imports. For example, instead of + ``import werkzeug; werkzeug.url_quote``, do + ``from werkzeug.urls import url_quote``. Install version 0.16 first + to see deprecation warnings while upgrading. :issue:`2`, :pr:`1640` +- Added ``utils.invalidate_cached_property()`` to invalidate cached + properties. (:pr:`1474`) +- Directive keys for the ``Set-Cookie`` response header are not + ignored when parsing the ``Cookie`` request header. This allows + cookies with names such as "expires" and "version". (:issue:`1495`) +- Request cookies are parsed into a ``MultiDict`` to capture all + values for cookies with the same key. ``cookies[key]`` returns the + first value rather than the last. Use ``cookies.getlist(key)`` to + get all values. ``parse_cookie`` also defaults to a ``MultiDict``. + :issue:`1562`, :pr:`1458` +- Add ``charset=utf-8`` to an HTTP exception response's + ``CONTENT_TYPE`` header. (:pr:`1526`) +- The interactive debugger handles outer variables in nested scopes + such as lambdas and comprehensions. :issue:`913`, :issue:`1037`, + :pr:`1532` +- The user agent for Opera 60 on Mac is correctly reported as + "opera" instead of "chrome". :issue:`1556` +- The platform for Crosswalk on Android is correctly reported as + "android" instead of "chromeos". (:pr:`1572`) +- Issue a warning when the current server name does not match the + configured server name. :issue:`760` +- A configured server name with the default port for a scheme will + match the current server name without the port if the current scheme + matches. :pr:`1584` +- :exc:`~exceptions.InternalServerError` has a ``original_exception`` + attribute that frameworks can use to track the original cause of the + error. :pr:`1590` +- Headers are tested for equality independent of the header key case, + such that ``X-Foo`` is the same as ``x-foo``. :pr:`1605` +- :meth:`http.dump_cookie` accepts ``'None'`` as a value for + ``samesite``. :issue:`1549` +- :meth:`~test.Client.set_cookie` accepts a ``samesite`` argument. + :pr:`1705` +- Support the Content Security Policy header through the + `Response.content_security_policy` data structure. :pr:`1617` +- ``LanguageAccept`` will fall back to matching "en" for "en-US" or + "en-US" for "en" to better support clients or translations that + only match at the primary language tag. :issue:`450`, :pr:`1507` +- ``MIMEAccept`` uses MIME parameters for specificity when matching. + :issue:`458`, :pr:`1574` +- If the development server is started with an ``SSLContext`` + configured to verify client certificates, the certificate in PEM + format will be available as ``environ["SSL_CLIENT_CERT"]``. + :pr:`1469` +- ``is_resource_modified`` will run for methods other than ``GET`` and + ``HEAD``, rather than always returning ``False``. :issue:`409` +- ``SharedDataMiddleware`` returns 404 rather than 500 when trying to + access a directory instead of a file with the package loader. The + dependency on setuptools and pkg_resources is removed. + :issue:`1599` +- Add a ``response.cache_control.immutable`` flag. Keep in mind that + browser support for this ``Cache-Control`` header option is still + experimental and may not be implemented. :issue:`1185` +- Optional request log highlighting with the development server is + handled by Click instead of termcolor. :issue:`1235` +- Optional ad-hoc TLS support for the development server is handled + by cryptography instead of pyOpenSSL. :pr:`1555` +- ``FileStorage.save()`` supports ``pathlib`` and :pep:`519` + ``PathLike`` objects. :issue:`1653` +- The debugger security pin is unique in containers managed by Podman. + :issue:`1661` +- Building a URL when ``host_matching`` is enabled takes into account + the current host when there are duplicate endpoints with different + hosts. :issue:`488` +- The ``429 TooManyRequests`` and ``503 ServiceUnavailable`` HTTP + exceptions takes a ``retry_after`` parameter to set the + ``Retry-After`` header. :issue:`1657` +- ``Map`` and ``Rule`` have a ``merge_slashes`` option to collapse + multiple slashes into one, similar to how many HTTP servers behave. + This is enabled by default. :pr:`1286, 1694` +- Add HTTP 103, 208, 306, 425, 506, 508, and 511 to the list of status + codes. :pr:`1678` +- Add ``update``, ``setlist``, and ``setlistdefault`` methods to the + ``Headers`` data structure. ``extend`` method can take ``MultiDict`` + and kwargs. :pr:`1687, 1697` +- The development server accepts paths that start with two slashes, + rather than stripping off the first path segment. :issue:`491` +- Add access control (Cross Origin Request Sharing, CORS) header + properties to the ``Request`` and ``Response`` wrappers. :pr:`1699` +- ``Accept`` values are no longer ordered alphabetically for equal + quality tags. Instead the initial order is preserved. :issue:`1686` +- Added ``Map.lock_class`` attribute for alternative + implementations. :pr:`1702` +- Support matching and building WebSocket rules in the routing system, + for use by async frameworks. :pr:`1709` +- Range requests that span an entire file respond with 206 instead of + 200, to be more compliant with :rfc:`7233`. This may help serving + media to older browsers. :issue:`410, 1704` +- The :class:`~middleware.shared_data.SharedDataMiddleware` default + ``fallback_mimetype`` is ``application/octet-stream``. If a filename + looks like a text mimetype, the ``utf-8`` charset is added to it. + This matches the behavior of :class:`~wrappers.BaseResponse` and + Flask's ``send_file()``. :issue:`1689` + + +Version 0.16.1 +-------------- + +Released 2020-01-27 + +- Fix import location in deprecation messages for subpackages. + :issue:`1663` +- Fix an SSL error on Python 3.5 when the dev server responds with no + content. :issue:`1659` + + +Version 0.16.0 +-------------- + +Released 2019-09-19 + +- Deprecate most top-level attributes provided by the ``werkzeug`` + module in favor of direct imports. The deprecated imports will be + removed in version 1.0. + + For example, instead of ``import werkzeug; werkzeug.url_quote``, do + ``from werkzeug.urls import url_quote``. A deprecation warning will + show the correct import to use. ``werkzeug.exceptions`` and + ``werkzeug.routing`` should also be imported instead of accessed, + but for technical reasons can't show a warning. + + :issue:`2`, :pr:`1640` + + +Version 0.15.6 +-------------- + +Released 2019-09-04 + +- Work around a bug in pip that caused the reloader to fail on + Windows when the script was an entry point. This fixes the issue + with Flask's `flask run` command failing with "No module named + Scripts\flask". :issue:`1614` +- ``ProxyFix`` trusts the ``X-Forwarded-Proto`` header by default. + :issue:`1630` +- The deprecated ``num_proxies`` argument to ``ProxyFix`` sets + ``x_for``, ``x_proto``, and ``x_host`` to match 0.14 behavior. This + is intended to make intermediate upgrades less disruptive, but the + argument will still be removed in 1.0. :issue:`1630` + + +Version 0.15.5 +-------------- + +Released 2019-07-17 + +- Fix a ``TypeError`` due to changes to ``ast.Module`` in Python 3.8. + :issue:`1551` +- Fix a C assertion failure in debug builds of some Python 2.7 + releases. :issue:`1553` +- :class:`~exceptions.BadRequestKeyError` adds the ``KeyError`` + message to the description if ``e.show_exception`` is set to + ``True``. This is a more secure default than the original 0.15.0 + behavior and makes it easier to control without losing information. + :pr:`1592` +- Upgrade the debugger to jQuery 3.4.1. :issue:`1581` +- Work around an issue in some external debuggers that caused the + reloader to fail. :issue:`1607` +- Work around an issue where the reloader couldn't introspect a + setuptools script installed as an egg. :issue:`1600` +- The reloader will use ``sys.executable`` even if the script is + marked executable, reverting a behavior intended for NixOS + introduced in 0.15. The reloader should no longer cause + ``OSError: [Errno 8] Exec format error``. :issue:`1482`, + :issue:`1580` +- ``SharedDataMiddleware`` safely handles paths with Windows drive + names. :issue:`1589` + + +Version 0.15.4 +-------------- + +Released 2019-05-14 + +- Fix a ``SyntaxError`` on Python 2.7.5. (:issue:`1544`) + + +Version 0.15.3 +-------------- + +Released 2019-05-14 + +- Properly handle multi-line header folding in development server in + Python 2.7. (:issue:`1080`) +- Restore the ``response`` argument to :exc:`~exceptions.Unauthorized`. + (:pr:`1527`) +- :exc:`~exceptions.Unauthorized` doesn't add the ``WWW-Authenticate`` + header if ``www_authenticate`` is not given. (:issue:`1516`) +- The default URL converter correctly encodes bytes to string rather + than representing them with ``b''``. (:issue:`1502`) +- Fix the filename format string in + :class:`~middleware.profiler.ProfilerMiddleware` to correctly handle + float values. (:issue:`1511`) +- Update :class:`~middleware.lint.LintMiddleware` to work on Python 3. + (:issue:`1510`) +- The debugger detects cycles in chained exceptions and does not time + out in that case. (:issue:`1536`) +- When running the development server in Docker, the debugger security + pin is now unique per container. + + +Version 0.15.2 +-------------- + +Released 2019-04-02 + +- ``Rule`` code generation uses a filename that coverage will ignore. + The previous value, "generated", was causing coverage to fail. + (:issue:`1487`) +- The test client removes the cookie header if there are no persisted + cookies. This fixes an issue introduced in 0.15.0 where the cookies + from the original request were used for redirects, causing functions + such as logout to fail. (:issue:`1491`) +- The test client copies the environ before passing it to the app, to + prevent in-place modifications from affecting redirect requests. + (:issue:`1498`) +- The ``"werkzeug"`` logger only adds a handler if there is no handler + configured for its level in the logging chain. This avoids double + logging if other code configures logging first. (:issue:`1492`) + + +Version 0.15.1 +-------------- + +Released 2019-03-21 + +- :exc:`~exceptions.Unauthorized` takes ``description`` as the first + argument, restoring previous behavior. The new ``www_authenticate`` + argument is listed second. (:issue:`1483`) + + +Version 0.15.0 +-------------- + +Released 2019-03-19 + +- Building URLs is ~7x faster. Each :class:`~routing.Rule` compiles + an optimized function for building itself. (:pr:`1281`) +- :meth:`MapAdapter.build() ` can be passed + a :class:`~datastructures.MultiDict` to represent multiple values + for a key. It already did this when passing a dict with a list + value. (:pr:`724`) +- ``path_info`` defaults to ``'/'`` for + :meth:`Map.bind() `. (:issue:`740`, :pr:`768`, + :pr:`1316`) +- Change ``RequestRedirect`` code from 301 to 308, preserving the verb + and request body (form data) during redirect. (:pr:`1342`) +- ``int`` and ``float`` converters in URL rules will handle negative + values if passed the ``signed=True`` parameter. For example, + ``/jump/``. (:pr:`1355`) +- ``Location`` autocorrection in :func:`Response.get_wsgi_headers() + ` is relative to the current + path rather than the root path. (:issue:`693`, :pr:`718`, + :pr:`1315`) +- 412 responses once again include entity headers and an error message + in the body. They were originally omitted when implementing + ``If-Match`` (:pr:`1233`), but the spec doesn't seem to disallow it. + (:issue:`1231`, :pr:`1255`) +- The Content-Length header is removed for 1xx and 204 responses. This + fixes a previous change where no body would be sent, but the header + would still be present. The new behavior matches RFC 7230. + (:pr:`1294`) +- :class:`~exceptions.Unauthorized` takes a ``www_authenticate`` + parameter to set the ``WWW-Authenticate`` header for the response, + which is technically required for a valid 401 response. + (:issue:`772`, :pr:`795`) +- Add support for status code 424 :exc:`~exceptions.FailedDependency`. + (:pr:`1358`) +- :func:`http.parse_cookie` ignores empty segments rather than + producing a cookie with no key or value. (:issue:`1245`, :pr:`1301`) +- :func:`~http.parse_authorization_header` (and + :class:`~datastructures.Authorization`, + :attr:`~wrappers.Request.authorization`) treats the authorization + header as UTF-8. On Python 2, basic auth username and password are + ``unicode``. (:pr:`1325`) +- :func:`~http.parse_options_header` understands :rfc:`2231` parameter + continuations. (:pr:`1417`) +- :func:`~urls.uri_to_iri` does not unquote ASCII characters in the + unreserved class, such as space, and leaves invalid bytes quoted + when decoding. :func:`~urls.iri_to_uri` does not quote reserved + characters. See :rfc:`3987` for these character classes. + (:pr:`1433`) +- ``get_content_type`` appends a charset for any mimetype that ends + with ``+xml``, not just those that start with ``application/``. + Known text types such as ``application/javascript`` are also given + charsets. (:pr:`1439`) +- Clean up ``werkzeug.security`` module, remove outdated hashlib + support. (:pr:`1282`) +- In :func:`~security.generate_password_hash`, PBKDF2 uses 150000 + iterations by default, increased from 50000. (:pr:`1377`) +- :class:`~wsgi.ClosingIterator` calls ``close`` on the wrapped + *iterable*, not the internal iterator. This doesn't affect objects + where ``__iter__`` returned ``self``. For other objects, the method + was not called before. (:issue:`1259`, :pr:`1260`) +- Bytes may be used as keys in :class:`~datastructures.Headers`, they + will be decoded as Latin-1 like values are. (:pr:`1346`) +- :class:`~datastructures.Range` validates that list of range tuples + passed to it would produce a valid ``Range`` header. (:pr:`1412`) +- :class:`~datastructures.FileStorage` looks up attributes on + ``stream._file`` if they don't exist on ``stream``, working around + an issue where :func:`tempfile.SpooledTemporaryFile` didn't + implement all of :class:`io.IOBase`. See + https://github.com/python/cpython/pull/3249. (:pr:`1409`) +- :class:`CombinedMultiDict.copy() ` + returns a shallow mutable copy as a + :class:`~datastructures.MultiDict`. The copy no longer reflects + changes to the combined dicts, but is more generally useful. + (:pr:`1420`) +- The version of jQuery used by the debugger is updated to 3.3.1. + (:pr:`1390`) +- The debugger correctly renders long ``markupsafe.Markup`` instances. + (:pr:`1393`) +- The debugger can serve resources when Werkzeug is installed as a + zip file. ``DebuggedApplication.get_resource`` uses + ``pkgutil.get_data``. (:pr:`1401`) +- The debugger and server log support Python 3's chained exceptions. + (:pr:`1396`) +- The interactive debugger highlights frames that come from user code + to make them easy to pick out in a long stack trace. Note that if an + env was created with virtualenv instead of venv, the debugger may + incorrectly classify some frames. (:pr:`1421`) +- Clicking the error message at the top of the interactive debugger + will jump down to the bottom of the traceback. (:pr:`1422`) +- When generating a PIN, the debugger will ignore a ``KeyError`` + raised when the current UID doesn't have an associated username, + which can happen in Docker. (:issue:`1471`) +- :class:`~exceptions.BadRequestKeyError` adds the ``KeyError`` + message to the description, making it clearer what caused the 400 + error. Frameworks like Flask can omit this information in production + by setting ``e.args = ()``. (:pr:`1395`) +- If a nested ``ImportError`` occurs from :func:`~utils.import_string` + the traceback mentions the nested import. Removes an untested code + path for handling "modules not yet set up by the parent." + (:pr:`735`) +- Triggering a reload while using a tool such as PDB no longer hides + input. (:pr:`1318`) +- The reloader will not prepend the Python executable to the command + line if the Python file is marked executable. This allows the + reloader to work on NixOS. (:pr:`1242`) +- Fix an issue where ``sys.path`` would change between reloads when + running with ``python -m app``. The reloader can detect that a + module was run with "-m" and reconstructs that instead of the file + path in ``sys.argv`` when reloading. (:pr:`1416`) +- The dev server can bind to a Unix socket by passing a hostname like + ``unix://app.socket``. (:pr:`209`, :pr:`1019`) +- Server uses ``IPPROTO_TCP`` constant instead of ``SOL_TCP`` for + Jython compatibility. (:pr:`1375`) +- When using an adhoc SSL cert with :func:`~serving.run_simple`, the + cert is shown as self-signed rather than signed by an invalid + authority. (:pr:`1430`) +- The development server logs the unquoted IRI rather than the raw + request line, to make it easier to work with Unicode in request + paths during development. (:issue:`1115`) +- The development server recognizes ``ConnectionError`` on Python 3 to + silence client disconnects, and does not silence other ``OSErrors`` + that may have been raised inside the application. (:pr:`1418`) +- The environ keys ``REQUEST_URI`` and ``RAW_URI`` contain the raw + path before it was percent-decoded. This is non-standard, but many + WSGI servers add them. Middleware could replace ``PATH_INFO`` with + this to route based on the raw value. (:pr:`1419`) +- :class:`~test.EnvironBuilder` doesn't set ``CONTENT_TYPE`` or + ``CONTENT_LENGTH`` in the environ if they aren't set. Previously + these used default values if they weren't set. Now it's possible to + distinguish between empty and unset values. (:pr:`1308`) +- The test client raises a ``ValueError`` if a query string argument + would overwrite a query string in the path. (:pr:`1338`) +- :class:`test.EnvironBuilder` and :class:`test.Client` take a + ``json`` argument instead of manually passing ``data`` and + ``content_type``. This is serialized using the + :meth:`test.EnvironBuilder.json_dumps` method. (:pr:`1404`) +- :class:`test.Client` redirect handling is rewritten. (:pr:`1402`) + + - The redirect environ is copied from the initial request environ. + - Script root and path are correctly distinguished when + redirecting to a path under the root. + - The HEAD method is not changed to GET. + - 307 and 308 codes preserve the method and body. All others + ignore the body and related headers. + - Headers are passed to the new request for all codes, following + what browsers do. + - :class:`test.EnvironBuilder` sets the content type and length + headers in addition to the WSGI keys when detecting them from + the data. + - Intermediate response bodies are iterated over even when + ``buffered=False`` to ensure iterator middleware can run cleanup + code safely. Only the last response is not buffered. (:pr:`988`) + +- :class:`~test.EnvironBuilder`, :class:`~datastructures.FileStorage`, + and :func:`wsgi.get_input_stream` no longer share a global + ``_empty_stream`` instance. This improves test isolation by + preventing cases where closing the stream in one request would + affect other usages. (:pr:`1340`) +- The default ``SecureCookie.serialization_method`` will change from + :mod:`pickle` to :mod:`json` in 1.0. To upgrade existing tokens, + override :meth:`~contrib.securecookie.SecureCookie.unquote` to try + ``pickle`` if ``json`` fails. (:pr:`1413`) +- ``CGIRootFix`` no longer modifies ``PATH_INFO`` for very old + versions of Lighttpd. ``LighttpdCGIRootFix`` was renamed to + ``CGIRootFix`` in 0.9. Both are deprecated and will be removed in + version 1.0. (:pr:`1141`) +- :class:`werkzeug.wrappers.json.JSONMixin` has been replaced with + Flask's implementation. Check the docs for the full API. + (:pr:`1445`) +- The contrib modules are deprecated and will either be moved into + ``werkzeug`` core or removed completely in version 1.0. Some modules + that already issued deprecation warnings have been removed. Be sure + to run or test your code with + ``python -W default::DeprecationWarning`` to catch any deprecated + code you're using. (:issue:`4`) + + - ``LintMiddleware`` has moved to :mod:`werkzeug.middleware.lint`. + - ``ProfilerMiddleware`` has moved to + :mod:`werkzeug.middleware.profiler`. + - ``ProxyFix`` has moved to :mod:`werkzeug.middleware.proxy_fix`. + - ``JSONRequestMixin`` has moved to :mod:`werkzeug.wrappers.json`. + - ``cache`` has been extracted into a separate project, + `cachelib `_. The version + in Werkzeug is deprecated. + - ``securecookie`` and ``sessions`` have been extracted into a + separate project, + `secure-cookie `_. The + version in Werkzeug is deprecated. + - Everything in ``fixers``, except ``ProxyFix``, is deprecated. + - Everything in ``wrappers``, except ``JSONMixin``, is deprecated. + - ``atom`` is deprecated. This did not fit in with the rest of + Werkzeug, and is better served by a dedicated library in the + community. + - ``jsrouting`` is removed. Set URLs when rendering templates + or JSON responses instead. + - ``limiter`` is removed. Its specific use is handled by Werkzeug + directly, but stream limiting is better handled by the WSGI + server in general. + - ``testtools`` is removed. It did not offer significant benefit + over the default test client. + - ``iterio`` is deprecated. + +- :func:`wsgi.get_host` no longer looks at ``X-Forwarded-For``. Use + :class:`~middleware.proxy_fix.ProxyFix` to handle that. + (:issue:`609`, :pr:`1303`) +- :class:`~middleware.proxy_fix.ProxyFix` is refactored to support + more headers, multiple values, and more secure configuration. + + - Each header supports multiple values. The trusted number of + proxies is configured separately for each header. The + ``num_proxies`` argument is deprecated. (:pr:`1314`) + - Sets ``SERVER_NAME`` and ``SERVER_PORT`` based on + ``X-Forwarded-Host``. (:pr:`1314`) + - Sets ``SERVER_PORT`` and modifies ``HTTP_HOST`` based on + ``X-Forwarded-Port``. (:issue:`1023`, :pr:`1304`) + - Sets ``SCRIPT_NAME`` based on ``X-Forwarded-Prefix``. + (:issue:`1237`) + - The original WSGI environment values are stored in the + ``werkzeug.proxy_fix.orig`` key, a dict. The individual keys + ``werkzeug.proxy_fix.orig_remote_addr``, + ``werkzeug.proxy_fix.orig_wsgi_url_scheme``, and + ``werkzeug.proxy_fix.orig_http_host`` are deprecated. + +- Middleware from ``werkzeug.wsgi`` has moved to separate modules + under ``werkzeug.middleware``, along with the middleware moved from + ``werkzeug.contrib``. The old ``werkzeug.wsgi`` imports are + deprecated and will be removed in version 1.0. (:pr:`1452`) + + - ``werkzeug.wsgi.DispatcherMiddleware`` has moved to + :class:`werkzeug.middleware.dispatcher.DispatcherMiddleware`. + - ``werkzeug.wsgi.ProxyMiddleware`` as moved to + :class:`werkzeug.middleware.http_proxy.ProxyMiddleware`. + - ``werkzeug.wsgi.SharedDataMiddleware`` has moved to + :class:`werkzeug.middleware.shared_data.SharedDataMiddleware`. + +- :class:`~middleware.http_proxy.ProxyMiddleware` proxies the query + string. (:pr:`1252`) +- The filenames generated by + :class:`~middleware.profiler.ProfilerMiddleware` can be customized. + (:issue:`1283`) +- The ``werkzeug.wrappers`` module has been converted to a package, + and its various classes have been organized into separate modules. + Any previously documented classes, understood to be the existing + public API, are still importable from ``werkzeug.wrappers``, or may + be imported from their specific modules. (:pr:`1456`) + + +Version 0.14.1 +-------------- + +Released on December 31st 2017 + +- Resolved a regression with status code handling in the integrated + development server. + +Version 0.14 +------------ + +Released on December 31st 2017 + +- HTTP exceptions are now automatically caught by + ``Request.application``. +- Added support for edge as browser. +- Added support for platforms that lack ``SpooledTemporaryFile``. +- Add support for etag handling through if-match +- Added support for the SameSite cookie attribute. +- Added ``werkzeug.wsgi.ProxyMiddleware`` +- Implemented ``has`` for ``NullCache`` +- ``get_multi`` on cache clients now returns lists all the time. +- Improved the watchdog observer shutdown for the reloader to not crash + on exit on older Python versions. +- Added support for ``filename*`` filename attributes according to + RFC 2231 +- Resolved an issue where machine ID for the reloader PIN was not + read accurately on windows. +- Added a workaround for syntax errors in init files in the reloader. +- Added support for using the reloader with console scripts on windows. +- The built-in HTTP server will no longer close a connection in cases + where no HTTP body is expected (204, 204, HEAD requests etc.) +- The ``EnvironHeaders`` object now skips over empty content type and + lengths if they are set to falsy values. +- Werkzeug will no longer send the content-length header on 1xx or + 204/304 responses. +- Cookie values are now also permitted to include slashes and equal + signs without quoting. +- Relaxed the regex for the routing converter arguments. +- If cookies are sent without values they are now assumed to have an + empty value and the parser accepts this. Previously this could have + corrupted cookies that followed the value. +- The test ``Client`` and ``EnvironBuilder`` now support mimetypes like + the request object does. +- Added support for static weights in URL rules. +- Better handle some more complex reloader scenarios where sys.path + contained non directory paths. +- ``EnvironHeaders`` no longer raises weird errors if non string keys + are passed to it. + + +Version 0.13 +------------ + +Released on December 7th 2017 + +- **Deprecate support for Python 2.6 and 3.3.** CI tests will not run + for these versions, and support will be dropped completely in the next + version. (:issue:`pallets/meta#24`) +- Raise ``TypeError`` when port is not an integer. (:pr:`1088`) +- Fully deprecate ``werkzeug.script``. Use `Click`_ instead. + (:pr:`1090`) +- ``response.age`` is parsed as a ``timedelta``. Previously, it was + incorrectly treated as a ``datetime``. The header value is an integer + number of seconds, not a date string. (:pr:`414`) +- Fix a bug in ``TypeConversionDict`` where errors are not propagated + when using the converter. (:issue:`1102`) +- ``Authorization.qop`` is a string instead of a set, to comply with + RFC 2617. (:pr:`984`) +- An exception is raised when an encoded cookie is larger than, by + default, 4093 bytes. Browsers may silently ignore cookies larger than + this. ``BaseResponse`` has a new attribute ``max_cookie_size`` and + ``dump_cookie`` has a new argument ``max_size`` to configure this. + (:pr:`780`, :pr:`1109`) +- Fix a TypeError in ``werkzeug.contrib.lint.GuardedIterator.close``. + (:pr:`1116`) +- ``BaseResponse.calculate_content_length`` now correctly works for + Unicode responses on Python 3. It first encodes using + ``iter_encoded``. (:issue:`705`) +- Secure cookie contrib works with string secret key on Python 3. + (:pr:`1205`) +- Shared data middleware accepts a list instead of a dict of static + locations to preserve lookup order. (:pr:`1197`) +- HTTP header values without encoding can contain single quotes. + (:pr:`1208`) +- The built-in dev server supports receiving requests with chunked + transfer encoding. (:pr:`1198`) + +.. _Click: https://palletsprojects.com/p/click/ + + +Version 0.12.2 +-------------- + +Released on May 16 2017 + +- Fix regression: Pull request ``#892`` prevented Werkzeug from correctly + logging the IP of a remote client behind a reverse proxy, even when using + `ProxyFix`. +- Fix a bug in `safe_join` on Windows. + +Version 0.12.1 +-------------- + +Released on March 15th 2017 + +- Fix crash of reloader (used on debug mode) on Windows. + (`OSError: [WinError 10038]`). See pull request ``#1081`` +- Partially revert change to class hierarchy of `Headers`. See ``#1084``. + +Version 0.12 +------------ + +Released on March 10th 2017 + +- Spit out big deprecation warnings for werkzeug.script +- Use `inspect.getfullargspec` internally when available as + `inspect.getargspec` is gone in 3.6 +- Added support for status code 451 and 423 +- Improved the build error suggestions. In particular only if + someone stringifies the error will the suggestions be calculated. +- Added support for uWSGI's caching backend. +- Fix a bug where iterating over a `FileStorage` would result in an infinite + loop. +- Datastructures now inherit from the relevant baseclasses from the + `collections` module in the stdlib. See #794. +- Add support for recognizing NetBSD, OpenBSD, FreeBSD, DragonFlyBSD platforms + in the user agent string. +- Recognize SeaMonkey browser name and version correctly +- Recognize Baiduspider, and bingbot user agents +- If `LocalProxy`'s wrapped object is a function, refer to it with __wrapped__ + attribute. +- The defaults of ``generate_password_hash`` have been changed to more secure + ones, see pull request ``#753``. +- Add support for encoding in options header parsing, see pull request + ``#933``. +- ``test.Client`` now properly handles Location headers with relative URLs, see + pull request ``#879``. +- When `HTTPException` is raised, it now prints the description, for easier + debugging. +- Werkzeug's dict-like datastructures now have ``view``-methods under Python 2, + see pull request ``#968``. +- Fix a bug in ``MultiPartParser`` when no ``stream_factory`` was provided + during initialization, see pull request ``#973``. +- Disable autocorrect and spellchecker in the debugger middleware's Python + prompt, see pull request ``#994``. +- Don't redirect to slash route when method doesn't match, see pull request + ``#907``. +- Fix a bug when using ``SharedDataMiddleware`` with frozen packages, see pull + request ``#959``. +- `Range` header parsing function fixed for invalid values ``#974``. +- Add support for byte Range Requests, see pull request ``#978``. +- Use modern cryptographic defaults in the dev servers ``#1004``. +- the post() method of the test client now accept file object through the data + parameter. +- Color run_simple's terminal output based on HTTP codes ``#1013``. +- Fix self-XSS in debugger console, see ``#1031``. +- Fix IPython 5.x shell support, see ``#1033``. +- Change Accept datastructure to sort by specificity first, allowing for more + accurate results when using ``best_match`` for mime types (for example in + ``requests.accept_mimetypes.best_match``) + +Version 0.11.16 +--------------- + +- werkzeug.serving: set CONTENT_TYPE / CONTENT_LENGTH if only they're provided by the client +- werkzeug.serving: Fix crash of reloader when using `python -m werkzeug.serving`. + +Version 0.11.15 +--------------- + +Released on December 30th 2016. + +- Bugfix for the bugfix in the previous release. + +Version 0.11.14 +--------------- + +Released on December 30th 2016. + +- Check if platform can fork before importing ``ForkingMixIn``, raise exception + when creating ``ForkingWSGIServer`` on such a platform, see PR ``#999``. + +Version 0.11.13 +--------------- + +Released on December 26th 2016. + +- Correct fix for the reloader issuer on certain Windows installations. + +Version 0.11.12 +--------------- + +Released on December 26th 2016. + +- Fix more bugs in multidicts regarding empty lists. See ``#1000``. +- Add some docstrings to some `EnvironBuilder` properties that were previously + unintentionally missing. +- Added a workaround for the reloader on windows. + +Version 0.11.11 +--------------- + +Released on August 31st 2016. + +- Fix JSONRequestMixin for Python3. See #731 +- Fix broken string handling in test client when passing integers. See #852 +- Fix a bug in ``parse_options_header`` where an invalid content type + starting with comma or semi-colon would result in an invalid return value, + see issue ``#995``. +- Fix a bug in multidicts when passing empty lists as values, see issue + ``#979``. +- Fix a security issue that allows XSS on the Werkzeug debugger. See ``#1001``. + +Version 0.11.10 +--------------- + +Released on May 24th 2016. + +- Fixed a bug that occurs when running on Python 2.6 and using a broken locale. + See pull request #912. +- Fixed a crash when running the debugger on Google App Engine. See issue #925. +- Fixed an issue with multipart parsing that could cause memory exhaustion. + +Version 0.11.9 +-------------- + +Released on April 24th 2016. + +- Corrected an issue that caused the debugger not to use the + machine GUID on POSIX systems. +- Corrected a Unicode error on Python 3 for the debugger's + PIN usage. +- Corrected the timestamp verification in the pin debug code. + Without this fix the pin was remembered for too long. + +Version 0.11.8 +-------------- + +Released on April 15th 2016. + +- fixed a problem with the machine GUID detection code on OS X + on Python 3. + +Version 0.11.7 +-------------- + +Released on April 14th 2016. + +- fixed a regression on Python 3 for the debugger. + +Version 0.11.6 +-------------- + +Released on April 14th 2016. + +- werkzeug.serving: Still show the client address on bad requests. +- improved the PIN based protection for the debugger to make it harder to + brute force via trying cookies. Please keep in mind that the debugger + *is not intended for running on production environments* +- increased the pin timeout to a week to make it less annoying for people + which should decrease the chance that users disable the pin check + entirely. +- werkzeug.serving: Fix broken HTTP_HOST when path starts with double slash. + +Version 0.11.5 +-------------- + +Released on March 22nd 2016. + +- werkzeug.serving: Fix crash when attempting SSL connection to HTTP server. + +Version 0.11.4 +-------------- + +Released on February 14th 2016. + +- Fixed werkzeug.serving not working from -m flag. +- Fixed incorrect weak etag handling. + +Version 0.11.3 +-------------- + +Released on December 20th 2015. + +- Fixed an issue with copy operations not working against + proxies. +- Changed the logging operations of the development server to + correctly log where the server is running in all situations + again. +- Fixed another regression with SSL wrapping similar to the + fix in 0.11.2 but for a different code path. + +Version 0.11.2 +-------------- + +Released on November 12th 2015. + +- Fix inheritable sockets on Windows on Python 3. +- Fixed an issue with the forking server not starting any longer. +- Fixed SSL wrapping on platforms that supported opening sockets + by file descriptor. +- No longer log from the watchdog reloader. +- Unicode errors in hosts are now better caught or converted into + bad request errors. + +Version 0.11.1 +-------------- + +Released on November 10th 2015. + +- Fixed a regression on Python 3 in the debugger. + +Version 0.11 +------------ + +Released on November 8th 2015, codename Gleisbaumaschine. + +- Added ``reloader_paths`` option to ``run_simple`` and other functions in + ``werkzeug.serving``. This allows the user to completely override the Python + module watching of Werkzeug with custom paths. +- Many custom cached properties of Werkzeug's classes are now subclasses of + Python's ``property`` type (issue ``#616``). +- ``bind_to_environ`` now doesn't differentiate between implicit and explicit + default port numbers in ``HTTP_HOST`` (pull request ``#204``). +- ``BuildErrors`` are now more informative. They come with a complete sentence + as error message, and also provide suggestions (pull request ``#691``). +- Fix a bug in the user agent parser where Safari's build number instead of + version would be extracted (pull request ``#703``). +- Fixed issue where RedisCache set_many was broken for twemproxy, which doesn't + support the default MULTI command (pull request ``#702``). +- ``mimetype`` parameters on request and response classes are now always + converted to lowercase. +- Changed cache so that cache never expires if timeout is 0. This also fixes + an issue with redis setex (issue ``#550``) +- Werkzeug now assumes ``UTF-8`` as filesystem encoding on Unix if Python + detected it as ASCII. +- New optional `has` method on caches. +- Fixed various bugs in `parse_options_header` (pull request ``#643``). +- If the reloader is enabled the server will now open the socket in the parent + process if this is possible. This means that when the reloader kicks in + the connection from client will wait instead of tearing down. This does + not work on all Python versions. +- Implemented PIN based authentication for the debugger. This can optionally + be disabled but is discouraged. This change was necessary as it has been + discovered that too many people run the debugger in production. +- Devserver no longer requires SSL module to be installed. + +Version 0.10.5 +-------------- + +(bugfix release, release date yet to be decided) + +- Reloader: Correctly detect file changes made by moving temporary files over + the original, which is e.g. the case with PyCharm (pull request ``#722``). +- Fix bool behavior of ``werkzeug.datastructures.ETags`` under Python 3 (issue + ``#744``). + +Version 0.10.4 +-------------- + +(bugfix release, released on March 26th 2015) + +- Re-release of 0.10.3 with packaging artifacts manually removed. + +Version 0.10.3 +-------------- + +(bugfix release, released on March 26th 2015) + +- Re-release of 0.10.2 without packaging artifacts. + +Version 0.10.2 +-------------- + +(bugfix release, released on March 26th 2015) + +- Fixed issue where ``empty`` could break third-party libraries that relied on + keyword arguments (pull request ``#675``) +- Improved ``Rule.empty`` by providing a ```get_empty_kwargs`` to allow setting + custom kwargs without having to override entire ``empty`` method. (pull + request ``#675``) +- Fixed ```extra_files``` parameter for reloader to not cause startup + to crash when included in server params +- Using `MultiDict` when building URLs is now not supported again. The behavior + introduced several regressions. +- Fix performance problems with stat-reloader (pull request ``#715``). + +Version 0.10.1 +-------------- + +(bugfix release, released on February 3rd 2015) + +- Fixed regression with multiple query values for URLs (pull request ``#667``). +- Fix issues with eventlet's monkeypatching and the builtin server (pull + request ``#663``). + +Version 0.10 +------------ + +Released on January 30th 2015, codename Bagger. + +- Changed the error handling of and improved testsuite for the caches in + ``contrib.cache``. +- Fixed a bug on Python 3 when creating adhoc ssl contexts, due to `sys.maxint` + not being defined. +- Fixed a bug on Python 3, that caused + :func:`~werkzeug.serving.make_ssl_devcert` to fail with an exception. +- Added exceptions for 504 and 505. +- Added support for ChromeOS detection. +- Added UUID converter to the routing system. +- Added message that explains how to quit the server. +- Fixed a bug on Python 2, that caused ``len`` for + :class:`werkzeug.datastructures.CombinedMultiDict` to crash. +- Added support for stdlib pbkdf2 hmac if a compatible digest + is found. +- Ported testsuite to use ``py.test``. +- Minor optimizations to various middlewares (pull requests ``#496`` and + ``#571``). +- Use stdlib ``ssl`` module instead of ``OpenSSL`` for the builtin server + (issue ``#434``). This means that OpenSSL contexts are not supported anymore, + but instead ``ssl.SSLContext`` from the stdlib. +- Allow protocol-relative URLs when building external URLs. +- Fixed Atom syndication to print time zone offset for tz-aware datetime + objects (pull request ``#254``). +- Improved reloader to track added files and to recover from broken + sys.modules setups with syntax errors in packages. +- ``cache.RedisCache`` now supports arbitrary ``**kwargs`` for the redis + object. +- ``werkzeug.test.Client`` now uses the original request method when resolving + 307 redirects (pull request ``#556``). +- ``werkzeug.datastructures.MIMEAccept`` now properly deals with mimetype + parameters (pull request ``#205``). +- ``werkzeug.datastructures.Accept`` now handles a quality of ``0`` as + intolerable, as per RFC 2616 (pull request ``#536``). +- ``werkzeug.urls.url_fix`` now properly encodes hostnames with ``idna`` + encoding (issue ``#559``). It also doesn't crash on malformed URLs anymore + (issue ``#582``). +- ``werkzeug.routing.MapAdapter.match`` now recognizes the difference between + the path ``/`` and an empty one (issue ``#360``). +- The interactive debugger now tries to decode non-ascii filenames (issue + ``#469``). +- Increased default key size of generated SSL certificates to 1024 bits (issue + ``#611``). +- Added support for specifying a ``Response`` subclass to use when calling + :func:`~werkzeug.utils.redirect`\ . +- ``werkzeug.test.EnvironBuilder`` now doesn't use the request method anymore + to guess the content type, and purely relies on the ``form``, ``files`` and + ``input_stream`` properties (issue ``#620``). +- Added Symbian to the user agent platform list. +- Fixed make_conditional to respect automatically_set_content_length +- Unset ``Content-Length`` when writing to response.stream (issue ``#451``) +- ``wrappers.Request.method`` is now always uppercase, eliminating + inconsistencies of the WSGI environment (issue ``647``). +- ``routing.Rule.empty`` now works correctly with subclasses of ``Rule`` (pull + request ``#645``). +- Made map updating safe in light of concurrent updates. +- Allow multiple values for the same field for url building (issue ``#658``). + +Version 0.9.7 +------------- + +(bugfix release, release date to be decided) + +- Fix unicode problems in ``werkzeug.debug.tbtools``. +- Fix Python 3-compatibility problems in ``werkzeug.posixemulation``. +- Backport fix of fatal typo for ``ImmutableList`` (issue ``#492``). +- Make creation of the cache dir for ``FileSystemCache`` atomic (issue + ``#468``). +- Use native strings for memcached keys to work with Python 3 client (issue + ``#539``). +- Fix charset detection for ``werkzeug.debug.tbtools.Frame`` objects (issues + ``#547`` and ``#532``). +- Fix ``AttributeError`` masking in ``werkzeug.utils.import_string`` (issue + ``#182``). +- Explicitly shut down server (issue ``#519``). +- Fix timeouts greater than 2592000 being misinterpreted as UNIX timestamps in + ``werkzeug.contrib.cache.MemcachedCache`` (issue ``#533``). +- Fix bug where ``werkzeug.exceptions.abort`` would raise an arbitrary subclass + of the expected class (issue ``#422``). +- Fix broken ``jsrouting`` (due to removal of ``werkzeug.templates``) +- ``werkzeug.urls.url_fix`` now doesn't crash on malformed URLs anymore, but + returns them unmodified. This is a cheap workaround for ``#582``, the proper + fix is included in version 0.10. +- The repr of ``werkzeug.wrappers.Request`` doesn't crash on non-ASCII-values + anymore (pull request ``#466``). +- Fix bug in ``cache.RedisCache`` when combined with ``redis.StrictRedis`` + object (pull request ``#583``). +- The ``qop`` parameter for ``WWW-Authenticate`` headers is now always quoted, + as required by RFC 2617 (issue ``#633``). +- Fix bug in ``werkzeug.contrib.cache.SimpleCache`` with Python 3 where add/set + may throw an exception when pruning old entries from the cache (pull request + ``#651``). + +Version 0.9.6 +------------- + +(bugfix release, released on June 7th 2014) + +- Added a safe conversion for IRI to URI conversion and use that + internally to work around issues with spec violations for + protocols such as ``itms-service``. + +Version 0.9.7 +------------- + +- Fixed uri_to_iri() not re-encoding hashes in query string parameters. + +Version 0.9.5 +------------- + +(bugfix release, released on June 7th 2014) + +- Forward charset argument from request objects to the environ + builder. +- Fixed error handling for missing boundaries in multipart data. +- Fixed session creation on systems without ``os.urandom()``. +- Fixed pluses in dictionary keys not being properly URL encoded. +- Fixed a problem with deepcopy not working for multi dicts. +- Fixed a double quoting issue on redirects. +- Fixed a problem with unicode keys appearing in headers on 2.x. +- Fixed a bug with unicode strings in the test builder. +- Fixed a unicode bug on Python 3 in the WSGI profiler. +- Fixed an issue with the safe string compare function on + Python 2.7.7 and Python 3.4. + +Version 0.9.4 +------------- + +(bugfix release, released on August 26th 2013) + +- Fixed an issue with Python 3.3 and an edge case in cookie parsing. +- Fixed decoding errors not handled properly through the WSGI + decoding dance. +- Fixed URI to IRI conversion incorrectly decoding percent signs. + +Version 0.9.3 +------------- + +(bugfix release, released on July 25th 2013) + +- Restored behavior of the ``data`` descriptor of the request class to pre 0.9 + behavior. This now also means that ``.data`` and ``.get_data()`` have + different behavior. New code should use ``.get_data()`` always. + + In addition to that there is now a flag for the ``.get_data()`` method that + controls what should happen with form data parsing and the form parser will + honor cached data. This makes dealing with custom form data more consistent. + +Version 0.9.2 +------------- + +(bugfix release, released on July 18th 2013) + +- Added `unsafe` parameter to :func:`~werkzeug.urls.url_quote`. +- Fixed an issue with :func:`~werkzeug.urls.url_quote_plus` not quoting + `'+'` correctly. +- Ported remaining parts of :class:`~werkzeug.contrib.RedisCache` to + Python 3.3. +- Ported remaining parts of :class:`~werkzeug.contrib.MemcachedCache` to + Python 3.3 +- Fixed a deprecation warning in the contrib atom module. +- Fixed a regression with setting of content types through the + headers dictionary instead with the content type parameter. +- Use correct name for stdlib secure string comparison function. +- Fixed a wrong reference in the docstring of + :func:`~werkzeug.local.release_local`. +- Fixed an `AttributeError` that sometimes occurred when accessing the + :attr:`werkzeug.wrappers.BaseResponse.is_streamed` attribute. + +Version 0.9.1 +------------- + +(bugfix release, released on June 14th 2013) + +- Fixed an issue with integers no longer being accepted in certain + parts of the routing system or URL quoting functions. +- Fixed an issue with `url_quote` not producing the right escape + codes for single digit codepoints. +- Fixed an issue with :class:`~werkzeug.wsgi.SharedDataMiddleware` not + reading the path correctly and breaking on etag generation in some + cases. +- Properly handle `Expect: 100-continue` in the development server + to resolve issues with curl. +- Automatically exhaust the input stream on request close. This should + fix issues where not touching request files results in a timeout. +- Fixed exhausting of streams not doing anything if a non-limited + stream was passed into the multipart parser. +- Raised the buffer sizes for the multipart parser. + +Version 0.9 +----------- + +Released on June 13nd 2013, codename Planierraupe. + +- Added support for :meth:`~werkzeug.wsgi.LimitedStream.tell` + on the limited stream. +- :class:`~werkzeug.datastructures.ETags` now is nonzero if it + contains at least one etag of any kind, including weak ones. +- Added a workaround for a bug in the stdlib for SSL servers. +- Improved SSL interface of the devserver so that it can generate + certificates easily and load them from files. +- Refactored test client to invoke the open method on the class + for redirects. This makes subclassing more powerful. +- :func:`werkzeug.wsgi.make_chunk_iter` and + :func:`werkzeug.wsgi.make_line_iter` now support processing of + iterators and streams. +- URL generation by the routing system now no longer quotes + ``+``. +- URL fixing now no longer quotes certain reserved characters. +- The :func:`werkzeug.security.generate_password_hash` and + check functions now support any of the hashlib algorithms. +- `wsgi.get_current_url` is now ascii safe for browsers sending + non-ascii data in query strings. +- improved parsing behavior for :func:`werkzeug.http.parse_options_header` +- added more operators to local proxies. +- added a hook to override the default converter in the routing + system. +- The description field of HTTP exceptions is now always escaped. + Use markup objects to disable that. +- Added number of proxy argument to the proxy fix to make it more + secure out of the box on common proxy setups. It will by default + no longer trust the x-forwarded-for header as much as it did + before. +- Added support for fragment handling in URI/IRI functions. +- Added custom class support for :func:`werkzeug.http.parse_dict_header`. +- Renamed `LighttpdCGIRootFix` to `CGIRootFix`. +- Always treat `+` as safe when fixing URLs as people love misusing them. +- Added support to profiling into directories in the contrib profiler. +- The escape function now by default escapes quotes. +- Changed repr of exceptions to be less magical. +- Simplified exception interface to no longer require environments + to be passed to receive the response object. +- Added sentinel argument to IterIO objects. +- Added pbkdf2 support for the security module. +- Added a plain request type that disables all form parsing to only + leave the stream behind. +- Removed support for deprecated `fix_headers`. +- Removed support for deprecated `header_list`. +- Removed support for deprecated parameter for `iter_encoded`. +- Removed support for deprecated non-silent usage of the limited + stream object. +- Removed support for previous dummy `writable` parameter on + the cached property. +- Added support for explicitly closing request objects to close + associated resources. +- Conditional request handling or access to the data property on responses no + longer ignores direct passthrough mode. +- Removed werkzeug.templates and werkzeug.contrib.kickstart. +- Changed host lookup logic for forwarded hosts to allow lists of + hosts in which case only the first one is picked up. +- Added `wsgi.get_query_string`, `wsgi.get_path_info` and + `wsgi.get_script_name` and made the `wsgi.pop_path_info` and + `wsgi.peek_path_info` functions perform unicode decoding. This + was necessary to avoid having to expose the WSGI encoding dance + on Python 3. +- Added `content_encoding` and `content_md5` to the request object's + common request descriptor mixin. +- added `options` and `trace` to the test client. +- Overhauled the utilization of the input stream to be easier to use + and better to extend. The detection of content payload on the input + side is now more compliant with HTTP by detecting off the content + type header instead of the request method. This also now means that + the stream property on the request class is always available instead + of just when the parsing fails. +- Added support for using :class:`werkzeug.wrappers.BaseResponse` in a with + statement. +- Changed `get_app_iter` to fetch the response early so that it does not + fail when wrapping a response iterable. This makes filtering easier. +- Introduced `get_data` and `set_data` methods for responses. +- Introduced `get_data` for requests. +- Soft deprecated the `data` descriptors for request and response objects. +- Added `as_bytes` operations to some of the headers to simplify working + with things like cookies. +- Made the debugger paste tracebacks into github's gist service as + private pastes. + +Version 0.8.4 +------------- + +(bugfix release, release date to be announced) + +- Added a favicon to the debugger which fixes problem with + state changes being triggered through a request to + /favicon.ico in Google Chrome. This should fix some + problems with Flask and other frameworks that use + context local objects on a stack with context preservation + on errors. +- Fixed an issue with scrolling up in the debugger. +- Fixed an issue with debuggers running on a different URL + than the URL root. +- Fixed a problem with proxies not forwarding some rarely + used special methods properly. +- Added a workaround to prevent the XSS protection from Chrome + breaking the debugger. +- Skip redis tests if redis is not running. +- Fixed a typo in the multipart parser that caused content-type + to not be picked up properly. + +Version 0.8.3 +------------- + +(bugfix release, released on February 5th 2012) + +- Fixed another issue with :func:`werkzeug.wsgi.make_line_iter` + where lines longer than the buffer size were not handled + properly. +- Restore stdout after debug console finished executing so + that the debugger can be used on GAE better. +- Fixed a bug with the redis cache for int subclasses + (affects bool caching). +- Fixed an XSS problem with redirect targets coming from + untrusted sources. +- Redis cache backend now supports password authentication. + +Version 0.8.2 +------------- + +(bugfix release, released on December 16th 2011) + +- Fixed a problem with request handling of the builtin server + not responding to socket errors properly. +- The routing request redirect exception's code attribute is now + used properly. +- Fixed a bug with shutdowns on Windows. +- Fixed a few unicode issues with non-ascii characters being + hardcoded in URL rules. +- Fixed two property docstrings being assigned to fdel instead + of ``__doc__``. +- Fixed an issue where CRLF line endings could be split into two + by the line iter function, causing problems with multipart file + uploads. + +Version 0.8.1 +------------- + +(bugfix release, released on September 30th 2011) + +- Fixed an issue with the memcache not working properly. +- Fixed an issue for Python 2.7.1 and higher that broke + copying of multidicts with :func:`copy.copy`. +- Changed hashing methodology of immutable ordered multi dicts + for a potential problem with alternative Python implementations. + +Version 0.8 +----------- + +Released on September 29th 2011, codename Lötkolben + +- Removed data structure specific KeyErrors for a general + purpose :exc:`~werkzeug.exceptions.BadRequestKeyError`. +- Documented :meth:`werkzeug.wrappers.BaseRequest._load_form_data`. +- The routing system now also accepts strings instead of + dictionaries for the `query_args` parameter since we're only + passing them through for redirects. +- Werkzeug now automatically sets the content length immediately when + the :attr:`~werkzeug.wrappers.BaseResponse.data` attribute is set + for efficiency and simplicity reasons. +- The routing system will now normalize server names to lowercase. +- The routing system will no longer raise ValueErrors in case the + configuration for the server name was incorrect. This should make + deployment much easier because you can ignore that factor now. +- Fixed a bug with parsing HTTP digest headers. It rejected headers + with missing nc and nonce params. +- Proxy fix now also updates wsgi.url_scheme based on X-Forwarded-Proto. +- Added support for key prefixes to the redis cache. +- Added the ability to suppress some auto corrections in the wrappers + that are now controlled via `autocorrect_location_header` and + `automatically_set_content_length` on the response objects. +- Werkzeug now uses a new method to check that the length of incoming + data is complete and will raise IO errors by itself if the server + fails to do so. +- :func:`~werkzeug.wsgi.make_line_iter` now requires a limit that is + not higher than the length the stream can provide. +- Refactored form parsing into a form parser class that makes it possible + to hook into individual parts of the parsing process for debugging and + extending. +- For conditional responses the content length is no longer set when it + is already there and added if missing. +- Immutable datastructures are hashable now. +- Headers datastructure no longer allows newlines in values to avoid + header injection attacks. +- Made it possible through subclassing to select a different remote + addr in the proxy fix. +- Added stream based URL decoding. This reduces memory usage on large + transmitted form data that is URL decoded since Werkzeug will no longer + load all the unparsed data into memory. +- Memcache client now no longer uses the buggy cmemcache module and + supports pylibmc. GAE is not tried automatically and the dedicated + class is no longer necessary. +- Redis cache now properly serializes data. +- Removed support for Python 2.4 + +Version 0.7.2 +------------- + +(bugfix release, released on September 30th 2011) + +- Fixed a CSRF problem with the debugger. +- The debugger is now generating private pastes on lodgeit. +- If URL maps are now bound to environments the query arguments + are properly decoded from it for redirects. + +Version 0.7.1 +------------- + +(bugfix release, released on July 26th 2011) + +- Fixed a problem with newer versions of IPython. +- Disabled pyinotify based reloader which does not work reliably. + +Version 0.7 +----------- + +Released on July 24th 2011, codename Schraubschlüssel + +- Add support for python-libmemcached to the Werkzeug cache abstraction + layer. +- Improved :func:`url_decode` and :func:`url_encode` performance. +- Fixed an issue where the SharedDataMiddleware could cause an + internal server error on weird paths when loading via pkg_resources. +- Fixed an URL generation bug that caused URLs to be invalid if a + generated component contains a colon. +- :func:`werkzeug.import_string` now works with partially set up + packages properly. +- Disabled automatic socket switching for IPv6 on the development + server due to problems it caused. +- Werkzeug no longer overrides the Date header when creating a + conditional HTTP response. +- The routing system provides a method to retrieve the matching + methods for a given path. +- The routing system now accepts a parameter to change the encoding + error behaviour. +- The local manager can now accept custom ident functions in the + constructor that are forwarded to the wrapped local objects. +- url_unquote_plus now accepts unicode strings again. +- Fixed an issue with the filesystem session support's prune + function and concurrent usage. +- Fixed a problem with external URL generation discarding the port. +- Added support for pylibmc to the Werkzeug cache abstraction layer. +- Fixed an issue with the new multipart parser that happened when + a linebreak happened to be on the chunk limit. +- Cookies are now set properly if ports are in use. A runtime error + is raised if one tries to set a cookie for a domain without a dot. +- Fixed an issue with Template.from_file not working for file + descriptors. +- Reloader can now use inotify to track reloads. This requires the + pyinotify library to be installed. +- Werkzeug debugger can now submit to custom lodgeit installations. +- redirect function's status code assertion now allows 201 to be used + as redirection code. While it's not a real redirect, it shares + enough with redirects for the function to still be useful. +- Fixed securecookie for pypy. +- Fixed `ValueErrors` being raised on calls to `best_match` on + `MIMEAccept` objects when invalid user data was supplied. +- Deprecated `werkzeug.contrib.kickstart` and `werkzeug.contrib.testtools` +- URL routing now can be passed the URL arguments to keep them for + redirects. In the future matching on URL arguments might also be + possible. +- Header encoding changed from utf-8 to latin1 to support a port to + Python 3. Bytestrings passed to the object stay untouched which + makes it possible to have utf-8 cookies. This is a part where + the Python 3 version will later change in that it will always + operate on latin1 values. +- Fixed a bug in the form parser that caused the last character to + be dropped off if certain values in multipart data are used. +- Multipart parser now looks at the part-individual content type + header to override the global charset. +- Introduced mimetype and mimetype_params attribute for the file + storage object. +- Changed FileStorage filename fallback logic to skip special filenames + that Python uses for marking special files like stdin. +- Introduced more HTTP exception classes. +- `call_on_close` now can be used as a decorator. +- Support for redis as cache backend. +- Added `BaseRequest.scheme`. +- Support for the RFC 5789 PATCH method. +- New custom routing parser and better ordering. +- Removed support for `is_behind_proxy`. Use a WSGI middleware + instead that rewrites the `REMOTE_ADDR` according to your setup. + Also see the :class:`werkzeug.contrib.fixers.ProxyFix` for + a drop-in replacement. +- Added cookie forging support to the test client. +- Added support for host based matching in the routing system. +- Switched from the default 'ignore' to the better 'replace' + unicode error handling mode. +- The builtin server now adds a function named 'werkzeug.server.shutdown' + into the WSGI env to initiate a shutdown. This currently only works + in Python 2.6 and later. +- Headers are now assumed to be latin1 for better compatibility with + Python 3 once we have support. +- Added :func:`werkzeug.security.safe_join`. +- Added `accept_json` property analogous to `accept_html` on the + :class:`werkzeug.datastructures.MIMEAccept`. +- :func:`werkzeug.utils.import_string` now fails with much better + error messages that pinpoint to the problem. +- Added support for parsing of the `If-Range` header + (:func:`werkzeug.http.parse_if_range_header` and + :class:`werkzeug.datastructures.IfRange`). +- Added support for parsing of the `Range` header + (:func:`werkzeug.http.parse_range_header` and + :class:`werkzeug.datastructures.Range`). +- Added support for parsing of the `Content-Range` header of responses + and provided an accessor object for it + (:func:`werkzeug.http.parse_content_range_header` and + :class:`werkzeug.datastructures.ContentRange`). + +Version 0.6.2 +------------- + +(bugfix release, released on April 23th 2010) + +- renamed the attribute `implicit_seqence_conversion` attribute of the + request object to `implicit_sequence_conversion`. + +Version 0.6.1 +------------- + +(bugfix release, released on April 13th 2010) + +- heavily improved local objects. Should pick up standalone greenlet + builds now and support proxies to free callables as well. There is + also a stacked local now that makes it possible to invoke the same + application from within itself by pushing current request/response + on top of the stack. +- routing build method will also build non-default method rules properly + if no method is provided. +- added proper IPv6 support for the builtin server. +- windows specific filesystem session store fixes. + (should now be more stable under high concurrency) +- fixed a `NameError` in the session system. +- fixed a bug with empty arguments in the werkzeug.script system. +- fixed a bug where log lines will be duplicated if an application uses + :meth:`logging.basicConfig` (#499) +- added secure password hashing and checking functions. +- `HEAD` is now implicitly added as method in the routing system if + `GET` is present. Not doing that was considered a bug because often + code assumed that this is the case and in web servers that do not + normalize `HEAD` to `GET` this could break `HEAD` requests. +- the script support can start SSL servers now. + +Version 0.6 +----------- + +Released on Feb 19th 2010, codename Hammer. + +- removed pending deprecations +- sys.path is now printed from the testapp. +- fixed an RFC 2068 incompatibility with cookie value quoting. +- the :class:`FileStorage` now gives access to the multipart headers. +- `cached_property.writeable` has been deprecated. +- :meth:`MapAdapter.match` now accepts a `return_rule` keyword argument + that returns the matched `Rule` instead of just the `endpoint` +- :meth:`routing.Map.bind_to_environ` raises a more correct error message + now if the map was bound to an invalid WSGI environment. +- added support for SSL to the builtin development server. +- Response objects are no longer modified in place when they are evaluated + as WSGI applications. For backwards compatibility the `fix_headers` + function is still called in case it was overridden. + You should however change your application to use `get_wsgi_headers` if + you need header modifications before responses are sent as the backwards + compatibility support will go away in future versions. +- :func:`append_slash_redirect` no longer requires the QUERY_STRING to be + in the WSGI environment. +- added :class:`~werkzeug.contrib.wrappers.DynamicCharsetResponseMixin` +- added :class:`~werkzeug.contrib.wrappers.DynamicCharsetRequestMixin` +- added :attr:`BaseRequest.url_charset` +- request and response objects have a default `__repr__` now. +- builtin data structures can be pickled now. +- the form data parser will now look at the filename instead the + content type to figure out if it should treat the upload as regular + form data or file upload. This fixes a bug with Google Chrome. +- improved performance of `make_line_iter` and the multipart parser + for binary uploads. +- fixed :attr:`~werkzeug.BaseResponse.is_streamed` +- fixed a path quoting bug in `EnvironBuilder` that caused PATH_INFO and + SCRIPT_NAME to end up in the environ unquoted. +- :meth:`werkzeug.BaseResponse.freeze` now sets the content length. +- for unknown HTTP methods the request stream is now always limited + instead of being empty. This makes it easier to implement DAV + and other protocols on top of Werkzeug. +- added :meth:`werkzeug.MIMEAccept.best_match` +- multi-value test-client posts from a standard dictionary are now + supported. Previously you had to use a multi dict. +- rule templates properly work with submounts, subdomains and + other rule factories now. +- deprecated non-silent usage of the :class:`werkzeug.LimitedStream`. +- added support for IRI handling to many parts of Werkzeug. +- development server properly logs to the werkzeug logger now. +- added :func:`werkzeug.extract_path_info` +- fixed a querystring quoting bug in :func:`url_fix` +- added `fallback_mimetype` to :class:`werkzeug.SharedDataMiddleware`. +- deprecated :meth:`BaseResponse.iter_encoded`'s charset parameter. +- added :meth:`BaseResponse.make_sequence`, + :attr:`BaseResponse.is_sequence` and + :meth:`BaseResponse._ensure_sequence`. +- added better __repr__ of :class:`werkzeug.Map` +- `import_string` accepts unicode strings as well now. +- development server doesn't break on double slashes after the host name. +- better `__repr__` and `__str__` of + :exc:`werkzeug.exceptions.HTTPException` +- test client works correctly with multiple cookies now. +- the :class:`werkzeug.routing.Map` now has a class attribute with + the default converter mapping. This helps subclasses to override + the converters without passing them to the constructor. +- implemented :class:`OrderedMultiDict` +- improved the session support for more efficient session storing + on the filesystem. Also added support for listing of sessions + currently stored in the filesystem session store. +- werkzeug no longer utilizes the Python time module for parsing + which means that dates in a broader range can be parsed. +- the wrappers have no class attributes that make it possible to + swap out the dict and list types it uses. +- werkzeug debugger should work on the appengine dev server now. +- the URL builder supports dropping of unexpected arguments now. + Previously they were always appended to the URL as query string. +- profiler now writes to the correct stream. + +Version 0.5.1 +------------- +(bugfix release for 0.5, released on July 9th 2009) + +- fixed boolean check of :class:`FileStorage` +- url routing system properly supports unicode URL rules now. +- file upload streams no longer have to provide a truncate() + method. +- implemented :meth:`BaseRequest._form_parsing_failed`. +- fixed #394 +- :meth:`ImmutableDict.copy`, :meth:`ImmutableMultiDict.copy` and + :meth:`ImmutableTypeConversionDict.copy` return mutable shallow + copies. +- fixed a bug with the `make_runserver` script action. +- :meth:`MultiDict.items` and :meth:`MutiDict.iteritems` now accept an + argument to return a pair for each value of each key. +- the multipart parser works better with hand-crafted multipart + requests now that have extra newlines added. This fixes a bug + with setuptools uploads not handled properly (#390) +- fixed some minor bugs in the atom feed generator. +- fixed a bug with client cookie header parsing being case sensitive. +- fixed a not-working deprecation warning. +- fixed package loading for :class:`SharedDataMiddleware`. +- fixed a bug in the secure cookie that made server-side expiration + on servers with a local time that was not set to UTC impossible. +- fixed console of the interactive debugger. + + +Version 0.5 +----------- + +Released on April 24th, codename Schlagbohrer. + +- requires Python 2.4 now +- fixed a bug in :class:`~contrib.IterIO` +- added :class:`MIMEAccept` and :class:`CharsetAccept` that work like the + regular :class:`Accept` but have extra special normalization for mimetypes + and charsets and extra convenience methods. +- switched the serving system from wsgiref to something homebrew. +- the :class:`Client` now supports cookies. +- added the :mod:`~werkzeug.contrib.fixers` module with various + fixes for webserver bugs and hosting setup side-effects. +- added :mod:`werkzeug.contrib.wrappers` +- added :func:`is_hop_by_hop_header` +- added :func:`is_entity_header` +- added :func:`remove_hop_by_hop_headers` +- added :func:`pop_path_info` +- added :func:`peek_path_info` +- added :func:`wrap_file` and :class:`FileWrapper` +- moved `LimitedStream` from the contrib package into the regular + werkzeug one and changed the default behavior to raise exceptions + rather than stopping without warning. The old class will stick in + the module until 0.6. +- implemented experimental multipart parser that replaces the old CGI hack. +- added :func:`dump_options_header` and :func:`parse_options_header` +- added :func:`quote_header_value` and :func:`unquote_header_value` +- :func:`url_encode` and :func:`url_decode` now accept a separator + argument to switch between `&` and `;` as pair separator. The magic + switch is no longer in place. +- all form data parsing functions as well as the :class:`BaseRequest` + object have parameters (or attributes) to limit the number of + incoming bytes (either totally or per field). +- added :class:`LanguageAccept` +- request objects are now enforced to be read only for all collections. +- added many new collection classes, refactored collections in general. +- test support was refactored, semi-undocumented `werkzeug.test.File` + was replaced by :class:`werkzeug.FileStorage`. +- :class:`EnvironBuilder` was added and unifies the previous distinct + :func:`create_environ`, :class:`Client` and + :meth:`BaseRequest.from_values`. They all work the same now which + is less confusing. +- officially documented imports from the internal modules as undefined + behavior. These modules were never exposed as public interfaces. +- removed `FileStorage.__len__` which previously made the object + falsy for browsers not sending the content length which all browsers + do. +- :class:`SharedDataMiddleware` uses `wrap_file` now and has a + configurable cache timeout. +- added :class:`CommonRequestDescriptorsMixin` +- added :attr:`CommonResponseDescriptorsMixin.mimetype_params` +- added :mod:`werkzeug.contrib.lint` +- added `passthrough_errors` to `run_simple`. +- added `secure_filename` +- added :func:`make_line_iter` +- :class:`MultiDict` copies now instead of revealing internal + lists to the caller for `getlist` and iteration functions that + return lists. +- added :attr:`follow_redirect` to the :func:`open` of :class:`Client`. +- added support for `extra_files` in + :func:`~werkzeug.script.make_runserver` + +Version 0.4.1 +------------- + +(Bugfix release, released on January 11th 2009) + +- `werkzeug.contrib.cache.Memcached` accepts now objects that + implement the memcache.Client interface as alternative to a list of + strings with server addresses. + There is also now a `GAEMemcachedCache` that connects to the Google + appengine cache. +- explicitly convert secret keys to bytestrings now because Python + 2.6 no longer does that. +- `url_encode` and all interfaces that call it, support ordering of + options now which however is disabled by default. +- the development server no longer resolves the addresses of clients. +- Fixed a typo in `werkzeug.test` that broke `File`. +- `Map.bind_to_environ` uses the `Host` header now if available. +- Fixed `BaseCache.get_dict` (#345) +- `werkzeug.test.Client` can now run the application buffered in which + case the application is properly closed automatically. +- Fixed `Headers.set` (#354). Caused header duplication before. +- Fixed `Headers.pop` (#349). default parameter was not properly + handled. +- Fixed UnboundLocalError in `create_environ` (#351) +- `Headers` is more compatible with wsgiref now. +- `Template.render` accepts multidicts now. +- dropped support for Python 2.3 + +Version 0.4 +----------- + +Released on November 23rd 2008, codename Schraubenzieher. + +- `Client` supports an empty `data` argument now. +- fixed a bug in `Response.application` that made it impossible to use it + as method decorator. +- the session system should work on appengine now +- the secure cookie works properly in load balanced environments with + different cpu architectures now. +- `CacheControl.no_cache` and `CacheControl.private` behavior changed to + reflect the possibilities of the HTTP RFC. Setting these attributes to + `None` or `True` now sets the value to "the empty value". + More details in the documentation. +- fixed `werkzeug.contrib.atom.AtomFeed.__call__`. (#338) +- `BaseResponse.make_conditional` now always returns `self`. Previously + it didn't for post requests and such. +- fixed a bug in boolean attribute handling of `html` and `xhtml`. +- added graceful error handling to the debugger pastebin feature. +- added a more list like interface to `Headers` (slicing and indexing + works now) +- fixed a bug with the `__setitem__` method of `Headers` that didn't + properly remove all keys on replacing. +- added `remove_entity_headers` which removes all entity headers from + a list of headers (or a `Headers` object) +- the responses now automatically call `remove_entity_headers` if the + status code is 304. +- fixed a bug with `Href` query parameter handling. Previously the last + item of a call to `Href` was not handled properly if it was a dict. +- headers now support a `pop` operation to better work with environ + properties. + + +Version 0.3.1 +------------- + +(bugfix release, released on June 24th 2008) + +- fixed a security problem with `werkzeug.contrib.SecureCookie`. + + +Version 0.3 +----------- + +Released on June 14th 2008, codename EUR325CAT6. + +- added support for redirecting in url routing. +- added `Authorization` and `AuthorizationMixin` +- added `WWWAuthenticate` and `WWWAuthenticateMixin` +- added `parse_list_header` +- added `parse_dict_header` +- added `parse_authorization_header` +- added `parse_www_authenticate_header` +- added `_get_current_object` method to `LocalProxy` objects +- added `parse_form_data` +- `MultiDict`, `CombinedMultiDict`, `Headers`, and `EnvironHeaders` raise + special key errors now that are subclasses of `BadRequest` so if you + don't catch them they give meaningful HTTP responses. +- added support for alternative encoding error handling and the new + `HTTPUnicodeError` which (if not caught) behaves like a `BadRequest`. +- added `BadRequest.wrap`. +- added ETag support to the SharedDataMiddleware and added an option + to disable caching. +- fixed `is_xhr` on the request objects. +- fixed error handling of the url adapter's `dispatch` method. (#318) +- fixed bug with `SharedDataMiddleware`. +- fixed `Accept.values`. +- `EnvironHeaders` contain content-type and content-length now +- `url_encode` treats lists and tuples in dicts passed to it as multiple + values for the same key so that one doesn't have to pass a `MultiDict` + to the function. +- added `validate_arguments` +- added `BaseRequest.application` +- improved Python 2.3 support +- `run_simple` accepts `use_debugger` and `use_evalex` parameters now, + like the `make_runserver` factory function from the script module. +- the `environ_property` is now read-only by default +- it's now possible to initialize requests as "shallow" requests which + causes runtime errors if the request object tries to consume the + input stream. + + +Version 0.2 +----------- + +Released Feb 14th 2008, codename Faustkeil. + +- Added `AnyConverter` to the routing system. +- Added `werkzeug.contrib.securecookie` +- Exceptions have a ``get_response()`` method that return a response object +- fixed the path ordering bug (#293), thanks Thomas Johansson +- `BaseReporterStream` is now part of the werkzeug contrib module. From + Werkzeug 0.3 onwards you will have to import it from there. +- added `DispatcherMiddleware`. +- `RequestRedirect` is now a subclass of `HTTPException` and uses a + 301 status code instead of 302. +- `url_encode` and `url_decode` can optionally treat keys as unicode strings + now, too. +- `werkzeug.script` has a different caller format for boolean arguments now. +- renamed `lazy_property` to `cached_property`. +- added `import_string`. +- added is_* properties to request objects. +- added `empty()` method to routing rules. +- added `werkzeug.contrib.profiler`. +- added `extends` to `Headers`. +- added `dump_cookie` and `parse_cookie`. +- added `as_tuple` to the `Client`. +- added `werkzeug.contrib.testtools`. +- added `werkzeug.unescape` +- added `BaseResponse.freeze` +- added `werkzeug.contrib.atom` +- the HTTPExceptions accept an argument `description` now which overrides the + default description. +- the `MapAdapter` has a default for path info now. If you use + `bind_to_environ` you don't have to pass the path later. +- the wsgiref subclass werkzeug uses for the dev server does not use direct + sys.stderr logging any more but a logger called "werkzeug". +- implemented `Href`. +- implemented `find_modules` +- refactored request and response objects into base objects, mixins and + full featured subclasses that implement all mixins. +- added simple user agent parser +- werkzeug's routing raises `MethodNotAllowed` now if it matches a + rule but for a different method. +- many fixes and small improvements + + +Version 0.1 +----------- + +Released on Dec 9th 2007, codename Wictorinoxger. + +- Initial release diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..f4ba197 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,76 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at report@palletsprojects.com. All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst new file mode 100644 index 0000000..9f40800 --- /dev/null +++ b/CONTRIBUTING.rst @@ -0,0 +1,222 @@ +How to contribute to Werkzeug +============================= + +Thank you for considering contributing to Werkzeug! + + +Support questions +----------------- + +Please don't use the issue tracker for this. The issue tracker is a +tool to address bugs and feature requests in Werkzeug itself. Use one of +the following resources for questions about using Werkzeug or issues +with your own code: + +- The ``#get-help`` channel on our Discord chat: + https://discord.gg/pallets +- The mailing list flask@python.org for long term discussion or larger + issues. +- Ask on `Stack Overflow`_. Search with Google first using: + ``site:stackoverflow.com werkzeug {search term, exception message, etc.}`` + +.. _Stack Overflow: https://stackoverflow.com/questions/tagged/werkzeug?tab=Frequent + + +Reporting issues +---------------- + +Include the following information in your post: + +- Describe what you expected to happen. +- If possible, include a `minimal reproducible example`_ to help us + identify the issue. This also helps check that the issue is not with + your own code. +- Describe what actually happened. Include the full traceback if there + was an exception. +- List your Python and Werkzeug versions. If possible, check if this + issue is already fixed in the latest releases or the latest code in + the repository. + +.. _minimal reproducible example: https://stackoverflow.com/help/minimal-reproducible-example + + +Submitting patches +------------------ + +If there is not an open issue for what you want to submit, prefer +opening one for discussion before working on a PR. You can work on any +issue that doesn't have an open PR linked to it or a maintainer assigned +to it. These show up in the sidebar. No need to ask if you can work on +an issue that interests you. + +Include the following in your patch: + +- Use `Black`_ to format your code. This and other tools will run + automatically if you install `pre-commit`_ using the instructions + below. +- Include tests if your patch adds or changes code. Make sure the test + fails without your patch. +- Update any relevant docs pages and docstrings. Docs pages and + docstrings should be wrapped at 72 characters. +- Add an entry in ``CHANGES.rst``. Use the same style as other + entries. Also include ``.. versionchanged::`` inline changelogs in + relevant docstrings. + +.. _Black: https://black.readthedocs.io +.. _pre-commit: https://pre-commit.com + + +First time setup +~~~~~~~~~~~~~~~~ + +- Download and install the `latest version of git`_. +- Configure git with your `username`_ and `email`_. + + .. code-block:: text + + $ git config --global user.name 'your name' + $ git config --global user.email 'your email' + +- Make sure you have a `GitHub account`_. +- Fork Werkzeug to your GitHub account by clicking the `Fork`_ button. +- `Clone`_ the main repository locally. + + .. code-block:: text + + $ git clone https://github.com/pallets/werkzeug + $ cd werkzeug + +- Add your fork as a remote to push your work to. Replace + ``{username}`` with your username. This names the remote "fork", the + default Pallets remote is "origin". + + .. code-block:: text + + $ git remote add fork https://github.com/{username}/werkzeug + +- Create a virtualenv. + + .. code-block:: text + + $ python3 -m venv env + $ . env/bin/activate + + On Windows, activating is different. + + .. code-block:: text + + > env\Scripts\activate + +- Upgrade pip and setuptools. + + .. code-block:: text + + $ python -m pip install --upgrade pip setuptools + +- Install the development dependencies, then install Werkzeug in + editable mode. + + .. code-block:: text + + $ pip install -r requirements/dev.txt && pip install -e . + +- Install the pre-commit hooks. + + .. code-block:: text + + $ pre-commit install + +.. _latest version of git: https://git-scm.com/downloads +.. _username: https://docs.github.com/en/github/using-git/setting-your-username-in-git +.. _email: https://docs.github.com/en/github/setting-up-and-managing-your-github-user-account/setting-your-commit-email-address +.. _GitHub account: https://github.com/join +.. _Fork: https://github.com/pallets/werkzeug/fork +.. _Clone: https://docs.github.com/en/github/getting-started-with-github/fork-a-repo#step-2-create-a-local-clone-of-your-fork + + +Start coding +~~~~~~~~~~~~ + +- Create a branch to identify the issue you would like to work on. If + you're submitting a bug or documentation fix, branch off of the + latest ".x" branch. + + .. code-block:: text + + $ git fetch origin + $ git checkout -b your-branch-name origin/2.0.x + + If you're submitting a feature addition or change, branch off of the + "main" branch. + + .. code-block:: text + + $ git fetch origin + $ git checkout -b your-branch-name origin/main + +- Using your favorite editor, make your changes, + `committing as you go`_. +- Include tests that cover any code changes you make. Make sure the + test fails without your patch. Run the tests as described below. +- Push your commits to your fork on GitHub and + `create a pull request`_. Link to the issue being addressed with + ``fixes #123`` in the pull request. + + .. code-block:: text + + $ git push --set-upstream fork your-branch-name + +.. _committing as you go: https://dont-be-afraid-to-commit.readthedocs.io/en/latest/git/commandlinegit.html#commit-your-changes +.. _create a pull request: https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request + + +Running the tests +~~~~~~~~~~~~~~~~~ + +Run the basic test suite with pytest. + +.. code-block:: text + + $ pytest + +This runs the tests for the current environment, which is usually +sufficient. CI will run the full suite when you submit your pull +request. You can run the full test suite with tox if you don't want to +wait. + +.. code-block:: text + + $ tox + + +Running test coverage +~~~~~~~~~~~~~~~~~~~~~ + +Generating a report of lines that do not have test coverage can indicate +where to start contributing. Run ``pytest`` using ``coverage`` and +generate a report. + +.. code-block:: text + + $ pip install coverage + $ coverage run -m pytest + $ coverage html + +Open ``htmlcov/index.html`` in your browser to explore the report. + +Read more about `coverage `__. + + +Building the docs +~~~~~~~~~~~~~~~~~ + +Build the docs in the ``docs`` directory using Sphinx. + +.. code-block:: text + + $ cd docs + $ make html + +Open ``_build/html/index.html`` in your browser to view the docs. + +Read more about `Sphinx `__. diff --git a/LICENSE.rst b/LICENSE.rst new file mode 100644 index 0000000..c37cae4 --- /dev/null +++ b/LICENSE.rst @@ -0,0 +1,28 @@ +Copyright 2007 Pallets + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..8942481 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,12 @@ +include CHANGES.rst +include tox.ini +include requirements/*.txt +graft artwork +graft docs +prune docs/_build +graft examples +graft tests +include src/werkzeug/py.typed +include src/werkzeug/*.pyi +graft src/werkzeug/debug/shared +global-exclude *.pyc diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..f1592a5 --- /dev/null +++ b/README.rst @@ -0,0 +1,91 @@ +Werkzeug +======== + +*werkzeug* German noun: "tool". Etymology: *werk* ("work"), *zeug* ("stuff") + +Werkzeug is a comprehensive `WSGI`_ web application library. It began as +a simple collection of various utilities for WSGI applications and has +become one of the most advanced WSGI utility libraries. + +It includes: + +- An interactive debugger that allows inspecting stack traces and + source code in the browser with an interactive interpreter for any + frame in the stack. +- A full-featured request object with objects to interact with + headers, query args, form data, files, and cookies. +- A response object that can wrap other WSGI applications and handle + streaming data. +- A routing system for matching URLs to endpoints and generating URLs + for endpoints, with an extensible system for capturing variables + from URLs. +- HTTP utilities to handle entity tags, cache control, dates, user + agents, cookies, files, and more. +- A threaded WSGI server for use while developing applications + locally. +- A test client for simulating HTTP requests during testing without + requiring running a server. + +Werkzeug doesn't enforce any dependencies. It is up to the developer to +choose a template engine, database adapter, and even how to handle +requests. It can be used to build all sorts of end user applications +such as blogs, wikis, or bulletin boards. + +`Flask`_ wraps Werkzeug, using it to handle the details of WSGI while +providing more structure and patterns for defining powerful +applications. + +.. _WSGI: https://wsgi.readthedocs.io/en/latest/ +.. _Flask: https://www.palletsprojects.com/p/flask/ + + +Installing +---------- + +Install and update using `pip`_: + +.. code-block:: text + + pip install -U Werkzeug + +.. _pip: https://pip.pypa.io/en/stable/getting-started/ + + +A Simple Example +---------------- + +.. code-block:: python + + from werkzeug.wrappers import Request, Response + + @Request.application + def application(request): + return Response('Hello, World!') + + if __name__ == '__main__': + from werkzeug.serving import run_simple + run_simple('localhost', 4000, application) + + +Donate +------ + +The Pallets organization develops and supports Werkzeug and other +popular packages. In order to grow the community of contributors and +users, and allow the maintainers to devote more time to the projects, +`please donate today`_. + +.. _please donate today: https://palletsprojects.com/donate + + +Links +----- + +- Documentation: https://werkzeug.palletsprojects.com/ +- Changes: https://werkzeug.palletsprojects.com/changes/ +- PyPI Releases: https://pypi.org/project/Werkzeug/ +- Source Code: https://github.com/pallets/werkzeug/ +- Issue Tracker: https://github.com/pallets/werkzeug/issues/ +- Website: https://palletsprojects.com/p/werkzeug/ +- Twitter: https://twitter.com/PalletsTeam +- Chat: https://discord.gg/pallets diff --git a/artwork/logo.png b/artwork/logo.png new file mode 100644 index 0000000..61666ab Binary files /dev/null and b/artwork/logo.png differ diff --git a/artwork/logo.svg b/artwork/logo.svg new file mode 100644 index 0000000..bd65219 --- /dev/null +++ b/artwork/logo.svg @@ -0,0 +1,88 @@ + + + + + + + + + image/svg+xml + + + + + + + + + + + diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..5128596 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,19 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/debug-screenshot.png b/docs/_static/debug-screenshot.png new file mode 100644 index 0000000..1432646 Binary files /dev/null and b/docs/_static/debug-screenshot.png differ diff --git a/docs/_static/favicon.ico b/docs/_static/favicon.ico new file mode 100644 index 0000000..a3b079a Binary files /dev/null and b/docs/_static/favicon.ico differ diff --git a/docs/_static/shortly.png b/docs/_static/shortly.png new file mode 100644 index 0000000..96c8525 Binary files /dev/null and b/docs/_static/shortly.png differ diff --git a/docs/_static/werkzeug.png b/docs/_static/werkzeug.png new file mode 100644 index 0000000..9cedb06 Binary files /dev/null and b/docs/_static/werkzeug.png differ diff --git a/docs/changes.rst b/docs/changes.rst new file mode 100644 index 0000000..955deaf --- /dev/null +++ b/docs/changes.rst @@ -0,0 +1,4 @@ +Changes +======= + +.. include:: ../CHANGES.rst diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..96e998b --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,55 @@ +from pallets_sphinx_themes import get_version +from pallets_sphinx_themes import ProjectLink + +# Project -------------------------------------------------------------- + +project = "Werkzeug" +copyright = "2007 Pallets" +author = "Pallets" +release, version = get_version("Werkzeug") + +# General -------------------------------------------------------------- + +master_doc = "index" +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", + "pallets_sphinx_themes", + "sphinx_issues", + "sphinxcontrib.log_cabinet", +] +autoclass_content = "both" +autodoc_typehints = "description" +intersphinx_mapping = {"python": ("https://docs.python.org/3/", None)} +issues_github_path = "pallets/werkzeug" + +# HTML ----------------------------------------------------------------- + +html_theme = "werkzeug" +html_context = { + "project_links": [ + ProjectLink("Donate", "https://palletsprojects.com/donate"), + ProjectLink("PyPI Releases", "https://pypi.org/project/Werkzeug/"), + ProjectLink("Source Code", "https://github.com/pallets/werkzeug/"), + ProjectLink("Issue Tracker", "https://github.com/pallets/werkzeug/issues/"), + ProjectLink("Website", "https://palletsprojects.com/p/werkzeug/"), + ProjectLink("Twitter", "https://twitter.com/PalletsTeam"), + ProjectLink("Chat", "https://discord.gg/pallets"), + ] +} +html_sidebars = { + "index": ["project.html", "localtoc.html", "searchbox.html", "ethicalads.html"], + "**": ["localtoc.html", "relations.html", "searchbox.html", "ethicalads.html"], +} +singlehtml_sidebars = {"index": ["project.html", "localtoc.html", "ethicalads.html"]} +html_static_path = ["_static"] +html_favicon = "_static/favicon.ico" +html_logo = "_static/werkzeug.png" +html_title = f"Werkzeug Documentation ({version})" +html_show_sourcelink = False + +# LaTeX ---------------------------------------------------------------- + +latex_documents = [ + (master_doc, f"Werkzeug-{version}.tex", html_title, author, "manual") +] diff --git a/docs/datastructures.rst b/docs/datastructures.rst new file mode 100644 index 0000000..01432f4 --- /dev/null +++ b/docs/datastructures.rst @@ -0,0 +1,138 @@ +=============== +Data Structures +=============== + +.. module:: werkzeug.datastructures + +Werkzeug provides some subclasses of common Python objects to extend them +with additional features. Some of them are used to make them immutable, others +are used to change some semantics to better work with HTTP. + +General Purpose +=============== + +.. versionchanged:: 0.6 + The general purpose classes are now pickleable in each protocol as long + as the contained objects are pickleable. This means that the + :class:`FileMultiDict` won't be pickleable as soon as it contains a + file. + +.. autoclass:: TypeConversionDict + :members: + +.. autoclass:: ImmutableTypeConversionDict + :members: copy + +.. autoclass:: MultiDict + :members: + :inherited-members: + +.. autoclass:: OrderedMultiDict + +.. autoclass:: ImmutableMultiDict + :members: copy + +.. autoclass:: ImmutableOrderedMultiDict + :members: copy + +.. autoclass:: CombinedMultiDict + +.. autoclass:: ImmutableDict + :members: copy + +.. autoclass:: ImmutableList + +.. autoclass:: FileMultiDict + :members: + +.. _http-datastructures: + +HTTP Related +============ + +.. autoclass:: Headers([defaults]) + :members: + +.. autoclass:: EnvironHeaders + +.. autoclass:: HeaderSet + :members: + +.. autoclass:: Accept + :members: + +.. autoclass:: MIMEAccept + :members: accept_html, accept_xhtml, accept_json + +.. autoclass:: CharsetAccept + +.. autoclass:: LanguageAccept + +.. autoclass:: RequestCacheControl + :members: + + .. autoattribute:: no_cache + + .. autoattribute:: no_store + + .. autoattribute:: max_age + + .. autoattribute:: no_transform + +.. autoclass:: ResponseCacheControl + :members: + + .. autoattribute:: no_cache + + .. autoattribute:: no_store + + .. autoattribute:: max_age + + .. autoattribute:: no_transform + +.. autoclass:: ETags + :members: + +.. autoclass:: Authorization + :members: + +.. autoclass:: WWWAuthenticate + :members: + +.. autoclass:: IfRange + :members: + +.. autoclass:: Range + :members: + +.. autoclass:: ContentRange + :members: + + +Others +====== + +.. autoclass:: FileStorage + :members: + + .. attribute:: stream + + The input stream for the uploaded file. This usually points to an + open temporary file. + + .. attribute:: filename + + The filename of the file on the client. Can be a ``str``, or an + instance of ``os.PathLike``. + + .. attribute:: name + + The name of the form field. + + .. attribute:: headers + + The multipart headers as :class:`Headers` object. This usually contains + irrelevant information but in combination with custom multipart requests + the raw headers might be interesting. + + .. versionadded:: 0.6 diff --git a/docs/debug.rst b/docs/debug.rst new file mode 100644 index 0000000..25a9f0b --- /dev/null +++ b/docs/debug.rst @@ -0,0 +1,101 @@ +Debugging Applications +====================== + +.. module:: werkzeug.debug + +Depending on the WSGI gateway/server, exceptions are handled +differently. Most of the time, exceptions go to stderr or the error log, +and a generic "500 Internal Server Error" message is displayed. + +Since this is not the best debugging environment, Werkzeug provides a +WSGI middleware that renders nice tracebacks, optionally with an +interactive debug console to execute code in any frame. + +.. danger:: + + The debugger allows the execution of arbitrary code which makes it a + major security risk. **The debugger must never be used on production + machines. We cannot stress this enough. Do not enable the debugger + in production.** + +.. note:: + + The interactive debugger does not work in forking environments, such + as a server that starts multiple processes. Most such environments + are production servers, where the debugger should not be enabled + anyway. + + +Enabling the Debugger +--------------------- + +Enable the debugger by wrapping the application with the +:class:`DebuggedApplication` middleware. Alternatively, you can pass +``use_debugger=True`` to :func:`run_simple` and it will do that for you. + +.. autoclass:: DebuggedApplication + + +Using the Debugger +------------------ + +Once enabled and an error happens during a request you will see a +detailed traceback instead of a generic "internal server error". The +traceback is still output to the terminal as well. + +The error message is displayed at the top. Clicking it jumps to the +bottom of the traceback. Frames that represent user code, as opposed to +built-ins or installed packages, are highlighted blue. Clicking a +frame will show more lines for context, clicking again will hide them. + +If you have the ``evalex`` feature enabled you can get a console for +every frame in the traceback by hovering over a frame and clicking the +console icon that appears at the right. Once clicked a console opens +where you can execute Python code in: + +.. image:: _static/debug-screenshot.png + :alt: a screenshot of the interactive debugger + :align: center + +Inside the interactive consoles you can execute any kind of Python code. +Unlike regular Python consoles the output of the object reprs is colored +and stripped to a reasonable size by default. If the output is longer +than what the console decides to display a small plus sign is added to +the repr and a click will expand the repr. + +To display all variables that are defined in the current frame you can +use the ``dump()`` function. You can call it without arguments to get a +detailed list of all variables and their values, or with an object as +argument to get a detailed list of all the attributes it has. + + +Debugger PIN +------------ + +Starting with Werkzeug 0.11 the debug console is protected by a PIN. +This is a security helper to make it less likely for the debugger to be +exploited if you forget to disable it when deploying to production. The +PIN based authentication is enabled by default. + +The first time a console is opened, a dialog will prompt for a PIN that +is printed to the command line. The PIN is generated in a stable way +that is specific to the project. An explicit PIN can be provided through +the environment variable ``WERKZEUG_DEBUG_PIN``. This can be set to a +number and will become the PIN. This variable can also be set to the +value ``off`` to disable the PIN check entirely. + +If an incorrect PIN is entered too many times the server needs to be +restarted. + +**This feature is not meant to entirely secure the debugger. It is +intended to make it harder for an attacker to exploit the debugger. +Never enable the debugger in production.** + + +Pasting Errors +-------------- + +If you click on the "Traceback (most recent call last)" header, the +view switches to a traditional text-based traceback. You can copy and +paste this in order to provide information when asking a question or +reporting an issue. diff --git a/docs/deployment/apache-httpd.rst b/docs/deployment/apache-httpd.rst new file mode 100644 index 0000000..42fc01f --- /dev/null +++ b/docs/deployment/apache-httpd.rst @@ -0,0 +1,82 @@ +Apache httpd +============ + +`Apache httpd`_ is a fast, production level HTTP server. When serving +your application with one of the WSGI servers listed in :doc:`index`, it +is often good or necessary to put a dedicated HTTP server in front of +it. This "reverse proxy" can handle incoming requests, TLS, and other +security and performance concerns better than the WSGI server. + +httpd can be installed using your system package manager, or a pre-built +executable for Windows. Installing and running httpd itself is outside +the scope of this doc. This page outlines the basics of configuring +httpd to proxy your application. Be sure to read its documentation to +understand what features are available. + +.. _Apache httpd: https://httpd.apache.org/ + + +Domain Name +----------- + +Acquiring and configuring a domain name is outside the scope of this +doc. In general, you will buy a domain name from a registrar, pay for +server space with a hosting provider, and then point your registrar +at the hosting provider's name servers. + +To simulate this, you can also edit your ``hosts`` file, located at +``/etc/hosts`` on Linux. Add a line that associates a name with the +local IP. + +Modern Linux systems may be configured to treat any domain name that +ends with ``.localhost`` like this without adding it to the ``hosts`` +file. + +.. code-block:: python + :caption: ``/etc/hosts`` + + 127.0.0.1 hello.localhost + + +Configuration +------------- + +The httpd configuration is located at ``/etc/httpd/conf/httpd.conf`` on +Linux. It may be different depending on your operating system. Check the +docs and look for ``httpd.conf``. + +Remove or comment out any existing ``DocumentRoot`` directive. Add the +config lines below. We'll assume the WSGI server is listening locally at +``http://127.0.0.1:8000``. + +.. code-block:: apache + :caption: ``/etc/httpd/conf/httpd.conf`` + + LoadModule proxy_module modules/mod_proxy.so + LoadModule proxy_http_module modules/mod_proxy_http.so + ProxyPass / http://127.0.0.1:8000/ + RequestHeader set X-Forwarded-Proto http + RequestHeader set X-Forwarded-Prefix / + +The ``LoadModule`` lines might already exist. If so, make sure they are +uncommented instead of adding them manually. + +Then :doc:`proxy_fix` so that your application uses the ``X-Forwarded`` +headers. ``X-Forwarded-For`` and ``X-Forwarded-Host`` are automatically +set by ``ProxyPass``. + + +Static Files +------------ + +If your application has static files such as JavaScript, CSS, and +images, it will be more efficient to let Nginx serve them directly +rather than going through the Python application. + +Assuming the static files are expected to be available under the +``/static/`` URL, and are stored at ``/home/project/static/``, add the +following to the config above. + +.. code-block:: apache + + Alias /static/ /home/project/static/ diff --git a/docs/deployment/eventlet.rst b/docs/deployment/eventlet.rst new file mode 100644 index 0000000..243be5e --- /dev/null +++ b/docs/deployment/eventlet.rst @@ -0,0 +1,80 @@ +eventlet +======== + +Prefer using :doc:`gunicorn` with eventlet workers rather than using +`eventlet`_ directly. Gunicorn provides a much more configurable and +production-tested server. + +`eventlet`_ allows writing asynchronous, coroutine-based code that looks +like standard synchronous Python. It uses `greenlet`_ to enable task +switching without writing ``async/await`` or using ``asyncio``. + +:doc:`gevent` is another library that does the same thing. Certain +dependencies you have, or other considerations, may affect which of the +two you choose to use. + +eventlet provides a WSGI server that can handle many connections at once +instead of one per worker process. You must actually use eventlet in +your own code to see any benefit to using the server. + +.. _eventlet: https://eventlet.net/ +.. _greenlet: https://greenlet.readthedocs.io/en/latest/ + + +Installing +---------- + +When using eventlet, greenlet>=1.0 is required, otherwise context locals +such as ``request`` will not work as expected. When using PyPy, +PyPy>=7.3.7 is required. + +Create a virtualenv, install your application, then install +``eventlet``. + +.. code-block:: text + + $ cd hello-app + $ python -m venv venv + $ . venv/bin/activate + $ pip install . # install your application + $ pip install eventlet + + +Running +------- + +To use eventlet to serve your application, write a script that imports +its ``wsgi.server``, as well as your app or app factory. + +.. code-block:: python + :caption: ``wsgi.py`` + + import eventlet + from eventlet import wsgi + from hello import create_app + + app = create_app() + wsgi.server(eventlet.listen(("127.0.0.1", 8000), app) + +.. code-block:: text + + $ python wsgi.py + (x) wsgi starting up on http://127.0.0.1:8000 + + +Binding Externally +------------------ + +eventlet should not be run as root because it would cause your +application code to run as root, which is not secure. However, this +means it will not be possible to bind to port 80 or 443. Instead, a +reverse proxy such as :doc:`nginx` or :doc:`apache-httpd` should be used +in front of eventlet. + +You can bind to all external IPs on a non-privileged port by using +``0.0.0.0`` in the server arguments shown in the previous section. +Don't do this when using a reverse proxy setup, otherwise it will be +possible to bypass the proxy. + +``0.0.0.0`` is not a valid address to navigate to, you'd use a specific +IP address in your browser. diff --git a/docs/deployment/gevent.rst b/docs/deployment/gevent.rst new file mode 100644 index 0000000..aae63e8 --- /dev/null +++ b/docs/deployment/gevent.rst @@ -0,0 +1,80 @@ +gevent +====== + +Prefer using :doc:`gunicorn` or :doc:`uwsgi` with gevent workers rather +than using `gevent`_ directly. Gunicorn and uWSGI provide much more +configurable and production-tested servers. + +`gevent`_ allows writing asynchronous, coroutine-based code that looks +like standard synchronous Python. It uses `greenlet`_ to enable task +switching without writing ``async/await`` or using ``asyncio``. + +:doc:`eventlet` is another library that does the same thing. Certain +dependencies you have, or other considerations, may affect which of the +two you choose to use. + +gevent provides a WSGI server that can handle many connections at once +instead of one per worker process. You must actually use gevent in your +own code to see any benefit to using the server. + +.. _gevent: https://www.gevent.org/ +.. _greenlet: https://greenlet.readthedocs.io/en/latest/ + + +Installing +---------- + +When using gevent, greenlet>=1.0 is required, otherwise context locals +such as ``request`` will not work as expected. When using PyPy, +PyPy>=7.3.7 is required. + +Create a virtualenv, install your application, then install ``gevent``. + +.. code-block:: text + + $ cd hello-app + $ python -m venv venv + $ . venv/bin/activate + $ pip install . # install your application + $ pip install gevent + + +Running +------- + +To use gevent to serve your application, write a script that imports its +``WSGIServer``, as well as your app or app factory. + +.. code-block:: python + :caption: ``wsgi.py`` + + from gevent.pywsgi import WSGIServer + from hello import create_app + + app = create_app() + http_server = WSGIServer(("127.0.0.1", 8000), app) + http_server.serve_forever() + +.. code-block:: text + + $ python wsgi.py + +No output is shown when the server starts. + + +Binding Externally +------------------ + +gevent should not be run as root because it would cause your +application code to run as root, which is not secure. However, this +means it will not be possible to bind to port 80 or 443. Instead, a +reverse proxy such as :doc:`nginx` or :doc:`apache-httpd` should be used +in front of gevent. + +You can bind to all external IPs on a non-privileged port by using +``0.0.0.0`` in the server arguments shown in the previous section. Don't +do this when using a reverse proxy setup, otherwise it will be possible +to bypass the proxy. + +``0.0.0.0`` is not a valid address to navigate to, you'd use a specific +IP address in your browser. diff --git a/docs/deployment/gunicorn.rst b/docs/deployment/gunicorn.rst new file mode 100644 index 0000000..82fe8ab --- /dev/null +++ b/docs/deployment/gunicorn.rst @@ -0,0 +1,130 @@ +Gunicorn +======== + +`Gunicorn`_ is a pure Python WSGI server with simple configuration and +multiple worker implementations for performance tuning. + +* It tends to integrate easily with hosting platforms. +* It does not support Windows (but does run on WSL). +* It is easy to install as it does not require additional dependencies + or compilation. +* It has built-in async worker support using gevent or eventlet. + +This page outlines the basics of running Gunicorn. Be sure to read its +`documentation`_ and use ``gunicorn --help`` to understand what features +are available. + +.. _Gunicorn: https://gunicorn.org/ +.. _documentation: https://docs.gunicorn.org/ + + +Installing +---------- + +Gunicorn is easy to install, as it does not require external +dependencies or compilation. It runs on Windows only under WSL. + +Create a virtualenv, install your application, then install +``gunicorn``. + +.. code-block:: text + + $ cd hello-app + $ python -m venv venv + $ . venv/bin/activate + $ pip install . # install your application + $ pip install gunicorn + + +Running +------- + +The only required argument to Gunicorn tells it how to load your +application. The syntax is ``{module_import}:{app_variable}``. +``module_import`` is the dotted import name to the module with your +application. ``app_variable`` is the variable with the application. It +can also be a function call (with any arguments) if you're using the +app factory pattern. + +.. code-block:: text + + # equivalent to 'from hello import app' + $ gunicorn -w 4 'hello:app' + + # equivalent to 'from hello import create_app; create_app()' + $ gunicorn -w 4 'hello:create_app()' + + Starting gunicorn 20.1.0 + Listening at: http://127.0.0.1:8000 (x) + Using worker: sync + Booting worker with pid: x + Booting worker with pid: x + Booting worker with pid: x + Booting worker with pid: x + +The ``-w`` option specifies the number of processes to run; a starting +value could be ``CPU * 2``. The default is only 1 worker, which is +probably not what you want for the default worker type. + +Logs for each request aren't shown by default, only worker info and +errors are shown. To show access logs on stdout, use the +``--access-logfile=-`` option. + + +Binding Externally +------------------ + +Gunicorn should not be run as root because it would cause your +application code to run as root, which is not secure. However, this +means it will not be possible to bind to port 80 or 443. Instead, a +reverse proxy such as :doc:`nginx` or :doc:`apache-httpd` should be used +in front of Gunicorn. + +You can bind to all external IPs on a non-privileged port using the +``-b 0.0.0.0`` option. Don't do this when using a reverse proxy setup, +otherwise it will be possible to bypass the proxy. + +.. code-block:: text + + $ gunicorn -w 4 -b 0.0.0.0 'hello:create_app()' + Listening at: http://0.0.0.0:8000 (x) + +``0.0.0.0`` is not a valid address to navigate to, you'd use a specific +IP address in your browser. + + +Async with gevent or eventlet +----------------------------- + +The default sync worker is appropriate for many use cases. If you need +asynchronous support, Gunicorn provides workers using either `gevent`_ +or `eventlet`_. This is not the same as Python's ``async/await``, or the +ASGI server spec. You must actually use gevent/eventlet in your own code +to see any benefit to using the workers. + +When using either gevent or eventlet, greenlet>=1.0 is required, +otherwise context locals such as ``request`` will not work as expected. +When using PyPy, PyPy>=7.3.7 is required. + +To use gevent: + +.. code-block:: text + + $ gunicorn -k gevent 'hello:create_app()' + Starting gunicorn 20.1.0 + Listening at: http://127.0.0.1:8000 (x) + Using worker: gevent + Booting worker with pid: x + +To use eventlet: + +.. code-block:: text + + $ gunicorn -k eventlet 'hello:create_app()' + Starting gunicorn 20.1.0 + Listening at: http://127.0.0.1:8000 (x) + Using worker: eventlet + Booting worker with pid: x + +.. _gevent: https://www.gevent.org/ +.. _eventlet: https://eventlet.net/ diff --git a/docs/deployment/index.rst b/docs/deployment/index.rst new file mode 100644 index 0000000..f884f08 --- /dev/null +++ b/docs/deployment/index.rst @@ -0,0 +1,71 @@ +Deploying to Production +======================= + +After developing your application, you'll want to make it available +publicly to other users. When you're developing locally, you're probably +using the built-in development server, debugger, and reloader. These +should not be used in production. Instead, you should use a dedicated +WSGI server or hosting platform, some of which will be described here. + +"Production" means "not development", which applies whether you're +serving your application publicly to millions of users or privately / +locally to a single user. **Do not use the development server when +deploying to production. It is intended for use only during local +development. It is not designed to be particularly secure, stable, or +efficient.** + +Self-Hosted Options +------------------- + +Werkzeug is a WSGI *application*. A WSGI *server* is used to run the +application, converting incoming HTTP requests to the standard WSGI +environ, and converting outgoing WSGI responses to HTTP responses. + +The primary goal of these docs is to familiarize you with the concepts +involved in running a WSGI application using a production WSGI server +and HTTP server. There are many WSGI servers and HTTP servers, with many +configuration possibilities. The pages below discuss the most common +servers, and show the basics of running each one. The next section +discusses platforms that can manage this for you. + +.. toctree:: + :maxdepth: 1 + + gunicorn + waitress + mod_wsgi + uwsgi + gevent + eventlet + +WSGI servers have HTTP servers built-in. However, a dedicated HTTP +server may be safer, more efficient, or more capable. Putting an HTTP +server in front of the WSGI server is called a "reverse proxy." + +.. toctree:: + :maxdepth: 1 + + proxy_fix + nginx + apache-httpd + +This list is not exhaustive, and you should evaluate these and other +servers based on your application's needs. Different servers will have +different capabilities, configuration, and support. + + +Hosting Platforms +----------------- + +There are many services available for hosting web applications without +needing to maintain your own server, networking, domain, etc. Some +services may have a free tier up to a certain time or bandwidth. Many of +these services use one of the WSGI servers described above, or a similar +interface. + +You should evaluate services based on your application's needs. +Different services will have different capabilities, configuration, +pricing, and support. + +You'll probably need to :doc:`proxy_fix` when using most hosting +platforms. diff --git a/docs/deployment/mod_wsgi.rst b/docs/deployment/mod_wsgi.rst new file mode 100644 index 0000000..f32631b --- /dev/null +++ b/docs/deployment/mod_wsgi.rst @@ -0,0 +1,94 @@ +mod_wsgi +======== + +`mod_wsgi`_ is a WSGI server integrated with the `Apache httpd`_ server. +The modern `mod_wsgi-express`_ command makes it easy to configure and +start the server without needing to write Apache httpd configuration. + +* Tightly integrated with Apache httpd. +* Supports Windows directly. +* Requires a compiler and the Apache development headers to install. +* Does not require a reverse proxy setup. + +This page outlines the basics of running mod_wsgi-express, not the more +complex installation and configuration with httpd. Be sure to read the +`mod_wsgi-express`_, `mod_wsgi`_, and `Apache httpd`_ documentation to +understand what features are available. + +.. _mod_wsgi-express: https://pypi.org/project/mod-wsgi/ +.. _mod_wsgi: https://modwsgi.readthedocs.io/ +.. _Apache httpd: https://httpd.apache.org/ + + +Installing +---------- + +Installing mod_wsgi requires a compiler and the Apache server and +development headers installed. You will get an error if they are not. +How to install them depends on the OS and package manager that you use. + +Create a virtualenv, install your application, then install +``mod_wsgi``. + +.. code-block:: text + + $ cd hello-app + $ python -m venv venv + $ . venv/bin/activate + $ pip install . # install your application + $ pip install mod_wsgi + + +Running +------- + +The only argument to ``mod_wsgi-express`` specifies a script containing +your application, which must be called ``application``. You can +write a small script to import your app with this name, or to create it +if using the app factory pattern. + +.. code-block:: python + :caption: ``wsgi.py`` + + from hello import app + + application = app + +.. code-block:: python + :caption: ``wsgi.py`` + + from hello import create_app + + application = create_app() + +Now run the ``mod_wsgi-express start-server`` command. + +.. code-block:: text + + $ mod_wsgi-express start-server wsgi.py --processes 4 + +The ``--processes`` option specifies the number of worker processes to +run; a starting value could be ``CPU * 2``. + +Logs for each request aren't show in the terminal. If an error occurs, +its information is written to the error log file shown when starting the +server. + + +Binding Externally +------------------ + +Unlike the other WSGI servers in these docs, mod_wsgi can be run as +root to bind to privileged ports like 80 and 443. However, it must be +configured to drop permissions to a different user and group for the +worker processes. + +For example, if you created a ``hello`` user and group, you should +install your virtualenv and application as that user, then tell +mod_wsgi to drop to that user after starting. + +.. code-block:: text + + $ sudo /home/hello/venv/bin/mod_wsgi-express start-server \ + /home/hello/wsgi.py \ + --user hello --group hello --port 80 --processes 4 diff --git a/docs/deployment/nginx.rst b/docs/deployment/nginx.rst new file mode 100644 index 0000000..5b136e4 --- /dev/null +++ b/docs/deployment/nginx.rst @@ -0,0 +1,87 @@ +nginx +===== + +`nginx`_ is a fast, production level HTTP server. When serving your +application with one of the WSGI servers listed in :doc:`index`, it is +often good or necessary to put a dedicated HTTP server in front of it. +This "reverse proxy" can handle incoming requests, TLS, and other +security and performance concerns better than the WSGI server. + +Nginx can be installed using your system package manager, or a pre-built +executable for Windows. Installing and running Nginx itself is outside +the scope of this doc. This page outlines the basics of configuring +Nginx to proxy your application. Be sure to read its documentation to +understand what features are available. + +.. _nginx: https://nginx.org/ + + +Domain Name +----------- + +Acquiring and configuring a domain name is outside the scope of this +doc. In general, you will buy a domain name from a registrar, pay for +server space with a hosting provider, and then point your registrar +at the hosting provider's name servers. + +To simulate this, you can also edit your ``hosts`` file, located at +``/etc/hosts`` on Linux. Add a line that associates a name with the +local IP. + +Modern Linux systems may be configured to treat any domain name that +ends with ``.localhost`` like this without adding it to the ``hosts`` +file. + +.. code-block:: python + :caption: ``/etc/hosts`` + + 127.0.0.1 hello.localhost + + +Configuration +------------- + +The nginx configuration is located at ``/etc/nginx/nginx.conf`` on +Linux. It may be different depending on your operating system. Check the +docs and look for ``nginx.conf``. + +Remove or comment out any existing ``server`` section. Add a ``server`` +section and use the ``proxy_pass`` directive to point to the address the +WSGI server is listening on. We'll assume the WSGI server is listening +locally at ``http://127.0.0.1:8000``. + +.. code-block:: nginx + :caption: ``/etc/nginx.conf`` + + server { + listen 80; + server_name _; + + location / { + proxy_pass http://127.0.0.1:8000/; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Host $host; + proxy_set_header X-Forwarded-Prefix /; + } + } + +Then :doc:`proxy_fix` so that your application uses these headers. + + +Static Files +------------ + +If your application has static files such as JavaScript, CSS, and +images, it will be more efficient to let Nginx serve them directly +rather than going through the Python application. + +Assuming the static files are expected to be available under the +``/static/`` URL, and are stored at ``/home/project/static/``, add the +following to the ``server`` block above. + +.. code-block:: nginx + + location /static { + alias /home/project/static; + } diff --git a/docs/deployment/proxy_fix.rst b/docs/deployment/proxy_fix.rst new file mode 100644 index 0000000..7b163b1 --- /dev/null +++ b/docs/deployment/proxy_fix.rst @@ -0,0 +1,33 @@ +Tell Werkzeug it is Behind a Proxy +================================== + +When using a reverse proxy, or many Python hosting platforms, the proxy +will intercept and forward all external requests to the local WSGI +server. + +From the WSGI server and application's perspectives, requests are now +coming from the HTTP server to the local address, rather than from +the remote address to the external server address. + +HTTP servers should set ``X-Forwarded-`` headers to pass on the real +values to the application. The application can then be told to trust and +use those values by wrapping it with the +:doc:`../middleware/proxy_fix` middleware provided by Werkzeug. + +This middleware should only be used if the application is actually +behind a proxy, and should be configured with the number of proxies that +are chained in front of it. Not all proxies set all the headers. Since +incoming headers can be faked, you must set how many proxies are setting +each header so the middleware knows what to trust. + +.. code-block:: python + + from werkzeug.middleware.proxy_fix import ProxyFix + + app.wsgi_app = ProxyFix( + app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1 + ) + +Remember, only apply this middleware if you are behind a proxy, and set +the correct number of proxies that set each header. It can be a security +issue if you get this configuration wrong. diff --git a/docs/deployment/uwsgi.rst b/docs/deployment/uwsgi.rst new file mode 100644 index 0000000..2da5efe --- /dev/null +++ b/docs/deployment/uwsgi.rst @@ -0,0 +1,145 @@ +uWSGI +===== + +`uWSGI`_ is a fast, compiled server suite with extensive configuration +and capabilities beyond a basic server. + +* It can be very performant due to being a compiled program. +* It is complex to configure beyond the basic application, and has so + many options that it can be difficult for beginners to understand. +* It does not support Windows (but does run on WSL). +* It requires a compiler to install in some cases. + +This page outlines the basics of running uWSGI. Be sure to read its +documentation to understand what features are available. + +.. _uWSGI: https://uwsgi-docs.readthedocs.io/en/latest/ + + +Installing +---------- + +uWSGI has multiple ways to install it. The most straightforward is to +install the ``pyuwsgi`` package, which provides precompiled wheels for +common platforms. However, it does not provide SSL support, which can be +provided with a reverse proxy instead. + +Create a virtualenv, install your application, then install ``pyuwsgi``. + +.. code-block:: text + + $ cd hello-app + $ python -m venv venv + $ . venv/bin/activate + $ pip install . # install your application + $ pip install pyuwsgi + +If you have a compiler available, you can install the ``uwsgi`` package +instead. Or install the ``pyuwsgi`` package from sdist instead of wheel. +Either method will include SSL support. + +.. code-block:: text + + $ pip install uwsgi + + # or + $ pip install --no-binary pyuwsgi pyuwsgi + + +Running +------- + +The most basic way to run uWSGI is to tell it to start an HTTP server +and import your application. + +.. code-block:: text + + $ uwsgi --http 127.0.0.1:8000 --master -p 4 -w hello:app + + *** Starting uWSGI 2.0.20 (64bit) on [x] *** + *** Operational MODE: preforking *** + mounting hello:app on / + spawned uWSGI master process (pid: x) + spawned uWSGI worker 1 (pid: x, cores: 1) + spawned uWSGI worker 2 (pid: x, cores: 1) + spawned uWSGI worker 3 (pid: x, cores: 1) + spawned uWSGI worker 4 (pid: x, cores: 1) + spawned uWSGI http 1 (pid: x) + +If you're using the app factory pattern, you'll need to create a small +Python file to create the app, then point uWSGI at that. + +.. code-block:: python + :caption: ``wsgi.py`` + + from hello import create_app + + app = create_app() + +.. code-block:: text + + $ uwsgi --http 127.0.0.1:8000 --master -p 4 -w wsgi:app + +The ``--http`` option starts an HTTP server at 127.0.0.1 port 8000. The +``--master`` option specifies the standard worker manager. The ``-p`` +option starts 4 worker processes; a starting value could be ``CPU * 2``. +The ``-w`` option tells uWSGI how to import your application + + +Binding Externally +------------------ + +uWSGI should not be run as root with the configuration shown in this doc +because it would cause your application code to run as root, which is +not secure. However, this means it will not be possible to bind to port +80 or 443. Instead, a reverse proxy such as :doc:`nginx` or +:doc:`apache-httpd` should be used in front of uWSGI. It is possible to +run uWSGI as root securely, but that is beyond the scope of this doc. + +uWSGI has optimized integration with `Nginx uWSGI`_ and +`Apache mod_proxy_uwsgi`_, and possibly other servers, instead of using +a standard HTTP proxy. That configuration is beyond the scope of this +doc, see the links for more information. + +.. _Nginx uWSGI: https://uwsgi-docs.readthedocs.io/en/latest/Nginx.html +.. _Apache mod_proxy_uwsgi: https://uwsgi-docs.readthedocs.io/en/latest/Apache.html#mod-proxy-uwsgi + +You can bind to all external IPs on a non-privileged port using the +``--http 0.0.0.0:8000`` option. Don't do this when using a reverse proxy +setup, otherwise it will be possible to bypass the proxy. + +.. code-block:: text + + $ uwsgi --http 0.0.0.0:8000 --master -p 4 -w wsgi:app + +``0.0.0.0`` is not a valid address to navigate to, you'd use a specific +IP address in your browser. + + +Async with gevent +----------------- + +The default sync worker is appropriate for many use cases. If you need +asynchronous support, uWSGI provides a `gevent`_ worker. This is not the +same as Python's ``async/await``, or the ASGI server spec. You must +actually use gevent in your own code to see any benefit to using the +worker. + +When using gevent, greenlet>=1.0 is required, otherwise context locals +such as ``request`` will not work as expected. When using PyPy, +PyPy>=7.3.7 is required. + +.. code-block:: text + + $ uwsgi --http 127.0.0.1:8000 --master --gevent 100 -w wsgi:app + + *** Starting uWSGI 2.0.20 (64bit) on [x] *** + *** Operational MODE: async *** + mounting hello:app on / + spawned uWSGI master process (pid: x) + spawned uWSGI worker 1 (pid: x, cores: 100) + spawned uWSGI http 1 (pid: x) + *** running gevent loop engine [addr:x] *** + + +.. _gevent: https://www.gevent.org/ diff --git a/docs/deployment/waitress.rst b/docs/deployment/waitress.rst new file mode 100644 index 0000000..b44223d --- /dev/null +++ b/docs/deployment/waitress.rst @@ -0,0 +1,75 @@ +Waitress +======== + +`Waitress`_ is a pure Python WSGI server. + +* It is easy to configure. +* It supports Windows directly. +* It is easy to install as it does not require additional dependencies + or compilation. +* It does not support streaming requests, full request data is always + buffered. +* It uses a single process with multiple thread workers. + +This page outlines the basics of running Waitress. Be sure to read its +documentation and ``waitress-serve --help`` to understand what features +are available. + +.. _Waitress: https://docs.pylonsproject.org/projects/waitress/ + + +Installing +---------- + +Create a virtualenv, install your application, then install +``waitress``. + +.. code-block:: text + + $ cd hello-app + $ python -m venv venv + $ . venv/bin/activate + $ pip install . # install your application + $ pip install waitress + + +Running +------- + +The only required argument to ``waitress-serve`` tells it how to load +your application. The syntax is ``{module}:{app}``. ``module`` is +the dotted import name to the module with your application. ``app`` is +the variable with the application. If you're using the app factory +pattern, use ``--call {module}:{factory}`` instead. + +.. code-block:: text + + # equivalent to 'from hello import app' + $ waitress-serve hello:app --host 127.0.0.1 + + # equivalent to 'from hello import create_app; create_app()' + $ waitress-serve --call hello:create_app --host 127.0.0.1 + + Serving on http://127.0.0.1:8080 + +The ``--host`` option binds the server to local ``127.0.0.1`` only. + +Logs for each request aren't shown, only errors are shown. Logging can +be configured through the Python interface instead of the command line. + + +Binding Externally +------------------ + +Waitress should not be run as root because it would cause your +application code to run as root, which is not secure. However, this +means it will not be possible to bind to port 80 or 443. Instead, a +reverse proxy such as :doc:`nginx` or :doc:`apache-httpd` should be used +in front of Waitress. + +You can bind to all external IPs on a non-privileged port by not +specifying the ``--host`` option. Don't do this when using a revers +proxy setup, otherwise it will be possible to bypass the proxy. + +``0.0.0.0`` is not a valid address to navigate to, you'd use a specific +IP address in your browser. diff --git a/docs/exceptions.rst b/docs/exceptions.rst new file mode 100644 index 0000000..88a309d --- /dev/null +++ b/docs/exceptions.rst @@ -0,0 +1,155 @@ +=============== +HTTP Exceptions +=============== + +.. automodule:: werkzeug.exceptions + + +Error Classes +============= + +The following error classes exist in Werkzeug: + +.. autoexception:: BadRequest + +.. autoexception:: Unauthorized + +.. autoexception:: Forbidden + +.. autoexception:: NotFound + +.. autoexception:: MethodNotAllowed + +.. autoexception:: NotAcceptable + +.. autoexception:: RequestTimeout + +.. autoexception:: Conflict + +.. autoexception:: Gone + +.. autoexception:: LengthRequired + +.. autoexception:: PreconditionFailed + +.. autoexception:: RequestEntityTooLarge + +.. autoexception:: RequestURITooLarge + +.. autoexception:: UnsupportedMediaType + +.. autoexception:: RequestedRangeNotSatisfiable + +.. autoexception:: ExpectationFailed + +.. autoexception:: ImATeapot + +.. autoexception:: UnprocessableEntity + +.. autoexception:: Locked + +.. autoexception:: FailedDependency + +.. autoexception:: PreconditionRequired + +.. autoexception:: TooManyRequests + +.. autoexception:: RequestHeaderFieldsTooLarge + +.. autoexception:: UnavailableForLegalReasons + +.. autoexception:: InternalServerError + :members: + +.. autoexception:: NotImplemented + +.. autoexception:: BadGateway + +.. autoexception:: ServiceUnavailable + +.. autoexception:: GatewayTimeout + +.. autoexception:: HTTPVersionNotSupported + +.. autoexception:: ClientDisconnected + +.. autoexception:: SecurityError + + +Baseclass +========= + +All the exceptions implement this common interface: + +.. autoexception:: HTTPException + :members: get_response, __call__ + + +Special HTTP Exceptions +======================= + +Starting with Werkzeug 0.3 some of the builtin classes raise exceptions that +look like regular python exceptions (eg :exc:`KeyError`) but are +:exc:`BadRequest` HTTP exceptions at the same time. This decision was made +to simplify a common pattern where you want to abort if the client tampered +with the submitted form data in a way that the application can't recover +properly and should abort with ``400 BAD REQUEST``. + +Assuming the application catches all HTTP exceptions and reacts to them +properly a view function could do the following safely and doesn't have to +check if the keys exist:: + + def new_post(request): + post = Post(title=request.form['title'], body=request.form['body']) + post.save() + return redirect(post.url) + +If `title` or `body` are missing in the form, a special key error will be +raised which behaves like a :exc:`KeyError` but also a :exc:`BadRequest` +exception. + +.. autoexception:: BadRequestKeyError + + +Simple Aborting +=============== + +Sometimes it's convenient to just raise an exception by the error code, +without importing the exception and looking up the name etc. For this +purpose there is the :func:`abort` function. + +.. autofunction:: abort + +If you want to use this functionality with custom exceptions you can +create an instance of the aborter class: + +.. autoclass:: Aborter + + +Custom Errors +============= + +As you can see from the list above not all status codes are available as +errors. Especially redirects and other non 200 status codes that do not +represent errors are missing. For redirects you can use the :func:`redirect` +function from the utilities. + +If you want to add an error yourself you can subclass :exc:`HTTPException`:: + + from werkzeug.exceptions import HTTPException + + class PaymentRequired(HTTPException): + code = 402 + description = '

Payment required.

' + +This is the minimal code you need for your own exception. If you want to +add more logic to the errors you can override the +:meth:`~HTTPException.get_description`, :meth:`~HTTPException.get_body`, +:meth:`~HTTPException.get_headers` and :meth:`~HTTPException.get_response` +methods. In any case you should have a look at the sourcecode of the +exceptions module. + +You can override the default description in the constructor with the +``description`` parameter:: + + raise BadRequest(description='Request failed because X was not present') diff --git a/docs/http.rst b/docs/http.rst new file mode 100644 index 0000000..cbf4e04 --- /dev/null +++ b/docs/http.rst @@ -0,0 +1,169 @@ +============== +HTTP Utilities +============== + +.. module:: werkzeug.http + +Werkzeug provides a couple of functions to parse and generate HTTP headers +that are useful when implementing WSGI middlewares or whenever you are +operating on a lower level layer. All this functionality is also exposed +from request and response objects. + + +Datetime Functions +================== + +These functions simplify working with times in an HTTP context. Werkzeug +produces timezone-aware :class:`~datetime.datetime` objects in UTC. When +passing datetime objects to Werkzeug, it assumes any naive datetime is +in UTC. + +When comparing datetime values from Werkzeug, your own datetime objects +must also be timezone-aware, or you must make the values from Werkzeug +naive. + +* ``dt = datetime.now(timezone.utc)`` gets the current time in UTC. +* ``dt = datetime(..., tzinfo=timezone.utc)`` creates a time in UTC. +* ``dt = dt.replace(tzinfo=timezone.utc)`` makes a naive object aware + by assuming it's in UTC. +* ``dt = dt.replace(tzinfo=None)`` makes an aware object naive. + +.. autofunction:: parse_date + +.. autofunction:: http_date + + +Header Parsing +============== + +The following functions can be used to parse incoming HTTP headers. +Because Python does not provide data structures with the semantics required +by :rfc:`2616`, Werkzeug implements some custom data structures that are +:ref:`documented separately `. + +.. autofunction:: parse_options_header + +.. autofunction:: parse_set_header + +.. autofunction:: parse_list_header + +.. autofunction:: parse_dict_header + +.. autofunction:: parse_accept_header(value, [class]) + +.. autofunction:: parse_cache_control_header + +.. autofunction:: parse_authorization_header + +.. autofunction:: parse_www_authenticate_header + +.. autofunction:: parse_if_range_header + +.. autofunction:: parse_range_header + +.. autofunction:: parse_content_range_header + +Header Utilities +================ + +The following utilities operate on HTTP headers well but do not parse +them. They are useful if you're dealing with conditional responses or if +you want to proxy arbitrary requests but want to remove WSGI-unsupported +hop-by-hop headers. Also there is a function to create HTTP header +strings from the parsed data. + +.. autofunction:: is_entity_header + +.. autofunction:: is_hop_by_hop_header + +.. autofunction:: remove_entity_headers + +.. autofunction:: remove_hop_by_hop_headers + +.. autofunction:: is_byte_range_valid + +.. autofunction:: quote_header_value + +.. autofunction:: unquote_header_value + +.. autofunction:: dump_header + + +Cookies +======= + +.. autofunction:: parse_cookie + +.. autofunction:: dump_cookie + + +Conditional Response Helpers +============================ + +For conditional responses the following functions might be useful: + +.. autofunction:: parse_etags + +.. autofunction:: quote_etag + +.. autofunction:: unquote_etag + +.. autofunction:: generate_etag + +.. autofunction:: is_resource_modified + +Constants +========= + +.. data:: HTTP_STATUS_CODES + + A dict of status code -> default status message pairs. This is used + by the wrappers and other places where an integer status code is expanded + to a string throughout Werkzeug. + +Form Data Parsing +================= + +.. module:: werkzeug.formparser + +Werkzeug provides the form parsing functions separately from the request +object so that you can access form data from a plain WSGI environment. + +The following formats are currently supported by the form data parser: + +- `application/x-www-form-urlencoded` +- `multipart/form-data` + +Nested multipart is not currently supported (Werkzeug 0.9), but it isn't used +by any of the modern web browsers. + +Usage example: + +>>> from io import BytesIO +>>> from werkzeug.formparser import parse_form_data +>>> data = ( +... b'--foo\r\nContent-Disposition: form-data; name="test"\r\n' +... b"\r\nHello World!\r\n--foo--" +... ) +>>> environ = { +... "wsgi.input": BytesIO(data), +... "CONTENT_LENGTH": str(len(data)), +... "CONTENT_TYPE": "multipart/form-data; boundary=foo", +... "REQUEST_METHOD": "POST", +... } +>>> stream, form, files = parse_form_data(environ) +>>> stream.read() +b'' +>>> form['test'] +'Hello World!' +>>> not files +True + +Normally the WSGI environment is provided by the WSGI gateway with the +incoming data as part of it. If you want to generate such fake-WSGI +environments for unittesting you might want to use the +:func:`create_environ` function or the :class:`EnvironBuilder` instead. + +.. autoclass:: FormDataParser + +.. autofunction:: parse_form_data diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..c4f0019 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,78 @@ +Werkzeug +======== + +*werkzeug* German noun: "tool". +Etymology: *werk* ("work"), *zeug* ("stuff") + +Werkzeug is a comprehensive `WSGI`_ web application library. It began as +a simple collection of various utilities for WSGI applications and has +become one of the most advanced WSGI utility libraries. + +Werkzeug doesn't enforce any dependencies. It is up to the developer to +choose a template engine, database adapter, and even how to handle +requests. + +.. _WSGI: https://wsgi.readthedocs.io/en/latest/ + + +Getting Started +--------------- + +.. toctree:: + :maxdepth: 2 + + installation + tutorial + levels + quickstart + + +Serving and Testing +------------------- + +.. toctree:: + :maxdepth: 2 + + serving + test + debug + + +Reference +--------- + +.. toctree:: + :maxdepth: 2 + + wrappers + routing + wsgi + http + datastructures + utils + urls + local + middleware/index + exceptions + + +Deployment +---------- + +.. toctree:: + :maxdepth: 3 + + deployment/index + + +Additional Information +---------------------- + +.. toctree:: + :maxdepth: 2 + + terms + unicode + request_data + license + changes diff --git a/docs/installation.rst b/docs/installation.rst new file mode 100644 index 0000000..9c5aa7f --- /dev/null +++ b/docs/installation.rst @@ -0,0 +1,111 @@ +Installation +============ + + +Python Version +-------------- + +We recommend using the latest version of Python. Werkzeug supports +Python 3.7 and newer. + + +Dependencies +------------ + +Werkzeug does not have any direct dependencies. + + +Optional dependencies +~~~~~~~~~~~~~~~~~~~~~ + +These distributions will not be installed automatically. Werkzeug will +detect and use them if you install them. + +* `Colorama`_ provides request log highlighting when using the + development server on Windows. This works automatically on other + systems. +* `Watchdog`_ provides a faster, more efficient reloader for the + development server. + +.. _Colorama: https://pypi.org/project/colorama/ +.. _Watchdog: https://pypi.org/project/watchdog/ + + +greenlet +~~~~~~~~ + +You may choose to use gevent or eventlet with your application. In this +case, greenlet>=1.0 is required. When using PyPy, PyPy>=7.3.7 is +required. + +These are not minimum supported versions, they only indicate the first +versions that added necessary features. You should use the latest +versions of each. + + +Virtual environments +-------------------- + +Use a virtual environment to manage the dependencies for your project, +both in development and in production. + +What problem does a virtual environment solve? The more Python +projects you have, the more likely it is that you need to work with +different versions of Python libraries, or even Python itself. Newer +versions of libraries for one project can break compatibility in +another project. + +Virtual environments are independent groups of Python libraries, one for +each project. Packages installed for one project will not affect other +projects or the operating system's packages. + +Python comes bundled with the :mod:`venv` module to create virtual +environments. + + +Create an environment +~~~~~~~~~~~~~~~~~~~~~ + +Create a project folder and a :file:`venv` folder within: + +.. code-block:: sh + + mkdir myproject + cd myproject + python3 -m venv venv + +On Windows: + +.. code-block:: bat + + py -3 -m venv venv + + +Activate the environment +~~~~~~~~~~~~~~~~~~~~~~~~ + +Before you work on your project, activate the corresponding environment: + +.. code-block:: sh + + . venv/bin/activate + +On Windows: + +.. code-block:: bat + + venv\Scripts\activate + +Your shell prompt will change to show the name of the activated +environment. + + +Install Werkzeug +---------------- + +Within the activated environment, use the following command to install +Werkzeug: + +.. code-block:: sh + + pip install Werkzeug diff --git a/docs/levels.rst b/docs/levels.rst new file mode 100644 index 0000000..a07fd86 --- /dev/null +++ b/docs/levels.rst @@ -0,0 +1,72 @@ +========== +API Levels +========== + +.. currentmodule:: werkzeug + +Werkzeug is intended to be a utility rather than a framework. Because of that +the user-friendly API is separated from the lower-level API so that Werkzeug +can easily be used to extend another system. + +All the functionality the :class:`Request` and :class:`Response` objects (aka +the "wrappers") provide is also available in small utility functions. + +Example +======= + +This example implements a small `Hello World` application that greets the +user with the name entered. + +.. code-block:: python + + from markupsafe import escape + from werkzeug.wrappers import Request, Response + + @Request.application + def hello_world(request): + result = ['Greeter'] + if request.method == 'POST': + result.append(f"

Hello {escape(request.form['name'])}!

") + result.append(''' +
+

Name: + +

+ ''') + return Response(''.join(result), mimetype='text/html') + +Alternatively the same application could be used without request and response +objects but by taking advantage of the parsing functions werkzeug provides:: + + from markupsafe import escape + from werkzeug.formparser import parse_form_data + + def hello_world(environ, start_response): + result = ['Greeter'] + if environ['REQUEST_METHOD'] == 'POST': + form = parse_form_data(environ)[1] + result.append(f"

Hello {escape(form['name'])}!

") + result.append(''' +
+

Name: + +

+ ''') + start_response('200 OK', [('Content-Type', 'text/html; charset=utf-8')]) + return [''.join(result).encode('utf-8')] + +High or Low? +============ + +Usually you want to use the high-level layer (the request and response +objects). But there are situations where this might not be what you want. + +For example you might be maintaining code for an application written in +Django or another framework and you have to parse HTTP headers. You can +utilize Werkzeug for that by accessing the lower-level HTTP header parsing +functions. + +Another situation where the low level parsing functions can be useful are +custom WSGI frameworks, unit-testing or modernizing an old CGI/mod_python +application to WSGI as well as WSGI middlewares where you want to keep the +overhead low. diff --git a/docs/license.rst b/docs/license.rst new file mode 100644 index 0000000..a53a98c --- /dev/null +++ b/docs/license.rst @@ -0,0 +1,4 @@ +BSD-3-Clause License +==================== + +.. include:: ../LICENSE.rst diff --git a/docs/local.rst b/docs/local.rst new file mode 100644 index 0000000..015b0e3 --- /dev/null +++ b/docs/local.rst @@ -0,0 +1,110 @@ +Context Locals +============== + +.. module:: werkzeug.local + +You may find that you have some data during each request that you want +to use across functions. Instead of passing these as arguments between +every function, you may want to access them as global data. However, +using global variables in Python web applications is not thread safe; +different workers might interfere with each others' data. + +Instead of storing common data during a request using global variables, +you must use context-local variables instead. A context local is +defined/imported globally, but the data it contains is specific to the +current thread, asyncio task, or greenlet. You won't accidentally get +or overwrite another worker's data. + +The current approach for storing per-context data in Python is the +:class:`contextvars` module. Context vars store data per thread, async +task, or greenlet. This replaces the older :class:`threading.local` +which only handled threads. + +Werkzeug provides wrappers around :class:`~contextvars.ContextVar` to +make it easier to work with. + + +Proxy Objects +============= + +:class:`LocalProxy` allows treating a context var as an object directly +instead of needing to use and check +:meth:`ContextVar.get() `. If the context +var is set, the local proxy will look and behave like the object the var +is set to. If it's not set, a ``RuntimeError`` is raised for most +operations. + +.. code-block:: python + + from contextvars import ContextVar + from werkzeug.local import LocalProxy + + _request_var = ContextVar("request") + request = LocalProxy(_request_var) + + from werkzeug.wrappers import Request + + @Request.application + def app(r): + _request_var.set(r) + check_auth() + ... + + from werkzeug.exceptions import Unauthorized + + def check_auth(): + if request.form["username"] != "admin": + raise Unauthorized() + +Accessing ``request`` will point to the specific request that each +server worker is handling. You can treat ``request`` just like an actual +``Request`` object. + +``bool(proxy)`` will always return ``False`` if the var is not set. If +you need access to the object directly instead of the proxy, you can get +it with the :meth:`~LocalProxy._get_current_object` method. + +.. autoclass:: LocalProxy + :members: _get_current_object + + +Stacks and Namespaces +===================== + +:class:`~contextvars.ContextVar` stores one value at a time. You may +find that you need to store a stack of items, or a namespace with +multiple attributes. A list or dict can be used for these, but using +them as context var values requires some extra care. Werkzeug provides +:class:`LocalStack` which wraps a list, and :class:`Local` which wraps a +dict. + +There is some amount of performance penalty associated with these +objects. Because lists and dicts are mutable, :class:`LocalStack` and +:class:`Local` need to do extra work to ensure data isn't shared between +nested contexts. If possible, design your application to use +:class:`LocalProxy` around a context var directly. + +.. autoclass:: LocalStack + :members: push, pop, top, __call__ + +.. autoclass:: Local + :members: __call__ + + +Releasing Data +============== + +A previous implementation of ``Local`` used internal data structures +which could not be cleaned up automatically when each context ended. +Instead, the following utilities could be used to release the data. + +.. warning:: + + This should not be needed with the modern implementation, as the + data in context vars is automatically managed by Python. It is kept + for compatibility for now, but may be removed in the future. + +.. autoclass:: LocalManager + :members: cleanup, make_middleware, middleware + +.. autofunction:: release_local diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..7893348 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/docs/middleware/dispatcher.rst b/docs/middleware/dispatcher.rst new file mode 100644 index 0000000..cc1cb3b --- /dev/null +++ b/docs/middleware/dispatcher.rst @@ -0,0 +1 @@ +.. automodule:: werkzeug.middleware.dispatcher diff --git a/docs/middleware/http_proxy.rst b/docs/middleware/http_proxy.rst new file mode 100644 index 0000000..dcda2e8 --- /dev/null +++ b/docs/middleware/http_proxy.rst @@ -0,0 +1 @@ +.. automodule:: werkzeug.middleware.http_proxy diff --git a/docs/middleware/index.rst b/docs/middleware/index.rst new file mode 100644 index 0000000..70cddee --- /dev/null +++ b/docs/middleware/index.rst @@ -0,0 +1 @@ +.. automodule:: werkzeug.middleware diff --git a/docs/middleware/lint.rst b/docs/middleware/lint.rst new file mode 100644 index 0000000..e831572 --- /dev/null +++ b/docs/middleware/lint.rst @@ -0,0 +1 @@ +.. automodule:: werkzeug.middleware.lint diff --git a/docs/middleware/profiler.rst b/docs/middleware/profiler.rst new file mode 100644 index 0000000..472a63a --- /dev/null +++ b/docs/middleware/profiler.rst @@ -0,0 +1 @@ +.. automodule:: werkzeug.middleware.profiler diff --git a/docs/middleware/proxy_fix.rst b/docs/middleware/proxy_fix.rst new file mode 100644 index 0000000..6c6d22e --- /dev/null +++ b/docs/middleware/proxy_fix.rst @@ -0,0 +1 @@ +.. automodule:: werkzeug.middleware.proxy_fix diff --git a/docs/middleware/shared_data.rst b/docs/middleware/shared_data.rst new file mode 100644 index 0000000..4d56743 --- /dev/null +++ b/docs/middleware/shared_data.rst @@ -0,0 +1 @@ +.. automodule:: werkzeug.middleware.shared_data diff --git a/docs/quickstart.rst b/docs/quickstart.rst new file mode 100644 index 0000000..1568892 --- /dev/null +++ b/docs/quickstart.rst @@ -0,0 +1,301 @@ +Quickstart +========== + +.. currentmodule:: werkzeug + +This part of the documentation shows how to use the most important parts of +Werkzeug. It's intended as a starting point for developers with basic +understanding of :pep:`3333` (WSGI) and :rfc:`2616` (HTTP). + + +WSGI Environment +================ + +The WSGI environment contains all the information the user request transmits +to the application. It is passed to the WSGI application but you can also +create a WSGI environ dict using the :func:`create_environ` helper: + +>>> from werkzeug.test import create_environ +>>> environ = create_environ('/foo', 'http://localhost:8080/') + +Now we have an environment to play around: + +>>> environ['PATH_INFO'] +'/foo' +>>> environ['SCRIPT_NAME'] +'' +>>> environ['SERVER_NAME'] +'localhost' + +Usually nobody wants to work with the environ directly because it uses a +confusing string encoding scheme, and it does not provide any way to +access the form data besides parsing that data by hand. + + +Enter Request +============= + +For access to the request data the :class:`Request` object is much more fun. +It wraps the `environ` and provides a read-only access to the data from +there: + +>>> from werkzeug.wrappers import Request +>>> request = Request(environ) + +Now you can access the important variables and Werkzeug will parse them +for you and decode them where it makes sense. The default charset for +requests is set to `utf-8` but you can change that by subclassing +:class:`Request`. + +>>> request.path +'/foo' +>>> request.script_root +'' +>>> request.host +'localhost:8080' +>>> request.url +'http://localhost:8080/foo' + +We can also find out which HTTP method was used for the request: + +>>> request.method +'GET' + +This way we can also access URL arguments (the query string) and data that +was transmitted in a POST/PUT request. + +For testing purposes we can create a request object from supplied data +using the :meth:`~Request.from_values` method: + +>>> from io import StringIO +>>> data = "name=this+is+encoded+form+data&another_key=another+one" +>>> request = Request.from_values(query_string='foo=bar&blah=blafasel', +... content_length=len(data), input_stream=StringIO(data), +... content_type='application/x-www-form-urlencoded', +... method='POST') +... +>>> request.method +'POST' + +Now we can access the URL parameters easily: + +>>> request.args.keys() +['blah', 'foo'] +>>> request.args['blah'] +'blafasel' + +Same for the supplied form data: + +>>> request.form['name'] +'this is encoded form data' + +Handling for uploaded files is not much harder as you can see from this +example:: + + def store_file(request): + file = request.files.get('my_file') + if file: + file.save('/where/to/store/the/file.txt') + else: + handle_the_error() + +The files are represented as :class:`FileStorage` objects which provide +some common operations to work with them. + +Request headers can be accessed by using the :class:`~Request.headers` +attribute: + +>>> request.headers['Content-Length'] +'54' +>>> request.headers['Content-Type'] +'application/x-www-form-urlencoded' + +The keys for the headers are of course case insensitive. + + +Header Parsing +============== + +There is more. Werkzeug provides convenient access to often used HTTP headers +and other request data. + +Let's create a request object with all the data a typical web browser transmits +so that we can play with it: + +>>> environ = create_environ() +>>> environ.update( +... HTTP_ACCEPT='text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', +... HTTP_ACCEPT_LANGUAGE='de-at,en-us;q=0.8,en;q=0.5', +... HTTP_ACCEPT_ENCODING='gzip,deflate', +... HTTP_ACCEPT_CHARSET='ISO-8859-1,utf-8;q=0.7,*;q=0.7', +... HTTP_IF_MODIFIED_SINCE='Fri, 20 Feb 2009 10:10:25 GMT', +... HTTP_IF_NONE_MATCH='"e51c9-1e5d-46356dc86c640"', +... HTTP_CACHE_CONTROL='max-age=0' +... ) +... +>>> request = Request(environ) + +With the accept header the browser informs the web application what +mimetypes it can handle and how well. All accept headers are sorted by +the quality, the best item being the first: + +>>> request.accept_mimetypes.best +'text/html' +>>> 'application/xhtml+xml' in request.accept_mimetypes +True +>>> print request.accept_mimetypes["application/json"] +0.8 + +The same works for languages: + +>>> request.accept_languages.best +'de-at' +>>> request.accept_languages.values() +['de-at', 'en-us', 'en'] + +And of course encodings and charsets: + +>>> 'gzip' in request.accept_encodings +True +>>> request.accept_charsets.best +'ISO-8859-1' +>>> 'utf-8' in request.accept_charsets +True + +Normalization is available, so you can safely use alternative forms to perform +containment checking: + +>>> 'UTF8' in request.accept_charsets +True +>>> 'de_AT' in request.accept_languages +True + +E-tags and other conditional headers are available in parsed form as well: + +>>> request.if_modified_since +datetime.datetime(2009, 2, 20, 10, 10, 25, tzinfo=datetime.timezone.utc) +>>> request.if_none_match + +>>> request.cache_control + +>>> request.cache_control.max_age +0 +>>> 'e51c9-1e5d-46356dc86c640' in request.if_none_match +True + + +Responses +========= + +Response objects are the opposite of request objects. They are used to send +data back to the client. In reality, response objects are nothing more than +glorified WSGI applications. + +So what you are doing is not *returning* the response objects from your WSGI +application but *calling* it as WSGI application inside your WSGI application +and returning the return value of that call. + +So imagine your standard WSGI "Hello World" application:: + + def application(environ, start_response): + start_response('200 OK', [('Content-Type', 'text/plain')]) + return ['Hello World!'] + +With response objects it would look like this:: + + from werkzeug.wrappers import Response + + def application(environ, start_response): + response = Response('Hello World!') + return response(environ, start_response) + +Also, unlike request objects, response objects are designed to be modified. +So here is what you can do with them: + +>>> from werkzeug.wrappers import Response +>>> response = Response("Hello World!") +>>> response.headers['content-type'] +'text/plain; charset=utf-8' +>>> response.data +'Hello World!' +>>> response.headers['content-length'] = len(response.data) + +You can modify the status of the response in the same way. Either just the +code or provide a message as well: + +>>> response.status +'200 OK' +>>> response.status = '404 Not Found' +>>> response.status_code +404 +>>> response.status_code = 400 +>>> response.status +'400 BAD REQUEST' + +As you can see attributes work in both directions. So you can set both +:attr:`~Response.status` and :attr:`~Response.status_code` and the +change will be reflected to the other. + +Also common headers are exposed as attributes or with methods to set / +retrieve them: + +>>> response.content_length +12 +>>> from datetime import datetime, timezone +>>> response.date = datetime(2009, 2, 20, 17, 42, 51, tzinfo=timezone.utc) +>>> response.headers['Date'] +'Fri, 20 Feb 2009 17:42:51 GMT' + +Because etags can be weak or strong there are methods to set them: + +>>> response.set_etag("12345-abcd") +>>> response.headers['etag'] +'"12345-abcd"' +>>> response.get_etag() +('12345-abcd', False) +>>> response.set_etag("12345-abcd", weak=True) +>>> response.get_etag() +('12345-abcd', True) + +Some headers are available as mutable structures. For example most +of the `Content-` headers are sets of values: + +>>> response.content_language.add('en-us') +>>> response.content_language.add('en') +>>> response.headers['Content-Language'] +'en-us, en' + +Also here this works in both directions: + +>>> response.headers['Content-Language'] = 'de-AT, de' +>>> response.content_language +HeaderSet(['de-AT', 'de']) + +Authentication headers can be set that way as well: + +>>> response.www_authenticate.set_basic("My protected resource") +>>> response.headers['www-authenticate'] +'Basic realm="My protected resource"' + +Cookies can be set as well: + +>>> response.set_cookie('name', 'value') +>>> response.headers['Set-Cookie'] +'name=value; Path=/' +>>> response.set_cookie('name2', 'value2') + +If headers appear multiple times you can use the :meth:`~Headers.getlist` +method to get all values for a header: + +>>> response.headers.getlist('Set-Cookie') +['name=value; Path=/', 'name2=value2; Path=/'] + +Finally if you have set all the conditional values, you can make the +response conditional against a request. Which means that if the request +can assure that it has the information already, no data besides the headers +is sent over the network which saves traffic. For that you should set at +least an etag (which is used for comparison) and the date header and then +call :class:`~Request.make_conditional` with the request object. + +The response is modified accordingly (status code changed, response body +removed, entity headers removed etc.) diff --git a/docs/request_data.rst b/docs/request_data.rst new file mode 100644 index 0000000..83c6278 --- /dev/null +++ b/docs/request_data.rst @@ -0,0 +1,100 @@ +Dealing with Request Data +========================= + +.. currentmodule:: werkzeug + +The most important rule about web development is "Do not trust the user". +This is especially true for incoming request data on the input stream. +With WSGI this is actually a bit harder than you would expect. Because +of that Werkzeug wraps the request stream for you to save you from the +most prominent problems with it. + + +Missing EOF Marker on Input Stream +---------------------------------- + +The input stream has no end-of-file marker. If you would call the +:meth:`~file.read` method on the `wsgi.input` stream you would cause your +application to hang on conforming servers. This is actually intentional +however painful. Werkzeug solves that problem by wrapping the input +stream in a special :class:`LimitedStream`. The input stream is exposed +on the request objects as :attr:`~Request.stream`. This one is either +an empty stream (if the form data was parsed) or a limited stream with +the contents of the input stream. + + +When does Werkzeug Parse? +------------------------- + +Werkzeug parses the incoming data under the following situations: + +- you access either :attr:`~Request.form`, :attr:`~Request.files`, + or :attr:`~Request.stream` and the request method was + `POST` or `PUT`. +- if you call :func:`parse_form_data`. + +These calls are not interchangeable. If you invoke :func:`parse_form_data` +you must not use the request object or at least not the attributes that +trigger the parsing process. + +This is also true if you read from the `wsgi.input` stream before the +parsing. + +**General rule:** Leave the WSGI input stream alone. Especially in +WSGI middlewares. Use either the parsing functions or the request +object. Do not mix multiple WSGI utility libraries for form data +parsing or anything else that works on the input stream. + + +How does it Parse? +------------------ + +The standard Werkzeug parsing behavior handles three cases: + +- input content type was `multipart/form-data`. In this situation the + :class:`~Request.stream` will be empty and + :class:`~Request.form` will contain the regular `POST` / `PUT` + data, :class:`~Request.files` will contain the uploaded + files as :class:`FileStorage` objects. +- input content type was `application/x-www-form-urlencoded`. Then the + :class:`~Request.stream` will be empty and + :class:`~Request.form` will contain the regular `POST` / `PUT` + data and :class:`~Request.files` will be empty. +- the input content type was neither of them, :class:`~Request.stream` + points to a :class:`LimitedStream` with the input data for further + processing. + +Special note on the :attr:`~Request.get_data` method: Calling this +loads the full request data into memory. This is only safe to do if the +:attr:`~Request.max_content_length` is set. Also you can *either* +read the stream *or* call :meth:`~Request.get_data`. + + +Limiting Request Data +--------------------- + +To avoid being the victim of a DDOS attack you can set the maximum +accepted content length and request field sizes. The :class:`Request` +class has two attributes for that: :attr:`~Request.max_content_length` +and :attr:`~Request.max_form_memory_size`. + +The first one can be used to limit the total content length. For example +by setting it to ``1024 * 1024 * 16`` the request won't accept more than +16MB of transmitted data. + +Because certain data can't be moved to the hard disk (regular post data) +whereas temporary files can, there is a second limit you can set. The +:attr:`~Request.max_form_memory_size` limits the size of `POST` +transmitted form data. By setting it to ``1024 * 1024 * 2`` you can make +sure that all in memory-stored fields are not more than 2MB in size. + +This however does *not* affect in-memory stored files if the +`stream_factory` used returns a in-memory file. + + +How to extend Parsing? +---------------------- + +Modern web applications transmit a lot more than multipart form data or +url encoded data. To extend the capabilities, subclass :class:`Request` +or :class:`Request` and add or extend methods. diff --git a/docs/routing.rst b/docs/routing.rst new file mode 100644 index 0000000..8c04a23 --- /dev/null +++ b/docs/routing.rst @@ -0,0 +1,307 @@ +=========== +URL Routing +=========== + +.. module:: werkzeug.routing + +When it comes to combining multiple controller or view functions (however +you want to call them), you need a dispatcher. A simple way would be +applying regular expression tests on ``PATH_INFO`` and call registered +callback functions that return the value. + +Werkzeug provides a much more powerful system, similar to `Routes`_. All the +objects mentioned on this page must be imported from :mod:`werkzeug.routing`, not +from :mod:`werkzeug`! + +.. _Routes: https://routes.readthedocs.io/en/latest/ + + +Quickstart +========== + +Here is a simple example which could be the URL definition for a blog:: + + from werkzeug.routing import Map, Rule, NotFound, RequestRedirect + + url_map = Map([ + Rule('/', endpoint='blog/index'), + Rule('//', endpoint='blog/archive'), + Rule('///', endpoint='blog/archive'), + Rule('////', endpoint='blog/archive'), + Rule('////', + endpoint='blog/show_post'), + Rule('/about', endpoint='blog/about_me'), + Rule('/feeds/', endpoint='blog/feeds'), + Rule('/feeds/.rss', endpoint='blog/show_feed') + ]) + + def application(environ, start_response): + urls = url_map.bind_to_environ(environ) + try: + endpoint, args = urls.match() + except HTTPException, e: + return e(environ, start_response) + start_response('200 OK', [('Content-Type', 'text/plain')]) + return [f'Rule points to {endpoint!r} with arguments {args!r}'.encode()] + +So what does that do? First of all we create a new :class:`Map` which stores +a bunch of URL rules. Then we pass it a list of :class:`Rule` objects. + +Each :class:`Rule` object is instantiated with a string that represents a rule +and an endpoint which will be the alias for what view the rule represents. +Multiple rules can have the same endpoint, but should have different arguments +to allow URL construction. + +The format for the URL rules is straightforward, but explained in detail below. + +Inside the WSGI application we bind the url_map to the current request which will +return a new :class:`MapAdapter`. This url_map adapter can then be used to match +or build domains for the current request. + +The :meth:`MapAdapter.match` method can then either return a tuple in the form +``(endpoint, args)`` or raise one of the three exceptions +:exc:`~werkzeug.exceptions.NotFound`, :exc:`~werkzeug.exceptions.MethodNotAllowed`, +or :exc:`~werkzeug.exceptions.RequestRedirect`. For more details about those +exceptions have a look at the documentation of the :meth:`MapAdapter.match` method. + + +Rule Format +=========== + +Rule strings are URL paths with placeholders for variable parts in the +format ````. ``converter`` and ``arguments`` +(with parentheses) are optional. If no converter is given, the +``default`` converter is used (``string`` by default). The available +converters are discussed below. + +Rules that end with a slash are "branches", others are "leaves". If +``strict_slashes`` is enabled (the default), visiting a branch URL +without a trailing slash will redirect to the URL with a slash appended. + +Many HTTP servers merge consecutive slashes into one when receiving +requests. If ``merge_slashes`` is enabled (the default), rules will +merge slashes in non-variable parts when matching and building. Visiting +a URL with consecutive slashes will redirect to the URL with slashes +merged. If you want to disable ``merge_slashes`` for a :class:`Rule` or +:class:`Map`, you'll also need to configure your web server +appropriately. + + +Built-in Converters +=================== + +Converters for common types of URL variables are built-in. The available +converters can be overridden or extended through :attr:`Map.converters`. + +.. autoclass:: UnicodeConverter + +.. autoclass:: PathConverter + +.. autoclass:: AnyConverter + +.. autoclass:: IntegerConverter + +.. autoclass:: FloatConverter + +.. autoclass:: UUIDConverter + + +Maps, Rules and Adapters +======================== + +.. autoclass:: Map + :members: + + .. attribute:: converters + + The dictionary of converters. This can be modified after the class + was created, but will only affect rules added after the + modification. If the rules are defined with the list passed to the + class, the `converters` parameter to the constructor has to be used + instead. + +.. autoclass:: MapAdapter + :members: + +.. autoclass:: Rule + :members: empty + + +Matchers +======== + +.. autoclass:: StateMachineMatcher + :members: + + +Rule Factories +============== + +.. autoclass:: RuleFactory + :members: get_rules + +.. autoclass:: Subdomain + +.. autoclass:: Submount + +.. autoclass:: EndpointPrefix + + +Rule Templates +============== + +.. autoclass:: RuleTemplate + + +Custom Converters +================= + +You can add custom converters that add behaviors not provided by the +built-in converters. To make a custom converter, subclass +:class:`BaseConverter` then pass the new class to the :class:`Map` +``converters`` parameter, or add it to +:attr:`url_map.converters `. + +The converter should have a ``regex`` attribute with a regular +expression to match with. If the converter can take arguments in a URL +rule, it should accept them in its ``__init__`` method. The entire +regex expression will be matched as a group and used as the value for +conversion. + +If a custom converter can match a forward slash, ``/``, it should have +the attribute ``part_isolating`` set to ``False``. This will ensure +that rules using the custom converter are correctly matched. + +It can implement a ``to_python`` method to convert the matched string to +some other object. This can also do extra validation that wasn't +possible with the ``regex`` attribute, and should raise a +:exc:`werkzeug.routing.ValidationError` in that case. Raising any other +errors will cause a 500 error. + +It can implement a ``to_url`` method to convert a Python object to a +string when building a URL. Any error raised here will be converted to a +:exc:`werkzeug.routing.BuildError` and eventually cause a 500 error. + +This example implements a ``BooleanConverter`` that will match the +strings ``"yes"``, ``"no"``, and ``"maybe"``, returning a random value +for ``"maybe"``. :: + + from random import randrange + from werkzeug.routing import BaseConverter, ValidationError + + class BooleanConverter(BaseConverter): + regex = r"(?:yes|no|maybe)" + + def __init__(self, url_map, maybe=False): + super().__init__(url_map) + self.maybe = maybe + + def to_python(self, value): + if value == "maybe": + if self.maybe: + return not randrange(2) + raise ValidationError + return value == 'yes' + + def to_url(self, value): + return "yes" if value else "no" + + from werkzeug.routing import Map, Rule + + url_map = Map([ + Rule("/vote/", endpoint="vote"), + Rule("/guess/", endpoint="guess") + ], converters={'bool': BooleanConverter}) + +If you want to change the default converter, assign a different +converter to the ``"default"`` key. + + +Host Matching +============= + +.. versionadded:: 0.7 + +Starting with Werkzeug 0.7 it's also possible to do matching on the whole +host names instead of just the subdomain. To enable this feature you need +to pass ``host_matching=True`` to the :class:`Map` constructor and provide +the `host` argument to all routes:: + + url_map = Map([ + Rule('/', endpoint='www_index', host='www.example.com'), + Rule('/', endpoint='help_index', host='help.example.com') + ], host_matching=True) + +Variable parts are of course also possible in the host section:: + + url_map = Map([ + Rule('/', endpoint='www_index', host='www.example.com'), + Rule('/', endpoint='user_index', host='.example.com') + ], host_matching=True) + + +WebSockets +========== + +.. versionadded:: 1.0 + +If a :class:`Rule` is created with ``websocket=True``, it will only +match if the :class:`Map` is bound to a request with a ``url_scheme`` of +``ws`` or ``wss``. + +.. note:: + + Werkzeug has no further WebSocket support beyond routing. This + functionality is mostly of use to ASGI projects. + +.. code-block:: python + + url_map = Map([ + Rule("/ws", endpoint="comm", websocket=True), + ]) + adapter = map.bind("example.org", "/ws", url_scheme="ws") + assert adapter.match() == ("comm", {}) + +If the only match is a WebSocket rule and the bind is HTTP (or the +only match is HTTP and the bind is WebSocket) a +:exc:`WebsocketMismatch` (derives from +:exc:`~werkzeug.exceptions.BadRequest`) exception is raised. + +As WebSocket URLs have a different scheme, rules are always built with a +scheme and host, ``force_external=True`` is implied. + +.. code-block:: python + + url = adapter.build("comm") + assert url == "ws://example.org/ws" + + +State Machine Matching +====================== + +The default matching algorithm uses a state machine that transitions +between parts of the request path to find a match. To understand how +this works consider this rule:: + + /resource/ + +Firstly this rule is decomposed into two ``RulePart``. The first is a +static part with a content equal to ``resource``, the second is +dynamic and requires a regex match to ``[^/]+``. + +A state machine is then created with an initial state that represents +the rule's first ``/``. This initial state has a single, static +transition to the next state which represents the rule's second +``/``. This second state has a single dynamic transition to the final +state which includes the rule. + +To match a path the matcher starts and the initial state and follows +transitions that work. Clearly a trial path of ``/resource/2`` has the +parts ``""``, ``resource``, and ``2`` which match the transitions and +hence a rule will match. Whereas ``/other/2`` will not match as there +is no transition for the ``other`` part from the initial state. + +The only diversion from this rule is if a ``RulePart`` is not +part-isolating i.e. it will match ``/``. In this case the ``RulePart`` +is considered final and represents a transition that must include all +the subsequent parts of the trial path. diff --git a/docs/serving.rst b/docs/serving.rst new file mode 100644 index 0000000..693774c --- /dev/null +++ b/docs/serving.rst @@ -0,0 +1,267 @@ +========================= +Serving WSGI Applications +========================= + +.. module:: werkzeug.serving + +There are many ways to serve a WSGI application. While you're developing it, +you usually don't want to have a full-blown webserver like Apache up and +running, but instead a simple standalone one. Because of that Werkzeug comes +with a builtin development server. + +The easiest way is creating a small ``start-myproject.py`` file that runs the +application using the builtin server:: + + from werkzeug.serving import run_simple + from myproject import make_app + + app = make_app(...) + run_simple('localhost', 8080, app, use_reloader=True) + +You can also pass it the `extra_files` keyword argument with a list of +additional files (like configuration files) you want to observe. + +.. autofunction:: run_simple + +.. autofunction:: is_running_from_reloader + +.. autofunction:: make_ssl_devcert + +.. admonition:: Information + + The development server is not intended to be used on production systems. + It was designed especially for development purposes and performs poorly + under high load. For deployment setups have a look at the + :doc:`/deployment/index` pages. + +.. _reloader: + +Reloader +-------- + +.. versionchanged:: 0.10 + +The Werkzeug reloader constantly monitors modules and paths of your web +application, and restarts the server if any of the observed files change. + +Since version 0.10, there are two backends the reloader supports: ``stat`` and +``watchdog``. + +- The default ``stat`` backend simply checks the ``mtime`` of all files in a + regular interval. This is sufficient for most cases, however, it is known to + drain a laptop's battery. + +- The ``watchdog`` backend uses filesystem events, and is much faster than + ``stat``. It requires the `watchdog `_ + module to be installed. The recommended way to achieve this is to add + ``Werkzeug[watchdog]`` to your requirements file. + +If ``watchdog`` is installed and available it will automatically be used +instead of the builtin ``stat`` reloader. + +To switch between the backends you can use the `reloader_type` parameter of the +:func:`run_simple` function. ``'stat'`` sets it to the default stat based +polling and ``'watchdog'`` forces it to the watchdog backend. + +.. note:: + + Some edge cases, like modules that failed to import correctly, are not + handled by the stat reloader for performance reasons. The watchdog reloader + monitors such files too. + + +Colored Logging +--------------- + +The development server highlights the request logs in different colors +based on the status code. On Windows, `Colorama`_ must be installed as +well to enable this. + +.. _Colorama: https://pypi.org/project/colorama/ + + +Virtual Hosts +------------- + +Many web applications utilize multiple subdomains. This can be a bit tricky +to simulate locally. Fortunately there is the `hosts file`_ that can be used +to assign the local computer multiple names. + +This allows you to call your local computer `yourapplication.local` and +`api.yourapplication.local` (or anything else) in addition to `localhost`. + +You can find the hosts file on the following location: + + =============== ============================================== + Windows ``%SystemRoot%\system32\drivers\etc\hosts`` + Linux / OS X ``/etc/hosts`` + =============== ============================================== + +You can open the file with your favorite text editor and add a new name after +`localhost`:: + + 127.0.0.1 localhost yourapplication.local api.yourapplication.local + +Save the changes and after a while you should be able to access the +development server on these host names as well. You can use the +:doc:`/routing` system to dispatch between different hosts or parse +:attr:`request.host` yourself. + + +Shutting Down The Server +------------------------ + +In some cases it can be useful to shut down a server after handling a +request. For example, a local command line tool that needs OAuth +authentication could temporarily start a server to listen for a +response, record the user's token, then stop the server. + +One method to do this could be to start a server in a +:mod:`multiprocessing` process, then terminate the process after a value +is passed back to the parent. + +.. code-block:: python + + import multiprocessing + from werkzeug import Request, Response, run_simple + + def get_token(q: multiprocessing.Queue) -> None: + @Request.application + def app(request: Request) -> Response: + q.put(request.args["token"]) + return Response("", 204) + + run_simple("localhost", 5000, app) + + if __name__ == "__main__": + q = multiprocessing.Queue() + p = multiprocessing.Process(target=get_token, args=(q,)) + p.start() + print("waiting") + token = q.get(block=True) + p.terminate() + print(token) + +That example uses Werkzeug's development server, but any production +server that can be started as a Python process could use the same +technique and should be preferred for security. Another method could be +to start a :mod:`subprocess` process and send the value back over +``stdout``. + + +Troubleshooting +--------------- + +On operating systems that support ipv6 and have it configured such as modern +Linux systems, OS X 10.4 or higher as well as Windows Vista some browsers can +be painfully slow if accessing your local server. The reason for this is that +sometimes "localhost" is configured to be available on both ipv4 and ipv6 sockets +and some browsers will try to access ipv6 first and then ipv4. + +At the current time the integrated webserver does not support ipv6 and ipv4 at +the same time and for better portability ipv4 is the default. + +If you notice that the web browser takes ages to load the page there are two ways +around this issue. If you don't need ipv6 support you can disable the ipv6 entry +in the `hosts file`_ by removing this line:: + + ::1 localhost + +Alternatively you can also disable ipv6 support in your browser. For example +if Firefox shows this behavior you can disable it by going to ``about:config`` +and disabling the `network.dns.disableIPv6` key. This however is not +recommended as of Werkzeug 0.6.1! + +Starting with Werkzeug 0.6.1, the server will now switch between ipv4 and +ipv6 based on your operating system's configuration. This means if that +you disabled ipv6 support in your browser but your operating system is +preferring ipv6, you will be unable to connect to your server. In that +situation, you can either remove the localhost entry for ``::1`` or +explicitly bind the hostname to an ipv4 address (`127.0.0.1`) + +.. _hosts file: https://en.wikipedia.org/wiki/Hosts_file + +SSL +--- + +.. versionadded:: 0.6 + +The builtin server supports SSL for testing purposes. If an SSL context is +provided it will be used. That means a server can either run in HTTP or HTTPS +mode, but not both. + +Quickstart +`````````` + +The easiest way to do SSL based development with Werkzeug is by using it +to generate an SSL certificate and private key and storing that somewhere +and to then put it there. For the certificate you need to provide the +name of your server on generation or a `CN`. + +1. Generate an SSL key and store it somewhere: + + >>> from werkzeug.serving import make_ssl_devcert + >>> make_ssl_devcert('/path/to/the/key', host='localhost') + ('/path/to/the/key.crt', '/path/to/the/key.key') + +2. Now this tuple can be passed as ``ssl_context`` to the + :func:`run_simple` method:: + + run_simple('localhost', 4000, application, + ssl_context=('/path/to/the/key.crt', + '/path/to/the/key.key')) + +You will have to acknowledge the certificate in your browser once then. + +Loading Contexts by Hand +```````````````````````` + +You can use a ``ssl.SSLContext`` object instead of a tuple for full +control over the TLS configuration. + +.. code-block:: python + + import ssl + ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + ctx.load_cert_chain('ssl.cert', 'ssl.key') + run_simple('localhost', 4000, application, ssl_context=ctx) + + +.. versionchanged 0.10:: ``OpenSSL`` contexts are not supported anymore. + +Generating Certificates +``````````````````````` + +A key and certificate can be created in advance using the openssl tool +instead of the :func:`make_ssl_devcert`. This requires that you have +the `openssl` command installed on your system:: + + $ openssl genrsa 1024 > ssl.key + $ openssl req -new -x509 -nodes -sha1 -days 365 -key ssl.key > ssl.cert + +Adhoc Certificates +`````````````````` + +The easiest way to enable SSL is to start the server in adhoc-mode. In +that case Werkzeug will generate an SSL certificate for you:: + + run_simple('localhost', 4000, application, + ssl_context='adhoc') + +The downside of this of course is that you will have to acknowledge the +certificate each time the server is reloaded. Adhoc certificates are +discouraged because modern browsers do a bad job at supporting them for +security reasons. + +This feature requires the cryptography library to be installed. + + +Unix Sockets +------------ + +The dev server can bind to a Unix socket instead of a TCP socket. +:func:`run_simple` will bind to a Unix socket if the ``hostname`` +parameter starts with ``'unix://'``. :: + + from werkzeug.serving import run_simple + run_simple('unix://example.sock', 0, app) diff --git a/docs/terms.rst b/docs/terms.rst new file mode 100644 index 0000000..088aaa4 --- /dev/null +++ b/docs/terms.rst @@ -0,0 +1,44 @@ +=============== +Important Terms +=============== + +.. currentmodule:: werkzeug + +This page covers important terms used in the documentation and Werkzeug +itself. + + +WSGI +---- + +WSGI a specification for Python web applications Werkzeug follows. It was +specified in the :pep:`3333` and is widely supported. Unlike previous solutions +it guarantees that web applications, servers and utilities can work together. + +Response Object +--------------- + +For Werkzeug, a response object is an object that works like a WSGI +application but does not do any request processing. Usually you have a view +function or controller method that processes the request and assembles a +response object. + +A response object is *not* necessarily the :class:`Response` class or a +subclass thereof. + +For example Pylons/webob provide a very similar response class that can +be used as well (:class:`webob.Response`). + +View Function +------------- + +Often people speak of MVC (Model, View, Controller) when developing web +applications. However, the Django framework coined MTV (Model, Template, +View) which basically means the same but reduces the concept to the data +model, a function that processes data from the request and the database and +renders a template. + +Werkzeug itself does not tell you how you should develop applications, but the +documentation often speaks of view functions that work roughly the same. The +idea of a view function is that it's called with a request object (and +optionally some parameters from an URL rule) and returns a response object. diff --git a/docs/test.rst b/docs/test.rst new file mode 100644 index 0000000..efb449a --- /dev/null +++ b/docs/test.rst @@ -0,0 +1,111 @@ +.. module:: werkzeug.test + +Testing WSGI Applications +========================= + + +Test Client +----------- + +Werkzeug provides a :class:`Client` to simulate requests to a WSGI +application without starting a server. The client has methods for making +different types of requests, as well as managing cookies across +requests. + +>>> from werkzeug.test import Client +>>> from werkzeug.testapp import test_app +>>> c = Client(test_app) +>>> response = c.get("/") +>>> response.status_code +200 +>>> resp.headers +Headers([('Content-Type', 'text/html; charset=utf-8'), ('Content-Length', '6658')]) +>>> response.get_data(as_text=True) +'...' + +The client's request methods return instances of :class:`TestResponse`. +This provides extra attributes and methods on top of +:class:`~werkzeug.wrappers.Response` that are useful for testing. + + +Request Body +------------ + +By passing a dict to ``data``, the client will construct a request body +with file and form data. It will set the content type to +``application/x-www-form-urlencoded`` if there are no files, or +``multipart/form-data`` there are. + +.. code-block:: python + + import io + + response = client.post(data={ + "name": "test", + "file": (BytesIO("file contents".encode("utf8")), "test.txt") + }) + +Pass a string, bytes, or file-like object to ``data`` to use that as the +raw request body. In that case, you should set the content type +appropriately. For example, to post YAML: + +.. code-block:: python + + response = client.post( + data="a: value\nb: 1\n", content_type="application/yaml" + ) + +A shortcut when testing JSON APIs is to pass a dict to ``json`` instead +of using ``data``. This will automatically call ``json.dumps()`` and +set the content type to ``application/json``. Additionally, if the +app returns JSON, ``response.json`` will automatically call +``json.loads()``. + +.. code-block:: python + + response = client.post("/api", json={"a": "value", "b": 1}) + obj = response.json() + + +Environment Builder +------------------- + +:class:`EnvironBuilder` is used to construct a WSGI environ dict. The +test client uses this internally to prepare its requests. The arguments +passed to the client request methods are the same as the builder. + +Sometimes, it can be useful to construct a WSGI environment manually. +An environ builder or dict can be passed to the test client request +methods in place of other arguments to use a custom environ. + +.. code-block:: Python + + from werkzeug.test import EnvironBuilder + builder = EnvironBuilder(...) + # build an environ dict + environ = builder.get_environ() + # build an environ dict wrapped in a request + request = builder.get_request() + +The test client responses make this available through +:attr:`TestResponse.request` and ``response.request.environ``. + + +API +--- + +.. autoclass:: Client + :members: + :member-order: bysource + +.. autoclass:: TestResponse + :members: + :member-order: bysource + +.. autoclass:: EnvironBuilder + :members: + :member-order: bysource + +.. autofunction:: create_environ + +.. autofunction:: run_wsgi_app diff --git a/docs/tutorial.rst b/docs/tutorial.rst new file mode 100644 index 0000000..943787a --- /dev/null +++ b/docs/tutorial.rst @@ -0,0 +1,479 @@ +================= +Werkzeug Tutorial +================= + +.. currentmodule:: werkzeug + +Welcome to the Werkzeug tutorial in which we will create a `TinyURL`_ clone +that stores URLs in a redis instance. The libraries we will use for this +applications are `Jinja`_ 2 for the templates, `redis`_ for the database +layer and, of course, Werkzeug for the WSGI layer. + +You can use `pip` to install the required libraries:: + + pip install Jinja2 redis Werkzeug + +Also make sure to have a redis server running on your local machine. If +you are on OS X, you can use `brew` to install it:: + + brew install redis + +If you are on Ubuntu or Debian, you can use apt-get:: + + sudo apt-get install redis-server + +Redis was developed for UNIX systems and was never really designed to +work on Windows. For development purposes, the unofficial ports however +work well enough. You can get them from `github +`_. + +Introducing Shortly +------------------- + +In this tutorial, we will together create a simple URL shortener service +with Werkzeug. Please keep in mind that Werkzeug is not a framework, it's +a library with utilities to create your own framework or application and +as such is very flexible. The approach we use here is just one of many you +can use. + +As data store, we will use `redis`_ here instead of a relational database +to keep this simple and because that's the kind of job that `redis`_ +excels at. + +The final result will look something like this: + +.. image:: _static/shortly.png + :alt: a screenshot of shortly + +.. _TinyURL: https://tinyurl.com/ +.. _Jinja: http://jinja.pocoo.org/ +.. _redis: https://redis.io/ + +Step 0: A Basic WSGI Introduction +--------------------------------- + +Werkzeug is a utility library for WSGI. WSGI itself is a protocol or +convention that ensures that your web application can speak with the +webserver and more importantly that web applications work nicely together. + +A basic “Hello World” application in WSGI without the help of Werkzeug +looks like this:: + + def application(environ, start_response): + start_response('200 OK', [('Content-Type', 'text/plain')]) + return ['Hello World!'.encode('utf-8')] + +A WSGI application is something you can call and pass an environ dict +and a ``start_response`` callable. The environ contains all incoming +information, the ``start_response`` function can be used to indicate the +start of the response. With Werkzeug you don't have to deal directly with +either as request and response objects are provided to work with them. + +The request data takes the environ object and allows you to access the +data from that environ in a nice manner. The response object is a WSGI +application in itself and provides a much nicer way to create responses. + +Here is how you would write that application with response objects:: + + from werkzeug.wrappers import Response + + def application(environ, start_response): + response = Response('Hello World!', mimetype='text/plain') + return response(environ, start_response) + +And here an expanded version that looks at the query string in the URL +(more importantly at the `name` parameter in the URL to substitute “World” +against another word):: + + from werkzeug.wrappers import Request, Response + + def application(environ, start_response): + request = Request(environ) + text = f"Hello {request.args.get('name', 'World')}!" + response = Response(text, mimetype='text/plain') + return response(environ, start_response) + +And that's all you need to know about WSGI. + + +Step 1: Creating the Folders +---------------------------- + +Before we get started, let’s create the folders needed for this application:: + + /shortly + /static + /templates + +The shortly folder is not a python package, but just something where we +drop our files. Directly into this folder we will then put our main +module in the following steps. The files inside the static folder are +available to users of the application via HTTP. This is the place where +CSS and JavaScript files go. Inside the templates folder we will make +Jinja2 look for templates. The templates you create later in the tutorial +will go in this directory. + +Step 2: The Base Structure +-------------------------- + +Now let's get right into it and create a module for our application. Let's +create a file called `shortly.py` in the `shortly` folder. At first we +will need a bunch of imports. I will pull in all the imports here, even +if they are not used right away, to keep it from being confusing:: + + import os + import redis + from werkzeug.urls import url_parse + from werkzeug.wrappers import Request, Response + from werkzeug.routing import Map, Rule + from werkzeug.exceptions import HTTPException, NotFound + from werkzeug.middleware.shared_data import SharedDataMiddleware + from werkzeug.utils import redirect + from jinja2 import Environment, FileSystemLoader + +Then we can create the basic structure for our application and a function +to create a new instance of it, optionally with a piece of WSGI middleware +that exports all the files on the `static` folder on the web:: + + class Shortly(object): + + def __init__(self, config): + self.redis = redis.Redis( + config['redis_host'], config['redis_port'], decode_responses=True + ) + + def dispatch_request(self, request): + return Response('Hello World!') + + def wsgi_app(self, environ, start_response): + request = Request(environ) + response = self.dispatch_request(request) + return response(environ, start_response) + + def __call__(self, environ, start_response): + return self.wsgi_app(environ, start_response) + + + def create_app(redis_host='localhost', redis_port=6379, with_static=True): + app = Shortly({ + 'redis_host': redis_host, + 'redis_port': redis_port + }) + if with_static: + app.wsgi_app = SharedDataMiddleware(app.wsgi_app, { + '/static': os.path.join(os.path.dirname(__file__), 'static') + }) + return app + +Lastly we can add a piece of code that will start a local development +server with automatic code reloading and a debugger:: + + if __name__ == '__main__': + from werkzeug.serving import run_simple + app = create_app() + run_simple('127.0.0.1', 5000, app, use_debugger=True, use_reloader=True) + +The basic idea here is that our ``Shortly`` class is an actual WSGI +application. The ``__call__`` method directly dispatches to ``wsgi_app``. +This is done so that we can wrap ``wsgi_app`` to apply middlewares like we +do in the ``create_app`` function. The actual ``wsgi_app`` method then +creates a :class:`Request` object and calls the ``dispatch_request`` +method which then has to return a :class:`Response` object which is then +evaluated as WSGI application again. As you can see: turtles all the way +down. Both the ``Shortly`` class we create, as well as any request object +in Werkzeug implements the WSGI interface. As a result of that you could +even return another WSGI application from the ``dispatch_request`` method. + +The ``create_app`` factory function can be used to create a new instance +of our application. Not only will it pass some parameters as +configuration to the application but also optionally add a WSGI middleware +that exports static files. This way we have access to the files from the +static folder even when we are not configuring our server to provide them +which is very helpful for development. + +Intermezzo: Running the Application +----------------------------------- + +Now you should be able to execute the file with `python` and see a server +on your local machine:: + + $ python shortly.py + * Running on http://127.0.0.1:5000/ + * Restarting with reloader: stat() polling + +It also tells you that the reloader is active. It will use various +techniques to figure out if any file changed on the disk and then +automatically restart. + +Just go to the URL and you should see “Hello World!”. + +Step 3: The Environment +----------------------- + +Now that we have the basic application class, we can make the constructor +do something useful and provide a few helpers on there that can come in +handy. We will need to be able to render templates and connect to redis, +so let's extend the class a bit:: + + def __init__(self, config): + self.redis = redis.Redis(config['redis_host'], config['redis_port']) + template_path = os.path.join(os.path.dirname(__file__), 'templates') + self.jinja_env = Environment(loader=FileSystemLoader(template_path), + autoescape=True) + + def render_template(self, template_name, **context): + t = self.jinja_env.get_template(template_name) + return Response(t.render(context), mimetype='text/html') + +Step 4: The Routing +------------------- + +Next up is routing. Routing is the process of matching and parsing the URL to +something we can use. Werkzeug provides a flexible integrated routing +system which we can use for that. The way it works is that you create a +:class:`~werkzeug.routing.Map` instance and add a bunch of +:class:`~werkzeug.routing.Rule` objects. Each rule has a pattern it will +try to match the URL against and an “endpoint”. The endpoint is typically +a string and can be used to uniquely identify the URL. We could also use +this to automatically reverse the URL, but that's not what we will do in this +tutorial. + +Just put this into the constructor:: + + self.url_map = Map([ + Rule('/', endpoint='new_url'), + Rule('/', endpoint='follow_short_link'), + Rule('/+', endpoint='short_link_details') + ]) + +Here we create a URL map with three rules. ``/`` for the root of the URL +space where we will just dispatch to a function that implements the logic +to create a new URL. And then one that follows the short link to the +target URL and another one with the same rule but a plus (``+``) at the +end to show the link details. + +So how do we find our way from the endpoint to a function? That's up to you. +The way we will do it in this tutorial is by calling the method ``on_`` ++ endpoint on the class itself. Here is how this works:: + + def dispatch_request(self, request): + adapter = self.url_map.bind_to_environ(request.environ) + try: + endpoint, values = adapter.match() + return getattr(self, f'on_{endpoint}')(request, **values) + except HTTPException as e: + return e + +We bind the URL map to the current environment and get back a +:class:`~werkzeug.routing.URLAdapter`. The adapter can be used to match +the request but also to reverse URLs. The match method will return the +endpoint and a dictionary of values in the URL. For instance the rule for +``follow_short_link`` has a variable part called ``short_id``. When we go +to ``http://localhost:5000/foo`` we will get the following values back:: + + endpoint = 'follow_short_link' + values = {'short_id': 'foo'} + +If it does not match anything, it will raise a +:exc:`~werkzeug.exceptions.NotFound` exception, which is an +:exc:`~werkzeug.exceptions.HTTPException`. All HTTP exceptions are also +WSGI applications by themselves which render a default error page. So we +just catch all of them down and return the error itself. + +If all works well, we call the function ``on_`` + endpoint and pass it the +request as argument as well as all the URL arguments as keyword arguments +and return the response object that method returns. + +Step 5: The First View +---------------------- + +Let's start with the first view: the one for new URLs:: + + def on_new_url(self, request): + error = None + url = '' + if request.method == 'POST': + url = request.form['url'] + if not is_valid_url(url): + error = 'Please enter a valid URL' + else: + short_id = self.insert_url(url) + return redirect(f"/{short_id}+") + return self.render_template('new_url.html', error=error, url=url) + +This logic should be easy to understand. Basically we are checking that +the request method is POST, in which case we validate the URL and add a +new entry to the database, then redirect to the detail page. This means +we need to write a function and a helper method. For URL validation this +is good enough:: + + def is_valid_url(url): + parts = url_parse(url) + return parts.scheme in ('http', 'https') + +For inserting the URL, all we need is this little method on our class:: + + def insert_url(self, url): + short_id = self.redis.get(f'reverse-url:{url}') + if short_id is not None: + return short_id + url_num = self.redis.incr('last-url-id') + short_id = base36_encode(url_num) + self.redis.set(f'url-target:{short_id}', url) + self.redis.set(f'reverse-url:{url}', short_id) + return short_id + +``reverse-url:`` + the URL will store the short id. If the URL was +already submitted this won't be None and we can just return that value +which will be the short ID. Otherwise we increment the ``last-url-id`` +key and convert it to base36. Then we store the link and the reverse +entry in redis. And here the function to convert to base 36:: + + def base36_encode(number): + assert number >= 0, 'positive integer required' + if number == 0: + return '0' + base36 = [] + while number != 0: + number, i = divmod(number, 36) + base36.append('0123456789abcdefghijklmnopqrstuvwxyz'[i]) + return ''.join(reversed(base36)) + +So what is missing for this view to work is the template. We will create +this later, let's first also write the other views and then do the +templates in one go. + +Step 6: Redirect View +--------------------- + +The redirect view is easy. All it has to do is to look for the link in +redis and redirect to it. Additionally we will also increment a counter +so that we know how often a link was clicked:: + + def on_follow_short_link(self, request, short_id): + link_target = self.redis.get(f'url-target:{short_id}') + if link_target is None: + raise NotFound() + self.redis.incr(f'click-count:{short_id}') + return redirect(link_target) + +In this case we will raise a :exc:`~werkzeug.exceptions.NotFound` exception +by hand if the URL does not exist, which will bubble up to the +``dispatch_request`` function and be converted into a default 404 +response. + +Step 7: Detail View +------------------- + +The link detail view is very similar, we just render a template +again. In addition to looking up the target, we also ask redis for the +number of times the link was clicked and let it default to zero if such +a key does not yet exist:: + + def on_short_link_details(self, request, short_id): + link_target = self.redis.get(f'url-target:{short_id}') + if link_target is None: + raise NotFound() + click_count = int(self.redis.get(f'click-count:{short_id}') or 0) + return self.render_template('short_link_details.html', + link_target=link_target, + short_id=short_id, + click_count=click_count + ) + +Please be aware that redis always works with strings, so you have to convert +the click count to :class:`int` by hand. + +Step 8: Templates +----------------- + +And here are all the templates. Just drop them into the `templates` +folder. Jinja2 supports template inheritance, so the first thing we will +do is create a layout template with blocks that act as placeholders. We +also set up Jinja2 so that it automatically escapes strings with HTML +rules, so we don't have to spend time on that ourselves. This prevents +XSS attacks and rendering errors. + +*layout.html*: + +.. sourcecode:: html+jinja + + + {% block title %}{% endblock %} | shortly + +
+

shortly

+

Shortly is a URL shortener written with Werkzeug + {% block body %}{% endblock %} +

+ +*new_url.html*: + +.. sourcecode:: html+jinja + + {% extends "layout.html" %} + {% block title %}Create New Short URL{% endblock %} + {% block body %} +

Submit URL

+
+ {% if error %} +

Error: {{ error }} + {% endif %} +

URL: + + +

+ {% endblock %} + +*short_link_details.html*: + +.. sourcecode:: html+jinja + + {% extends "layout.html" %} + {% block title %}Details about /{{ short_id }}{% endblock %} + {% block body %} +

/{{ short_id }}

+
+
Full link +
Click count: +
{{ click_count }} +
+ {% endblock %} + +Step 9: The Style +----------------- + +For this to look better than ugly black and white, here a simple +stylesheet that goes along: + +*static/style.css*: + +.. sourcecode:: css + + body { background: #E8EFF0; margin: 0; padding: 0; } + body, input { font-family: 'Helvetica Neue', Arial, + sans-serif; font-weight: 300; font-size: 18px; } + .box { width: 500px; margin: 60px auto; padding: 20px; + background: white; box-shadow: 0 1px 4px #BED1D4; + border-radius: 2px; } + a { color: #11557C; } + h1, h2 { margin: 0; color: #11557C; } + h1 a { text-decoration: none; } + h2 { font-weight: normal; font-size: 24px; } + .tagline { color: #888; font-style: italic; margin: 0 0 20px 0; } + .link div { overflow: auto; font-size: 0.8em; white-space: pre; + padding: 4px 10px; margin: 5px 0; background: #E5EAF1; } + dt { font-weight: normal; } + .error { background: #E8EFF0; padding: 3px 8px; color: #11557C; + font-size: 0.9em; border-radius: 2px; } + .urlinput { width: 300px; } + +Bonus: Refinements +------------------ + +Look at the implementation in the example dictionary in the Werkzeug +repository to see a version of this tutorial with some small refinements +such as a custom 404 page. + +- `shortly in the example folder `_ diff --git a/docs/unicode.rst b/docs/unicode.rst new file mode 100644 index 0000000..30f76f5 --- /dev/null +++ b/docs/unicode.rst @@ -0,0 +1,76 @@ +Unicode +======= + +.. currentmodule:: werkzeug + +Werkzeug uses strings internally everwhere text data is assumed, even if +the HTTP standard is not Unicode aware. Basically all incoming data is +decoded from the charset (UTF-8 by default) so that you don't work with +bytes directly. Outgoing data is encoded into the target charset. + + +Unicode in Python +----------------- + +Imagine you have the German Umlaut ``ö``. In ASCII you cannot represent +that character, but in the ``latin-1`` and ``utf-8`` character sets you +can represent it, but they look different when encoded: + +>>> "ö".encode("latin1") +b'\xf6' +>>> "ö".encode("utf-8") +b'\xc3\xb6' + +An ``ö`` looks different depending on the encoding which makes it hard +to work with it as bytes. Instead, Python treats strings as Unicode text +and stores the information ``LATIN SMALL LETTER O WITH DIAERESIS`` +instead of the bytes for ``ö`` in a specific encoding. The length of a +string with 1 character will be 1, where the length of the bytes might +be some other value. + + +Unicode in HTTP +--------------- + +However, the HTTP spec was written in a time where ASCII bytes were the +common way data was represented. To work around this for the modern +web, Werkzeug decodes and encodes incoming and outgoing data +automatically. Data sent from the browser to the web application is +decoded from UTF-8 bytes into a string. Data sent from the application +back to the browser is encoded back to UTF-8. + + +Error Handling +-------------- + +Functions that do internal encoding or decoding accept an ``errors`` +keyword argument that is passed to :meth:`str.decode` and +:meth:`str.encode`. The default is ``'replace'`` so that errors are easy +to spot. It might be useful to set it to ``'strict'`` in order to catch +the error and report the bad data to the client. + + +Request and Response Objects +---------------------------- + +In most cases, you should stick with Werkzeug's default encoding of +UTF-8. If you have a specific reason to, you can subclass +:class:`wrappers.Request` and :class:`wrappers.Response` to change the +encoding and error handling. + +.. code-block:: python + + from werkzeug.wrappers.request import Request + from werkzeug.wrappers.response import Response + + class Latin1Request(Request): + charset = "latin1" + encoding_errors = "strict" + + class Latin1Response(Response): + charset = "latin1" + +The error handling can only be changed for the request. Werkzeug will +always raise errors when encoding to bytes in the response. It's your +responsibility to not create data that is not present in the target +charset. This is not an issue for UTF-8. diff --git a/docs/urls.rst b/docs/urls.rst new file mode 100644 index 0000000..f97e54b --- /dev/null +++ b/docs/urls.rst @@ -0,0 +1,6 @@ +=========== +URL Helpers +=========== + +.. automodule:: werkzeug.urls + :members: diff --git a/docs/utils.rst b/docs/utils.rst new file mode 100644 index 0000000..0d4e339 --- /dev/null +++ b/docs/utils.rst @@ -0,0 +1,74 @@ +========= +Utilities +========= + +Various utility functions shipped with Werkzeug. + +.. module:: werkzeug.utils + + +General Helpers +=============== + +.. autoclass:: cached_property + :members: + +.. autoclass:: environ_property + +.. autoclass:: header_property + +.. autofunction:: redirect + +.. autofunction:: append_slash_redirect + +.. autofunction:: send_file + +.. autofunction:: import_string + +.. autofunction:: find_modules + +.. autofunction:: secure_filename + + +URL Helpers +=========== + +Please refer to :doc:`urls`. + + +User Agent API +============== + +.. module:: werkzeug.user_agent + +.. autoclass:: UserAgent + :members: + :member-order: bysource + + +Security Helpers +================ + +.. module:: werkzeug.security + +.. autofunction:: generate_password_hash + +.. autofunction:: check_password_hash + +.. autofunction:: safe_join + + +Logging +======= + +Werkzeug uses standard Python :mod:`logging`. The logger is named +``"werkzeug"``. + +.. code-block:: python + + import logging + logger = logging.getLogger("werkzeug") + +If the logger level is not set, it will be set to :data:`~logging.INFO` +on first use. If there is no handler for that level, a +:class:`~logging.StreamHandler` is added. diff --git a/docs/wrappers.rst b/docs/wrappers.rst new file mode 100644 index 0000000..1f0d5aa --- /dev/null +++ b/docs/wrappers.rst @@ -0,0 +1,92 @@ +========================== +Request / Response Objects +========================== + +.. module:: werkzeug.wrappers + +The request and response objects wrap the WSGI environment or the return +value from a WSGI application so that it is another WSGI application +(wraps a whole application). + +How they Work +============= + +Your WSGI application is always passed two arguments. The WSGI "environment" +and the WSGI `start_response` function that is used to start the response +phase. The :class:`Request` class wraps the `environ` for easier access to +request variables (form data, request headers etc.). + +The :class:`Response` on the other hand is a standard WSGI application that +you can create. The simple hello world in Werkzeug looks like this:: + + from werkzeug.wrappers import Response + application = Response('Hello World!') + +To make it more useful you can replace it with a function and do some +processing:: + + from werkzeug.wrappers import Request, Response + + def application(environ, start_response): + request = Request(environ) + response = Response(f"Hello {request.args.get('name', 'World!')}!") + return response(environ, start_response) + +Because this is a very common task the :class:`~Request` object provides +a helper for that. The above code can be rewritten like this:: + + from werkzeug.wrappers import Request, Response + + @Request.application + def application(request): + return Response(f"Hello {request.args.get('name', 'World!')}!") + +The `application` is still a valid WSGI application that accepts the +environment and `start_response` callable. + + +Mutability and Reusability of Wrappers +====================================== + +The implementation of the Werkzeug request and response objects are trying +to guard you from common pitfalls by disallowing certain things as much as +possible. This serves two purposes: high performance and avoiding of +pitfalls. + +For the request object the following rules apply: + +1. The request object is immutable. Modifications are not supported by + default, you may however replace the immutable attributes with mutable + attributes if you need to modify it. +2. The request object may be shared in the same thread, but is not thread + safe itself. If you need to access it from multiple threads, use + locks around calls. +3. It's not possible to pickle the request object. + +For the response object the following rules apply: + +1. The response object is mutable +2. The response object can be pickled or copied after `freeze()` was + called. +3. Since Werkzeug 0.6 it's safe to use the same response object for + multiple WSGI responses. +4. It's possible to create copies using `copy.deepcopy`. + + +Wrapper Classes +=============== + +.. autoclass:: Request + :members: + :inherited-members: + + .. automethod:: _get_file_stream + + +.. autoclass:: Response + :members: + :inherited-members: + + .. automethod:: __call__ + + .. automethod:: _ensure_sequence diff --git a/docs/wsgi.rst b/docs/wsgi.rst new file mode 100644 index 0000000..a96916b --- /dev/null +++ b/docs/wsgi.rst @@ -0,0 +1,115 @@ +WSGI Helpers +============ + +.. module:: werkzeug.wsgi + +The following classes and functions are designed to make working with +the WSGI specification easier or operate on the WSGI layer. All the +functionality from this module is available on the high-level +:doc:`/wrappers`. + + +Iterator / Stream Helpers +------------------------- + +These classes and functions simplify working with the WSGI application +iterator and the input stream. + +.. autoclass:: ClosingIterator + +.. autoclass:: FileWrapper + +.. autoclass:: LimitedStream + :members: + +.. autofunction:: make_line_iter + +.. autofunction:: make_chunk_iter + +.. autofunction:: wrap_file + + +Environ Helpers +--------------- + +These functions operate on the WSGI environment. They extract useful +information or perform common manipulations: + +.. autofunction:: get_host + +.. autofunction:: get_content_length + +.. autofunction:: get_input_stream + +.. autofunction:: get_current_url + +.. autofunction:: get_query_string + +.. autofunction:: get_script_name + +.. autofunction:: get_path_info + +.. autofunction:: pop_path_info + +.. autofunction:: peek_path_info + +.. autofunction:: extract_path_info + +.. autofunction:: host_is_trusted + + +Convenience Helpers +------------------- + +.. autofunction:: responder + +.. autofunction:: werkzeug.testapp.test_app + + +Bytes, Strings, and Encodings +----------------------------- + +The values in HTTP requests come in as bytes representing (or encoded +to) ASCII. The WSGI specification (:pep:`3333`) decided to always use +the ``str`` type to represent values. To accomplish this, the raw bytes +are decoded using the ISO-8859-1 charset to produce a string. + +Strings in the WSGI environment are restricted to ISO-8859-1 code +points. If a string read from the environment might contain characters +outside that charset, it must first be decoded to bytes as ISO-8859-1, +then encoded to a string using the proper charset (typically UTF-8). The +reverse is done when writing to the environ. This is known as the "WSGI +encoding dance". + +Werkzeug provides functions to deal with this automatically so that you +don't need to be aware of the inner workings. Use the functions on this +page as well as :func:`~werkzeug.datastructures.EnvironHeaders` to read +data out of the WSGI environment. + +Applications should avoid manually creating or modifying a WSGI +environment unless they take care of the proper encoding or decoding +step. All high level interfaces in Werkzeug will apply the encoding and +decoding as necessary. + + +Raw Request URI and Path Encoding +--------------------------------- + +The ``PATH_INFO`` in the environ is the path value after +percent-decoding. For example, the raw path ``/hello%2fworld`` would +show up from the WSGI server to Werkzeug as ``/hello/world``. This loses +the information that the slash was a raw character as opposed to a path +separator. + +The WSGI specification (:pep:`3333`) does not provide a way to get the +original value, so it is impossible to route some types of data in the +path. The most compatible way to work around this is to send problematic +data in the query string instead of the path. + +However, many WSGI servers add a non-standard environ key with the raw +path. To match this behavior, Werkzeug's test client and development +server will add the raw value to both the ``REQUEST_URI`` and +``RAW_URI`` keys. If you want to route based on this value, you can use +middleware to replace ``PATH_INFO`` in the environ before it reaches the +application. However, keep in mind that these keys are non-standard and +not guaranteed to be present. diff --git a/examples/README.rst b/examples/README.rst new file mode 100644 index 0000000..31b50ef --- /dev/null +++ b/examples/README.rst @@ -0,0 +1,113 @@ +================= +Werkzeug Examples +================= + +This directory contains various example applications and example code of +Werkzeug powered applications. + +Beside the proof of concept applications and code snippets in the partial +folder they all have external dependencies for template engines or database +adapters (SQLAlchemy only so far). Also, every application has click as +external dependency, used to create the command line interface. + + +Full Example Applications +========================= + +The following example applications are application types you would actually +find in real life :-) + + +`simplewiki` + + A simple Wiki implementation. + + Requirements: + + - SQLAlchemy + - Creoleparser >= 0.7 + - genshi + + You can obtain all packages in the Cheeseshop via easy_install. You have + to have at least version 0.7 of Creoleparser. + + Usage:: + + ./manage-simplewiki.py initdb + ./manage-simplewiki.py runserver + + Or of course you can just use the application object + (`simplewiki.SimpleWiki`) and hook that into your favourite WSGI gateway. + The constructor of the application object takes a single argument which is + the SQLAlchemy URI for the database. + + The management script for the devserver looks up the an environment var + called `SIMPLEWIKI_DATABASE_URI` and uses that for the database URI. If + no such variable is provided "sqlite:////tmp/simplewiki.db" is assumed. + +`plnt` + + A planet called plnt, pronounce plant. + + Requirements: + + - SQLAlchemy + - Jinja2 + - feedparser + + You can obtain all packages in the Cheeseshop via easy_install. + + Usage:: + + ./manage-plnt.py initdb + ./manage-plnt.py sync + ./manage-plnt.py runserver + + The WSGI application is called `plnt.Plnt` which, like the simple wiki, + accepts a database URI as first argument. The environment variable for + the database key is called `PLNT_DATABASE_URI` and the default is + "sqlite:////tmp/plnt.db". + + Per default a few python related blogs are added to the database, you + can add more in a python shell by playing with the `Blog` model. + +`shorty` + + A tinyurl clone for the Werkzeug tutorial. + + Requirements: + + - SQLAlchemy + - Jinja2 + + You can obtain all packages in the Cheeseshop via easy_install. + + Usage:: + + ./manage-shorty.py initdb + ./manage-shorty.py runserver + + The WSGI application is called `shorty.application.Shorty` which, like the + simple wiki, accepts a database URI as first argument. + + The source code of the application is explained in detail in the Werkzeug + tutorial. + +`couchy` + + Like shorty, but implemented using CouchDB. + + Requirements : + + - werkzeug : http://werkzeug.pocoo.org + - jinja : http://jinja.pocoo.org + - couchdb 0.72 & above : https://couchdb.apache.org/ + +`cupoftee` + + A `Teeworlds `_ server browser. This application + works best in a non forking environment and won't work for CGI. + + Usage:: + + ./manage-cupoftee.py runserver diff --git a/examples/coolmagic/__init__.py b/examples/coolmagic/__init__.py new file mode 100644 index 0000000..2d6c904 --- /dev/null +++ b/examples/coolmagic/__init__.py @@ -0,0 +1 @@ +from .application import make_app diff --git a/examples/coolmagic/application.py b/examples/coolmagic/application.py new file mode 100644 index 0000000..819616b --- /dev/null +++ b/examples/coolmagic/application.py @@ -0,0 +1,77 @@ +"""This module provides the WSGI application. + +The WSGI middlewares are applied in the `make_app` factory function that +automatically wraps the application within the require middlewares. Per +default only the `SharedDataMiddleware` is applied. +""" +from os import listdir +from os import path + +from werkzeug.exceptions import HTTPException +from werkzeug.exceptions import NotFound +from werkzeug.middleware.shared_data import SharedDataMiddleware +from werkzeug.routing import Map +from werkzeug.routing import RequestRedirect +from werkzeug.routing import Rule + +from .utils import local_manager +from .utils import Request + + +class CoolMagicApplication: + """ + The application class. It's passed a directory with configuration values. + """ + + def __init__(self, config): + self.config = config + + for fn in listdir(path.join(path.dirname(__file__), "views")): + if fn.endswith(".py") and fn != "__init__.py": + __import__(f"coolmagic.views.{fn[:-3]}") + + from coolmagic.utils import exported_views + + rules = [ + # url for shared data. this will always be unmatched + # because either the middleware or the webserver + # handles that request first. + Rule("/public/", endpoint="shared_data") + ] + self.views = {} + for endpoint, (func, rule, extra) in exported_views.items(): + if rule is not None: + rules.append(Rule(rule, endpoint=endpoint, **extra)) + self.views[endpoint] = func + self.url_map = Map(rules) + + def __call__(self, environ, start_response): + urls = self.url_map.bind_to_environ(environ) + req = Request(environ, urls) + try: + endpoint, args = urls.match(req.path) + resp = self.views[endpoint](**args) + except NotFound: + resp = self.views["static.not_found"]() + except (HTTPException, RequestRedirect) as e: + resp = e + return resp(environ, start_response) + + +def make_app(config=None): + """ + Factory function that creates a new `CoolmagicApplication` + object. Optional WSGI middlewares should be applied here. + """ + config = config or {} + app = CoolMagicApplication(config) + + # static stuff + app = SharedDataMiddleware( + app, {"/public": path.join(path.dirname(__file__), "public")} + ) + + # clean up locals + app = local_manager.make_middleware(app) + + return app diff --git a/examples/coolmagic/helpers.py b/examples/coolmagic/helpers.py new file mode 100644 index 0000000..9074a24 --- /dev/null +++ b/examples/coolmagic/helpers.py @@ -0,0 +1,5 @@ +from .utils import ThreadedRequest + +#: a thread local proxy request object +request = ThreadedRequest() +del ThreadedRequest diff --git a/examples/coolmagic/public/style.css b/examples/coolmagic/public/style.css new file mode 100644 index 0000000..bf48856 --- /dev/null +++ b/examples/coolmagic/public/style.css @@ -0,0 +1,10 @@ +body { + margin: 0; + padding: 20px; + font-family: sans-serif; + font-size: 15px; +} + +h1, a { + color: #a00; +} diff --git a/examples/coolmagic/templates/layout.html b/examples/coolmagic/templates/layout.html new file mode 100644 index 0000000..a87c954 --- /dev/null +++ b/examples/coolmagic/templates/layout.html @@ -0,0 +1,12 @@ + + + + {{ page_title }} — Cool Magic! + + + +

Cool Magic

+

{{ page_title }}

+ {% block page_body %}{% endblock %} + + diff --git a/examples/coolmagic/templates/static/about.html b/examples/coolmagic/templates/static/about.html new file mode 100644 index 0000000..b814230 --- /dev/null +++ b/examples/coolmagic/templates/static/about.html @@ -0,0 +1,10 @@ +{% extends "layout.html" %} +{% set page_title = 'About the Magic' %} +{% block page_body %} +

+ Nothing to see. It's just magic. +

+

+ back to the index +

+{% endblock %} diff --git a/examples/coolmagic/templates/static/index.html b/examples/coolmagic/templates/static/index.html new file mode 100644 index 0000000..a7d8410 --- /dev/null +++ b/examples/coolmagic/templates/static/index.html @@ -0,0 +1,13 @@ +{% extends "layout.html" %} +{% set page_title = 'Welcome to the Magic' %} +{% block page_body %} +

+ Welcome to the magic! This is a bigger example for the + Werkzeug toolkit. And it contains a lot of magic. +

+

+ about the implementation or + click here if you want to see a broken view. +

+{% endblock %} diff --git a/examples/coolmagic/templates/static/not_found.html b/examples/coolmagic/templates/static/not_found.html new file mode 100644 index 0000000..6f41e8e --- /dev/null +++ b/examples/coolmagic/templates/static/not_found.html @@ -0,0 +1,8 @@ +{% extends "layout.html" %} +{% set page_title = 'Missing Magic' %} +{% block page_body %} +

+ The requested magic really does not exist. Maybe you want + to look for it on the index. +

+{% endblock %} diff --git a/examples/coolmagic/utils.py b/examples/coolmagic/utils.py new file mode 100644 index 0000000..85ad3f7 --- /dev/null +++ b/examples/coolmagic/utils.py @@ -0,0 +1,104 @@ +"""Subclasses of the base request and response objects provided by +werkzeug. The subclasses know about their charset and implement some +additional functionality like the ability to link to view functions. +""" +from os.path import dirname +from os.path import join + +from jinja2 import Environment +from jinja2 import FileSystemLoader +from werkzeug.local import Local +from werkzeug.local import LocalManager +from werkzeug.wrappers import Request as BaseRequest +from werkzeug.wrappers import Response as BaseResponse + + +local = Local() +local_manager = LocalManager([local]) +template_env = Environment( + loader=FileSystemLoader(join(dirname(__file__), "templates")) +) +exported_views = {} + + +def export(string, template=None, **extra): + """ + Decorator for registering view functions and adding + templates to it. + """ + + def wrapped(f): + endpoint = f"{f.__module__}.{f.__name__}"[16:] + if template is not None: + old_f = f + + def f(**kwargs): + rv = old_f(**kwargs) + if not isinstance(rv, Response): + rv = TemplateResponse(template, **(rv or {})) + return rv + + f.__name__ = old_f.__name__ + f.__doc__ = old_f.__doc__ + exported_views[endpoint] = (f, string, extra) + return f + + return wrapped + + +def url_for(endpoint, **values): + """ + Build a URL + """ + return local.request.url_adapter.build(endpoint, values) + + +class Request(BaseRequest): + """ + The concrete request object used in the WSGI application. + It has some helper functions that can be used to build URLs. + """ + + charset = "utf-8" + + def __init__(self, environ, url_adapter): + super().__init__(environ) + self.url_adapter = url_adapter + local.request = self + + +class ThreadedRequest: + """ + A pseudo request object that always points to the current + context active request. + """ + + def __getattr__(self, name): + if name == "__members__": + return [x for x in dir(local.request) if not x.startswith("_")] + return getattr(local.request, name) + + def __setattr__(self, name, value): + return setattr(local.request, name, value) + + +class Response(BaseResponse): + """ + The concrete response object for the WSGI application. + """ + + charset = "utf-8" + default_mimetype = "text/html" + + +class TemplateResponse(Response): + """ + Render a template to a response. + """ + + def __init__(self, template_name, **values): + from coolmagic import helpers + + values.update(request=local.request, h=helpers) + template = template_env.get_template(template_name) + Response.__init__(self, template.render(values)) diff --git a/examples/coolmagic/views/__init__.py b/examples/coolmagic/views/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/coolmagic/views/static.py b/examples/coolmagic/views/static.py new file mode 100644 index 0000000..c61e014 --- /dev/null +++ b/examples/coolmagic/views/static.py @@ -0,0 +1,25 @@ +from coolmagic.utils import export + + +@export("/", template="static/index.html") +def index(): + pass + + +@export("/about", template="static/about.html") +def about(): + pass + + +@export("/broken") +def broken(): + raise RuntimeError("that's really broken") + + +@export(None, template="static/not_found.html") +def not_found(): + """ + This function is always executed if an url does not + match or a `NotFound` exception is raised. + """ + pass diff --git a/examples/couchy/README b/examples/couchy/README new file mode 100644 index 0000000..2496044 --- /dev/null +++ b/examples/couchy/README @@ -0,0 +1,7 @@ +couchy README + +Requirements : +- werkzeug : http://werkzeug.pocoo.org +- jinja : http://jinja.pocoo.org +- couchdb 0.72 & above : https://couchdb.apache.org/ +- couchdb-python 0.3 & above : https://github.com/djc/couchdb-python diff --git a/examples/couchy/__init__.py b/examples/couchy/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/couchy/application.py b/examples/couchy/application.py new file mode 100644 index 0000000..04ef623 --- /dev/null +++ b/examples/couchy/application.py @@ -0,0 +1,47 @@ +from couchdb.client import Server +from werkzeug.exceptions import HTTPException +from werkzeug.exceptions import NotFound +from werkzeug.middleware.shared_data import SharedDataMiddleware +from werkzeug.wrappers import Request +from werkzeug.wsgi import ClosingIterator + +from . import views +from .models import URL +from .utils import local +from .utils import local_manager +from .utils import STATIC_PATH +from .utils import url_map + + +class Couchy: + def __init__(self, db_uri): + local.application = self + + server = Server(db_uri) + try: + db = server.create("urls") + except Exception: + db = server["urls"] + self.dispatch = SharedDataMiddleware(self.dispatch, {"/static": STATIC_PATH}) + + URL.db = db + + def dispatch(self, environ, start_response): + local.application = self + request = Request(environ) + local.url_adapter = adapter = url_map.bind_to_environ(environ) + try: + endpoint, values = adapter.match() + handler = getattr(views, endpoint) + response = handler(request, **values) + except NotFound: + response = views.not_found(request) + response.status_code = 404 + except HTTPException as e: + response = e + return ClosingIterator( + response(environ, start_response), [local_manager.cleanup] + ) + + def __call__(self, environ, start_response): + return self.dispatch(environ, start_response) diff --git a/examples/couchy/models.py b/examples/couchy/models.py new file mode 100644 index 0000000..57bb639 --- /dev/null +++ b/examples/couchy/models.py @@ -0,0 +1,50 @@ +from datetime import datetime + +from couchdb.mapping import BooleanField +from couchdb.mapping import DateTimeField +from couchdb.mapping import Document +from couchdb.mapping import TextField + +from .utils import get_random_uid +from .utils import url_for + + +class URL(Document): + target = TextField() + public = BooleanField() + added = DateTimeField(default=datetime.utcnow()) + shorty_id = TextField(default=None) + db = None + + @classmethod + def load(cls, id): + return super().load(URL.db, id) + + @classmethod + def query(cls, code): + return URL.db.query(code) + + def store(self): + if getattr(self._data, "id", None) is None: + new_id = self.shorty_id if self.shorty_id else None + while 1: + id = new_id if new_id else get_random_uid() + try: + docid = URL.db.resource.put(content=self._data, path=f"/{id}/")[ + "id" + ] + except Exception: + continue + if docid: + break + self._data = URL.db.get(docid) + else: + super().store(URL.db) + return self + + @property + def short_url(self): + return url_for("link", uid=self.id, _external=True) + + def __repr__(self): + return f"" diff --git a/examples/couchy/static/style.css b/examples/couchy/static/style.css new file mode 100644 index 0000000..787632f --- /dev/null +++ b/examples/couchy/static/style.css @@ -0,0 +1,108 @@ +body { + background-color: #333; + font-family: 'Lucida Sans', 'Verdana', sans-serif; + font-size: 16px; + margin: 3em 0 3em 0; + padding: 0; + text-align: center; +} + +a { + color: #0C4850; +} + +a:hover { + color: #1C818F; +} + +h1 { + width: 500px; + background-color: #24C0CE; + text-align: center; + font-size: 3em; + margin: 0 auto 0 auto; + padding: 0; +} + +h1 a { + display: block; + padding: 0.3em; + color: #fff; + text-decoration: none; +} + +h1 a:hover { + color: #ADEEF7; + background-color: #0E8A96; +} + +div.footer { + margin: 0 auto 0 auto; + font-size: 13px; + text-align: right; + padding: 10px; + width: 480px; + background-color: #004C63; + color: white; +} + +div.footer a { + color: #A0E9FF; +} + +div.body { + margin: 0 auto 0 auto; + padding: 20px; + width: 460px; + background-color: #98CE24; + color: black; +} + +div.body h2 { + margin: 0 0 0.5em 0; + text-align: center; +} + +div.body input { + margin: 0.2em 0 0.2em 0; + font-family: 'Lucida Sans', 'Verdana', sans-serif; + font-size: 20px; + background-color: #CCEB98; + color: black; +} + +div.body #url { + width: 400px; +} + +div.body #alias { + width: 300px; + margin-right: 10px; +} + +div.body #submit { + width: 90px; +} + +div.body p { + margin: 0; + padding: 0.2em 0 0.2em 0; +} + +div.body ul { + margin: 1em 0 1em 0; + padding: 0; + list-style: none; +} + +div.error { + margin: 1em 0 1em 0; + border: 2px solid #AC0202; + background-color: #9E0303; + font-weight: bold; + color: white; +} + +div.pagination { + font-size: 13px; +} diff --git a/examples/couchy/templates/display.html b/examples/couchy/templates/display.html new file mode 100644 index 0000000..83d5685 --- /dev/null +++ b/examples/couchy/templates/display.html @@ -0,0 +1,8 @@ +{% extends 'layout.html' %} +{% block body %} +

Shortened URL

+

+ The URL {{ url.target|urlize(40, true) }} + was shortened to {{ url.short_url|urlize }}. +

+{% endblock %} diff --git a/examples/couchy/templates/layout.html b/examples/couchy/templates/layout.html new file mode 100644 index 0000000..f496806 --- /dev/null +++ b/examples/couchy/templates/layout.html @@ -0,0 +1,16 @@ + + + + Shorty + + + +

Shorty

+
{% block body %}{% endblock %}
+ + + diff --git a/examples/couchy/templates/list.html b/examples/couchy/templates/list.html new file mode 100644 index 0000000..be42f0d --- /dev/null +++ b/examples/couchy/templates/list.html @@ -0,0 +1,19 @@ +{% extends 'layout.html' %} +{% block body %} +

List of URLs

+
    + {%- for url in pagination.entries %} +
  • {{ url.id|e }} » + {{ url.target|urlize(38, true) }}
  • + {%- else %} +
  • no URLs shortened yet
  • + {%- endfor %} +
+ +{% endblock %} diff --git a/examples/couchy/templates/new.html b/examples/couchy/templates/new.html new file mode 100644 index 0000000..3db0802 --- /dev/null +++ b/examples/couchy/templates/new.html @@ -0,0 +1,14 @@ +{% extends 'layout.html' %} +{% block body %} +

Create a Shorty-URL!

+ {% if error %}
{{ error }}
{% endif -%} +
+

Enter the URL you want to shorten

+

+

Optionally you can give the URL a memorable name

+

{# + #}

+

+

+
+{% endblock %} diff --git a/examples/couchy/templates/not_found.html b/examples/couchy/templates/not_found.html new file mode 100644 index 0000000..bd45566 --- /dev/null +++ b/examples/couchy/templates/not_found.html @@ -0,0 +1,8 @@ +{% extends 'layout.html' %} +{% block body %} +

Page Not Found

+

+ The page you have requested does not exist on this server. What about + adding a new URL? +

+{% endblock %} diff --git a/examples/couchy/utils.py b/examples/couchy/utils.py new file mode 100644 index 0000000..03d1681 --- /dev/null +++ b/examples/couchy/utils.py @@ -0,0 +1,101 @@ +from os import path +from random import randrange +from random import sample + +from jinja2 import Environment +from jinja2 import FileSystemLoader +from werkzeug.local import Local +from werkzeug.local import LocalManager +from werkzeug.routing import Map +from werkzeug.routing import Rule +from werkzeug.urls import url_parse +from werkzeug.utils import cached_property +from werkzeug.wrappers import Response + +TEMPLATE_PATH = path.join(path.dirname(__file__), "templates") +STATIC_PATH = path.join(path.dirname(__file__), "static") +ALLOWED_SCHEMES = frozenset(["http", "https", "ftp", "ftps"]) +URL_CHARS = "abcdefghijkmpqrstuvwxyzABCDEFGHIJKLMNPQRST23456789" + +local = Local() +local_manager = LocalManager([local]) +application = local("application") + +url_map = Map([Rule("/static/", endpoint="static", build_only=True)]) + +jinja_env = Environment(loader=FileSystemLoader(TEMPLATE_PATH)) + + +def expose(rule, **kw): + def decorate(f): + kw["endpoint"] = f.__name__ + url_map.add(Rule(rule, **kw)) + return f + + return decorate + + +def url_for(endpoint, _external=False, **values): + return local.url_adapter.build(endpoint, values, force_external=_external) + + +jinja_env.globals["url_for"] = url_for + + +def render_template(template, **context): + return Response( + jinja_env.get_template(template).render(**context), mimetype="text/html" + ) + + +def validate_url(url): + return url_parse(url)[0] in ALLOWED_SCHEMES + + +def get_random_uid(): + return "".join(sample(URL_CHARS, randrange(3, 9))) + + +class Pagination: + def __init__(self, results, per_page, page, endpoint): + self.results = results + self.per_page = per_page + self.page = page + self.endpoint = endpoint + + @cached_property + def count(self): + return len(self.results) + + @cached_property + def entries(self): + return self.results[ + ((self.page - 1) * self.per_page) : ( + ((self.page - 1) * self.per_page) + self.per_page + ) + ] + + @property + def has_previous(self): + """Return True if there are pages before the current one.""" + return self.page > 1 + + @property + def has_next(self): + """Return True if there are pages after the current one.""" + return self.page < self.pages + + @property + def previous(self): + """Return the URL for the previous page.""" + return url_for(self.endpoint, page=self.page - 1) + + @property + def next(self): + """Return the URL for the next page.""" + return url_for(self.endpoint, page=self.page + 1) + + @property + def pages(self): + """Return the number of pages.""" + return max(0, self.count - 1) // self.per_page + 1 diff --git a/examples/couchy/views.py b/examples/couchy/views.py new file mode 100644 index 0000000..c1547e7 --- /dev/null +++ b/examples/couchy/views.py @@ -0,0 +1,73 @@ +from werkzeug.exceptions import NotFound +from werkzeug.utils import redirect + +from .models import URL +from .utils import expose +from .utils import Pagination +from .utils import render_template +from .utils import url_for +from .utils import validate_url + + +@expose("/") +def new(request): + error = url = "" + if request.method == "POST": + url = request.form.get("url") + alias = request.form.get("alias") + if not validate_url(url): + error = "I'm sorry but you cannot shorten this URL." + elif alias: + if len(alias) > 140: + error = "Your alias is too long" + elif "/" in alias: + error = "Your alias might not include a slash" + elif URL.load(alias): + error = "The alias you have requested exists already" + if not error: + url = URL( + target=url, + public="private" not in request.form, + shorty_id=alias if alias else None, + ) + url.store() + uid = url.id + return redirect(url_for("display", uid=uid)) + return render_template("new.html", error=error, url=url) + + +@expose("/display/") +def display(request, uid): + url = URL.load(uid) + if not url: + raise NotFound() + return render_template("display.html", url=url) + + +@expose("/u/") +def link(request, uid): + url = URL.load(uid) + if not url: + raise NotFound() + return redirect(url.target, 301) + + +@expose("/list/", defaults={"page": 1}) +@expose("/list/") +def list(request, page): + def wrap(doc): + data = doc.value + data["_id"] = doc.id + return URL.wrap(data) + + code = """function(doc) { if (doc.public){ map([doc._id], doc); }}""" + docResults = URL.query(code) + results = [wrap(doc) for doc in docResults] + pagination = Pagination(results, 1, page, "list") + if pagination.page > 1 and not pagination.entries: + raise NotFound() + return render_template("list.html", pagination=pagination) + + +def not_found(request): + return render_template("not_found.html") diff --git a/examples/cupoftee/__init__.py b/examples/cupoftee/__init__.py new file mode 100644 index 0000000..d009f77 --- /dev/null +++ b/examples/cupoftee/__init__.py @@ -0,0 +1,2 @@ +"""Werkzeug powered Teeworlds Server Browser.""" +from .application import make_app diff --git a/examples/cupoftee/application.py b/examples/cupoftee/application.py new file mode 100644 index 0000000..7104cfd --- /dev/null +++ b/examples/cupoftee/application.py @@ -0,0 +1,120 @@ +import time +from os import path +from threading import Thread + +from jinja2 import Environment +from jinja2 import PackageLoader +from werkzeug.exceptions import HTTPException +from werkzeug.exceptions import NotFound +from werkzeug.middleware.shared_data import SharedDataMiddleware +from werkzeug.routing import Map +from werkzeug.routing import Rule +from werkzeug.wrappers import Request +from werkzeug.wrappers import Response + +from .db import Database +from .network import ServerBrowser + + +templates = path.join(path.dirname(__file__), "templates") +pages = {} +url_map = Map([Rule("/shared/", endpoint="shared")]) + + +def make_app(database, interval=120): + return SharedDataMiddleware( + Cup(database, interval), + {"/shared": path.join(path.dirname(__file__), "shared")}, + ) + + +class PageMeta(type): + def __init__(cls, name, bases, d): + type.__init__(cls, name, bases, d) + if d.get("url_rule") is not None: + pages[cls.identifier] = cls + url_map.add( + Rule(cls.url_rule, endpoint=cls.identifier, **cls.url_arguments) + ) + + @property + def identifier(cls): + return cls.__name__.lower() + + +def _with_metaclass(meta, *bases): + """Create a base class with a metaclass.""" + + class metaclass(type): + def __new__(metacls, name, this_bases, d): + return meta(name, bases, d) + + return type.__new__(metaclass, "temporary_class", (), {}) + + +class Page(_with_metaclass(PageMeta, object)): + url_arguments = {} + + def __init__(self, cup, request, url_adapter): + self.cup = cup + self.request = request + self.url_adapter = url_adapter + + def url_for(self, endpoint, **values): + return self.url_adapter.build(endpoint, values) + + def process(self): + pass + + def render_template(self, template=None): + if template is None: + template = f"{type(self).identifier}.html" + context = dict(self.__dict__) + context.update(url_for=self.url_for, self=self) + return self.cup.render_template(template, context) + + def get_response(self): + return Response(self.render_template(), mimetype="text/html") + + +class Cup: + def __init__(self, database, interval=120): + self.jinja_env = Environment(loader=PackageLoader("cupoftee"), autoescape=True) + self.interval = interval + self.db = Database(database) + self.server_browser = ServerBrowser(self) + self.updater = Thread(None, self.update_server_browser) + self.updater.daemon = True + self.updater.start() + + def update_server_browser(self): + while 1: + if self.server_browser.sync(): + wait = self.interval + else: + wait = self.interval // 2 + time.sleep(wait) + + def dispatch_request(self, request): + url_adapter = url_map.bind_to_environ(request.environ) + try: + endpoint, values = url_adapter.match() + page = pages[endpoint](self, request, url_adapter) + response = page.process(**values) + except NotFound: + page = MissingPage(self, request, url_adapter) + response = page.process() + except HTTPException as e: + return e + return response or page.get_response() + + def __call__(self, environ, start_response): + request = Request(environ) + return self.dispatch_request(request)(environ, start_response) + + def render_template(self, name, **context): + template = self.jinja_env.get_template(name) + return template.render(context) + + +from cupoftee.pages import MissingPage diff --git a/examples/cupoftee/db.py b/examples/cupoftee/db.py new file mode 100644 index 0000000..97c2c51 --- /dev/null +++ b/examples/cupoftee/db.py @@ -0,0 +1,67 @@ +"""A simple object database. As long as the server is not running in +multiprocess mode that's good enough. +""" +import dbm +from pickle import dumps +from pickle import loads +from threading import Lock + + +class Database: + def __init__(self, filename): + self.filename = filename + self._fs = dbm.open(filename, "cf") + self._local = {} + self._lock = Lock() + + def __getitem__(self, key): + with self._lock: + return self._load_key(key) + + def _load_key(self, key): + if key in self._local: + return self._local[key] + rv = loads(self._fs[key]) + self._local[key] = rv + return rv + + def __setitem__(self, key, value): + self._local[key] = value + + def __delitem__(self, key): + with self._lock: + self._local.pop(key, None) + if key in self._fs: + del self._fs[key] + + def __del__(self): + self.close() + + def __contains__(self, key): + with self._lock: + try: + self._load_key(key) + except KeyError: + pass + return key in self._local + + def setdefault(self, key, factory): + with self._lock: + try: + rv = self._load_key(key) + except KeyError: + self._local[key] = rv = factory() + return rv + + def sync(self): + with self._lock: + for key, value in self._local.items(): + self._fs[key] = dumps(value, 2) + self._fs.sync() + + def close(self): + try: + self.sync() + self._fs.close() + except Exception: + pass diff --git a/examples/cupoftee/network.py b/examples/cupoftee/network.py new file mode 100644 index 0000000..5cbb9c2 --- /dev/null +++ b/examples/cupoftee/network.py @@ -0,0 +1,124 @@ +"""Query the servers for information.""" +import socket +from datetime import datetime +from math import log + +from .utils import unicodecmp + + +class ServerError(Exception): + pass + + +class Syncable: + last_sync = None + + def sync(self): + try: + self._sync() + except (OSError, socket.timeout): + return False + self.last_sync = datetime.utcnow() + return True + + +class ServerBrowser(Syncable): + def __init__(self, cup): + self.cup = cup + self.servers = cup.db.setdefault("servers", dict) + + def _sync(self): + to_delete = set(self.servers) + for x in range(1, 17): + addr = (f"master{x}.teeworlds.com", 8300) + print(addr) + try: + self._sync_server_browser(addr, to_delete) + except (OSError, socket.timeout): + continue + for server_id in to_delete: + self.servers.pop(server_id, None) + if not self.servers: + raise OSError("no servers found") + self.cup.db.sync() + + def _sync_server_browser(self, addr, to_delete): + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.settimeout(5) + s.sendto(b"\x20\x00\x00\x00\x00\x48\xff\xff\xff\xffreqt", addr) + data = s.recvfrom(1024)[0][14:] + s.close() + + for n in range(0, len(data) // 6): + addr = ( + ".".join(map(str, map(ord, data[n * 6 : n * 6 + 4]))), + ord(data[n * 6 + 5]) * 256 + ord(data[n * 6 + 4]), + ) + server_id = f"{addr[0]}:{addr[1]}" + if server_id in self.servers: + if not self.servers[server_id].sync(): + continue + else: + try: + self.servers[server_id] = Server(addr, server_id) + except ServerError: + pass + to_delete.discard(server_id) + + +class Server(Syncable): + def __init__(self, addr, server_id): + self.addr = addr + self.id = server_id + self.players = [] + if not self.sync(): + raise ServerError("server not responding in time") + + def _sync(self): + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.settimeout(1) + s.sendto(b"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xffgief", self.addr) + bits = s.recvfrom(1024)[0][14:].split(b"\x00") + s.close() + self.version, server_name, map_name = bits[:3] + self.name = server_name.decode("latin1") + self.map = map_name.decode("latin1") + self.gametype = bits[3] + self.flags, self.progression, player_count, self.max_players = map( + int, bits[4:8] + ) + + # sync the player stats + players = {p.name: p for p in self.players} + for i in range(player_count): + name = bits[8 + i * 2].decode("latin1") + score = int(bits[9 + i * 2]) + + # update existing player + if name in players: + player = players.pop(name) + player.score = score + # add new player + else: + self.players.append(Player(self, name, score)) + # delete players that left + for player in players.values(): + try: + self.players.remove(player) + except Exception: + pass + + # sort the player list and count them + self.players.sort(key=lambda x: -x.score) + self.player_count = len(self.players) + + def __cmp__(self, other): + return unicodecmp(self.name, other.name) + + +class Player: + def __init__(self, server, name, score): + self.server = server + self.name = name + self.score = score + self.size = round(100 + log(max(score, 1)) * 25, 2) diff --git a/examples/cupoftee/pages.py b/examples/cupoftee/pages.py new file mode 100644 index 0000000..3cbf842 --- /dev/null +++ b/examples/cupoftee/pages.py @@ -0,0 +1,75 @@ +from functools import reduce + +from werkzeug.exceptions import NotFound +from werkzeug.utils import redirect + +from .application import Page +from .utils import unicodecmp + + +class ServerList(Page): + url_rule = "/" + + def order_link(self, name, title): + cls = "" + link = f"?order_by={name}" + desc = False + if name == self.order_by: + desc = not self.order_desc + cls = f' class="{"down" if desc else "up"}"' + if desc: + link += "&dir=desc" + return f'{title}' + + def process(self): + self.order_by = self.request.args.get("order_by") or "name" + sort_func = { + "name": lambda x: x, + "map": lambda x: x.map, + "gametype": lambda x: x.gametype, + "players": lambda x: x.player_count, + "progression": lambda x: x.progression, + }.get(self.order_by) + if sort_func is None: + return redirect(self.url_for("serverlist")) + + self.servers = self.cup.server_browser.servers.values() + self.servers.sort(key=sort_func) + if self.request.args.get("dir") == "desc": + self.servers.reverse() + self.order_desc = True + else: + self.order_desc = False + + self.players = reduce(lambda a, b: a + b.players, self.servers, []) + self.players = sorted(self.players, key=lambda a, b: unicodecmp(a.name, b.name)) + + +class Server(Page): + url_rule = "/server/" + + def process(self, id): + try: + self.server = self.cup.server_browser.servers[id] + except KeyError: + raise NotFound() from None + + +class Search(Page): + url_rule = "/search" + + def process(self): + self.user = self.request.args.get("user") + if self.user: + self.results = [] + for server in self.cup.server_browser.servers.values(): + for player in server.players: + if player.name == self.user: + self.results.append(server) + + +class MissingPage(Page): + def get_response(self): + response = super().get_response() + response.status_code = 404 + return response diff --git a/examples/cupoftee/shared/content.png b/examples/cupoftee/shared/content.png new file mode 100644 index 0000000..dcb9f6a Binary files /dev/null and b/examples/cupoftee/shared/content.png differ diff --git a/examples/cupoftee/shared/down.png b/examples/cupoftee/shared/down.png new file mode 100755 index 0000000..6bb9139 Binary files /dev/null and b/examples/cupoftee/shared/down.png differ diff --git a/examples/cupoftee/shared/favicon.ico b/examples/cupoftee/shared/favicon.ico new file mode 100644 index 0000000..0c3a785 Binary files /dev/null and b/examples/cupoftee/shared/favicon.ico differ diff --git a/examples/cupoftee/shared/header.png b/examples/cupoftee/shared/header.png new file mode 100644 index 0000000..e6bab6c Binary files /dev/null and b/examples/cupoftee/shared/header.png differ diff --git a/examples/cupoftee/shared/logo.png b/examples/cupoftee/shared/logo.png new file mode 100644 index 0000000..05c4e86 Binary files /dev/null and b/examples/cupoftee/shared/logo.png differ diff --git a/examples/cupoftee/shared/style.css b/examples/cupoftee/shared/style.css new file mode 100644 index 0000000..ca835fb --- /dev/null +++ b/examples/cupoftee/shared/style.css @@ -0,0 +1,122 @@ +body { + font-family: 'Verdana', sans-serif; + background: #2b93ad; + margin: 0; + padding: 0; + font-size: 15px; + text-align: center; +} + +h1 { + font-size: 0; + margin: 0; + padding: 10px 0 0 10px; + height: 124px; + line-height: 100px; + background: url(header.png); + color: white; +} + +h1 a { + display: block; + margin: 0 auto 0 auto; + height: 90px; + width: 395px; + background: url(logo.png); +} + +div.contents { + background: white url(content.png) repeat-x; + margin: -8px auto 0 auto; + text-align: left; + padding: 15px; + max-width: 1000px; +} + +div.contents a { + margin: 0 5px 0 5px; +} + +div.footer { + max-width: 1014px; + margin: 0 auto 0 auto; + background: #1a6f96; + padding: 8px; + font-size: 10px; + color: white; +} + +div.footer a { + color: #79b9d7; +} + +a { + color: #1a6f96; + text-decoration: none; +} + +a:hover { + color: #ffb735; +} + +h2 { + margin: 0 0 0.5em 0; + padding: 0 0 0.1em 0; + color: #ffb735; + font-size: 2em; + border-bottom: 1px solid #ccc; +} + +h3 { + margin: 1em 0 0.7em 0; + color: #ffb735; + font-size: 1.5em; +} + +table { + width: 100%; + border-collapse: collapse; + border: 3px solid #79b9d7; +} + +table td, table th { + border: 1px solid #79b9d7; + padding: 3px 6px 3px 6px; + font-weight: normal; + text-align: center; + font-size: 13px; +} + +table th { + background: #f2f8fb; + text-align: left; +} + +table thead th { + font-weight: bold; + background-color: #79b9d7; + text-align: center; +} + +table thead th a { + color: white; +} + +table thead th a.up { + background: url(up.png) no-repeat right; + padding-right: 20px; +} + +table thead th a.down { + background: url(down.png) no-repeat right; + padding-right: 20px; +} + +div.players { + font-size: 11px; +} + +dl dt { + font-weight: bold; + padding: 5px 0 0 0; +} diff --git a/examples/cupoftee/shared/up.png b/examples/cupoftee/shared/up.png new file mode 100755 index 0000000..e0908ca Binary files /dev/null and b/examples/cupoftee/shared/up.png differ diff --git a/examples/cupoftee/templates/layout.html b/examples/cupoftee/templates/layout.html new file mode 100644 index 0000000..b434051 --- /dev/null +++ b/examples/cupoftee/templates/layout.html @@ -0,0 +1,20 @@ + + + + Teeworlds Server Browser + + + + +

Teeworlds Server Browser

+
+ {% block body %}{% endblock %} +
+ + + diff --git a/examples/cupoftee/templates/missingpage.html b/examples/cupoftee/templates/missingpage.html new file mode 100644 index 0000000..2604b08 --- /dev/null +++ b/examples/cupoftee/templates/missingpage.html @@ -0,0 +1,11 @@ +{% extends "layout.html" %} +{% block body %} +

Page Not Found

+

+ The requested page does not exist on this server. If you expect something + here (for example a server) it probably went away after the last update. +

+

+ go back to the server list. +

+{% endblock %} diff --git a/examples/cupoftee/templates/search.html b/examples/cupoftee/templates/search.html new file mode 100644 index 0000000..cbfc488 --- /dev/null +++ b/examples/cupoftee/templates/search.html @@ -0,0 +1,39 @@ +{% extends "layout.html" %} +{% block body %} +

Nick Search

+{% if not user %} +
+

+ You have to enter a nickname. +

+

+ + +

+

+ Take me back to the server list. +

+
+{% else %} +{% if results %} +

+ The nickname "{{ user }}" is currently playing on the + following {{ 'server' if results|length == 1 else 'servers' }}: +

+ +{% else %} +

+ The nickname "{{ user }}" is currently not playing. +

+{% endif %} +

+ You can bookmark this link + to search for "{{ user }}" quickly or return + to the server list. +

+{% endif %} +{% endblock %} diff --git a/examples/cupoftee/templates/server.html b/examples/cupoftee/templates/server.html new file mode 100644 index 0000000..7b92c57 --- /dev/null +++ b/examples/cupoftee/templates/server.html @@ -0,0 +1,32 @@ +{% extends "layout.html" %} +{% block body %} +

{{ server.name }}

+

+ Take me back to the server list. +

+
+
Map
+
{{ server.map }}
+
Gametype
+
{{ server.gametype }}
+
Number of players
+
{{ server.player_count }}
+
Server version
+
{{ server.version }}
+
Maximum number of players
+
{{ server.max_players }}
+ {% if server.progression >= 0 %} +
Game progression
+
{{ server.progression }}%
+ {% endif %} +
+{% if server.players %} +

Players

+
    + {% for player in server.players %} +
  • {{ player.name }} + ({{ player.score }}
  • + {% endfor %} +
+{% endif %} +{% endblock %} diff --git a/examples/cupoftee/templates/serverlist.html b/examples/cupoftee/templates/serverlist.html new file mode 100644 index 0000000..c77d441 --- /dev/null +++ b/examples/cupoftee/templates/serverlist.html @@ -0,0 +1,62 @@ +{% extends "layout.html" %} +{% block body %} +

Server List

+

+ Currently {{ len(players) }} players are playing on + {{ len(servers) }} servers. + {% if cup.server_browser.last_sync %} + This list was last synced on + {{ cup.server_browser.last_sync.strftime('%d %B %Y at %H:%M UTC') }}. + {% else %} + Synchronization with main server in progress. Reload the page in a minute + or two, to see the server list. + {% endif %} +

+ + + + + + + + + + + + {% for server in servers %} + + + + + + + + {% endfor %} + +
{{ self.order_link('name', 'Name') }}{{ self.order_link('map', 'Map') }}{{ self.order_link('gametype', 'Gametype') }}{{ self.order_link('players', 'Players') }}{{ self.order_link('progression', 'Progression') }}
{{ server.name }}{{ server.map }}{{ server.gametype }}{{ server.player_count }} / {{ server.max_players }}{{ '%d%%' % server.progression if server.progression >= 0 else '?' }}
+

Players online

+

+ The following map represents the users playing currently. The bigger their name + the higher their score in the current game. Clicking on the name takes you to + the detail page of the server for some more information. +

+
+{% for player in players %} + {{ player.name }} +{% endfor %} +
+

Find User

+

+ Find a user by username. The result page contains a link you can bookmark to + find your buddy easily. Because currently there is no central user database + users can appear on multiple servers for too generic usernames (like the + default "nameless tee" user). +

+
+

+ + +

+
+{% endblock %} diff --git a/examples/cupoftee/utils.py b/examples/cupoftee/utils.py new file mode 100644 index 0000000..da4453a --- /dev/null +++ b/examples/cupoftee/utils.py @@ -0,0 +1,11 @@ +import re + + +_sort_re = re.compile(r"\w+") + + +def unicodecmp(a, b): + x, y = map(_sort_re.search, [a, b]) + x = (x.group() if x else a).lower() + y = (y.group() if y else b).lower() + return (x > y) - (x < y) diff --git a/examples/httpbasicauth.py b/examples/httpbasicauth.py new file mode 100644 index 0000000..7ceaa4d --- /dev/null +++ b/examples/httpbasicauth.py @@ -0,0 +1,40 @@ +"""Shows how you can implement HTTP basic auth support without an +additional component. +""" +from werkzeug.serving import run_simple +from werkzeug.wrappers import Request +from werkzeug.wrappers import Response + + +class Application: + def __init__(self, users, realm="login required"): + self.users = users + self.realm = realm + + def check_auth(self, username, password): + return username in self.users and self.users[username] == password + + def auth_required(self, request): + return Response( + "Could not verify your access level for that URL.\n" + "You have to login with proper credentials", + 401, + {"WWW-Authenticate": f'Basic realm="{self.realm}"'}, + ) + + def dispatch_request(self, request): + return Response(f"Logged in as {request.authorization.username}") + + def __call__(self, environ, start_response): + request = Request(environ) + auth = request.authorization + if not auth or not self.check_auth(auth.username, auth.password): + response = self.auth_required(request) + else: + response = self.dispatch_request(request) + return response(environ, start_response) + + +if __name__ == "__main__": + application = Application({"user1": "password", "user2": "password"}) + run_simple("localhost", 5000, application) diff --git a/examples/i18nurls/__init__.py b/examples/i18nurls/__init__.py new file mode 100644 index 0000000..f5f5c6e --- /dev/null +++ b/examples/i18nurls/__init__.py @@ -0,0 +1 @@ +from .application import Application as make_app diff --git a/examples/i18nurls/application.py b/examples/i18nurls/application.py new file mode 100644 index 0000000..f0c2ca9 --- /dev/null +++ b/examples/i18nurls/application.py @@ -0,0 +1,93 @@ +from os import path + +from jinja2 import Environment +from jinja2 import PackageLoader +from werkzeug.exceptions import HTTPException +from werkzeug.exceptions import NotFound +from werkzeug.routing import RequestRedirect +from werkzeug.wrappers import Request as BaseRequest +from werkzeug.wrappers import Response as BaseResponse + +from .urls import map + +TEMPLATES = path.join(path.dirname(__file__), "templates") +views = {} + + +def expose(name): + """Register the function as view.""" + + def wrapped(f): + views[name] = f + return f + + return wrapped + + +class Request(BaseRequest): + def __init__(self, environ, urls): + super().__init__(environ) + self.urls = urls + self.matched_url = None + + def url_for(self, endpoint, **args): + if "lang_code" not in args: + args["lang_code"] = self.language + if endpoint == "this": + endpoint = self.matched_url[0] + tmp = self.matched_url[1].copy() + tmp.update(args) + args = tmp + return self.urls.build(endpoint, args) + + +class Response(BaseResponse): + pass + + +class TemplateResponse(Response): + jinja_env = Environment(loader=PackageLoader("i18nurls"), autoescape=True) + + def __init__(self, template_name, **values): + self.template_name = template_name + self.template_values = values + Response.__init__(self, mimetype="text/html") + + def __call__(self, environ, start_response): + req = environ["werkzeug.request"] + values = self.template_values.copy() + values["req"] = req + self.data = self.render_template(self.template_name, values) + return super().__call__(environ, start_response) + + def render_template(self, name, values): + template = self.jinja_env.get_template(name) + return template.render(values) + + +class Application: + def __init__(self): + from i18nurls import views + + self.not_found = views.page_not_found + + def __call__(self, environ, start_response): + urls = map.bind_to_environ(environ) + req = Request(environ, urls) + try: + endpoint, args = urls.match(req.path) + req.matched_url = (endpoint, args) + if endpoint == "#language_select": + lng = req.accept_languages.best + lng = lng.split("-")[0].lower() if lng else "en" + index_url = urls.build("index", {"lang_code": lng}) + resp = Response(f"Moved to {index_url}", status=302) + resp.headers["Location"] = index_url + else: + req.language = args.pop("lang_code", None) + resp = views[endpoint](req, **args) + except NotFound: + resp = self.not_found(req) + except (RequestRedirect, HTTPException) as e: + resp = e + return resp(environ, start_response) diff --git a/examples/i18nurls/templates/about.html b/examples/i18nurls/templates/about.html new file mode 100644 index 0000000..a48d07e --- /dev/null +++ b/examples/i18nurls/templates/about.html @@ -0,0 +1,7 @@ +{% extends "layout.html" %} +{% block body %} +

+ This is just another page. Maybe you want to head over to the + blog. +

+{% endblock %} diff --git a/examples/i18nurls/templates/blog.html b/examples/i18nurls/templates/blog.html new file mode 100644 index 0000000..36e7f7a --- /dev/null +++ b/examples/i18nurls/templates/blog.html @@ -0,0 +1,7 @@ +{% extends "layout.html" %} +{% block body %} +

Blog {% if mode == 'index' %}Index{% else %}Post {{ post_id }}{% endif %}

+

+ How about going to the index. +

+{% endblock %} diff --git a/examples/i18nurls/templates/index.html b/examples/i18nurls/templates/index.html new file mode 100644 index 0000000..bc7d72e --- /dev/null +++ b/examples/i18nurls/templates/index.html @@ -0,0 +1,8 @@ +{% extends "layout.html" %} +{% block body %} +

Hello in the i18n URL example application.

+

Because I'm too lazy to translate here is just english content.

+ +{% endblock %} diff --git a/examples/i18nurls/templates/layout.html b/examples/i18nurls/templates/layout.html new file mode 100644 index 0000000..742295b --- /dev/null +++ b/examples/i18nurls/templates/layout.html @@ -0,0 +1,21 @@ + + + + {{ title }} | Example Application + + +

Example Application

+

+ Request Language: {{ req.language }} +

+ {% block body %}{% endblock %} +
+

This page in other languages: +

    + {% for lng in ['en', 'de', 'fr'] %} +
  • {{ lng }}
  • + {% endfor %} +
+
+ + diff --git a/examples/i18nurls/urls.py b/examples/i18nurls/urls.py new file mode 100644 index 0000000..3dd54a0 --- /dev/null +++ b/examples/i18nurls/urls.py @@ -0,0 +1,18 @@ +from werkzeug.routing import Map +from werkzeug.routing import Rule +from werkzeug.routing import Submount + +map = Map( + [ + Rule("/", endpoint="#language_select"), + Submount( + "/", + [ + Rule("/", endpoint="index"), + Rule("/about", endpoint="about"), + Rule("/blog/", endpoint="blog/index"), + Rule("/blog/", endpoint="blog/show"), + ], + ), + ] +) diff --git a/examples/i18nurls/views.py b/examples/i18nurls/views.py new file mode 100644 index 0000000..26ba101 --- /dev/null +++ b/examples/i18nurls/views.py @@ -0,0 +1,29 @@ +from .application import expose +from .application import Response +from .application import TemplateResponse + + +@expose("index") +def index(req): + return TemplateResponse("index.html", title="Index") + + +@expose("about") +def about(req): + return TemplateResponse("about.html", title="About") + + +@expose("blog/index") +def blog_index(req): + return TemplateResponse("blog.html", title="Blog Index", mode="index") + + +@expose("blog/show") +def blog_show(req, post_id): + return TemplateResponse( + "blog.html", title=f"Blog Post #{post_id}", post_id=post_id, mode="show" + ) + + +def page_not_found(req): + return Response("

Page Not Found

", mimetype="text/html") diff --git a/examples/manage-coolmagic.py b/examples/manage-coolmagic.py new file mode 100644 index 0000000..73bbb8f --- /dev/null +++ b/examples/manage-coolmagic.py @@ -0,0 +1,64 @@ +import click +from werkzeug.serving import run_simple + +from coolmagic import make_app + + +@click.group() +def cli(): + pass + + +@cli.command() +@click.option("-h", "--hostname", type=str, default="localhost", help="localhost") +@click.option("-p", "--port", type=int, default=5000, help="5000") +@click.option("--no-reloader", is_flag=True, default=False) +@click.option("--debugger", is_flag=True) +@click.option("--no-evalex", is_flag=True, default=False) +@click.option("--threaded", is_flag=True) +@click.option("--processes", type=int, default=1, help="1") +def runserver(hostname, port, no_reloader, debugger, no_evalex, threaded, processes): + """Start a new development server.""" + app = make_app() + reloader = not no_reloader + evalex = not no_evalex + run_simple( + hostname, + port, + app, + use_reloader=reloader, + use_debugger=debugger, + use_evalex=evalex, + threaded=threaded, + processes=processes, + ) + + +@cli.command() +@click.option("--no-ipython", is_flag=True, default=False) +def shell(no_ipython): + """Start a new interactive python session.""" + banner = "Interactive Werkzeug Shell" + namespace = dict() + if not no_ipython: + try: + try: + from IPython.frontend.terminal.embed import InteractiveShellEmbed + + sh = InteractiveShellEmbed.instance(banner1=banner) + except ImportError: + from IPython.Shell import IPShellEmbed + + sh = IPShellEmbed(banner=banner) + except ImportError: + pass + else: + sh(local_ns=namespace) + return + from code import interact + + interact(banner, local=namespace) + + +if __name__ == "__main__": + cli() diff --git a/examples/manage-couchy.py b/examples/manage-couchy.py new file mode 100644 index 0000000..db9d19b --- /dev/null +++ b/examples/manage-couchy.py @@ -0,0 +1,82 @@ +import click +from werkzeug.serving import run_simple + + +def make_app(): + from couchy.application import Couchy + + return Couchy("http://localhost:5984") + + +def make_shell(): + from couchy import models, utils + + application = make_app() + return {"application": application, "models": models, "utils": utils} + + +@click.group() +def cli(): + pass + + +@cli.command() +def initdb(): + from couchy.application import Couchy + + Couchy("http://localhost:5984").init_database() + + +@cli.command() +@click.option("-h", "--hostname", type=str, default="localhost", help="localhost") +@click.option("-p", "--port", type=int, default=5000, help="5000") +@click.option("--no-reloader", is_flag=True, default=False) +@click.option("--debugger", is_flag=True) +@click.option("--no-evalex", is_flag=True, default=False) +@click.option("--threaded", is_flag=True) +@click.option("--processes", type=int, default=1, help="1") +def runserver(hostname, port, no_reloader, debugger, no_evalex, threaded, processes): + """Start a new development server.""" + app = make_app() + reloader = not no_reloader + evalex = not no_evalex + run_simple( + hostname, + port, + app, + use_reloader=reloader, + use_debugger=debugger, + use_evalex=evalex, + threaded=threaded, + processes=processes, + ) + + +@cli.command() +@click.option("--no-ipython", is_flag=True, default=False) +def shell(no_ipython): + """Start a new interactive python session.""" + banner = "Interactive Werkzeug Shell" + namespace = make_shell() + if not no_ipython: + try: + try: + from IPython.frontend.terminal.embed import InteractiveShellEmbed + + sh = InteractiveShellEmbed.instance(banner1=banner) + except ImportError: + from IPython.Shell import IPShellEmbed + + sh = IPShellEmbed(banner=banner) + except ImportError: + pass + else: + sh(local_ns=namespace) + return + from code import interact + + interact(banner, local=namespace) + + +if __name__ == "__main__": + cli() diff --git a/examples/manage-cupoftee.py b/examples/manage-cupoftee.py new file mode 100644 index 0000000..99f7179 --- /dev/null +++ b/examples/manage-cupoftee.py @@ -0,0 +1,49 @@ +""" + Manage Cup Of Tee + ~~~~~~~~~~~~~~~~~ + + Manage the cup of tee application. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import click +from werkzeug.serving import run_simple + + +def make_app(): + from cupoftee import make_app + + return make_app("/tmp/cupoftee.db") + + +@click.group() +def cli(): + pass + + +@cli.command() +@click.option("-h", "--hostname", type=str, default="localhost", help="localhost") +@click.option("-p", "--port", type=int, default=5000, help="5000") +@click.option("--reloader", is_flag=True, default=False) +@click.option("--debugger", is_flag=True) +@click.option("--evalex", is_flag=True, default=False) +@click.option("--threaded", is_flag=True) +@click.option("--processes", type=int, default=1, help="1") +def runserver(hostname, port, reloader, debugger, evalex, threaded, processes): + """Start a new development server.""" + app = make_app() + run_simple( + hostname, + port, + app, + use_reloader=reloader, + use_debugger=debugger, + use_evalex=evalex, + threaded=threaded, + processes=processes, + ) + + +if __name__ == "__main__": + cli() diff --git a/examples/manage-i18nurls.py b/examples/manage-i18nurls.py new file mode 100644 index 0000000..71ddfc9 --- /dev/null +++ b/examples/manage-i18nurls.py @@ -0,0 +1,64 @@ +import click +from werkzeug.serving import run_simple + +from i18nurls import make_app + + +@click.group() +def cli(): + pass + + +@cli.command() +@click.option("-h", "--hostname", type=str, default="localhost", help="localhost") +@click.option("-p", "--port", type=int, default=5000, help="5000") +@click.option("--no-reloader", is_flag=True, default=False) +@click.option("--debugger", is_flag=True) +@click.option("--no-evalex", is_flag=True, default=False) +@click.option("--threaded", is_flag=True) +@click.option("--processes", type=int, default=1, help="1") +def runserver(hostname, port, no_reloader, debugger, no_evalex, threaded, processes): + """Start a new development server.""" + app = make_app() + reloader = not no_reloader + evalex = not no_evalex + run_simple( + hostname, + port, + app, + use_reloader=reloader, + use_debugger=debugger, + use_evalex=evalex, + threaded=threaded, + processes=processes, + ) + + +@cli.command() +@click.option("--no-ipython", is_flag=True, default=False) +def shell(no_ipython): + """Start a new interactive python session.""" + banner = "Interactive Werkzeug Shell" + namespace = dict() + if not no_ipython: + try: + try: + from IPython.frontend.terminal.embed import InteractiveShellEmbed + + sh = InteractiveShellEmbed.instance(banner1=banner) + except ImportError: + from IPython.Shell import IPShellEmbed + + sh = IPShellEmbed(banner=banner) + except ImportError: + pass + else: + sh(local_ns=namespace) + return + from code import interact + + interact(banner, local=namespace) + + +if __name__ == "__main__": + cli() diff --git a/examples/manage-plnt.py b/examples/manage-plnt.py new file mode 100644 index 0000000..d48cb8e --- /dev/null +++ b/examples/manage-plnt.py @@ -0,0 +1,131 @@ +import os + +import click +from werkzeug.serving import run_simple + + +def make_app(): + """Helper function that creates a plnt app.""" + from plnt import Plnt + + database_uri = os.environ.get("PLNT_DATABASE_URI") + app = Plnt(database_uri or "sqlite:////tmp/plnt.db") + app.bind_to_context() + return app + + +@click.group() +def cli(): + pass + + +@cli.command() +def initdb(): + """Initialize the database""" + from plnt.database import Blog, session + + make_app().init_database() + # and now fill in some python blogs everybody should read (shamelessly + # added my own blog too) + blogs = [ + Blog( + "Armin Ronacher", + "https://lucumr.pocoo.org/", + "https://lucumr.pocoo.org/feed.atom", + ), + Blog( + "Georg Brandl", + "https://pyside.blogspot.com/", + "https://pyside.blogspot.com/feeds/posts/default", + ), + Blog( + "Ian Bicking", + "https://blog.ianbicking.org/", + "https://blog.ianbicking.org/feed/", + ), + Blog( + "Amir Salihefendic", + "http://amix.dk/", + "https://feeds.feedburner.com/amixdk", + ), + Blog( + "Christopher Lenz", + "https://www.cmlenz.net/blog/", + "https://www.cmlenz.net/blog/atom.xml", + ), + Blog( + "Frederick Lundh", + "https://effbot.org/", + "https://effbot.org/rss.xml", + ), + ] + # okay. got tired here. if someone feels that they are missing, drop me + # a line ;-) + for blog in blogs: + session.add(blog) + session.commit() + click.echo("Initialized database, now run manage-plnt.py sync to get the posts") + + +@cli.command() +@click.option("-h", "--hostname", type=str, default="localhost", help="localhost") +@click.option("-p", "--port", type=int, default=5000, help="5000") +@click.option("--no-reloader", is_flag=True, default=False) +@click.option("--debugger", is_flag=True) +@click.option("--no-evalex", is_flag=True, default=False) +@click.option("--threaded", is_flag=True) +@click.option("--processes", type=int, default=1, help="1") +def runserver(hostname, port, no_reloader, debugger, no_evalex, threaded, processes): + """Start a new development server.""" + app = make_app() + reloader = not no_reloader + evalex = not no_evalex + run_simple( + hostname, + port, + app, + use_reloader=reloader, + use_debugger=debugger, + use_evalex=evalex, + threaded=threaded, + processes=processes, + ) + + +@cli.command() +@click.option("--no-ipython", is_flag=True, default=False) +def shell(no_ipython): + """Start a new interactive python session.""" + banner = "Interactive Werkzeug Shell" + namespace = {"app": make_app()} + if not no_ipython: + try: + try: + from IPython.frontend.terminal.embed import InteractiveShellEmbed + + sh = InteractiveShellEmbed.instance(banner1=banner) + except ImportError: + from IPython.Shell import IPShellEmbed + + sh = IPShellEmbed(banner=banner) + except ImportError: + pass + else: + sh(local_ns=namespace) + return + from code import interact + + interact(banner, local=namespace) + + +@cli.command() +def sync(): + """Sync the blogs in the planet. Call this from a cronjob.""" + from plnt.sync import sync + + make_app().bind_to_context() + sync() + + +if __name__ == "__main__": + cli() diff --git a/examples/manage-shorty.py b/examples/manage-shorty.py new file mode 100644 index 0000000..1dc4278 --- /dev/null +++ b/examples/manage-shorty.py @@ -0,0 +1,84 @@ +import os +import tempfile + +import click +from werkzeug.serving import run_simple + + +def make_app(): + from shorty.application import Shorty + + filename = os.path.join(tempfile.gettempdir(), "shorty.db") + return Shorty(f"sqlite:///{filename}") + + +def make_shell(): + from shorty import models, utils + + application = make_app() + return {"application": application, "models": models, "utils": utils} + + +@click.group() +def cli(): + pass + + +@cli.command() +def initdb(): + make_app().init_database() + + +@cli.command() +@click.option("-h", "--hostname", type=str, default="localhost", help="localhost") +@click.option("-p", "--port", type=int, default=5000, help="5000") +@click.option("--no-reloader", is_flag=True, default=False) +@click.option("--debugger", is_flag=True) +@click.option("--no-evalex", is_flag=True, default=False) +@click.option("--threaded", is_flag=True) +@click.option("--processes", type=int, default=1, help="1") +def runserver(hostname, port, no_reloader, debugger, no_evalex, threaded, processes): + """Start a new development server.""" + app = make_app() + reloader = not no_reloader + evalex = not no_evalex + run_simple( + hostname, + port, + app, + use_reloader=reloader, + use_debugger=debugger, + use_evalex=evalex, + threaded=threaded, + processes=processes, + ) + + +@cli.command() +@click.option("--no-ipython", is_flag=True, default=False) +def shell(no_ipython): + """Start a new interactive python session.""" + banner = "Interactive Werkzeug Shell" + namespace = make_shell() + if not no_ipython: + try: + try: + from IPython.frontend.terminal.embed import InteractiveShellEmbed + + sh = InteractiveShellEmbed.instance(banner1=banner) + except ImportError: + from IPython.Shell import IPShellEmbed + + sh = IPShellEmbed(banner=banner) + except ImportError: + pass + else: + sh(local_ns=namespace) + return + from code import interact + + interact(banner, local=namespace) + + +if __name__ == "__main__": + cli() diff --git a/examples/manage-simplewiki.py b/examples/manage-simplewiki.py new file mode 100644 index 0000000..9f57818 --- /dev/null +++ b/examples/manage-simplewiki.py @@ -0,0 +1,85 @@ +import os + +import click +from werkzeug.serving import run_simple + + +def make_wiki(): + """Helper function that creates a new wiki instance.""" + from simplewiki import SimpleWiki + + database_uri = os.environ.get("SIMPLEWIKI_DATABASE_URI") + return SimpleWiki(database_uri or "sqlite:////tmp/simplewiki.db") + + +def make_shell(): + from simplewiki import database + + wiki = make_wiki() + wiki.bind_to_context() + return {"wiki": wiki, "db": database} + + +@click.group() +def cli(): + pass + + +@cli.command() +def initdb(): + make_wiki().init_database() + + +@cli.command() +@click.option("-h", "--hostname", type=str, default="localhost", help="localhost") +@click.option("-p", "--port", type=int, default=5000, help="5000") +@click.option("--no-reloader", is_flag=True, default=False) +@click.option("--debugger", is_flag=True) +@click.option("--no-evalex", is_flag=True, default=False) +@click.option("--threaded", is_flag=True) +@click.option("--processes", type=int, default=1, help="1") +def runserver(hostname, port, no_reloader, debugger, no_evalex, threaded, processes): + """Start a new development server.""" + app = make_wiki() + reloader = not no_reloader + evalex = not no_evalex + run_simple( + hostname, + port, + app, + use_reloader=reloader, + use_debugger=debugger, + use_evalex=evalex, + threaded=threaded, + processes=processes, + ) + + +@cli.command() +@click.option("--no-ipython", is_flag=True, default=False) +def shell(no_ipython): + """Start a new interactive python session.""" + banner = "Interactive Werkzeug Shell" + namespace = make_shell() + if not no_ipython: + try: + try: + from IPython.frontend.terminal.embed import InteractiveShellEmbed + + sh = InteractiveShellEmbed.instance(banner1=banner) + except ImportError: + from IPython.Shell import IPShellEmbed + + sh = IPShellEmbed(banner=banner) + except ImportError: + pass + else: + sh(local_ns=namespace) + return + from code import interact + + interact(banner, local=namespace) + + +if __name__ == "__main__": + cli() diff --git a/examples/manage-webpylike.py b/examples/manage-webpylike.py new file mode 100644 index 0000000..0cb15aa --- /dev/null +++ b/examples/manage-webpylike.py @@ -0,0 +1,68 @@ +import os +import sys + +import click +from werkzeug.serving import run_simple + +from webpylike.example import app + +sys.path.append(os.path.join(os.path.dirname(__file__), "webpylike")) + + +@click.group() +def cli(): + pass + + +@cli.command() +@click.option("-h", "--hostname", type=str, default="localhost", help="localhost") +@click.option("-p", "--port", type=int, default=5000, help="5000") +@click.option("--no-reloader", is_flag=True, default=False) +@click.option("--debugger", is_flag=True) +@click.option("--no-evalex", is_flag=True, default=False) +@click.option("--threaded", is_flag=True) +@click.option("--processes", type=int, default=1, help="1") +def runserver(hostname, port, no_reloader, debugger, no_evalex, threaded, processes): + """Start a new development server.""" + reloader = not no_reloader + evalex = not no_evalex + run_simple( + hostname, + port, + app, + use_reloader=reloader, + use_debugger=debugger, + use_evalex=evalex, + threaded=threaded, + processes=processes, + ) + + +@cli.command() +@click.option("--no-ipython", is_flag=True, default=False) +def shell(no_ipython): + """Start a new interactive python session.""" + banner = "Interactive Werkzeug Shell" + namespace = dict() + if not no_ipython: + try: + try: + from IPython.frontend.terminal.embed import InteractiveShellEmbed + + sh = InteractiveShellEmbed.instance(banner1=banner) + except ImportError: + from IPython.Shell import IPShellEmbed + + sh = IPShellEmbed(banner=banner) + except ImportError: + pass + else: + sh(local_ns=namespace) + return + from code import interact + + interact(banner, local=namespace) + + +if __name__ == "__main__": + cli() diff --git a/examples/partial/README b/examples/partial/README new file mode 100644 index 0000000..b3f4976 --- /dev/null +++ b/examples/partial/README @@ -0,0 +1,3 @@ +This directory contains modules that have code but that are +not excutable. For example routing definitions to play around +in the python interactive prompt. diff --git a/examples/partial/complex_routing.py b/examples/partial/complex_routing.py new file mode 100644 index 0000000..596d00e --- /dev/null +++ b/examples/partial/complex_routing.py @@ -0,0 +1,43 @@ +from werkzeug.routing import EndpointPrefix +from werkzeug.routing import Map +from werkzeug.routing import Rule +from werkzeug.routing import Subdomain +from werkzeug.routing import Submount + +m = Map( + [ + # Static URLs + EndpointPrefix( + "static/", + [ + Rule("/", endpoint="index"), + Rule("/about", endpoint="about"), + Rule("/help", endpoint="help"), + ], + ), + # Knowledge Base + Subdomain( + "kb", + [ + EndpointPrefix( + "kb/", + [ + Rule("/", endpoint="index"), + Submount( + "/browse", + [ + Rule("/", endpoint="browse"), + Rule( + "//", + defaults={"page": 1}, + endpoint="browse", + ), + Rule("//", endpoint="browse"), + ], + ), + ], + ) + ], + ), + ] +) diff --git a/examples/plnt/__init__.py b/examples/plnt/__init__.py new file mode 100644 index 0000000..81f2359 --- /dev/null +++ b/examples/plnt/__init__.py @@ -0,0 +1,2 @@ +"""A planet application, pronounced "plant".""" +from .webapp import Plnt diff --git a/examples/plnt/database.py b/examples/plnt/database.py new file mode 100644 index 0000000..c126363 --- /dev/null +++ b/examples/plnt/database.py @@ -0,0 +1,74 @@ +from sqlalchemy import Column +from sqlalchemy import DateTime +from sqlalchemy import ForeignKey +from sqlalchemy import Integer +from sqlalchemy import MetaData +from sqlalchemy import String +from sqlalchemy import Table +from sqlalchemy.orm import create_session +from sqlalchemy.orm import dynamic_loader +from sqlalchemy.orm import mapper +from sqlalchemy.orm import scoped_session + +from .utils import application + +try: + from greenlet import getcurrent as get_ident +except ImportError: + from threading import get_ident + + +def new_db_session(): + return create_session(application.database_engine, autoflush=True, autocommit=False) + + +metadata = MetaData() +session = scoped_session(new_db_session, get_ident) + + +blog_table = Table( + "blogs", + metadata, + Column("id", Integer, primary_key=True), + Column("name", String(120)), + Column("description", String), + Column("url", String(200)), + Column("feed_url", String(250)), +) + +entry_table = Table( + "entries", + metadata, + Column("id", Integer, primary_key=True), + Column("blog_id", Integer, ForeignKey("blogs.id")), + Column("guid", String(200), unique=True), + Column("title", String(140)), + Column("url", String(200)), + Column("text", String), + Column("pub_date", DateTime), + Column("last_update", DateTime), +) + + +class Blog: + query = session.query_property() + + def __init__(self, name, url, feed_url, description=""): + self.name = name + self.url = url + self.feed_url = feed_url + self.description = description + + def __repr__(self): + return f"<{type(self).__name__} {self.url!r}>" + + +class Entry: + query = session.query_property() + + def __repr__(self): + return f"<{type(self).__name__} {self.guid!r}>" + + +mapper(Entry, entry_table) +mapper(Blog, blog_table, properties=dict(entries=dynamic_loader(Entry, backref="blog"))) diff --git a/examples/plnt/shared/style.css b/examples/plnt/shared/style.css new file mode 100644 index 0000000..2765f93 --- /dev/null +++ b/examples/plnt/shared/style.css @@ -0,0 +1,133 @@ +body { + font-family: 'Luxi Sans', 'Lucida Sans', 'Verdana', sans-serif; + margin: 1em; + padding: 0; + background-color: #BDE1EC; + color: #0B2B35; +} + +a { + color: #50ACC4; +} + +a:hover { + color: #0B2B35; +} + +div.header { + display: block; + margin: -1em -1em 0 -1em; + padding: 1em; + background-color: #0B2B35; + color: white; +} + +div.header h1 { + font-family: 'Georgia', serif; + margin: 0; + font-size: 1.8em; +} + +div.header blockquote { + margin: 0; + padding: 0.5em 0 0 1em; + font-size: 0.9em; +} + +div.footer { + margin: 1em -1em -1em -1em; + padding: 0.5em; + color: #F3F7F8; + background-color: #1F6070; +} + +div.footer p { + margin: 0; + padding: 0; + font-size: 0.8em; + text-align: right; +} + +ul.navigation { + float: right; + padding: 0.7em 1em 0.7em 1em; + background-color: #F3F7F8; + border: 1px solid #85CADB; + border-right-color: #50ACC4; + border-bottom-color: #50ACC4; + list-style: none; +} + +ul.navigation li { + padding: 0.3em 0 0.3em 0; +} + +ul.navigation li a { + color: #0B2B35; +} + +ul.navigation li a:hover { + color: #50ACC4; +} + +div.pagination { + margin: 0.5em 0 0.5em 0; + padding: 0.7em; + text-align: center; + max-width: 50em; + background-color: white; + border: 1px solid #B1CDD4; +} + +div.day, div.page { + max-width: 50em; + background-color: white; + border: 1px solid #50ACC4; + margin: 1em 0 1em 0; + padding: 0.7em; +} + +div.day h2, div.page h2 { + margin: 0 0 0.5em 0; + padding: 0; + color: black; + font-size: 1.7em; +} + +div.page p { + margin: 0.7em 1em 0.7em 1em; + line-height: 1.5em; +} + +div.day div.entry { + margin: 0.5em 0.25em 0.5em 1em; + padding: 1em; + background-color: #F3F7F8; + border: 1px solid #85CADB; + border-left-color: #50ACC4; + border-top-color: #50ACC4; +} + +div.day div.entry h3 { + margin: 0; + padding: 0; +} + +div.day div.entry h3 a { + color: #1C6D81; + text-decoration: none; +} + +div.day div.entry p.meta { + color: #666; + font-size: 0.85em; + margin: 0.3em 0 0.6em 0; +} + +div.day div.entry p.meta a { + color: #666; +} + +div.day div.entry div.text { + padding: 0 0 0 0.5em; +} diff --git a/examples/plnt/sync.py b/examples/plnt/sync.py new file mode 100644 index 0000000..2a94cb1 --- /dev/null +++ b/examples/plnt/sync.py @@ -0,0 +1,100 @@ +"""Does the synchronization. Called by "manage-plnt.py sync".""" +from datetime import datetime + +import feedparser +from markupsafe import escape + +from .database import Blog +from .database import Entry +from .database import session +from .utils import nl2p +from .utils import strip_tags + + +HTML_MIMETYPES = {"text/html", "application/xhtml+xml"} + + +def sync(): + """ + Performs a synchronization. Articles that are already synchronized aren't + touched anymore. + """ + for blog in Blog.query.all(): + # parse the feed. feedparser.parse will never given an exception + # but the bozo bit might be defined. + feed = feedparser.parse(blog.feed_url) + + for entry in feed.entries: + # get the guid. either the id if specified, otherwise the link. + # if none is available we skip the entry. + guid = entry.get("id") or entry.get("link") + if not guid: + continue + + # get an old entry for the guid to check if we need to update + # or recreate the item + old_entry = Entry.query.filter_by(guid=guid).first() + + # get title, url and text. skip if no title or no text is + # given. if the link is missing we use the blog link. + if "title_detail" in entry: + title = entry.title_detail.get("value") or "" + if entry.title_detail.get("type") in HTML_MIMETYPES: + title = strip_tags(title) + else: + title = escape(title) + else: + title = entry.get("title") + url = entry.get("link") or blog.blog_url + text = ( + entry.content[0] if "content" in entry else entry.get("summary_detail") + ) + + if not title or not text: + continue + + # if we have an html text we use that, otherwise we HTML + # escape the text and use that one. We also handle XHTML + # with our tag soup parser for the moment. + if text.get("type") not in HTML_MIMETYPES: + text = escape(nl2p(text.get("value") or "")) + else: + text = text.get("value") or "" + + # no text? continue + if not text.strip(): + continue + + # get the pub date and updated date. This is rather complex + # because different feeds do different stuff + pub_date = ( + entry.get("published_parsed") + or entry.get("created_parsed") + or entry.get("date_parsed") + ) + updated = entry.get("updated_parsed") or pub_date + pub_date = pub_date or updated + + # if we don't have a pub_date we skip. + if not pub_date: + continue + + # convert the time tuples to datetime objects. + pub_date = datetime(*pub_date[:6]) + updated = datetime(*updated[:6]) + if old_entry and updated <= old_entry.last_update: + continue + + # create a new entry object based on the data collected or + # update the old one. + entry = old_entry or Entry() + entry.blog = blog + entry.guid = guid + entry.title = title + entry.url = url + entry.text = text + entry.pub_date = pub_date + entry.last_update = updated + session.add(entry) + + session.commit() diff --git a/examples/plnt/templates/about.html b/examples/plnt/templates/about.html new file mode 100644 index 0000000..f6af244 --- /dev/null +++ b/examples/plnt/templates/about.html @@ -0,0 +1,19 @@ +{% extends "layout.html" %} +{% block body %} +
+

About Plnt

+

+ Plnt is a small example application written using the + Werkzeug WSGI toolkit, + the Jinja template language, + the SQLAlchemy database abstraction + layer and ORM and last but not least the awesome + feedparser library. +

+

+ It's one of the example applications developed to show some of the + features werkzeug provides and could be the base of a real planet + software. +

+
+{% endblock %} diff --git a/examples/plnt/templates/index.html b/examples/plnt/templates/index.html new file mode 100644 index 0000000..10b4d7a --- /dev/null +++ b/examples/plnt/templates/index.html @@ -0,0 +1,26 @@ +{% extends "layout.html" %} +{% block body %} + {% for day in days %} +
+

{{ day.date.strftime('%d %B %Y') }}

+ {%- for entry in day.entries %} +
+

{{ entry.title }}

+

by {{ entry.blog.name|e }} + at {{ entry.pub_date.strftime('%H:%m') }}

+
{{ entry.text }}
+
+ {%- endfor %} +
+ {%- endfor %} + + {% if pagination.pages > 1 %} + + {% endif %} +{% endblock %} diff --git a/examples/plnt/templates/layout.html b/examples/plnt/templates/layout.html new file mode 100644 index 0000000..2859502 --- /dev/null +++ b/examples/plnt/templates/layout.html @@ -0,0 +1,22 @@ + +Plnt Planet + + +
+

Plnt Planet

+
This is the Plnt Planet Werkzeug Example Application
+
+ + + +
+{% block body %}{% endblock %} +
+ + diff --git a/examples/plnt/utils.py b/examples/plnt/utils.py new file mode 100644 index 0000000..b4e0f60 --- /dev/null +++ b/examples/plnt/utils.py @@ -0,0 +1,147 @@ +import re +from os import path + +from jinja2 import Environment +from jinja2 import FileSystemLoader +from werkzeug.local import Local +from werkzeug.local import LocalManager +from werkzeug.routing import Map +from werkzeug.routing import Rule +from werkzeug.utils import cached_property +from werkzeug.wrappers import Response + + +# context locals. these two objects are use by the application to +# bind objects to the current context. A context is defined as the +# current thread and the current greenlet if there is greenlet support. +# the `get_request` and `get_application` functions look up the request +# and application objects from this local manager. +local = Local() +local_manager = LocalManager([local]) + + +# proxy objects +request = local("request") +application = local("application") +url_adapter = local("url_adapter") + + +# let's use jinja for templates this time +template_path = path.join(path.dirname(__file__), "templates") +jinja_env = Environment(loader=FileSystemLoader(template_path)) + + +# the collected url patterns +url_map = Map([Rule("/shared/", endpoint="shared")]) +endpoints = {} + + +_par_re = re.compile(r"\n{2,}") +_entity_re = re.compile(r"&([^;]+);") +_striptags_re = re.compile(r"(|<[^>]*>)") + +from html.entities import name2codepoint + +html_entities = name2codepoint.copy() +html_entities["apos"] = 39 +del name2codepoint + + +def expose(url_rule, endpoint=None, **kwargs): + """Expose this function to the web layer.""" + + def decorate(f): + e = endpoint or f.__name__ + endpoints[e] = f + url_map.add(Rule(url_rule, endpoint=e, **kwargs)) + return f + + return decorate + + +def render_template(template_name, **context): + """Render a template into a response.""" + tmpl = jinja_env.get_template(template_name) + context["url_for"] = url_for + return Response(tmpl.render(context), mimetype="text/html") + + +def nl2p(s): + """Add paragraphs to a text.""" + return "\n".join(f"

{p}

" for p in _par_re.split(s)) + + +def url_for(endpoint, **kw): + """Simple function for URL generation.""" + return url_adapter.build(endpoint, kw) + + +def strip_tags(s): + """Resolve HTML entities and remove tags from a string.""" + + def handle_match(m): + name = m.group(1) + if name in html_entities: + return chr(html_entities[name]) + if name[:2] in ("#x", "#X"): + try: + return chr(int(name[2:], 16)) + except ValueError: + return "" + elif name.startswith("#"): + try: + return chr(int(name[1:])) + except ValueError: + return "" + return "" + + return _entity_re.sub(handle_match, _striptags_re.sub("", s)) + + +class Pagination: + """ + Paginate a SQLAlchemy query object. + """ + + def __init__(self, query, per_page, page, endpoint): + self.query = query + self.per_page = per_page + self.page = page + self.endpoint = endpoint + + @cached_property + def entries(self): + return ( + self.query.offset((self.page - 1) * self.per_page) + .limit(self.per_page) + .all() + ) + + @cached_property + def count(self): + return self.query.count() + + @property + def has_previous(self): + """Return True if there are pages before the current one.""" + return self.page > 1 + + @property + def has_next(self): + """Return True if there are pages after the current one.""" + return self.page < self.pages + + @property + def previous(self): + """Return the URL for the previous page.""" + return url_for(self.endpoint, page=self.page - 1) + + @property + def next(self): + """Return the URL for the next page.""" + return url_for(self.endpoint, page=self.page + 1) + + @property + def pages(self): + """Return the number of pages.""" + return max(0, self.count - 1) // self.per_page + 1 diff --git a/examples/plnt/views.py b/examples/plnt/views.py new file mode 100644 index 0000000..1729f9a --- /dev/null +++ b/examples/plnt/views.py @@ -0,0 +1,34 @@ +"""Display the aggregated feeds.""" +from datetime import date + +from .database import Entry +from .utils import expose +from .utils import Pagination +from .utils import render_template + + +#: number of items per page +PER_PAGE = 30 + + +@expose("/", defaults={"page": 1}) +@expose("/page/") +def index(request, page): + """Show the index page or any an offset of it.""" + days = [] + days_found = set() + query = Entry.query.order_by(Entry.pub_date.desc()) + pagination = Pagination(query, PER_PAGE, page, "index") + for entry in pagination.entries: + day = date(*entry.pub_date.timetuple()[:3]) + if day not in days_found: + days_found.add(day) + days.append({"date": day, "entries": []}) + days[-1]["entries"].append(entry) + return render_template("index.html", days=days, pagination=pagination) + + +@expose("/about") +def about(request): + """Show the about page, so that we have another view func ;-)""" + return render_template("about.html") diff --git a/examples/plnt/webapp.py b/examples/plnt/webapp.py new file mode 100644 index 0000000..eefea13 --- /dev/null +++ b/examples/plnt/webapp.py @@ -0,0 +1,46 @@ +from os import path + +from sqlalchemy import create_engine +from werkzeug.exceptions import HTTPException +from werkzeug.middleware.shared_data import SharedDataMiddleware +from werkzeug.wrappers import Request +from werkzeug.wsgi import ClosingIterator + +from . import views # noqa: F401 +from .database import metadata +from .database import session +from .utils import endpoints +from .utils import local +from .utils import local_manager +from .utils import url_map + +#: path to shared data +SHARED_DATA = path.join(path.dirname(__file__), "shared") + + +class Plnt: + def __init__(self, database_uri): + self.database_engine = create_engine(database_uri) + + self._dispatch = local_manager.middleware(self.dispatch_request) + self._dispatch = SharedDataMiddleware(self._dispatch, {"/shared": SHARED_DATA}) + + def init_database(self): + metadata.create_all(self.database_engine) + + def bind_to_context(self): + local.application = self + + def dispatch_request(self, environ, start_response): + self.bind_to_context() + local.request = request = Request(environ, start_response) + local.url_adapter = adapter = url_map.bind_to_environ(environ) + try: + endpoint, values = adapter.match(request.path) + response = endpoints[endpoint](request, **values) + except HTTPException as e: + response = e + return ClosingIterator(response(environ, start_response), session.remove) + + def __call__(self, environ, start_response): + return self._dispatch(environ, start_response) diff --git a/examples/shortly/shortly.py b/examples/shortly/shortly.py new file mode 100644 index 0000000..10e957e --- /dev/null +++ b/examples/shortly/shortly.py @@ -0,0 +1,139 @@ +"""A simple URL shortener using Werkzeug and redis.""" +import os + +import redis +from jinja2 import Environment +from jinja2 import FileSystemLoader +from werkzeug.exceptions import HTTPException +from werkzeug.exceptions import NotFound +from werkzeug.middleware.shared_data import SharedDataMiddleware +from werkzeug.routing import Map +from werkzeug.routing import Rule +from werkzeug.urls import url_parse +from werkzeug.utils import redirect +from werkzeug.wrappers import Request +from werkzeug.wrappers import Response + + +def base36_encode(number): + assert number >= 0, "positive integer required" + if number == 0: + return "0" + base36 = [] + while number != 0: + number, i = divmod(number, 36) + base36.append("0123456789abcdefghijklmnopqrstuvwxyz"[i]) + return "".join(reversed(base36)) + + +def is_valid_url(url): + parts = url_parse(url) + return parts.scheme in ("http", "https") + + +def get_hostname(url): + return url_parse(url).netloc + + +class Shortly: + def __init__(self, config): + self.redis = redis.Redis( + config["redis_host"], config["redis_port"], decode_responses=True + ) + template_path = os.path.join(os.path.dirname(__file__), "templates") + self.jinja_env = Environment( + loader=FileSystemLoader(template_path), autoescape=True + ) + self.jinja_env.filters["hostname"] = get_hostname + + self.url_map = Map( + [ + Rule("/", endpoint="new_url"), + Rule("/", endpoint="follow_short_link"), + Rule("/+", endpoint="short_link_details"), + ] + ) + + def on_new_url(self, request): + error = None + url = "" + if request.method == "POST": + url = request.form["url"] + if not is_valid_url(url): + error = "Please enter a valid URL" + else: + short_id = self.insert_url(url) + return redirect(f"/{short_id}+") + return self.render_template("new_url.html", error=error, url=url) + + def on_follow_short_link(self, request, short_id): + link_target = self.redis.get(f"url-target:{short_id}") + if link_target is None: + raise NotFound() + self.redis.incr(f"click-count:{short_id}") + return redirect(link_target) + + def on_short_link_details(self, request, short_id): + link_target = self.redis.get(f"url-target:{short_id}") + if link_target is None: + raise NotFound() + click_count = int(self.redis.get(f"click-count:{short_id}") or 0) + return self.render_template( + "short_link_details.html", + link_target=link_target, + short_id=short_id, + click_count=click_count, + ) + + def error_404(self): + response = self.render_template("404.html") + response.status_code = 404 + return response + + def insert_url(self, url): + short_id = self.redis.get(f"reverse-url:{url}") + if short_id is not None: + return short_id + url_num = self.redis.incr("last-url-id") + short_id = base36_encode(url_num) + self.redis.set(f"url-target:{short_id}", url) + self.redis.set(f"reverse-url:{url}", short_id) + return short_id + + def render_template(self, template_name, **context): + t = self.jinja_env.get_template(template_name) + return Response(t.render(context), mimetype="text/html") + + def dispatch_request(self, request): + adapter = self.url_map.bind_to_environ(request.environ) + try: + endpoint, values = adapter.match() + return getattr(self, f"on_{endpoint}")(request, **values) + except NotFound: + return self.error_404() + except HTTPException as e: + return e + + def wsgi_app(self, environ, start_response): + request = Request(environ) + response = self.dispatch_request(request) + return response(environ, start_response) + + def __call__(self, environ, start_response): + return self.wsgi_app(environ, start_response) + + +def create_app(redis_host="localhost", redis_port=6379, with_static=True): + app = Shortly({"redis_host": redis_host, "redis_port": redis_port}) + if with_static: + app.wsgi_app = SharedDataMiddleware( + app.wsgi_app, {"/static": os.path.join(os.path.dirname(__file__), "static")} + ) + return app + + +if __name__ == "__main__": + from werkzeug.serving import run_simple + + app = create_app() + run_simple("127.0.0.1", 5000, app, use_debugger=True, use_reloader=True) diff --git a/examples/shortly/static/style.css b/examples/shortly/static/style.css new file mode 100644 index 0000000..2b87e57 --- /dev/null +++ b/examples/shortly/static/style.css @@ -0,0 +1,17 @@ +body { background: #E8EFF0; margin: 0; padding: 0; } +body, input { font-family: 'Helvetica Neue', Arial, + sans-serif; font-weight: 300; font-size: 18px; } +.box { width: 500px; margin: 60px auto; padding: 20px; + background: white; box-shadow: 0 1px 4px #BED1D4; + border-radius: 2px; } +a { color: #11557C; } +h1, h2 { margin: 0; color: #11557C; } +h1 a { text-decoration: none; } +h2 { font-weight: normal; font-size: 24px; } +.tagline { color: #888; font-style: italic; margin: 0 0 20px 0; } +.link div { overflow: auto; font-size: 0.8em; white-space: pre; + padding: 4px 10px; margin: 5px 0; background: #E5EAF1; } +dt { font-weight: normal; } +.error { background: #E8EFF0; padding: 3px 8px; color: #11557C; + font-size: 0.9em; border-radius: 2px; } +.urlinput { width: 300px; } diff --git a/examples/shortly/templates/404.html b/examples/shortly/templates/404.html new file mode 100644 index 0000000..1df4aca --- /dev/null +++ b/examples/shortly/templates/404.html @@ -0,0 +1,6 @@ +{% extends "layout.html" %} +{% block title %}Page Not Found{% endblock %} +{% block body %} +

Page Not Found

+

I am sorry, but no such page was found here. +{% endblock %} diff --git a/examples/shortly/templates/layout.html b/examples/shortly/templates/layout.html new file mode 100644 index 0000000..26f64dd --- /dev/null +++ b/examples/shortly/templates/layout.html @@ -0,0 +1,8 @@ + +{% block title %}{% endblock %} | shortly + +

+

shortly

+

Shortly is a URL shortener written with Werkzeug + {% block body %}{% endblock %} +

diff --git a/examples/shortly/templates/new_url.html b/examples/shortly/templates/new_url.html new file mode 100644 index 0000000..a1553c5 --- /dev/null +++ b/examples/shortly/templates/new_url.html @@ -0,0 +1,13 @@ +{% extends "layout.html" %} +{% block title %}Create New Short URL{% endblock %} +{% block body %} +

Submit URL

+
+ {% if error %} +

Error: {{ error }} + {% endif %} +

URL: + + +

+{% endblock %} diff --git a/examples/shortly/templates/short_link_details.html b/examples/shortly/templates/short_link_details.html new file mode 100644 index 0000000..98b95e7 --- /dev/null +++ b/examples/shortly/templates/short_link_details.html @@ -0,0 +1,13 @@ +{% extends "layout.html" %} +{% block title %}Details about /{{ short_id }}{% endblock %} +{% block body %} +

/{{ short_id }}

+
+
Target host: +
{{ link_target|hostname }} +
Full link +
Click count: +
{{ click_count }} +
+{% endblock %} diff --git a/examples/shorty/__init__.py b/examples/shorty/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/shorty/application.py b/examples/shorty/application.py new file mode 100644 index 0000000..adb8e97 --- /dev/null +++ b/examples/shorty/application.py @@ -0,0 +1,45 @@ +from sqlalchemy import create_engine +from werkzeug.exceptions import HTTPException +from werkzeug.exceptions import NotFound +from werkzeug.middleware.shared_data import SharedDataMiddleware +from werkzeug.wrappers import Request +from werkzeug.wsgi import ClosingIterator + +from . import views +from .utils import local +from .utils import local_manager +from .utils import metadata +from .utils import session +from .utils import STATIC_PATH +from .utils import url_map + + +class Shorty: + def __init__(self, db_uri): + local.application = self + self.database_engine = create_engine(db_uri, convert_unicode=True) + + self.dispatch = SharedDataMiddleware(self.dispatch, {"/static": STATIC_PATH}) + + def init_database(self): + metadata.create_all(self.database_engine) + + def dispatch(self, environ, start_response): + local.application = self + request = Request(environ) + local.url_adapter = adapter = url_map.bind_to_environ(environ) + try: + endpoint, values = adapter.match() + handler = getattr(views, endpoint) + response = handler(request, **values) + except NotFound: + response = views.not_found(request) + response.status_code = 404 + except HTTPException as e: + response = e + return ClosingIterator( + response(environ, start_response), [session.remove, local_manager.cleanup] + ) + + def __call__(self, environ, start_response): + return self.dispatch(environ, start_response) diff --git a/examples/shorty/models.py b/examples/shorty/models.py new file mode 100644 index 0000000..15e2020 --- /dev/null +++ b/examples/shorty/models.py @@ -0,0 +1,48 @@ +from datetime import datetime + +from sqlalchemy import Boolean +from sqlalchemy import Column +from sqlalchemy import DateTime +from sqlalchemy import String +from sqlalchemy import Table +from sqlalchemy.orm import mapper + +from .utils import get_random_uid +from .utils import metadata +from .utils import session +from .utils import url_for + +url_table = Table( + "urls", + metadata, + Column("uid", String(140), primary_key=True), + Column("target", String(500)), + Column("added", DateTime), + Column("public", Boolean), +) + + +class URL: + query = session.query_property() + + def __init__(self, target, public=True, uid=None, added=None): + self.target = target + self.public = public + self.added = added or datetime.utcnow() + if not uid: + while 1: + uid = get_random_uid() + if not URL.query.get(uid): + break + self.uid = uid + session.add(self) + + @property + def short_url(self): + return url_for("link", uid=self.uid, _external=True) + + def __repr__(self): + return f"" + + +mapper(URL, url_table) diff --git a/examples/shorty/static/style.css b/examples/shorty/static/style.css new file mode 100644 index 0000000..787632f --- /dev/null +++ b/examples/shorty/static/style.css @@ -0,0 +1,108 @@ +body { + background-color: #333; + font-family: 'Lucida Sans', 'Verdana', sans-serif; + font-size: 16px; + margin: 3em 0 3em 0; + padding: 0; + text-align: center; +} + +a { + color: #0C4850; +} + +a:hover { + color: #1C818F; +} + +h1 { + width: 500px; + background-color: #24C0CE; + text-align: center; + font-size: 3em; + margin: 0 auto 0 auto; + padding: 0; +} + +h1 a { + display: block; + padding: 0.3em; + color: #fff; + text-decoration: none; +} + +h1 a:hover { + color: #ADEEF7; + background-color: #0E8A96; +} + +div.footer { + margin: 0 auto 0 auto; + font-size: 13px; + text-align: right; + padding: 10px; + width: 480px; + background-color: #004C63; + color: white; +} + +div.footer a { + color: #A0E9FF; +} + +div.body { + margin: 0 auto 0 auto; + padding: 20px; + width: 460px; + background-color: #98CE24; + color: black; +} + +div.body h2 { + margin: 0 0 0.5em 0; + text-align: center; +} + +div.body input { + margin: 0.2em 0 0.2em 0; + font-family: 'Lucida Sans', 'Verdana', sans-serif; + font-size: 20px; + background-color: #CCEB98; + color: black; +} + +div.body #url { + width: 400px; +} + +div.body #alias { + width: 300px; + margin-right: 10px; +} + +div.body #submit { + width: 90px; +} + +div.body p { + margin: 0; + padding: 0.2em 0 0.2em 0; +} + +div.body ul { + margin: 1em 0 1em 0; + padding: 0; + list-style: none; +} + +div.error { + margin: 1em 0 1em 0; + border: 2px solid #AC0202; + background-color: #9E0303; + font-weight: bold; + color: white; +} + +div.pagination { + font-size: 13px; +} diff --git a/examples/shorty/templates/display.html b/examples/shorty/templates/display.html new file mode 100644 index 0000000..83d5685 --- /dev/null +++ b/examples/shorty/templates/display.html @@ -0,0 +1,8 @@ +{% extends 'layout.html' %} +{% block body %} +

Shortened URL

+

+ The URL {{ url.target|urlize(40, true) }} + was shortened to {{ url.short_url|urlize }}. +

+{% endblock %} diff --git a/examples/shorty/templates/layout.html b/examples/shorty/templates/layout.html new file mode 100644 index 0000000..f496806 --- /dev/null +++ b/examples/shorty/templates/layout.html @@ -0,0 +1,16 @@ + + + + Shorty + + + +

Shorty

+
{% block body %}{% endblock %}
+ + + diff --git a/examples/shorty/templates/list.html b/examples/shorty/templates/list.html new file mode 100644 index 0000000..6dd7dff --- /dev/null +++ b/examples/shorty/templates/list.html @@ -0,0 +1,19 @@ +{% extends 'layout.html' %} +{% block body %} +

List of URLs

+
    + {%- for url in pagination.entries %} +
  • {{ url.uid|e }} » + {{ url.target|urlize(38, true) }}
  • + {%- else %} +
  • no URLs shortened yet
  • + {%- endfor %} +
+ +{% endblock %} diff --git a/examples/shorty/templates/new.html b/examples/shorty/templates/new.html new file mode 100644 index 0000000..8a5de58 --- /dev/null +++ b/examples/shorty/templates/new.html @@ -0,0 +1,14 @@ +{% extends 'layout.html' %} +{% block body %} +

Create a Shorty-URL!

+ {% if error %}
{{ error }}
{% endif %} +
+

Enter the URL you want to shorten

+

+

Optionally you can give the URL a memorable name

+

{# + #}

+

+

+
+{% endblock %} diff --git a/examples/shorty/templates/not_found.html b/examples/shorty/templates/not_found.html new file mode 100644 index 0000000..bd45566 --- /dev/null +++ b/examples/shorty/templates/not_found.html @@ -0,0 +1,8 @@ +{% extends 'layout.html' %} +{% block body %} +

Page Not Found

+

+ The page you have requested does not exist on this server. What about + adding a new URL? +

+{% endblock %} diff --git a/examples/shorty/utils.py b/examples/shorty/utils.py new file mode 100644 index 0000000..2d9fe0e --- /dev/null +++ b/examples/shorty/utils.py @@ -0,0 +1,111 @@ +from os import path +from random import randrange +from random import sample + +from jinja2 import Environment +from jinja2 import FileSystemLoader +from sqlalchemy import MetaData +from sqlalchemy.orm import create_session +from sqlalchemy.orm import scoped_session +from werkzeug.local import Local +from werkzeug.local import LocalManager +from werkzeug.routing import Map +from werkzeug.routing import Rule +from werkzeug.urls import url_parse +from werkzeug.utils import cached_property +from werkzeug.wrappers import Response + + +TEMPLATE_PATH = path.join(path.dirname(__file__), "templates") +STATIC_PATH = path.join(path.dirname(__file__), "static") +ALLOWED_SCHEMES = frozenset(["http", "https", "ftp", "ftps"]) +URL_CHARS = "abcdefghijkmpqrstuvwxyzABCDEFGHIJKLMNPQRST23456789" + +local = Local() +local_manager = LocalManager([local]) +application = local("application") + +metadata = MetaData() +url_map = Map([Rule("/static/", endpoint="static", build_only=True)]) + +session = scoped_session( + lambda: create_session( + application.database_engine, autocommit=False, autoflush=False + ) +) +jinja_env = Environment(loader=FileSystemLoader(TEMPLATE_PATH)) + + +def expose(rule, **kw): + def decorate(f): + kw["endpoint"] = f.__name__ + url_map.add(Rule(rule, **kw)) + return f + + return decorate + + +def url_for(endpoint, _external=False, **values): + return local.url_adapter.build(endpoint, values, force_external=_external) + + +jinja_env.globals["url_for"] = url_for + + +def render_template(template, **context): + return Response( + jinja_env.get_template(template).render(**context), mimetype="text/html" + ) + + +def validate_url(url): + return url_parse(url)[0] in ALLOWED_SCHEMES + + +def get_random_uid(): + return "".join(sample(URL_CHARS, randrange(3, 9))) + + +class Pagination: + def __init__(self, query, per_page, page, endpoint): + self.query = query + self.per_page = per_page + self.page = page + self.endpoint = endpoint + + @cached_property + def count(self): + return self.query.count() + + @cached_property + def entries(self): + return ( + self.query.offset((self.page - 1) * self.per_page) + .limit(self.per_page) + .all() + ) + + @property + def has_previous(self): + """Return True if there are pages before the current one.""" + return self.page > 1 + + @property + def has_next(self): + """Return True if there are pages after the current one.""" + return self.page < self.pages + + @property + def previous(self): + """Return the URL for the previous page.""" + return url_for(self.endpoint, page=self.page - 1) + + @property + def next(self): + """Return the URL for the next page.""" + return url_for(self.endpoint, page=self.page + 1) + + @property + def pages(self): + """Return the number of pages.""" + return max(0, self.count - 1) // self.per_page + 1 diff --git a/examples/shorty/views.py b/examples/shorty/views.py new file mode 100644 index 0000000..7a1ee20 --- /dev/null +++ b/examples/shorty/views.py @@ -0,0 +1,62 @@ +from werkzeug.exceptions import NotFound +from werkzeug.utils import redirect + +from .models import URL +from .utils import expose +from .utils import Pagination +from .utils import render_template +from .utils import session +from .utils import url_for +from .utils import validate_url + + +@expose("/") +def new(request): + error = url = "" + if request.method == "POST": + url = request.form.get("url") + alias = request.form.get("alias") + if not validate_url(url): + error = "I'm sorry but you cannot shorten this URL." + elif alias: + if len(alias) > 140: + error = "Your alias is too long" + elif "/" in alias: + error = "Your alias might not include a slash" + elif URL.query.get(alias): + error = "The alias you have requested exists already" + if not error: + uid = URL(url, "private" not in request.form, alias).uid + session.commit() + return redirect(url_for("display", uid=uid)) + return render_template("new.html", error=error, url=url) + + +@expose("/display/") +def display(request, uid): + url = URL.query.get(uid) + if not url: + raise NotFound() + return render_template("display.html", url=url) + + +@expose("/u/") +def link(request, uid): + url = URL.query.get(uid) + if not url: + raise NotFound() + return redirect(url.target, 301) + + +@expose("/list/", defaults={"page": 1}) +@expose("/list/") +def list(request, page): + query = URL.query.filter_by(public=True) + pagination = Pagination(query, 30, page, "list") + if pagination.page > 1 and not pagination.entries: + raise NotFound() + return render_template("list.html", pagination=pagination) + + +def not_found(request): + return render_template("not_found.html") diff --git a/examples/simplewiki/__init__.py b/examples/simplewiki/__init__.py new file mode 100644 index 0000000..827ac07 --- /dev/null +++ b/examples/simplewiki/__init__.py @@ -0,0 +1,4 @@ +"""Very simple wiki application based on Genshi, Werkzeug and +SQLAlchemy. Additionally the creoleparser is used for the wiki markup. +""" +from .application import SimpleWiki diff --git a/examples/simplewiki/actions.py b/examples/simplewiki/actions.py new file mode 100644 index 0000000..93237f1 --- /dev/null +++ b/examples/simplewiki/actions.py @@ -0,0 +1,203 @@ +"""The per page actions. The actions are defined in the URL with the +``action`` parameter and directly dispatched to the functions in this +module. In the module the actions are prefixed with '`on_`', so be +careful not to name any other objects in the module with the same prefix +unless you want to act them as actions. +""" +from difflib import unified_diff + +from werkzeug.utils import redirect + +from .database import Page +from .database import Revision +from .database import RevisionedPage +from .database import session +from .utils import format_datetime +from .utils import generate_template +from .utils import href +from .utils import Response + + +def on_show(request, page_name): + """Displays the page the user requests.""" + revision_id = request.args.get("rev", type=int) + query = RevisionedPage.query.filter_by(name=page_name) + if revision_id: + query = query.filter_by(revision_id=revision_id) + revision_requested = True + else: + query = query.order_by(RevisionedPage.revision_id.desc()) + revision_requested = False + page = query.first() + if page is None: + return page_missing(request, page_name, revision_requested) + return Response(generate_template("action_show.html", page=page)) + + +def on_edit(request, page_name): + """Edit the current revision of a page.""" + change_note = error = "" + revision = ( + Revision.query.filter( + (Page.name == page_name) & (Page.page_id == Revision.page_id) + ) + .order_by(Revision.revision_id.desc()) + .first() + ) + if revision is None: + page = None + else: + page = revision.page + + if request.method == "POST": + text = request.form.get("text") + if request.form.get("cancel") or revision and revision.text == text: + return redirect(href(page.name)) + elif not text: + error = "You cannot save empty revisions." + else: + change_note = request.form.get("change_note", "") + if page is None: + page = Page(page_name) + session.add(page) + session.add(Revision(page, text, change_note)) + session.commit() + return redirect(href(page.name)) + + return Response( + generate_template( + "action_edit.html", + revision=revision, + page=page, + new=page is None, + page_name=page_name, + change_note=change_note, + error=error, + ) + ) + + +def on_log(request, page_name): + """Show the list of recent changes.""" + page = Page.query.filter_by(name=page_name).first() + if page is None: + return page_missing(request, page_name, False) + return Response(generate_template("action_log.html", page=page)) + + +def on_diff(request, page_name): + """Show the diff between two revisions.""" + old = request.args.get("old", type=int) + new = request.args.get("new", type=int) + error = "" + diff = page = old_rev = new_rev = None + + if not (old and new): + error = "No revisions specified." + else: + revisions = { + x.revision_id: x + for x in Revision.query.filter( + (Revision.revision_id.in_((old, new))) + & (Revision.page_id == Page.page_id) + & (Page.name == page_name) + ) + } + if len(revisions) != 2: + error = "At least one of the revisions requested does not exist." + else: + new_rev = revisions[new] + old_rev = revisions[old] + page = old_rev.page + diff = unified_diff( + f"{old_rev.text}\n".splitlines(True), + f"{new_rev.text}\n".splitlines(True), + page.name, + page.name, + format_datetime(old_rev.timestamp), + format_datetime(new_rev.timestamp), + 3, + ) + + return Response( + generate_template( + "action_diff.html", + error=error, + old_revision=old_rev, + new_revision=new_rev, + page=page, + diff=diff, + ) + ) + + +def on_revert(request, page_name): + """Revert an old revision.""" + rev_id = request.args.get("rev", type=int) + + old_revision = page = None + error = "No such revision" + + if request.method == "POST" and request.form.get("cancel"): + return redirect(href(page_name)) + + if rev_id: + old_revision = Revision.query.filter( + (Revision.revision_id == rev_id) + & (Revision.page_id == Page.page_id) + & (Page.name == page_name) + ).first() + if old_revision: + new_revision = ( + Revision.query.filter( + (Revision.page_id == Page.page_id) & (Page.name == page_name) + ) + .order_by(Revision.revision_id.desc()) + .first() + ) + if old_revision == new_revision: + error = "You tried to revert the current active revision." + elif old_revision.text == new_revision.text: + error = ( + "There are no changes between the current " + "revision and the revision you want to " + "restore." + ) + else: + error = "" + page = old_revision.page + if request.method == "POST": + change_note = request.form.get("change_note", "") + + if change_note: + change_note = f"revert: {change_note}" + else: + change_note = "revert" + + session.add(Revision(page, old_revision.text, change_note)) + session.commit() + return redirect(href(page_name)) + + return Response( + generate_template( + "action_revert.html", error=error, old_revision=old_revision, page=page + ) + ) + + +def page_missing(request, page_name, revision_requested, protected=False): + """Displayed if page or revision does not exist.""" + return Response( + generate_template( + "page_missing.html", + page_name=page_name, + revision_requested=revision_requested, + protected=protected, + ), + status=404, + ) + + +def missing_action(request, action): + """Displayed if a user tried to access a action that does not exist.""" + return Response(generate_template("missing_action.html", action=action), status=404) diff --git a/examples/simplewiki/application.py b/examples/simplewiki/application.py new file mode 100644 index 0000000..22828d2 --- /dev/null +++ b/examples/simplewiki/application.py @@ -0,0 +1,94 @@ +"""Implements the wiki WSGI application which dispatches requests to +specific wiki pages and actions. +""" +from os import path + +from sqlalchemy import create_engine +from werkzeug.middleware.shared_data import SharedDataMiddleware +from werkzeug.utils import redirect +from werkzeug.wsgi import ClosingIterator + +from . import actions +from .database import metadata +from .database import session +from .specialpages import page_not_found +from .specialpages import pages +from .utils import href +from .utils import local +from .utils import local_manager +from .utils import Request + +#: path to shared data +SHARED_DATA = path.join(path.dirname(__file__), "shared") + + +class SimpleWiki: + """ + Our central WSGI application. + """ + + def __init__(self, database_uri): + self.database_engine = create_engine(database_uri) + + # apply our middlewares. we apply the middlewars *inside* the + # application and not outside of it so that we never lose the + # reference to the `SimpleWiki` object. + self._dispatch = SharedDataMiddleware( + self.dispatch_request, {"/_shared": SHARED_DATA} + ) + + # free the context locals at the end of the request + self._dispatch = local_manager.make_middleware(self._dispatch) + + def init_database(self): + """Called from the management script to generate the db.""" + metadata.create_all(bind=self.database_engine) + + def bind_to_context(self): + """ + Useful for the shell. Binds the application to the current active + context. It's automatically called by the shell command. + """ + local.application = self + + def dispatch_request(self, environ, start_response): + """Dispatch an incoming request.""" + # set up all the stuff we want to have for this request. That is + # creating a request object, propagating the application to the + # current context and instantiating the database session. + self.bind_to_context() + request = Request(environ) + request.bind_to_context() + + # get the current action from the url and normalize the page name + # which is just the request path + action_name = request.args.get("action") or "show" + page_name = "_".join([x for x in request.path.strip("/").split() if x]) + + # redirect to the Main_Page if the user requested the index + if not page_name: + response = redirect(href("Main_Page")) + + # check special pages + elif page_name.startswith("Special:"): + if page_name[8:] not in pages: + response = page_not_found(request, page_name) + else: + response = pages[page_name[8:]](request) + + # get the callback function for the requested action from the + # action module. It's "on_" + the action name. If it doesn't + # exists call the missing_action method from the same module. + else: + action = getattr(actions, f"on_{action_name}", None) + if action is None: + response = actions.missing_action(request, action_name) + else: + response = action(request, page_name) + + # make sure the session is removed properly + return ClosingIterator(response(environ, start_response), session.remove) + + def __call__(self, environ, start_response): + """Just forward a WSGI call to the first internal middleware.""" + return self._dispatch(environ, start_response) diff --git a/examples/simplewiki/database.py b/examples/simplewiki/database.py new file mode 100644 index 0000000..f060b9e --- /dev/null +++ b/examples/simplewiki/database.py @@ -0,0 +1,143 @@ +from datetime import datetime + +from sqlalchemy import Column +from sqlalchemy import DateTime +from sqlalchemy import ForeignKey +from sqlalchemy import Integer +from sqlalchemy import join +from sqlalchemy import MetaData +from sqlalchemy import String +from sqlalchemy import Table +from sqlalchemy.orm import create_session +from sqlalchemy.orm import mapper +from sqlalchemy.orm import relation +from sqlalchemy.orm import scoped_session + +from .utils import application +from .utils import parse_creole + +try: + from greenlet import getcurrent as get_ident +except ImportError: + from threading import get_ident + +# create a global metadata +metadata = MetaData() + + +def new_db_session(): + """ + This function creates a new session if there is no session yet for + the current context. It looks up the application and if it finds + one it creates a session bound to the active database engine in that + application. If there is no application bound to the context it + raises an exception. + """ + return create_session(application.database_engine, autoflush=True, autocommit=False) + + +# and create a new global session factory. Calling this object gives +# you the current active session +session = scoped_session(new_db_session, get_ident) + + +# our database tables. +page_table = Table( + "pages", + metadata, + Column("page_id", Integer, primary_key=True), + Column("name", String(60), unique=True), +) + +revision_table = Table( + "revisions", + metadata, + Column("revision_id", Integer, primary_key=True), + Column("page_id", Integer, ForeignKey("pages.page_id")), + Column("timestamp", DateTime), + Column("text", String), + Column("change_note", String(200)), +) + + +class Revision: + """ + Represents one revision of a page. + This is useful for editing particular revision of pages or creating + new revisions. It's also used for the diff system and the revision + log. + """ + + query = session.query_property() + + def __init__(self, page, text, change_note="", timestamp=None): + if isinstance(page, int): + self.page_id = page + else: + self.page = page + self.text = text + self.change_note = change_note + self.timestamp = timestamp or datetime.utcnow() + + def render(self): + """Render the page text into a genshi stream.""" + return parse_creole(self.text) + + def __repr__(self): + return f"<{type(self).__name__} {self.page_id!r}:{self.revision_id!r}>" + + +class Page: + """ + Represents a simple page without any revisions. This is for example + used in the page index where the page contents are not relevant. + """ + + query = session.query_property() + + def __init__(self, name): + self.name = name + + @property + def title(self): + return self.name.replace("_", " ") + + def __repr__(self): + return f"<{type(self).__name__} {self.name!r}>" + + +class RevisionedPage(Page, Revision): + """ + Represents a wiki page with a revision. Thanks to multiple inheritance + and the ability of SQLAlchemy to map to joins we can combine `Page` and + `Revision` into one class here. + """ + + query = session.query_property() + + def __init__(self): + raise TypeError( + "cannot create WikiPage instances, use the Page and " + "Revision classes for data manipulation." + ) + + def __repr__(self): + return f"<{type(self).__name__} {self.name!r}:{self.revision_id!r}>" + + +# setup mappers +mapper(Revision, revision_table) +mapper( + Page, + page_table, + properties=dict( + revisions=relation( + Revision, backref="page", order_by=Revision.revision_id.desc() + ) + ), +) +mapper( + RevisionedPage, + join(page_table, revision_table), + properties=dict(page_id=[page_table.c.page_id, revision_table.c.page_id]), +) diff --git a/examples/simplewiki/shared/style.css b/examples/simplewiki/shared/style.css new file mode 100644 index 0000000..be03246 --- /dev/null +++ b/examples/simplewiki/shared/style.css @@ -0,0 +1,210 @@ +body { + font-family: 'Luxi Sans', 'Lucida Sans', 'Trebuchet MS', sans-serif; + margin: 2em 1em 2em 1em; + padding: 0; + background: #1C0424; +} + +a { + color: #6A2F7E; +} + +a:hover { + color: #3D0F4D; +} + +pre { + border: 1px solid #ccc; + background-color: white; + font-family: 'Consolas', 'Monaco', 'Bitstream Vera Sans', monospace; + font-size: 0.9em; + padding: 0.3em; +} + +table { + border: 2px solid #ccc; + border-collapse: collapse; +} + +table td, table th { + border: 1px solid #ccc; + padding: 0.4em; +} + +div.bodywrapper { + margin: 0 auto 0 auto; + max-width: 50em; + background: #F1EBF3; + border: 1px solid #4C1068; + padding: 0; + color: #111; +} + +div.header { + background-color: #320846; + color: white; +} + +div.header h1 { + margin: 0; + padding: 0.4em; + font-size: 1.7em; +} + +div.header h1 a { + text-decoration: none; + color: white; +} + +div.header h1 a:hover { + color: #6A2F7E; +} + +div.contents { + padding: 1em; + margin: 0; + border: 1px solid #3D0F4D; +} + +div.footer { + padding: 0.5em; + background: #15031B; + color: white; + font-size: 0.8em; + text-align: right; + color: white; +} + +div.contents h1, div.contents h2, div.contents h3, div.contents h4, +div.contents h5 { + margin: 0; + padding: 0.3em 0 0.2em 0; + color: #3D0F4D; +} + +div.contents h1 { font-size: 1.7em; } +div.contents h2 { font-size: 1.6em; } +div.contents h3 { font-size: 1.4em; } +div.contents h4 { font-size: 1.2em; } +div.contents h5 { font-size: 1em; } + +div.contents p { + margin: 0; + padding: 0.3em 0 0.3em 0; + line-height: 1.5em; +} + +div.contents div.navigation { + padding: 0 0 0.3em 0; + margin: 0 0 0.3em 0; + border-bottom: 1px solid #6A2F7E; + font-size: 0.85em; + color: #555; +} + +div.contents div.navigation a { + padding: 0 0.2em 0 0.2em; + font-weight: bold; + color: #555; +} + +div.contents div.navigation a:hover { + color: #6A2F7E; +} + +div.contents div.navigation a.active { + background-color: #ccc; + text-decoration: none; +} + +div.contents div.page_meta { + font-size: 0.7em; + color: #555; + float: right; +} + +textarea { + width: 99%; + font-family: 'Consolas', 'Monaco', 'Bitstream Vera Sans', monospace; + font-size: 0.9em; + padding: 0.3em; + margin: 0.5em 0 0.5em 0; +} + +input { + font-family: 'Luxi Sans', 'Lucida Sans', 'Trebuchet MS', sans-serif; +} + +table.revisions, table.changes { + border-collapse: collapse; + border: 1px solid #6A2F7E; + background: #fdfdfd; + width: 100%; + margin: 1em 0 0.5em 0; +} + +table.revisions th, table.changes th { + background-color: #6A2F7E; + color: white; + padding: 0.1em 0.6em 0.1em 0.6em; + font-size: 0.8em; + border: none; +} + +table.revisions td, table.changes td { + padding: 0.2em 0.5em 0.2em 0.5em; + font-size: 0.9em; + border: none; +} + +table.revisions .timestamp, table.changes .timestamp { + text-align: left; + width: 10em; +} + +table.revisions td.timestamp, table.changes td.timestamp { + color: #444; +} + +table.revisions .change_note, table.changes .change_note { + text-align: left; +} + +table.revisions td.change_note, table.changes td.change_note { + font-style: italic; +} + +table.revisions th.diff input { + background-color: #3D0F4D; + color: white; + border: 1px solid #1C0424; +} + +table.revisions .diff { + width: 5em; + text-align: right; +} + +table.revisions .actions { + width: 8em; + text-align: left; +} + +table.revisions td.actions { + font-size: 0.75em; +} + +table.revisions tr.odd, table.changes tr.odd { + background-color: #f7f7f7; +} + +pre.udiff { + overflow: auto; + font-size: 0.75em; +} + +div.pagination { + font-size: 0.9em; + padding: 0.5em 0 0.5em 0; + text-align: center; +} diff --git a/examples/simplewiki/specialpages.py b/examples/simplewiki/specialpages.py new file mode 100644 index 0000000..2c286f5 --- /dev/null +++ b/examples/simplewiki/specialpages.py @@ -0,0 +1,40 @@ +"""Special pages such as the recent changes page.""" +from .actions import page_missing +from .database import Page +from .database import RevisionedPage +from .utils import generate_template +from .utils import Pagination +from .utils import Response + + +def page_index(request): + """Index of all pages.""" + letters = {} + for page in Page.query.order_by(Page.name): + letters.setdefault(page.name.capitalize()[0], []).append(page) + return Response( + generate_template("page_index.html", letters=sorted(letters.items())) + ) + + +def recent_changes(request): + """Display the recent changes.""" + page = max(1, request.args.get("page", type=int)) + query = RevisionedPage.query.order_by(RevisionedPage.revision_id.desc()) + return Response( + generate_template( + "recent_changes.html", + pagination=Pagination(query, 20, page, "Special:Recent_Changes"), + ) + ) + + +def page_not_found(request, page_name): + """ + Displays an error message if a user tried to access + a not existing special page. + """ + return page_missing(request, page_name, True) + + +pages = {"Index": page_index, "Recent_Changes": recent_changes} diff --git a/examples/simplewiki/templates/action_diff.html b/examples/simplewiki/templates/action_diff.html new file mode 100644 index 0000000..4a8edf8 --- /dev/null +++ b/examples/simplewiki/templates/action_diff.html @@ -0,0 +1,27 @@ + + + + View Diff + + + +

Diff for “${page.title}

+

+ Below you can see the differences between the revision from + ${format_datetime(old_revision.timestamp)} and the + revision from ${format_datetime(new_revision.timestamp)} in unified + diff format. +

+
${diff}
+
+ +

Cannot Display Diff

+

${error}

+
+ + diff --git a/examples/simplewiki/templates/action_edit.html b/examples/simplewiki/templates/action_edit.html new file mode 100644 index 0000000..84d74fa --- /dev/null +++ b/examples/simplewiki/templates/action_edit.html @@ -0,0 +1,26 @@ + + + + ${'Create' if new else 'Edit'} Page + + +

${'Create' if new else 'Edit'} “${page.title or page_name}”

+

+ You can now ${'create' if new else 'modify'} the page contents. To + format your text you can use creole markup. +

+

${error}

+
+

+
+ + + +
+
+ + diff --git a/examples/simplewiki/templates/action_log.html b/examples/simplewiki/templates/action_log.html new file mode 100644 index 0000000..05a97c0 --- /dev/null +++ b/examples/simplewiki/templates/action_log.html @@ -0,0 +1,44 @@ + + + + Revisions for “${page.title}” + + +

Revisions for “${page.title}

+

+ In this list you can see all the revisions of the requested page. +

+
+ + + + + + + + + + + + + + +
DateChange NoteActions
${format_datetime(revision.timestamp)}${revision.change_note} + + + + show + | + revert + +
+
+ + diff --git a/examples/simplewiki/templates/action_revert.html b/examples/simplewiki/templates/action_revert.html new file mode 100644 index 0000000..e7194e7 --- /dev/null +++ b/examples/simplewiki/templates/action_revert.html @@ -0,0 +1,31 @@ + + + + Revert Old Revision + + + +

Revert Old Revision of “${page.title}

+

+ If you want to restore the old revision from + ${format_datetime(old_revision.timestamp)} enter your change + note and click “Revert”. +

+
+
+ + + +
+
+
+ +

Cannot Revert

+

${error}

+
+ + diff --git a/examples/simplewiki/templates/action_show.html b/examples/simplewiki/templates/action_show.html new file mode 100644 index 0000000..caaf8d2 --- /dev/null +++ b/examples/simplewiki/templates/action_show.html @@ -0,0 +1,12 @@ + + + + ${page.title} + + + ${page.render()} + + diff --git a/examples/simplewiki/templates/layout.html b/examples/simplewiki/templates/layout.html new file mode 100644 index 0000000..d63bb65 --- /dev/null +++ b/examples/simplewiki/templates/layout.html @@ -0,0 +1,46 @@ + + + + + <py:if + test="title">${title} — </py:if>SimpleWiki + + ${select('*[local-name()!="title"]')} + + + + +
+ +
+
+ This revision + was created on ${format_datetime(page.timestamp)}. +
+ + ${select('*|text()')} +
+ +
+ +
+ diff --git a/examples/simplewiki/templates/macros.xml b/examples/simplewiki/templates/macros.xml new file mode 100644 index 0000000..28ce06b --- /dev/null +++ b/examples/simplewiki/templates/macros.xml @@ -0,0 +1,18 @@ +
+ + + + + +
diff --git a/examples/simplewiki/templates/missing_action.html b/examples/simplewiki/templates/missing_action.html new file mode 100644 index 0000000..4051317 --- /dev/null +++ b/examples/simplewiki/templates/missing_action.html @@ -0,0 +1,12 @@ + + + + Action Not Found + + +

Action “${action}” Not Found

+

The requested action does not exist.

+

Try to access the same URL without parameters.

+ + diff --git a/examples/simplewiki/templates/page_index.html b/examples/simplewiki/templates/page_index.html new file mode 100644 index 0000000..7c0a463 --- /dev/null +++ b/examples/simplewiki/templates/page_index.html @@ -0,0 +1,18 @@ + + + + Index + + +

Index

+ +

${letter}

+ +
+ + diff --git a/examples/simplewiki/templates/page_missing.html b/examples/simplewiki/templates/page_missing.html new file mode 100644 index 0000000..c352c4b --- /dev/null +++ b/examples/simplewiki/templates/page_missing.html @@ -0,0 +1,24 @@ + + + + Page Not Found + + +

Page Not Found

+

The page you requested does not exist.

+

+ It also could be that there is no such revision of that page. +

+

+ Feel free to create such a page. +

+

+ Although this page does not exist by now you cannot create it because + the system protected the page name for future use. +

+ + diff --git a/examples/simplewiki/templates/recent_changes.html b/examples/simplewiki/templates/recent_changes.html new file mode 100644 index 0000000..46bd7a1 --- /dev/null +++ b/examples/simplewiki/templates/recent_changes.html @@ -0,0 +1,26 @@ + + + + Recent Changes + + +

Recent Changes

+ + + + + + + + + + + +
DatePageChange Note
${format_datetime(entry.timestamp)}${entry.title}${entry.change_note}
+ ${render_pagination(pagination)} + + diff --git a/examples/simplewiki/utils.py b/examples/simplewiki/utils.py new file mode 100644 index 0000000..6cafab4 --- /dev/null +++ b/examples/simplewiki/utils.py @@ -0,0 +1,141 @@ +from os import path + +import creoleparser +from genshi import Stream +from genshi.template import TemplateLoader +from werkzeug.local import Local +from werkzeug.local import LocalManager +from werkzeug.urls import url_encode +from werkzeug.urls import url_quote +from werkzeug.utils import cached_property +from werkzeug.wrappers import Request as BaseRequest +from werkzeug.wrappers import Response as BaseResponse + + +# calculate the path to the templates an create the template loader +TEMPLATE_PATH = path.join(path.dirname(__file__), "templates") +template_loader = TemplateLoader( + TEMPLATE_PATH, auto_reload=True, variable_lookup="lenient" +) + + +# context locals. these two objects are use by the application to +# bind objects to the current context. A context is defined as the +# current thread and the current greenlet if there is greenlet support. +local = Local() +local_manager = LocalManager([local]) +request = local("request") +application = local("application") + +# create a new creole parser +creole_parser = creoleparser.Parser( + dialect=creoleparser.create_dialect( + creoleparser.creole10_base, + wiki_links_base_url="", + wiki_links_path_func=lambda page_name: href(page_name), + wiki_links_space_char="_", + no_wiki_monospace=True, + ), + method="html", +) + + +def generate_template(template_name, **context): + """Load and generate a template.""" + context.update(href=href, format_datetime=format_datetime) + return template_loader.load(template_name).generate(**context) + + +def parse_creole(markup): + """Parse some creole markup and create a genshi stream.""" + return creole_parser.generate(markup) + + +def href(*args, **kw): + """ + Simple function for URL generation. Position arguments are used for the + URL path and keyword arguments are used for the url parameters. + """ + result = [f"{request.script_root if request else ''}/"] + for idx, arg in enumerate(args): + result.append(f"{'/' if idx else ''}{url_quote(arg)}") + if kw: + result.append(f"?{url_encode(kw)}") + return "".join(result) + + +def format_datetime(obj): + """Format a datetime object.""" + return obj.strftime("%Y-%m-%d %H:%M") + + +class Request(BaseRequest): + """ + Simple request subclass that allows to bind the object to the + current context. + """ + + def bind_to_context(self): + local.request = self + + +class Response(BaseResponse): + """ + Encapsulates a WSGI response. Unlike the default response object werkzeug + provides, this accepts a genshi stream and will automatically render it + to html. This makes it possible to switch to xhtml or html5 easily. + """ + + default_mimetype = "text/html" + + def __init__( + self, response=None, status=200, headers=None, mimetype=None, content_type=None + ): + if isinstance(response, Stream): + response = response.render("html", encoding=None, doctype="html") + super().__init__(response, status, headers, mimetype, content_type) + + +class Pagination: + """ + Paginate a SQLAlchemy query object. + """ + + def __init__(self, query, per_page, page, link): + self.query = query + self.per_page = per_page + self.page = page + self.link = link + self._count = None + + @cached_property + def entries(self): + return ( + self.query.offset((self.page - 1) * self.per_page) + .limit(self.per_page) + .all() + ) + + @property + def has_previous(self): + return self.page > 1 + + @property + def has_next(self): + return self.page < self.pages + + @property + def previous(self): + return href(self.link, page=self.page - 1) + + @property + def next(self): + return href(self.link, page=self.page + 1) + + @cached_property + def count(self): + return self.query.count() + + @property + def pages(self): + return max(0, self.count - 1) // self.per_page + 1 diff --git a/examples/upload.py b/examples/upload.py new file mode 100644 index 0000000..4fa952d --- /dev/null +++ b/examples/upload.py @@ -0,0 +1,38 @@ +"""All uploaded files are directly send back to the client.""" +from werkzeug.serving import run_simple +from werkzeug.wrappers import Request +from werkzeug.wrappers import Response +from werkzeug.wsgi import wrap_file + + +def view_file(req): + if "uploaded_file" not in req.files: + return Response("no file uploaded") + f = req.files["uploaded_file"] + return Response( + wrap_file(req.environ, f), mimetype=f.content_type, direct_passthrough=True + ) + + +def upload_file(req): + return Response( + """

Upload File

+
+ + +
""", + mimetype="text/html", + ) + + +def application(environ, start_response): + req = Request(environ) + if req.method == "POST": + resp = view_file(req) + else: + resp = upload_file(req) + return resp(environ, start_response) + + +if __name__ == "__main__": + run_simple("localhost", 5000, application, use_debugger=True) diff --git a/examples/webpylike/example.py b/examples/webpylike/example.py new file mode 100644 index 0000000..74534d1 --- /dev/null +++ b/examples/webpylike/example.py @@ -0,0 +1,19 @@ +from .webpylike import Response +from .webpylike import View +from .webpylike import WebPyApp + + +urls = ("/", "index", "/about", "about") + + +class index(View): + def GET(self): + return Response("Hello World") + + +class about(View): + def GET(self): + return Response("This is the about page") + + +app = WebPyApp(urls, globals()) diff --git a/examples/webpylike/webpylike.py b/examples/webpylike/webpylike.py new file mode 100644 index 0000000..e7a9ceb --- /dev/null +++ b/examples/webpylike/webpylike.py @@ -0,0 +1,58 @@ +"""Implements web.py like dispatching. What this module does not +implement is a stream system that hooks into sys.stdout like web.py +provides. +""" +import re + +from werkzeug.exceptions import HTTPException +from werkzeug.exceptions import MethodNotAllowed +from werkzeug.exceptions import NotFound +from werkzeug.exceptions import NotImplemented +from werkzeug.wrappers import Request +from werkzeug.wrappers import Response # noqa: F401 + + +class View: + """Baseclass for our views.""" + + def __init__(self, app, req): + self.app = app + self.req = req + + def GET(self): + raise MethodNotAllowed() + + POST = DELETE = PUT = GET + + def HEAD(self): + return self.GET() + + +class WebPyApp: + """ + An interface to a web.py like application. It works like the web.run + function in web.py + """ + + def __init__(self, urls, views): + self.urls = [ + (re.compile(f"^{urls[i]}$"), urls[i + 1]) for i in range(0, len(urls), 2) + ] + self.views = views + + def __call__(self, environ, start_response): + try: + req = Request(environ) + for regex, view in self.urls: + match = regex.match(req.path) + if match is not None: + view = self.views[view](self, req) + if req.method not in ("GET", "HEAD", "POST", "DELETE", "PUT"): + raise NotImplemented() # noqa: F901 + resp = getattr(view, req.method)(*match.groups()) + break + else: + raise NotFound() + except HTTPException as e: + resp = e + return resp(environ, start_response) diff --git a/examples/wsecho.py b/examples/wsecho.py new file mode 100644 index 0000000..23223c9 --- /dev/null +++ b/examples/wsecho.py @@ -0,0 +1,79 @@ +"""Shows how you can implement a simple WebSocket echo server using the +wsproto library. +""" +from werkzeug.exceptions import InternalServerError +from werkzeug.serving import run_simple +from werkzeug.wrappers import Request +from werkzeug.wrappers import Response +from wsproto import ConnectionType +from wsproto import WSConnection +from wsproto.events import AcceptConnection +from wsproto.events import CloseConnection +from wsproto.events import Message +from wsproto.events import Ping +from wsproto.events import Request as WSRequest +from wsproto.events import TextMessage +from wsproto.frame_protocol import CloseReason + + +@Request.application +def websocket(request): + # The underlying socket must be provided by the server. Gunicorn and + # Werkzeug's dev server are known to support this. + stream = request.environ.get("werkzeug.socket") + + if stream is None: + stream = request.environ.get("gunicorn.socket") + + if stream is None: + raise InternalServerError() + + # Initialize the wsproto connection. Need to recreate the request + # data that was read by the WSGI server already. + ws = WSConnection(ConnectionType.SERVER) + in_data = b"GET %s HTTP/1.1\r\n" % request.path.encode("utf8") + + for header, value in request.headers.items(): + in_data += f"{header}: {value}\r\n".encode() + + in_data += b"\r\n" + ws.receive_data(in_data) + running = True + + while True: + out_data = b"" + + for event in ws.events(): + if isinstance(event, WSRequest): + out_data += ws.send(AcceptConnection()) + elif isinstance(event, CloseConnection): + out_data += ws.send(event.response()) + running = False + elif isinstance(event, Ping): + out_data += ws.send(event.response()) + elif isinstance(event, TextMessage): + # echo the incoming message back to the client + if event.data == "quit": + out_data += ws.send( + CloseConnection(CloseReason.NORMAL_CLOSURE, "bye") + ) + running = False + else: + out_data += ws.send(Message(data=event.data)) + + if out_data: + stream.send(out_data) + + if not running: + break + + in_data = stream.recv(4096) + ws.receive_data(in_data) + + # The connection will be closed at this point, but WSGI still + # requires a response. + return Response("", status=204) + + +if __name__ == "__main__": + run_simple("localhost", 5000, websocket) diff --git a/requirements/dev.in b/requirements/dev.in new file mode 100644 index 0000000..99f5942 --- /dev/null +++ b/requirements/dev.in @@ -0,0 +1,6 @@ +-r docs.in +-r tests.in +-r typing.in +pip-compile-multi +pre-commit +tox diff --git a/requirements/dev.txt b/requirements/dev.txt new file mode 100644 index 0000000..50e233e --- /dev/null +++ b/requirements/dev.txt @@ -0,0 +1,64 @@ +# SHA1:54b5b77ec8c7a0064ffa93b2fd16cb0130ba177c +# +# This file is autogenerated by pip-compile-multi +# To update, run: +# +# pip-compile-multi +# +-r docs.txt +-r tests.txt +-r typing.txt +build==0.8.0 + # via pip-tools +cfgv==3.3.1 + # via pre-commit +click==8.1.3 + # via + # pip-compile-multi + # pip-tools +distlib==0.3.4 + # via virtualenv +filelock==3.7.1 + # via + # tox + # virtualenv +greenlet==1.1.2 ; python_version < "3.11" + # via -r requirements/tests.in +identify==2.5.1 + # via pre-commit +nodeenv==1.7.0 + # via pre-commit +pep517==0.12.0 + # via build +pip-compile-multi==2.4.5 + # via -r requirements/dev.in +pip-tools==6.8.0 + # via pip-compile-multi +platformdirs==2.5.2 + # via virtualenv +pre-commit==2.20.0 + # via -r requirements/dev.in +pyyaml==6.0 + # via pre-commit +six==1.16.0 + # via + # tox + # virtualenv +toml==0.10.2 + # via + # pre-commit + # tox +toposort==1.7 + # via pip-compile-multi +tox==3.25.1 + # via -r requirements/dev.in +virtualenv==20.15.1 + # via + # pre-commit + # tox +wheel==0.37.1 + # via pip-tools + +# The following packages are considered to be unsafe in a requirements file: +# pip +# setuptools diff --git a/requirements/docs.in b/requirements/docs.in new file mode 100644 index 0000000..7ec501b --- /dev/null +++ b/requirements/docs.in @@ -0,0 +1,4 @@ +Pallets-Sphinx-Themes +Sphinx +sphinx-issues +sphinxcontrib-log-cabinet diff --git a/requirements/docs.txt b/requirements/docs.txt new file mode 100644 index 0000000..8238e78 --- /dev/null +++ b/requirements/docs.txt @@ -0,0 +1,65 @@ +# SHA1:45c590f97fe95b8bdc755eef796e91adf5fbe4ea +# +# This file is autogenerated by pip-compile-multi +# To update, run: +# +# pip-compile-multi +# +alabaster==0.7.12 + # via sphinx +babel==2.10.3 + # via sphinx +certifi==2022.6.15 + # via requests +charset-normalizer==2.1.0 + # via requests +docutils==0.18.1 + # via sphinx +idna==3.3 + # via requests +imagesize==1.4.1 + # via sphinx +jinja2==3.1.2 + # via sphinx +markupsafe==2.1.1 + # via jinja2 +packaging==21.3 + # via + # pallets-sphinx-themes + # sphinx +pallets-sphinx-themes==2.0.2 + # via -r requirements/docs.in +pygments==2.12.0 + # via sphinx +pyparsing==3.0.9 + # via packaging +pytz==2022.1 + # via babel +requests==2.28.1 + # via sphinx +snowballstemmer==2.2.0 + # via sphinx +sphinx==5.0.2 + # via + # -r requirements/docs.in + # pallets-sphinx-themes + # sphinx-issues + # sphinxcontrib-log-cabinet +sphinx-issues==3.0.1 + # via -r requirements/docs.in +sphinxcontrib-applehelp==1.0.2 + # via sphinx +sphinxcontrib-devhelp==1.0.2 + # via sphinx +sphinxcontrib-htmlhelp==2.0.0 + # via sphinx +sphinxcontrib-jsmath==1.0.1 + # via sphinx +sphinxcontrib-log-cabinet==1.0.1 + # via -r requirements/docs.in +sphinxcontrib-qthelp==1.0.3 + # via sphinx +sphinxcontrib-serializinghtml==1.1.5 + # via sphinx +urllib3==1.26.10 + # via requests diff --git a/requirements/tests.in b/requirements/tests.in new file mode 100644 index 0000000..3ced491 --- /dev/null +++ b/requirements/tests.in @@ -0,0 +1,7 @@ +pytest +pytest-timeout +pytest-xprocess +cryptography +greenlet ; python_version < "3.11" +watchdog +ephemeral-port-reserve diff --git a/requirements/tests.txt b/requirements/tests.txt new file mode 100644 index 0000000..689d8ba --- /dev/null +++ b/requirements/tests.txt @@ -0,0 +1,44 @@ +# SHA1:42b4e3e66395275e048d9a92c294b2c650393866 +# +# This file is autogenerated by pip-compile-multi +# To update, run: +# +# pip-compile-multi +# +attrs==21.4.0 + # via pytest +cffi==1.15.1 + # via cryptography +cryptography==37.0.4 + # via -r requirements/tests.in +ephemeral-port-reserve==1.1.4 + # via -r requirements/tests.in +greenlet==1.1.2 ; python_version < "3.11" + # via -r requirements/tests.in +iniconfig==1.1.1 + # via pytest +packaging==21.3 + # via pytest +pluggy==1.0.0 + # via pytest +psutil==5.9.1 + # via pytest-xprocess +py==1.11.0 + # via pytest +pycparser==2.21 + # via cffi +pyparsing==3.0.9 + # via packaging +pytest==7.1.2 + # via + # -r requirements/tests.in + # pytest-timeout + # pytest-xprocess +pytest-timeout==2.1.0 + # via -r requirements/tests.in +pytest-xprocess==0.19.0 + # via -r requirements/tests.in +tomli==2.0.1 + # via pytest +watchdog==2.1.9 + # via -r requirements/tests.in diff --git a/requirements/typing.in b/requirements/typing.in new file mode 100644 index 0000000..e17c43d --- /dev/null +++ b/requirements/typing.in @@ -0,0 +1,4 @@ +mypy +types-contextvars +types-dataclasses +types-setuptools diff --git a/requirements/typing.txt b/requirements/typing.txt new file mode 100644 index 0000000..1f6de2c --- /dev/null +++ b/requirements/typing.txt @@ -0,0 +1,21 @@ +# SHA1:95499f7e92b572adde012b13e1ec99dbbb2f7089 +# +# This file is autogenerated by pip-compile-multi +# To update, run: +# +# pip-compile-multi +# +mypy==0.961 + # via -r requirements/typing.in +mypy-extensions==0.4.3 + # via mypy +tomli==2.0.1 + # via mypy +types-contextvars==2.4.7 + # via -r requirements/typing.in +types-dataclasses==0.6.6 + # via -r requirements/typing.in +types-setuptools==62.6.1 + # via -r requirements/typing.in +typing-extensions==4.3.0 + # via mypy diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..2a1c2e4 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,130 @@ +[metadata] +name = Werkzeug +version = attr: werkzeug.__version__ +url = https://palletsprojects.com/p/werkzeug/ +project_urls = + Donate = https://palletsprojects.com/donate + Documentation = https://werkzeug.palletsprojects.com/ + Changes = https://werkzeug.palletsprojects.com/changes/ + Source Code = https://github.com/pallets/werkzeug/ + Issue Tracker = https://github.com/pallets/werkzeug/issues/ + Twitter = https://twitter.com/PalletsTeam + Chat = https://discord.gg/pallets +license = BSD-3-Clause +author = Armin Ronacher +author_email = armin.ronacher@active-4.com +maintainer = Pallets +maintainer_email = contact@palletsprojects.com +description = The comprehensive WSGI web application library. +long_description = file: README.rst +long_description_content_type = text/x-rst +classifiers = + Development Status :: 5 - Production/Stable + Environment :: Web Environment + Intended Audience :: Developers + License :: OSI Approved :: BSD License + Operating System :: OS Independent + Programming Language :: Python + Topic :: Internet :: WWW/HTTP :: Dynamic Content + Topic :: Internet :: WWW/HTTP :: WSGI + Topic :: Internet :: WWW/HTTP :: WSGI :: Application + Topic :: Internet :: WWW/HTTP :: WSGI :: Middleware + Topic :: Software Development :: Libraries :: Application Frameworks + +[options] +packages = find: +package_dir = = src +include_package_data = True +python_requires = >= 3.7 +# Dependencies are in setup.py for GitHub's dependency graph. + +[options.packages.find] +where = src + +[tool:pytest] +testpaths = tests +filterwarnings = + error +markers = + dev_server: tests that start the dev server + +[coverage:run] +branch = True +source = + werkzeug + tests + +[coverage:paths] +source = + src + */site-packages + +[flake8] +# B = bugbear +# E = pycodestyle errors +# F = flake8 pyflakes +# W = pycodestyle warnings +# B9 = bugbear opinions +# ISC = implicit str concat +select = B, E, F, W, B9, ISC +ignore = + # slice notation whitespace, invalid + E203 + # import at top, too many circular import fixes + E402 + # line length, handled by bugbear B950 + E501 + # bare except, handled by bugbear B001 + E722 + # bin op line break, invalid + W503 +# up to 88 allowed by bugbear B950 +max-line-length = 80 +per-file-ignores = + # __init__ exports names + **/__init__.py: F401 + # LocalProxy assigns lambdas + src/werkzeug/local.py: E731 + +[mypy] +files = src/werkzeug +python_version = 3.7 +show_error_codes = True +allow_redefinition = True +disallow_subclassing_any = True +# disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +local_partial_types = True +no_implicit_reexport = True +strict_equality = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_return_any = True +# warn_unreachable = True + +[mypy-werkzeug.wrappers] +no_implicit_reexport = False + +[mypy-colorama.*] +ignore_missing_imports = True + +[mypy-cryptography.*] +ignore_missing_imports = True + +[mypy-eventlet.*] +ignore_missing_imports = True + +[mypy-gevent.*] +ignore_missing_imports = True + +[mypy-greenlet.*] +ignore_missing_imports = True + +[mypy-watchdog.*] +ignore_missing_imports = True + +[mypy-xprocess.*] +ignore_missing_imports = True diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..37d75a5 --- /dev/null +++ b/setup.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python +from setuptools import setup + +# Metadata goes in setup.cfg. These are here for GitHub's dependency graph. +setup( + name="Werkzeug", + install_requires=["MarkupSafe>=2.1.1"], + extras_require={"watchdog": ["watchdog"]}, +) diff --git a/src/werkzeug/__init__.py b/src/werkzeug/__init__.py new file mode 100644 index 0000000..fd7f8d2 --- /dev/null +++ b/src/werkzeug/__init__.py @@ -0,0 +1,6 @@ +from .serving import run_simple as run_simple +from .test import Client as Client +from .wrappers import Request as Request +from .wrappers import Response as Response + +__version__ = "2.2.2" diff --git a/src/werkzeug/_internal.py b/src/werkzeug/_internal.py new file mode 100644 index 0000000..4636647 --- /dev/null +++ b/src/werkzeug/_internal.py @@ -0,0 +1,548 @@ +import logging +import operator +import re +import string +import sys +import typing +import typing as t +from datetime import date +from datetime import datetime +from datetime import timezone +from itertools import chain +from weakref import WeakKeyDictionary + +if t.TYPE_CHECKING: + from _typeshed.wsgi import StartResponse + from _typeshed.wsgi import WSGIApplication + from _typeshed.wsgi import WSGIEnvironment + from .wrappers.request import Request # noqa: F401 + +_logger: t.Optional[logging.Logger] = None +_signature_cache = WeakKeyDictionary() # type: ignore +_epoch_ord = date(1970, 1, 1).toordinal() +_legal_cookie_chars = frozenset( + c.encode("ascii") + for c in f"{string.ascii_letters}{string.digits}/=!#$%&'*+-.^_`|~:" +) + +_cookie_quoting_map = {b",": b"\\054", b";": b"\\073", b'"': b'\\"', b"\\": b"\\\\"} +for _i in chain(range(32), range(127, 256)): + _cookie_quoting_map[_i.to_bytes(1, sys.byteorder)] = f"\\{_i:03o}".encode("latin1") + +_octal_re = re.compile(rb"\\[0-3][0-7][0-7]") +_quote_re = re.compile(rb"[\\].") +_legal_cookie_chars_re = rb"[\w\d!#%&\'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=]" +_cookie_re = re.compile( + rb""" + (?P[^=;]+) + (?:\s*=\s* + (?P + "(?:[^\\"]|\\.)*" | + (?:.*?) + ) + )? + \s*; +""", + flags=re.VERBOSE, +) + + +class _Missing: + def __repr__(self) -> str: + return "no value" + + def __reduce__(self) -> str: + return "_missing" + + +_missing = _Missing() + + +@typing.overload +def _make_encode_wrapper(reference: str) -> t.Callable[[str], str]: + ... + + +@typing.overload +def _make_encode_wrapper(reference: bytes) -> t.Callable[[str], bytes]: + ... + + +def _make_encode_wrapper(reference: t.AnyStr) -> t.Callable[[str], t.AnyStr]: + """Create a function that will be called with a string argument. If + the reference is bytes, values will be encoded to bytes. + """ + if isinstance(reference, str): + return lambda x: x + + return operator.methodcaller("encode", "latin1") + + +def _check_str_tuple(value: t.Tuple[t.AnyStr, ...]) -> None: + """Ensure tuple items are all strings or all bytes.""" + if not value: + return + + item_type = str if isinstance(value[0], str) else bytes + + if any(not isinstance(item, item_type) for item in value): + raise TypeError(f"Cannot mix str and bytes arguments (got {value!r})") + + +_default_encoding = sys.getdefaultencoding() + + +def _to_bytes( + x: t.Union[str, bytes], charset: str = _default_encoding, errors: str = "strict" +) -> bytes: + if x is None or isinstance(x, bytes): + return x + + if isinstance(x, (bytearray, memoryview)): + return bytes(x) + + if isinstance(x, str): + return x.encode(charset, errors) + + raise TypeError("Expected bytes") + + +@typing.overload +def _to_str( # type: ignore + x: None, + charset: t.Optional[str] = ..., + errors: str = ..., + allow_none_charset: bool = ..., +) -> None: + ... + + +@typing.overload +def _to_str( + x: t.Any, + charset: t.Optional[str] = ..., + errors: str = ..., + allow_none_charset: bool = ..., +) -> str: + ... + + +def _to_str( + x: t.Optional[t.Any], + charset: t.Optional[str] = _default_encoding, + errors: str = "strict", + allow_none_charset: bool = False, +) -> t.Optional[t.Union[str, bytes]]: + if x is None or isinstance(x, str): + return x + + if not isinstance(x, (bytes, bytearray)): + return str(x) + + if charset is None: + if allow_none_charset: + return x + + return x.decode(charset, errors) # type: ignore + + +def _wsgi_decoding_dance( + s: str, charset: str = "utf-8", errors: str = "replace" +) -> str: + return s.encode("latin1").decode(charset, errors) + + +def _wsgi_encoding_dance( + s: str, charset: str = "utf-8", errors: str = "replace" +) -> str: + if isinstance(s, bytes): + return s.decode("latin1", errors) + + return s.encode(charset).decode("latin1", errors) + + +def _get_environ(obj: t.Union["WSGIEnvironment", "Request"]) -> "WSGIEnvironment": + env = getattr(obj, "environ", obj) + assert isinstance( + env, dict + ), f"{type(obj).__name__!r} is not a WSGI environment (has to be a dict)" + return env + + +def _has_level_handler(logger: logging.Logger) -> bool: + """Check if there is a handler in the logging chain that will handle + the given logger's effective level. + """ + level = logger.getEffectiveLevel() + current = logger + + while current: + if any(handler.level <= level for handler in current.handlers): + return True + + if not current.propagate: + break + + current = current.parent # type: ignore + + return False + + +class _ColorStreamHandler(logging.StreamHandler): + """On Windows, wrap stream with Colorama for ANSI style support.""" + + def __init__(self) -> None: + try: + import colorama + except ImportError: + stream = None + else: + stream = colorama.AnsiToWin32(sys.stderr) + + super().__init__(stream) + + +def _log(type: str, message: str, *args: t.Any, **kwargs: t.Any) -> None: + """Log a message to the 'werkzeug' logger. + + The logger is created the first time it is needed. If there is no + level set, it is set to :data:`logging.INFO`. If there is no handler + for the logger's effective level, a :class:`logging.StreamHandler` + is added. + """ + global _logger + + if _logger is None: + _logger = logging.getLogger("werkzeug") + + if _logger.level == logging.NOTSET: + _logger.setLevel(logging.INFO) + + if not _has_level_handler(_logger): + _logger.addHandler(_ColorStreamHandler()) + + getattr(_logger, type)(message.rstrip(), *args, **kwargs) + + +@typing.overload +def _dt_as_utc(dt: None) -> None: + ... + + +@typing.overload +def _dt_as_utc(dt: datetime) -> datetime: + ... + + +def _dt_as_utc(dt: t.Optional[datetime]) -> t.Optional[datetime]: + if dt is None: + return dt + + if dt.tzinfo is None: + return dt.replace(tzinfo=timezone.utc) + elif dt.tzinfo != timezone.utc: + return dt.astimezone(timezone.utc) + + return dt + + +_TAccessorValue = t.TypeVar("_TAccessorValue") + + +class _DictAccessorProperty(t.Generic[_TAccessorValue]): + """Baseclass for `environ_property` and `header_property`.""" + + read_only = False + + def __init__( + self, + name: str, + default: t.Optional[_TAccessorValue] = None, + load_func: t.Optional[t.Callable[[str], _TAccessorValue]] = None, + dump_func: t.Optional[t.Callable[[_TAccessorValue], str]] = None, + read_only: t.Optional[bool] = None, + doc: t.Optional[str] = None, + ) -> None: + self.name = name + self.default = default + self.load_func = load_func + self.dump_func = dump_func + if read_only is not None: + self.read_only = read_only + self.__doc__ = doc + + def lookup(self, instance: t.Any) -> t.MutableMapping[str, t.Any]: + raise NotImplementedError + + @typing.overload + def __get__( + self, instance: None, owner: type + ) -> "_DictAccessorProperty[_TAccessorValue]": + ... + + @typing.overload + def __get__(self, instance: t.Any, owner: type) -> _TAccessorValue: + ... + + def __get__( + self, instance: t.Optional[t.Any], owner: type + ) -> t.Union[_TAccessorValue, "_DictAccessorProperty[_TAccessorValue]"]: + if instance is None: + return self + + storage = self.lookup(instance) + + if self.name not in storage: + return self.default # type: ignore + + value = storage[self.name] + + if self.load_func is not None: + try: + return self.load_func(value) + except (ValueError, TypeError): + return self.default # type: ignore + + return value # type: ignore + + def __set__(self, instance: t.Any, value: _TAccessorValue) -> None: + if self.read_only: + raise AttributeError("read only property") + + if self.dump_func is not None: + self.lookup(instance)[self.name] = self.dump_func(value) + else: + self.lookup(instance)[self.name] = value + + def __delete__(self, instance: t.Any) -> None: + if self.read_only: + raise AttributeError("read only property") + + self.lookup(instance).pop(self.name, None) + + def __repr__(self) -> str: + return f"<{type(self).__name__} {self.name}>" + + +def _cookie_quote(b: bytes) -> bytes: + buf = bytearray() + all_legal = True + _lookup = _cookie_quoting_map.get + _push = buf.extend + + for char_int in b: + char = char_int.to_bytes(1, sys.byteorder) + if char not in _legal_cookie_chars: + all_legal = False + char = _lookup(char, char) + _push(char) + + if all_legal: + return bytes(buf) + return bytes(b'"' + buf + b'"') + + +def _cookie_unquote(b: bytes) -> bytes: + if len(b) < 2: + return b + if b[:1] != b'"' or b[-1:] != b'"': + return b + + b = b[1:-1] + + i = 0 + n = len(b) + rv = bytearray() + _push = rv.extend + + while 0 <= i < n: + o_match = _octal_re.search(b, i) + q_match = _quote_re.search(b, i) + if not o_match and not q_match: + rv.extend(b[i:]) + break + j = k = -1 + if o_match: + j = o_match.start(0) + if q_match: + k = q_match.start(0) + if q_match and (not o_match or k < j): + _push(b[i:k]) + _push(b[k + 1 : k + 2]) + i = k + 2 + else: + _push(b[i:j]) + rv.append(int(b[j + 1 : j + 4], 8)) + i = j + 4 + + return bytes(rv) + + +def _cookie_parse_impl(b: bytes) -> t.Iterator[t.Tuple[bytes, bytes]]: + """Lowlevel cookie parsing facility that operates on bytes.""" + i = 0 + n = len(b) + + while i < n: + match = _cookie_re.search(b + b";", i) + if not match: + break + + key = match.group("key").strip() + value = match.group("val") or b"" + i = match.end(0) + + yield key, _cookie_unquote(value) + + +def _encode_idna(domain: str) -> bytes: + # If we're given bytes, make sure they fit into ASCII + if isinstance(domain, bytes): + domain.decode("ascii") + return domain + + # Otherwise check if it's already ascii, then return + try: + return domain.encode("ascii") + except UnicodeError: + pass + + # Otherwise encode each part separately + return b".".join(p.encode("idna") for p in domain.split(".")) + + +def _decode_idna(domain: t.Union[str, bytes]) -> str: + # If the input is a string try to encode it to ascii to do the idna + # decoding. If that fails because of a unicode error, then we + # already have a decoded idna domain. + if isinstance(domain, str): + try: + domain = domain.encode("ascii") + except UnicodeError: + return domain # type: ignore + + # Decode each part separately. If a part fails, try to decode it + # with ascii and silently ignore errors. This makes sense because + # the idna codec does not have error handling. + def decode_part(part: bytes) -> str: + try: + return part.decode("idna") + except UnicodeError: + return part.decode("ascii", "ignore") + + return ".".join(decode_part(p) for p in domain.split(b".")) + + +@typing.overload +def _make_cookie_domain(domain: None) -> None: + ... + + +@typing.overload +def _make_cookie_domain(domain: str) -> bytes: + ... + + +def _make_cookie_domain(domain: t.Optional[str]) -> t.Optional[bytes]: + if domain is None: + return None + domain = _encode_idna(domain) + if b":" in domain: + domain = domain.split(b":", 1)[0] + if b"." in domain: + return domain + raise ValueError( + "Setting 'domain' for a cookie on a server running locally (ex: " + "localhost) is not supported by complying browsers. You should " + "have something like: '127.0.0.1 localhost dev.localhost' on " + "your hosts file and then point your server to run on " + "'dev.localhost' and also set 'domain' for 'dev.localhost'" + ) + + +def _easteregg(app: t.Optional["WSGIApplication"] = None) -> "WSGIApplication": + """Like the name says. But who knows how it works?""" + + def bzzzzzzz(gyver: bytes) -> str: + import base64 + import zlib + + return zlib.decompress(base64.b64decode(gyver)).decode("ascii") + + gyver = "\n".join( + [ + x + (77 - len(x)) * " " + for x in bzzzzzzz( + b""" +eJyFlzuOJDkMRP06xRjymKgDJCDQStBYT8BCgK4gTwfQ2fcFs2a2FzvZk+hvlcRvRJD148efHt9m +9Xz94dRY5hGt1nrYcXx7us9qlcP9HHNh28rz8dZj+q4rynVFFPdlY4zH873NKCexrDM6zxxRymzz +4QIxzK4bth1PV7+uHn6WXZ5C4ka/+prFzx3zWLMHAVZb8RRUxtFXI5DTQ2n3Hi2sNI+HK43AOWSY +jmEzE4naFp58PdzhPMdslLVWHTGUVpSxImw+pS/D+JhzLfdS1j7PzUMxij+mc2U0I9zcbZ/HcZxc +q1QjvvcThMYFnp93agEx392ZdLJWXbi/Ca4Oivl4h/Y1ErEqP+lrg7Xa4qnUKu5UE9UUA4xeqLJ5 +jWlPKJvR2yhRI7xFPdzPuc6adXu6ovwXwRPXXnZHxlPtkSkqWHilsOrGrvcVWXgGP3daXomCj317 +8P2UOw/NnA0OOikZyFf3zZ76eN9QXNwYdD8f8/LdBRFg0BO3bB+Pe/+G8er8tDJv83XTkj7WeMBJ +v/rnAfdO51d6sFglfi8U7zbnr0u9tyJHhFZNXYfH8Iafv2Oa+DT6l8u9UYlajV/hcEgk1x8E8L/r +XJXl2SK+GJCxtnyhVKv6GFCEB1OO3f9YWAIEbwcRWv/6RPpsEzOkXURMN37J0PoCSYeBnJQd9Giu +LxYQJNlYPSo/iTQwgaihbART7Fcyem2tTSCcwNCs85MOOpJtXhXDe0E7zgZJkcxWTar/zEjdIVCk +iXy87FW6j5aGZhttDBoAZ3vnmlkx4q4mMmCdLtnHkBXFMCReqthSGkQ+MDXLLCpXwBs0t+sIhsDI +tjBB8MwqYQpLygZ56rRHHpw+OAVyGgaGRHWy2QfXez+ZQQTTBkmRXdV/A9LwH6XGZpEAZU8rs4pE +1R4FQ3Uwt8RKEtRc0/CrANUoes3EzM6WYcFyskGZ6UTHJWenBDS7h163Eo2bpzqxNE9aVgEM2CqI +GAJe9Yra4P5qKmta27VjzYdR04Vc7KHeY4vs61C0nbywFmcSXYjzBHdiEjraS7PGG2jHHTpJUMxN +Jlxr3pUuFvlBWLJGE3GcA1/1xxLcHmlO+LAXbhrXah1tD6Ze+uqFGdZa5FM+3eHcKNaEarutAQ0A +QMAZHV+ve6LxAwWnXbbSXEG2DmCX5ijeLCKj5lhVFBrMm+ryOttCAeFpUdZyQLAQkA06RLs56rzG +8MID55vqr/g64Qr/wqwlE0TVxgoiZhHrbY2h1iuuyUVg1nlkpDrQ7Vm1xIkI5XRKLedN9EjzVchu +jQhXcVkjVdgP2O99QShpdvXWoSwkp5uMwyjt3jiWCqWGSiaaPAzohjPanXVLbM3x0dNskJsaCEyz +DTKIs+7WKJD4ZcJGfMhLFBf6hlbnNkLEePF8Cx2o2kwmYF4+MzAxa6i+6xIQkswOqGO+3x9NaZX8 +MrZRaFZpLeVTYI9F/djY6DDVVs340nZGmwrDqTCiiqD5luj3OzwpmQCiQhdRYowUYEA3i1WWGwL4 +GCtSoO4XbIPFeKGU13XPkDf5IdimLpAvi2kVDVQbzOOa4KAXMFlpi/hV8F6IDe0Y2reg3PuNKT3i +RYhZqtkQZqSB2Qm0SGtjAw7RDwaM1roESC8HWiPxkoOy0lLTRFG39kvbLZbU9gFKFRvixDZBJmpi +Xyq3RE5lW00EJjaqwp/v3EByMSpVZYsEIJ4APaHmVtpGSieV5CALOtNUAzTBiw81GLgC0quyzf6c +NlWknzJeCsJ5fup2R4d8CYGN77mu5vnO1UqbfElZ9E6cR6zbHjgsr9ly18fXjZoPeDjPuzlWbFwS +pdvPkhntFvkc13qb9094LL5NrA3NIq3r9eNnop9DizWOqCEbyRBFJTHn6Tt3CG1o8a4HevYh0XiJ +sR0AVVHuGuMOIfbuQ/OKBkGRC6NJ4u7sbPX8bG/n5sNIOQ6/Y/BX3IwRlTSabtZpYLB85lYtkkgm +p1qXK3Du2mnr5INXmT/78KI12n11EFBkJHHp0wJyLe9MvPNUGYsf+170maayRoy2lURGHAIapSpQ +krEDuNoJCHNlZYhKpvw4mspVWxqo415n8cD62N9+EfHrAvqQnINStetek7RY2Urv8nxsnGaZfRr/ +nhXbJ6m/yl1LzYqscDZA9QHLNbdaSTTr+kFg3bC0iYbX/eQy0Bv3h4B50/SGYzKAXkCeOLI3bcAt +mj2Z/FM1vQWgDynsRwNvrWnJHlespkrp8+vO1jNaibm+PhqXPPv30YwDZ6jApe3wUjFQobghvW9p +7f2zLkGNv8b191cD/3vs9Q833z8t""" + ).splitlines() + ] + ) + + def easteregged( + environ: "WSGIEnvironment", start_response: "StartResponse" + ) -> t.Iterable[bytes]: + def injecting_start_response( + status: str, headers: t.List[t.Tuple[str, str]], exc_info: t.Any = None + ) -> t.Callable[[bytes], t.Any]: + headers.append(("X-Powered-By", "Werkzeug")) + return start_response(status, headers, exc_info) + + if app is not None and environ.get("QUERY_STRING") != "macgybarchakku": + return app(environ, injecting_start_response) + injecting_start_response("200 OK", [("Content-Type", "text/html")]) + return [ + f"""\ + + + +About Werkzeug + + + +

Werkzeug

+

the Swiss Army knife of Python web development.

+
{gyver}\n\n\n
+ +""".encode( + "latin1" + ) + ] + + return easteregged diff --git a/src/werkzeug/_reloader.py b/src/werkzeug/_reloader.py new file mode 100644 index 0000000..57f3117 --- /dev/null +++ b/src/werkzeug/_reloader.py @@ -0,0 +1,446 @@ +import fnmatch +import os +import subprocess +import sys +import threading +import time +import typing as t +from itertools import chain +from pathlib import PurePath + +from ._internal import _log + +# The various system prefixes where imports are found. Base values are +# different when running in a virtualenv. All reloaders will ignore the +# base paths (usually the system installation). The stat reloader won't +# scan the virtualenv paths, it will only include modules that are +# already imported. +_ignore_always = tuple({sys.base_prefix, sys.base_exec_prefix}) +prefix = {*_ignore_always, sys.prefix, sys.exec_prefix} + +if hasattr(sys, "real_prefix"): + # virtualenv < 20 + prefix.add(sys.real_prefix) # type: ignore[attr-defined] + +_stat_ignore_scan = tuple(prefix) +del prefix +_ignore_common_dirs = { + "__pycache__", + ".git", + ".hg", + ".tox", + ".nox", + ".pytest_cache", + ".mypy_cache", +} + + +def _iter_module_paths() -> t.Iterator[str]: + """Find the filesystem paths associated with imported modules.""" + # List is in case the value is modified by the app while updating. + for module in list(sys.modules.values()): + name = getattr(module, "__file__", None) + + if name is None or name.startswith(_ignore_always): + continue + + while not os.path.isfile(name): + # Zip file, find the base file without the module path. + old = name + name = os.path.dirname(name) + + if name == old: # skip if it was all directories somehow + break + else: + yield name + + +def _remove_by_pattern(paths: t.Set[str], exclude_patterns: t.Set[str]) -> None: + for pattern in exclude_patterns: + paths.difference_update(fnmatch.filter(paths, pattern)) + + +def _find_stat_paths( + extra_files: t.Set[str], exclude_patterns: t.Set[str] +) -> t.Iterable[str]: + """Find paths for the stat reloader to watch. Returns imported + module files, Python files under non-system paths. Extra files and + Python files under extra directories can also be scanned. + + System paths have to be excluded for efficiency. Non-system paths, + such as a project root or ``sys.path.insert``, should be the paths + of interest to the user anyway. + """ + paths = set() + + for path in chain(list(sys.path), extra_files): + path = os.path.abspath(path) + + if os.path.isfile(path): + # zip file on sys.path, or extra file + paths.add(path) + continue + + parent_has_py = {os.path.dirname(path): True} + + for root, dirs, files in os.walk(path): + # Optimizations: ignore system prefixes, __pycache__ will + # have a py or pyc module at the import path, ignore some + # common known dirs such as version control and tool caches. + if ( + root.startswith(_stat_ignore_scan) + or os.path.basename(root) in _ignore_common_dirs + ): + dirs.clear() + continue + + has_py = False + + for name in files: + if name.endswith((".py", ".pyc")): + has_py = True + paths.add(os.path.join(root, name)) + + # Optimization: stop scanning a directory if neither it nor + # its parent contained Python files. + if not (has_py or parent_has_py[os.path.dirname(root)]): + dirs.clear() + continue + + parent_has_py[root] = has_py + + paths.update(_iter_module_paths()) + _remove_by_pattern(paths, exclude_patterns) + return paths + + +def _find_watchdog_paths( + extra_files: t.Set[str], exclude_patterns: t.Set[str] +) -> t.Iterable[str]: + """Find paths for the stat reloader to watch. Looks at the same + sources as the stat reloader, but watches everything under + directories instead of individual files. + """ + dirs = set() + + for name in chain(list(sys.path), extra_files): + name = os.path.abspath(name) + + if os.path.isfile(name): + name = os.path.dirname(name) + + dirs.add(name) + + for name in _iter_module_paths(): + dirs.add(os.path.dirname(name)) + + _remove_by_pattern(dirs, exclude_patterns) + return _find_common_roots(dirs) + + +def _find_common_roots(paths: t.Iterable[str]) -> t.Iterable[str]: + root: t.Dict[str, dict] = {} + + for chunks in sorted((PurePath(x).parts for x in paths), key=len, reverse=True): + node = root + + for chunk in chunks: + node = node.setdefault(chunk, {}) + + node.clear() + + rv = set() + + def _walk(node: t.Mapping[str, dict], path: t.Tuple[str, ...]) -> None: + for prefix, child in node.items(): + _walk(child, path + (prefix,)) + + if not node: + rv.add(os.path.join(*path)) + + _walk(root, ()) + return rv + + +def _get_args_for_reloading() -> t.List[str]: + """Determine how the script was executed, and return the args needed + to execute it again in a new process. + """ + rv = [sys.executable] + py_script = sys.argv[0] + args = sys.argv[1:] + # Need to look at main module to determine how it was executed. + __main__ = sys.modules["__main__"] + + # The value of __package__ indicates how Python was called. It may + # not exist if a setuptools script is installed as an egg. It may be + # set incorrectly for entry points created with pip on Windows. + if getattr(__main__, "__package__", None) is None or ( + os.name == "nt" + and __main__.__package__ == "" + and not os.path.exists(py_script) + and os.path.exists(f"{py_script}.exe") + ): + # Executed a file, like "python app.py". + py_script = os.path.abspath(py_script) + + if os.name == "nt": + # Windows entry points have ".exe" extension and should be + # called directly. + if not os.path.exists(py_script) and os.path.exists(f"{py_script}.exe"): + py_script += ".exe" + + if ( + os.path.splitext(sys.executable)[1] == ".exe" + and os.path.splitext(py_script)[1] == ".exe" + ): + rv.pop(0) + + rv.append(py_script) + else: + # Executed a module, like "python -m werkzeug.serving". + if os.path.isfile(py_script): + # Rewritten by Python from "-m script" to "/path/to/script.py". + py_module = t.cast(str, __main__.__package__) + name = os.path.splitext(os.path.basename(py_script))[0] + + if name != "__main__": + py_module += f".{name}" + else: + # Incorrectly rewritten by pydevd debugger from "-m script" to "script". + py_module = py_script + + rv.extend(("-m", py_module.lstrip("."))) + + rv.extend(args) + return rv + + +class ReloaderLoop: + name = "" + + def __init__( + self, + extra_files: t.Optional[t.Iterable[str]] = None, + exclude_patterns: t.Optional[t.Iterable[str]] = None, + interval: t.Union[int, float] = 1, + ) -> None: + self.extra_files: t.Set[str] = {os.path.abspath(x) for x in extra_files or ()} + self.exclude_patterns: t.Set[str] = set(exclude_patterns or ()) + self.interval = interval + + def __enter__(self) -> "ReloaderLoop": + """Do any setup, then run one step of the watch to populate the + initial filesystem state. + """ + self.run_step() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): # type: ignore + """Clean up any resources associated with the reloader.""" + pass + + def run(self) -> None: + """Continually run the watch step, sleeping for the configured + interval after each step. + """ + while True: + self.run_step() + time.sleep(self.interval) + + def run_step(self) -> None: + """Run one step for watching the filesystem. Called once to set + up initial state, then repeatedly to update it. + """ + pass + + def restart_with_reloader(self) -> int: + """Spawn a new Python interpreter with the same arguments as the + current one, but running the reloader thread. + """ + while True: + _log("info", f" * Restarting with {self.name}") + args = _get_args_for_reloading() + new_environ = os.environ.copy() + new_environ["WERKZEUG_RUN_MAIN"] = "true" + exit_code = subprocess.call(args, env=new_environ, close_fds=False) + + if exit_code != 3: + return exit_code + + def trigger_reload(self, filename: str) -> None: + self.log_reload(filename) + sys.exit(3) + + def log_reload(self, filename: str) -> None: + filename = os.path.abspath(filename) + _log("info", f" * Detected change in {filename!r}, reloading") + + +class StatReloaderLoop(ReloaderLoop): + name = "stat" + + def __enter__(self) -> ReloaderLoop: + self.mtimes: t.Dict[str, float] = {} + return super().__enter__() + + def run_step(self) -> None: + for name in _find_stat_paths(self.extra_files, self.exclude_patterns): + try: + mtime = os.stat(name).st_mtime + except OSError: + continue + + old_time = self.mtimes.get(name) + + if old_time is None: + self.mtimes[name] = mtime + continue + + if mtime > old_time: + self.trigger_reload(name) + + +class WatchdogReloaderLoop(ReloaderLoop): + def __init__(self, *args: t.Any, **kwargs: t.Any) -> None: + from watchdog.observers import Observer + from watchdog.events import PatternMatchingEventHandler + + super().__init__(*args, **kwargs) + trigger_reload = self.trigger_reload + + class EventHandler(PatternMatchingEventHandler): # type: ignore + def on_any_event(self, event): # type: ignore + trigger_reload(event.src_path) + + reloader_name = Observer.__name__.lower() + + if reloader_name.endswith("observer"): + reloader_name = reloader_name[:-8] + + self.name = f"watchdog ({reloader_name})" + self.observer = Observer() + # Extra patterns can be non-Python files, match them in addition + # to all Python files in default and extra directories. Ignore + # __pycache__ since a change there will always have a change to + # the source file (or initial pyc file) as well. Ignore Git and + # Mercurial internal changes. + extra_patterns = [p for p in self.extra_files if not os.path.isdir(p)] + self.event_handler = EventHandler( + patterns=["*.py", "*.pyc", "*.zip", *extra_patterns], + ignore_patterns=[ + *[f"*/{d}/*" for d in _ignore_common_dirs], + *self.exclude_patterns, + ], + ) + self.should_reload = False + + def trigger_reload(self, filename: str) -> None: + # This is called inside an event handler, which means throwing + # SystemExit has no effect. + # https://github.com/gorakhargosh/watchdog/issues/294 + self.should_reload = True + self.log_reload(filename) + + def __enter__(self) -> ReloaderLoop: + self.watches: t.Dict[str, t.Any] = {} + self.observer.start() + return super().__enter__() + + def __exit__(self, exc_type, exc_val, exc_tb): # type: ignore + self.observer.stop() + self.observer.join() + + def run(self) -> None: + while not self.should_reload: + self.run_step() + time.sleep(self.interval) + + sys.exit(3) + + def run_step(self) -> None: + to_delete = set(self.watches) + + for path in _find_watchdog_paths(self.extra_files, self.exclude_patterns): + if path not in self.watches: + try: + self.watches[path] = self.observer.schedule( + self.event_handler, path, recursive=True + ) + except OSError: + # Clear this path from list of watches We don't want + # the same error message showing again in the next + # iteration. + self.watches[path] = None + + to_delete.discard(path) + + for path in to_delete: + watch = self.watches.pop(path, None) + + if watch is not None: + self.observer.unschedule(watch) + + +reloader_loops: t.Dict[str, t.Type[ReloaderLoop]] = { + "stat": StatReloaderLoop, + "watchdog": WatchdogReloaderLoop, +} + +try: + __import__("watchdog.observers") +except ImportError: + reloader_loops["auto"] = reloader_loops["stat"] +else: + reloader_loops["auto"] = reloader_loops["watchdog"] + + +def ensure_echo_on() -> None: + """Ensure that echo mode is enabled. Some tools such as PDB disable + it which causes usability issues after a reload.""" + # tcgetattr will fail if stdin isn't a tty + if sys.stdin is None or not sys.stdin.isatty(): + return + + try: + import termios + except ImportError: + return + + attributes = termios.tcgetattr(sys.stdin) + + if not attributes[3] & termios.ECHO: + attributes[3] |= termios.ECHO + termios.tcsetattr(sys.stdin, termios.TCSANOW, attributes) + + +def run_with_reloader( + main_func: t.Callable[[], None], + extra_files: t.Optional[t.Iterable[str]] = None, + exclude_patterns: t.Optional[t.Iterable[str]] = None, + interval: t.Union[int, float] = 1, + reloader_type: str = "auto", +) -> None: + """Run the given function in an independent Python interpreter.""" + import signal + + signal.signal(signal.SIGTERM, lambda *args: sys.exit(0)) + reloader = reloader_loops[reloader_type]( + extra_files=extra_files, exclude_patterns=exclude_patterns, interval=interval + ) + + try: + if os.environ.get("WERKZEUG_RUN_MAIN") == "true": + ensure_echo_on() + t = threading.Thread(target=main_func, args=()) + t.daemon = True + + # Enter the reloader to set up initial state, then start + # the app thread and reloader update loop. + with reloader: + t.start() + reloader.run() + else: + sys.exit(reloader.restart_with_reloader()) + except KeyboardInterrupt: + pass diff --git a/src/werkzeug/datastructures.py b/src/werkzeug/datastructures.py new file mode 100644 index 0000000..43ee8c7 --- /dev/null +++ b/src/werkzeug/datastructures.py @@ -0,0 +1,3040 @@ +import base64 +import codecs +import mimetypes +import os +import re +from collections.abc import Collection +from collections.abc import MutableSet +from copy import deepcopy +from io import BytesIO +from itertools import repeat +from os import fspath + +from . import exceptions +from ._internal import _missing + + +def is_immutable(self): + raise TypeError(f"{type(self).__name__!r} objects are immutable") + + +def iter_multi_items(mapping): + """Iterates over the items of a mapping yielding keys and values + without dropping any from more complex structures. + """ + if isinstance(mapping, MultiDict): + yield from mapping.items(multi=True) + elif isinstance(mapping, dict): + for key, value in mapping.items(): + if isinstance(value, (tuple, list)): + for v in value: + yield key, v + else: + yield key, value + else: + yield from mapping + + +class ImmutableListMixin: + """Makes a :class:`list` immutable. + + .. versionadded:: 0.5 + + :private: + """ + + _hash_cache = None + + def __hash__(self): + if self._hash_cache is not None: + return self._hash_cache + rv = self._hash_cache = hash(tuple(self)) + return rv + + def __reduce_ex__(self, protocol): + return type(self), (list(self),) + + def __delitem__(self, key): + is_immutable(self) + + def __iadd__(self, other): + is_immutable(self) + + def __imul__(self, other): + is_immutable(self) + + def __setitem__(self, key, value): + is_immutable(self) + + def append(self, item): + is_immutable(self) + + def remove(self, item): + is_immutable(self) + + def extend(self, iterable): + is_immutable(self) + + def insert(self, pos, value): + is_immutable(self) + + def pop(self, index=-1): + is_immutable(self) + + def reverse(self): + is_immutable(self) + + def sort(self, key=None, reverse=False): + is_immutable(self) + + +class ImmutableList(ImmutableListMixin, list): + """An immutable :class:`list`. + + .. versionadded:: 0.5 + + :private: + """ + + def __repr__(self): + return f"{type(self).__name__}({list.__repr__(self)})" + + +class ImmutableDictMixin: + """Makes a :class:`dict` immutable. + + .. versionadded:: 0.5 + + :private: + """ + + _hash_cache = None + + @classmethod + def fromkeys(cls, keys, value=None): + instance = super().__new__(cls) + instance.__init__(zip(keys, repeat(value))) + return instance + + def __reduce_ex__(self, protocol): + return type(self), (dict(self),) + + def _iter_hashitems(self): + return self.items() + + def __hash__(self): + if self._hash_cache is not None: + return self._hash_cache + rv = self._hash_cache = hash(frozenset(self._iter_hashitems())) + return rv + + def setdefault(self, key, default=None): + is_immutable(self) + + def update(self, *args, **kwargs): + is_immutable(self) + + def pop(self, key, default=None): + is_immutable(self) + + def popitem(self): + is_immutable(self) + + def __setitem__(self, key, value): + is_immutable(self) + + def __delitem__(self, key): + is_immutable(self) + + def clear(self): + is_immutable(self) + + +class ImmutableMultiDictMixin(ImmutableDictMixin): + """Makes a :class:`MultiDict` immutable. + + .. versionadded:: 0.5 + + :private: + """ + + def __reduce_ex__(self, protocol): + return type(self), (list(self.items(multi=True)),) + + def _iter_hashitems(self): + return self.items(multi=True) + + def add(self, key, value): + is_immutable(self) + + def popitemlist(self): + is_immutable(self) + + def poplist(self, key): + is_immutable(self) + + def setlist(self, key, new_list): + is_immutable(self) + + def setlistdefault(self, key, default_list=None): + is_immutable(self) + + +def _calls_update(name): + def oncall(self, *args, **kw): + rv = getattr(super(UpdateDictMixin, self), name)(*args, **kw) + + if self.on_update is not None: + self.on_update(self) + + return rv + + oncall.__name__ = name + return oncall + + +class UpdateDictMixin(dict): + """Makes dicts call `self.on_update` on modifications. + + .. versionadded:: 0.5 + + :private: + """ + + on_update = None + + def setdefault(self, key, default=None): + modified = key not in self + rv = super().setdefault(key, default) + if modified and self.on_update is not None: + self.on_update(self) + return rv + + def pop(self, key, default=_missing): + modified = key in self + if default is _missing: + rv = super().pop(key) + else: + rv = super().pop(key, default) + if modified and self.on_update is not None: + self.on_update(self) + return rv + + __setitem__ = _calls_update("__setitem__") + __delitem__ = _calls_update("__delitem__") + clear = _calls_update("clear") + popitem = _calls_update("popitem") + update = _calls_update("update") + + +class TypeConversionDict(dict): + """Works like a regular dict but the :meth:`get` method can perform + type conversions. :class:`MultiDict` and :class:`CombinedMultiDict` + are subclasses of this class and provide the same feature. + + .. versionadded:: 0.5 + """ + + def get(self, key, default=None, type=None): + """Return the default value if the requested data doesn't exist. + If `type` is provided and is a callable it should convert the value, + return it or raise a :exc:`ValueError` if that is not possible. In + this case the function will return the default as if the value was not + found: + + >>> d = TypeConversionDict(foo='42', bar='blub') + >>> d.get('foo', type=int) + 42 + >>> d.get('bar', -1, type=int) + -1 + + :param key: The key to be looked up. + :param default: The default value to be returned if the key can't + be looked up. If not further specified `None` is + returned. + :param type: A callable that is used to cast the value in the + :class:`MultiDict`. If a :exc:`ValueError` is raised + by this callable the default value is returned. + """ + try: + rv = self[key] + except KeyError: + return default + if type is not None: + try: + rv = type(rv) + except ValueError: + rv = default + return rv + + +class ImmutableTypeConversionDict(ImmutableDictMixin, TypeConversionDict): + """Works like a :class:`TypeConversionDict` but does not support + modifications. + + .. versionadded:: 0.5 + """ + + def copy(self): + """Return a shallow mutable copy of this object. Keep in mind that + the standard library's :func:`copy` function is a no-op for this class + like for any other python immutable type (eg: :class:`tuple`). + """ + return TypeConversionDict(self) + + def __copy__(self): + return self + + +class MultiDict(TypeConversionDict): + """A :class:`MultiDict` is a dictionary subclass customized to deal with + multiple values for the same key which is for example used by the parsing + functions in the wrappers. This is necessary because some HTML form + elements pass multiple values for the same key. + + :class:`MultiDict` implements all standard dictionary methods. + Internally, it saves all values for a key as a list, but the standard dict + access methods will only return the first value for a key. If you want to + gain access to the other values, too, you have to use the `list` methods as + explained below. + + Basic Usage: + + >>> d = MultiDict([('a', 'b'), ('a', 'c')]) + >>> d + MultiDict([('a', 'b'), ('a', 'c')]) + >>> d['a'] + 'b' + >>> d.getlist('a') + ['b', 'c'] + >>> 'a' in d + True + + It behaves like a normal dict thus all dict functions will only return the + first value when multiple values for one key are found. + + From Werkzeug 0.3 onwards, the `KeyError` raised by this class is also a + subclass of the :exc:`~exceptions.BadRequest` HTTP exception and will + render a page for a ``400 BAD REQUEST`` if caught in a catch-all for HTTP + exceptions. + + A :class:`MultiDict` can be constructed from an iterable of + ``(key, value)`` tuples, a dict, a :class:`MultiDict` or from Werkzeug 0.2 + onwards some keyword parameters. + + :param mapping: the initial value for the :class:`MultiDict`. Either a + regular dict, an iterable of ``(key, value)`` tuples + or `None`. + """ + + def __init__(self, mapping=None): + if isinstance(mapping, MultiDict): + dict.__init__(self, ((k, l[:]) for k, l in mapping.lists())) + elif isinstance(mapping, dict): + tmp = {} + for key, value in mapping.items(): + if isinstance(value, (tuple, list)): + if len(value) == 0: + continue + value = list(value) + else: + value = [value] + tmp[key] = value + dict.__init__(self, tmp) + else: + tmp = {} + for key, value in mapping or (): + tmp.setdefault(key, []).append(value) + dict.__init__(self, tmp) + + def __getstate__(self): + return dict(self.lists()) + + def __setstate__(self, value): + dict.clear(self) + dict.update(self, value) + + def __iter__(self): + # Work around https://bugs.python.org/issue43246. + # (`return super().__iter__()` also works here, which makes this look + # even more like it should be a no-op, yet it isn't.) + return dict.__iter__(self) + + def __getitem__(self, key): + """Return the first data value for this key; + raises KeyError if not found. + + :param key: The key to be looked up. + :raise KeyError: if the key does not exist. + """ + + if key in self: + lst = dict.__getitem__(self, key) + if len(lst) > 0: + return lst[0] + raise exceptions.BadRequestKeyError(key) + + def __setitem__(self, key, value): + """Like :meth:`add` but removes an existing key first. + + :param key: the key for the value. + :param value: the value to set. + """ + dict.__setitem__(self, key, [value]) + + def add(self, key, value): + """Adds a new value for the key. + + .. versionadded:: 0.6 + + :param key: the key for the value. + :param value: the value to add. + """ + dict.setdefault(self, key, []).append(value) + + def getlist(self, key, type=None): + """Return the list of items for a given key. If that key is not in the + `MultiDict`, the return value will be an empty list. Just like `get`, + `getlist` accepts a `type` parameter. All items will be converted + with the callable defined there. + + :param key: The key to be looked up. + :param type: A callable that is used to cast the value in the + :class:`MultiDict`. If a :exc:`ValueError` is raised + by this callable the value will be removed from the list. + :return: a :class:`list` of all the values for the key. + """ + try: + rv = dict.__getitem__(self, key) + except KeyError: + return [] + if type is None: + return list(rv) + result = [] + for item in rv: + try: + result.append(type(item)) + except ValueError: + pass + return result + + def setlist(self, key, new_list): + """Remove the old values for a key and add new ones. Note that the list + you pass the values in will be shallow-copied before it is inserted in + the dictionary. + + >>> d = MultiDict() + >>> d.setlist('foo', ['1', '2']) + >>> d['foo'] + '1' + >>> d.getlist('foo') + ['1', '2'] + + :param key: The key for which the values are set. + :param new_list: An iterable with the new values for the key. Old values + are removed first. + """ + dict.__setitem__(self, key, list(new_list)) + + def setdefault(self, key, default=None): + """Returns the value for the key if it is in the dict, otherwise it + returns `default` and sets that value for `key`. + + :param key: The key to be looked up. + :param default: The default value to be returned if the key is not + in the dict. If not further specified it's `None`. + """ + if key not in self: + self[key] = default + else: + default = self[key] + return default + + def setlistdefault(self, key, default_list=None): + """Like `setdefault` but sets multiple values. The list returned + is not a copy, but the list that is actually used internally. This + means that you can put new values into the dict by appending items + to the list: + + >>> d = MultiDict({"foo": 1}) + >>> d.setlistdefault("foo").extend([2, 3]) + >>> d.getlist("foo") + [1, 2, 3] + + :param key: The key to be looked up. + :param default_list: An iterable of default values. It is either copied + (in case it was a list) or converted into a list + before returned. + :return: a :class:`list` + """ + if key not in self: + default_list = list(default_list or ()) + dict.__setitem__(self, key, default_list) + else: + default_list = dict.__getitem__(self, key) + return default_list + + def items(self, multi=False): + """Return an iterator of ``(key, value)`` pairs. + + :param multi: If set to `True` the iterator returned will have a pair + for each value of each key. Otherwise it will only + contain pairs for the first value of each key. + """ + for key, values in dict.items(self): + if multi: + for value in values: + yield key, value + else: + yield key, values[0] + + def lists(self): + """Return a iterator of ``(key, values)`` pairs, where values is the list + of all values associated with the key.""" + for key, values in dict.items(self): + yield key, list(values) + + def values(self): + """Returns an iterator of the first value on every key's value list.""" + for values in dict.values(self): + yield values[0] + + def listvalues(self): + """Return an iterator of all values associated with a key. Zipping + :meth:`keys` and this is the same as calling :meth:`lists`: + + >>> d = MultiDict({"foo": [1, 2, 3]}) + >>> zip(d.keys(), d.listvalues()) == d.lists() + True + """ + return dict.values(self) + + def copy(self): + """Return a shallow copy of this object.""" + return self.__class__(self) + + def deepcopy(self, memo=None): + """Return a deep copy of this object.""" + return self.__class__(deepcopy(self.to_dict(flat=False), memo)) + + def to_dict(self, flat=True): + """Return the contents as regular dict. If `flat` is `True` the + returned dict will only have the first item present, if `flat` is + `False` all values will be returned as lists. + + :param flat: If set to `False` the dict returned will have lists + with all the values in it. Otherwise it will only + contain the first value for each key. + :return: a :class:`dict` + """ + if flat: + return dict(self.items()) + return dict(self.lists()) + + def update(self, mapping): + """update() extends rather than replaces existing key lists: + + >>> a = MultiDict({'x': 1}) + >>> b = MultiDict({'x': 2, 'y': 3}) + >>> a.update(b) + >>> a + MultiDict([('y', 3), ('x', 1), ('x', 2)]) + + If the value list for a key in ``other_dict`` is empty, no new values + will be added to the dict and the key will not be created: + + >>> x = {'empty_list': []} + >>> y = MultiDict() + >>> y.update(x) + >>> y + MultiDict([]) + """ + for key, value in iter_multi_items(mapping): + MultiDict.add(self, key, value) + + def pop(self, key, default=_missing): + """Pop the first item for a list on the dict. Afterwards the + key is removed from the dict, so additional values are discarded: + + >>> d = MultiDict({"foo": [1, 2, 3]}) + >>> d.pop("foo") + 1 + >>> "foo" in d + False + + :param key: the key to pop. + :param default: if provided the value to return if the key was + not in the dictionary. + """ + try: + lst = dict.pop(self, key) + + if len(lst) == 0: + raise exceptions.BadRequestKeyError(key) + + return lst[0] + except KeyError: + if default is not _missing: + return default + + raise exceptions.BadRequestKeyError(key) from None + + def popitem(self): + """Pop an item from the dict.""" + try: + item = dict.popitem(self) + + if len(item[1]) == 0: + raise exceptions.BadRequestKeyError(item[0]) + + return (item[0], item[1][0]) + except KeyError as e: + raise exceptions.BadRequestKeyError(e.args[0]) from None + + def poplist(self, key): + """Pop the list for a key from the dict. If the key is not in the dict + an empty list is returned. + + .. versionchanged:: 0.5 + If the key does no longer exist a list is returned instead of + raising an error. + """ + return dict.pop(self, key, []) + + def popitemlist(self): + """Pop a ``(key, list)`` tuple from the dict.""" + try: + return dict.popitem(self) + except KeyError as e: + raise exceptions.BadRequestKeyError(e.args[0]) from None + + def __copy__(self): + return self.copy() + + def __deepcopy__(self, memo): + return self.deepcopy(memo=memo) + + def __repr__(self): + return f"{type(self).__name__}({list(self.items(multi=True))!r})" + + +class _omd_bucket: + """Wraps values in the :class:`OrderedMultiDict`. This makes it + possible to keep an order over multiple different keys. It requires + a lot of extra memory and slows down access a lot, but makes it + possible to access elements in O(1) and iterate in O(n). + """ + + __slots__ = ("prev", "key", "value", "next") + + def __init__(self, omd, key, value): + self.prev = omd._last_bucket + self.key = key + self.value = value + self.next = None + + if omd._first_bucket is None: + omd._first_bucket = self + if omd._last_bucket is not None: + omd._last_bucket.next = self + omd._last_bucket = self + + def unlink(self, omd): + if self.prev: + self.prev.next = self.next + if self.next: + self.next.prev = self.prev + if omd._first_bucket is self: + omd._first_bucket = self.next + if omd._last_bucket is self: + omd._last_bucket = self.prev + + +class OrderedMultiDict(MultiDict): + """Works like a regular :class:`MultiDict` but preserves the + order of the fields. To convert the ordered multi dict into a + list you can use the :meth:`items` method and pass it ``multi=True``. + + In general an :class:`OrderedMultiDict` is an order of magnitude + slower than a :class:`MultiDict`. + + .. admonition:: note + + Due to a limitation in Python you cannot convert an ordered + multi dict into a regular dict by using ``dict(multidict)``. + Instead you have to use the :meth:`to_dict` method, otherwise + the internal bucket objects are exposed. + """ + + def __init__(self, mapping=None): + dict.__init__(self) + self._first_bucket = self._last_bucket = None + if mapping is not None: + OrderedMultiDict.update(self, mapping) + + def __eq__(self, other): + if not isinstance(other, MultiDict): + return NotImplemented + if isinstance(other, OrderedMultiDict): + iter1 = iter(self.items(multi=True)) + iter2 = iter(other.items(multi=True)) + try: + for k1, v1 in iter1: + k2, v2 = next(iter2) + if k1 != k2 or v1 != v2: + return False + except StopIteration: + return False + try: + next(iter2) + except StopIteration: + return True + return False + if len(self) != len(other): + return False + for key, values in self.lists(): + if other.getlist(key) != values: + return False + return True + + __hash__ = None + + def __reduce_ex__(self, protocol): + return type(self), (list(self.items(multi=True)),) + + def __getstate__(self): + return list(self.items(multi=True)) + + def __setstate__(self, values): + dict.clear(self) + for key, value in values: + self.add(key, value) + + def __getitem__(self, key): + if key in self: + return dict.__getitem__(self, key)[0].value + raise exceptions.BadRequestKeyError(key) + + def __setitem__(self, key, value): + self.poplist(key) + self.add(key, value) + + def __delitem__(self, key): + self.pop(key) + + def keys(self): + return (key for key, value in self.items()) + + def __iter__(self): + return iter(self.keys()) + + def values(self): + return (value for key, value in self.items()) + + def items(self, multi=False): + ptr = self._first_bucket + if multi: + while ptr is not None: + yield ptr.key, ptr.value + ptr = ptr.next + else: + returned_keys = set() + while ptr is not None: + if ptr.key not in returned_keys: + returned_keys.add(ptr.key) + yield ptr.key, ptr.value + ptr = ptr.next + + def lists(self): + returned_keys = set() + ptr = self._first_bucket + while ptr is not None: + if ptr.key not in returned_keys: + yield ptr.key, self.getlist(ptr.key) + returned_keys.add(ptr.key) + ptr = ptr.next + + def listvalues(self): + for _key, values in self.lists(): + yield values + + def add(self, key, value): + dict.setdefault(self, key, []).append(_omd_bucket(self, key, value)) + + def getlist(self, key, type=None): + try: + rv = dict.__getitem__(self, key) + except KeyError: + return [] + if type is None: + return [x.value for x in rv] + result = [] + for item in rv: + try: + result.append(type(item.value)) + except ValueError: + pass + return result + + def setlist(self, key, new_list): + self.poplist(key) + for value in new_list: + self.add(key, value) + + def setlistdefault(self, key, default_list=None): + raise TypeError("setlistdefault is unsupported for ordered multi dicts") + + def update(self, mapping): + for key, value in iter_multi_items(mapping): + OrderedMultiDict.add(self, key, value) + + def poplist(self, key): + buckets = dict.pop(self, key, ()) + for bucket in buckets: + bucket.unlink(self) + return [x.value for x in buckets] + + def pop(self, key, default=_missing): + try: + buckets = dict.pop(self, key) + except KeyError: + if default is not _missing: + return default + + raise exceptions.BadRequestKeyError(key) from None + + for bucket in buckets: + bucket.unlink(self) + + return buckets[0].value + + def popitem(self): + try: + key, buckets = dict.popitem(self) + except KeyError as e: + raise exceptions.BadRequestKeyError(e.args[0]) from None + + for bucket in buckets: + bucket.unlink(self) + + return key, buckets[0].value + + def popitemlist(self): + try: + key, buckets = dict.popitem(self) + except KeyError as e: + raise exceptions.BadRequestKeyError(e.args[0]) from None + + for bucket in buckets: + bucket.unlink(self) + + return key, [x.value for x in buckets] + + +def _options_header_vkw(value, kw): + return http.dump_options_header( + value, {k.replace("_", "-"): v for k, v in kw.items()} + ) + + +def _unicodify_header_value(value): + if isinstance(value, bytes): + value = value.decode("latin-1") + if not isinstance(value, str): + value = str(value) + return value + + +class Headers: + """An object that stores some headers. It has a dict-like interface, + but is ordered, can store the same key multiple times, and iterating + yields ``(key, value)`` pairs instead of only keys. + + This data structure is useful if you want a nicer way to handle WSGI + headers which are stored as tuples in a list. + + From Werkzeug 0.3 onwards, the :exc:`KeyError` raised by this class is + also a subclass of the :class:`~exceptions.BadRequest` HTTP exception + and will render a page for a ``400 BAD REQUEST`` if caught in a + catch-all for HTTP exceptions. + + Headers is mostly compatible with the Python :class:`wsgiref.headers.Headers` + class, with the exception of `__getitem__`. :mod:`wsgiref` will return + `None` for ``headers['missing']``, whereas :class:`Headers` will raise + a :class:`KeyError`. + + To create a new ``Headers`` object, pass it a list, dict, or + other ``Headers`` object with default values. These values are + validated the same way values added later are. + + :param defaults: The list of default values for the :class:`Headers`. + + .. versionchanged:: 2.1.0 + Default values are validated the same as values added later. + + .. versionchanged:: 0.9 + This data structure now stores unicode values similar to how the + multi dicts do it. The main difference is that bytes can be set as + well which will automatically be latin1 decoded. + + .. versionchanged:: 0.9 + The :meth:`linked` function was removed without replacement as it + was an API that does not support the changes to the encoding model. + """ + + def __init__(self, defaults=None): + self._list = [] + if defaults is not None: + self.extend(defaults) + + def __getitem__(self, key, _get_mode=False): + if not _get_mode: + if isinstance(key, int): + return self._list[key] + elif isinstance(key, slice): + return self.__class__(self._list[key]) + if not isinstance(key, str): + raise exceptions.BadRequestKeyError(key) + ikey = key.lower() + for k, v in self._list: + if k.lower() == ikey: + return v + # micro optimization: if we are in get mode we will catch that + # exception one stack level down so we can raise a standard + # key error instead of our special one. + if _get_mode: + raise KeyError() + raise exceptions.BadRequestKeyError(key) + + def __eq__(self, other): + def lowered(item): + return (item[0].lower(),) + item[1:] + + return other.__class__ is self.__class__ and set( + map(lowered, other._list) + ) == set(map(lowered, self._list)) + + __hash__ = None + + def get(self, key, default=None, type=None, as_bytes=False): + """Return the default value if the requested data doesn't exist. + If `type` is provided and is a callable it should convert the value, + return it or raise a :exc:`ValueError` if that is not possible. In + this case the function will return the default as if the value was not + found: + + >>> d = Headers([('Content-Length', '42')]) + >>> d.get('Content-Length', type=int) + 42 + + .. versionadded:: 0.9 + Added support for `as_bytes`. + + :param key: The key to be looked up. + :param default: The default value to be returned if the key can't + be looked up. If not further specified `None` is + returned. + :param type: A callable that is used to cast the value in the + :class:`Headers`. If a :exc:`ValueError` is raised + by this callable the default value is returned. + :param as_bytes: return bytes instead of strings. + """ + try: + rv = self.__getitem__(key, _get_mode=True) + except KeyError: + return default + if as_bytes: + rv = rv.encode("latin1") + if type is None: + return rv + try: + return type(rv) + except ValueError: + return default + + def getlist(self, key, type=None, as_bytes=False): + """Return the list of items for a given key. If that key is not in the + :class:`Headers`, the return value will be an empty list. Just like + :meth:`get`, :meth:`getlist` accepts a `type` parameter. All items will + be converted with the callable defined there. + + .. versionadded:: 0.9 + Added support for `as_bytes`. + + :param key: The key to be looked up. + :param type: A callable that is used to cast the value in the + :class:`Headers`. If a :exc:`ValueError` is raised + by this callable the value will be removed from the list. + :return: a :class:`list` of all the values for the key. + :param as_bytes: return bytes instead of strings. + """ + ikey = key.lower() + result = [] + for k, v in self: + if k.lower() == ikey: + if as_bytes: + v = v.encode("latin1") + if type is not None: + try: + v = type(v) + except ValueError: + continue + result.append(v) + return result + + def get_all(self, name): + """Return a list of all the values for the named field. + + This method is compatible with the :mod:`wsgiref` + :meth:`~wsgiref.headers.Headers.get_all` method. + """ + return self.getlist(name) + + def items(self, lower=False): + for key, value in self: + if lower: + key = key.lower() + yield key, value + + def keys(self, lower=False): + for key, _ in self.items(lower): + yield key + + def values(self): + for _, value in self.items(): + yield value + + def extend(self, *args, **kwargs): + """Extend headers in this object with items from another object + containing header items as well as keyword arguments. + + To replace existing keys instead of extending, use + :meth:`update` instead. + + If provided, the first argument can be another :class:`Headers` + object, a :class:`MultiDict`, :class:`dict`, or iterable of + pairs. + + .. versionchanged:: 1.0 + Support :class:`MultiDict`. Allow passing ``kwargs``. + """ + if len(args) > 1: + raise TypeError(f"update expected at most 1 arguments, got {len(args)}") + + if args: + for key, value in iter_multi_items(args[0]): + self.add(key, value) + + for key, value in iter_multi_items(kwargs): + self.add(key, value) + + def __delitem__(self, key, _index_operation=True): + if _index_operation and isinstance(key, (int, slice)): + del self._list[key] + return + key = key.lower() + new = [] + for k, v in self._list: + if k.lower() != key: + new.append((k, v)) + self._list[:] = new + + def remove(self, key): + """Remove a key. + + :param key: The key to be removed. + """ + return self.__delitem__(key, _index_operation=False) + + def pop(self, key=None, default=_missing): + """Removes and returns a key or index. + + :param key: The key to be popped. If this is an integer the item at + that position is removed, if it's a string the value for + that key is. If the key is omitted or `None` the last + item is removed. + :return: an item. + """ + if key is None: + return self._list.pop() + if isinstance(key, int): + return self._list.pop(key) + try: + rv = self[key] + self.remove(key) + except KeyError: + if default is not _missing: + return default + raise + return rv + + def popitem(self): + """Removes a key or index and returns a (key, value) item.""" + return self.pop() + + def __contains__(self, key): + """Check if a key is present.""" + try: + self.__getitem__(key, _get_mode=True) + except KeyError: + return False + return True + + def __iter__(self): + """Yield ``(key, value)`` tuples.""" + return iter(self._list) + + def __len__(self): + return len(self._list) + + def add(self, _key, _value, **kw): + """Add a new header tuple to the list. + + Keyword arguments can specify additional parameters for the header + value, with underscores converted to dashes:: + + >>> d = Headers() + >>> d.add('Content-Type', 'text/plain') + >>> d.add('Content-Disposition', 'attachment', filename='foo.png') + + The keyword argument dumping uses :func:`dump_options_header` + behind the scenes. + + .. versionadded:: 0.4.1 + keyword arguments were added for :mod:`wsgiref` compatibility. + """ + if kw: + _value = _options_header_vkw(_value, kw) + _key = _unicodify_header_value(_key) + _value = _unicodify_header_value(_value) + self._validate_value(_value) + self._list.append((_key, _value)) + + def _validate_value(self, value): + if not isinstance(value, str): + raise TypeError("Value should be a string.") + if "\n" in value or "\r" in value: + raise ValueError( + "Detected newline in header value. This is " + "a potential security problem" + ) + + def add_header(self, _key, _value, **_kw): + """Add a new header tuple to the list. + + An alias for :meth:`add` for compatibility with the :mod:`wsgiref` + :meth:`~wsgiref.headers.Headers.add_header` method. + """ + self.add(_key, _value, **_kw) + + def clear(self): + """Clears all headers.""" + del self._list[:] + + def set(self, _key, _value, **kw): + """Remove all header tuples for `key` and add a new one. The newly + added key either appears at the end of the list if there was no + entry or replaces the first one. + + Keyword arguments can specify additional parameters for the header + value, with underscores converted to dashes. See :meth:`add` for + more information. + + .. versionchanged:: 0.6.1 + :meth:`set` now accepts the same arguments as :meth:`add`. + + :param key: The key to be inserted. + :param value: The value to be inserted. + """ + if kw: + _value = _options_header_vkw(_value, kw) + _key = _unicodify_header_value(_key) + _value = _unicodify_header_value(_value) + self._validate_value(_value) + if not self._list: + self._list.append((_key, _value)) + return + listiter = iter(self._list) + ikey = _key.lower() + for idx, (old_key, _old_value) in enumerate(listiter): + if old_key.lower() == ikey: + # replace first occurrence + self._list[idx] = (_key, _value) + break + else: + self._list.append((_key, _value)) + return + self._list[idx + 1 :] = [t for t in listiter if t[0].lower() != ikey] + + def setlist(self, key, values): + """Remove any existing values for a header and add new ones. + + :param key: The header key to set. + :param values: An iterable of values to set for the key. + + .. versionadded:: 1.0 + """ + if values: + values_iter = iter(values) + self.set(key, next(values_iter)) + + for value in values_iter: + self.add(key, value) + else: + self.remove(key) + + def setdefault(self, key, default): + """Return the first value for the key if it is in the headers, + otherwise set the header to the value given by ``default`` and + return that. + + :param key: The header key to get. + :param default: The value to set for the key if it is not in the + headers. + """ + if key in self: + return self[key] + + self.set(key, default) + return default + + def setlistdefault(self, key, default): + """Return the list of values for the key if it is in the + headers, otherwise set the header to the list of values given + by ``default`` and return that. + + Unlike :meth:`MultiDict.setlistdefault`, modifying the returned + list will not affect the headers. + + :param key: The header key to get. + :param default: An iterable of values to set for the key if it + is not in the headers. + + .. versionadded:: 1.0 + """ + if key not in self: + self.setlist(key, default) + + return self.getlist(key) + + def __setitem__(self, key, value): + """Like :meth:`set` but also supports index/slice based setting.""" + if isinstance(key, (slice, int)): + if isinstance(key, int): + value = [value] + value = [ + (_unicodify_header_value(k), _unicodify_header_value(v)) + for (k, v) in value + ] + for (_, v) in value: + self._validate_value(v) + if isinstance(key, int): + self._list[key] = value[0] + else: + self._list[key] = value + else: + self.set(key, value) + + def update(self, *args, **kwargs): + """Replace headers in this object with items from another + headers object and keyword arguments. + + To extend existing keys instead of replacing, use :meth:`extend` + instead. + + If provided, the first argument can be another :class:`Headers` + object, a :class:`MultiDict`, :class:`dict`, or iterable of + pairs. + + .. versionadded:: 1.0 + """ + if len(args) > 1: + raise TypeError(f"update expected at most 1 arguments, got {len(args)}") + + if args: + mapping = args[0] + + if isinstance(mapping, (Headers, MultiDict)): + for key in mapping.keys(): + self.setlist(key, mapping.getlist(key)) + elif isinstance(mapping, dict): + for key, value in mapping.items(): + if isinstance(value, (list, tuple)): + self.setlist(key, value) + else: + self.set(key, value) + else: + for key, value in mapping: + self.set(key, value) + + for key, value in kwargs.items(): + if isinstance(value, (list, tuple)): + self.setlist(key, value) + else: + self.set(key, value) + + def to_wsgi_list(self): + """Convert the headers into a list suitable for WSGI. + + :return: list + """ + return list(self) + + def copy(self): + return self.__class__(self._list) + + def __copy__(self): + return self.copy() + + def __str__(self): + """Returns formatted headers suitable for HTTP transmission.""" + strs = [] + for key, value in self.to_wsgi_list(): + strs.append(f"{key}: {value}") + strs.append("\r\n") + return "\r\n".join(strs) + + def __repr__(self): + return f"{type(self).__name__}({list(self)!r})" + + +class ImmutableHeadersMixin: + """Makes a :class:`Headers` immutable. We do not mark them as + hashable though since the only usecase for this datastructure + in Werkzeug is a view on a mutable structure. + + .. versionadded:: 0.5 + + :private: + """ + + def __delitem__(self, key, **kwargs): + is_immutable(self) + + def __setitem__(self, key, value): + is_immutable(self) + + def set(self, _key, _value, **kw): + is_immutable(self) + + def setlist(self, key, values): + is_immutable(self) + + def add(self, _key, _value, **kw): + is_immutable(self) + + def add_header(self, _key, _value, **_kw): + is_immutable(self) + + def remove(self, key): + is_immutable(self) + + def extend(self, *args, **kwargs): + is_immutable(self) + + def update(self, *args, **kwargs): + is_immutable(self) + + def insert(self, pos, value): + is_immutable(self) + + def pop(self, key=None, default=_missing): + is_immutable(self) + + def popitem(self): + is_immutable(self) + + def setdefault(self, key, default): + is_immutable(self) + + def setlistdefault(self, key, default): + is_immutable(self) + + +class EnvironHeaders(ImmutableHeadersMixin, Headers): + """Read only version of the headers from a WSGI environment. This + provides the same interface as `Headers` and is constructed from + a WSGI environment. + + From Werkzeug 0.3 onwards, the `KeyError` raised by this class is also a + subclass of the :exc:`~exceptions.BadRequest` HTTP exception and will + render a page for a ``400 BAD REQUEST`` if caught in a catch-all for + HTTP exceptions. + """ + + def __init__(self, environ): + self.environ = environ + + def __eq__(self, other): + return self.environ is other.environ + + __hash__ = None + + def __getitem__(self, key, _get_mode=False): + # _get_mode is a no-op for this class as there is no index but + # used because get() calls it. + if not isinstance(key, str): + raise KeyError(key) + key = key.upper().replace("-", "_") + if key in ("CONTENT_TYPE", "CONTENT_LENGTH"): + return _unicodify_header_value(self.environ[key]) + return _unicodify_header_value(self.environ[f"HTTP_{key}"]) + + def __len__(self): + # the iter is necessary because otherwise list calls our + # len which would call list again and so forth. + return len(list(iter(self))) + + def __iter__(self): + for key, value in self.environ.items(): + if key.startswith("HTTP_") and key not in ( + "HTTP_CONTENT_TYPE", + "HTTP_CONTENT_LENGTH", + ): + yield ( + key[5:].replace("_", "-").title(), + _unicodify_header_value(value), + ) + elif key in ("CONTENT_TYPE", "CONTENT_LENGTH") and value: + yield (key.replace("_", "-").title(), _unicodify_header_value(value)) + + def copy(self): + raise TypeError(f"cannot create {type(self).__name__!r} copies") + + +class CombinedMultiDict(ImmutableMultiDictMixin, MultiDict): + """A read only :class:`MultiDict` that you can pass multiple :class:`MultiDict` + instances as sequence and it will combine the return values of all wrapped + dicts: + + >>> from werkzeug.datastructures import CombinedMultiDict, MultiDict + >>> post = MultiDict([('foo', 'bar')]) + >>> get = MultiDict([('blub', 'blah')]) + >>> combined = CombinedMultiDict([get, post]) + >>> combined['foo'] + 'bar' + >>> combined['blub'] + 'blah' + + This works for all read operations and will raise a `TypeError` for + methods that usually change data which isn't possible. + + From Werkzeug 0.3 onwards, the `KeyError` raised by this class is also a + subclass of the :exc:`~exceptions.BadRequest` HTTP exception and will + render a page for a ``400 BAD REQUEST`` if caught in a catch-all for HTTP + exceptions. + """ + + def __reduce_ex__(self, protocol): + return type(self), (self.dicts,) + + def __init__(self, dicts=None): + self.dicts = list(dicts) or [] + + @classmethod + def fromkeys(cls, keys, value=None): + raise TypeError(f"cannot create {cls.__name__!r} instances by fromkeys") + + def __getitem__(self, key): + for d in self.dicts: + if key in d: + return d[key] + raise exceptions.BadRequestKeyError(key) + + def get(self, key, default=None, type=None): + for d in self.dicts: + if key in d: + if type is not None: + try: + return type(d[key]) + except ValueError: + continue + return d[key] + return default + + def getlist(self, key, type=None): + rv = [] + for d in self.dicts: + rv.extend(d.getlist(key, type)) + return rv + + def _keys_impl(self): + """This function exists so __len__ can be implemented more efficiently, + saving one list creation from an iterator. + """ + rv = set() + rv.update(*self.dicts) + return rv + + def keys(self): + return self._keys_impl() + + def __iter__(self): + return iter(self.keys()) + + def items(self, multi=False): + found = set() + for d in self.dicts: + for key, value in d.items(multi): + if multi: + yield key, value + elif key not in found: + found.add(key) + yield key, value + + def values(self): + for _key, value in self.items(): + yield value + + def lists(self): + rv = {} + for d in self.dicts: + for key, values in d.lists(): + rv.setdefault(key, []).extend(values) + return list(rv.items()) + + def listvalues(self): + return (x[1] for x in self.lists()) + + def copy(self): + """Return a shallow mutable copy of this object. + + This returns a :class:`MultiDict` representing the data at the + time of copying. The copy will no longer reflect changes to the + wrapped dicts. + + .. versionchanged:: 0.15 + Return a mutable :class:`MultiDict`. + """ + return MultiDict(self) + + def to_dict(self, flat=True): + """Return the contents as regular dict. If `flat` is `True` the + returned dict will only have the first item present, if `flat` is + `False` all values will be returned as lists. + + :param flat: If set to `False` the dict returned will have lists + with all the values in it. Otherwise it will only + contain the first item for each key. + :return: a :class:`dict` + """ + if flat: + return dict(self.items()) + + return dict(self.lists()) + + def __len__(self): + return len(self._keys_impl()) + + def __contains__(self, key): + for d in self.dicts: + if key in d: + return True + return False + + def __repr__(self): + return f"{type(self).__name__}({self.dicts!r})" + + +class FileMultiDict(MultiDict): + """A special :class:`MultiDict` that has convenience methods to add + files to it. This is used for :class:`EnvironBuilder` and generally + useful for unittesting. + + .. versionadded:: 0.5 + """ + + def add_file(self, name, file, filename=None, content_type=None): + """Adds a new file to the dict. `file` can be a file name or + a :class:`file`-like or a :class:`FileStorage` object. + + :param name: the name of the field. + :param file: a filename or :class:`file`-like object + :param filename: an optional filename + :param content_type: an optional content type + """ + if isinstance(file, FileStorage): + value = file + else: + if isinstance(file, str): + if filename is None: + filename = file + file = open(file, "rb") + if filename and content_type is None: + content_type = ( + mimetypes.guess_type(filename)[0] or "application/octet-stream" + ) + value = FileStorage(file, filename, name, content_type) + + self.add(name, value) + + +class ImmutableDict(ImmutableDictMixin, dict): + """An immutable :class:`dict`. + + .. versionadded:: 0.5 + """ + + def __repr__(self): + return f"{type(self).__name__}({dict.__repr__(self)})" + + def copy(self): + """Return a shallow mutable copy of this object. Keep in mind that + the standard library's :func:`copy` function is a no-op for this class + like for any other python immutable type (eg: :class:`tuple`). + """ + return dict(self) + + def __copy__(self): + return self + + +class ImmutableMultiDict(ImmutableMultiDictMixin, MultiDict): + """An immutable :class:`MultiDict`. + + .. versionadded:: 0.5 + """ + + def copy(self): + """Return a shallow mutable copy of this object. Keep in mind that + the standard library's :func:`copy` function is a no-op for this class + like for any other python immutable type (eg: :class:`tuple`). + """ + return MultiDict(self) + + def __copy__(self): + return self + + +class ImmutableOrderedMultiDict(ImmutableMultiDictMixin, OrderedMultiDict): + """An immutable :class:`OrderedMultiDict`. + + .. versionadded:: 0.6 + """ + + def _iter_hashitems(self): + return enumerate(self.items(multi=True)) + + def copy(self): + """Return a shallow mutable copy of this object. Keep in mind that + the standard library's :func:`copy` function is a no-op for this class + like for any other python immutable type (eg: :class:`tuple`). + """ + return OrderedMultiDict(self) + + def __copy__(self): + return self + + +class Accept(ImmutableList): + """An :class:`Accept` object is just a list subclass for lists of + ``(value, quality)`` tuples. It is automatically sorted by specificity + and quality. + + All :class:`Accept` objects work similar to a list but provide extra + functionality for working with the data. Containment checks are + normalized to the rules of that header: + + >>> a = CharsetAccept([('ISO-8859-1', 1), ('utf-8', 0.7)]) + >>> a.best + 'ISO-8859-1' + >>> 'iso-8859-1' in a + True + >>> 'UTF8' in a + True + >>> 'utf7' in a + False + + To get the quality for an item you can use normal item lookup: + + >>> print a['utf-8'] + 0.7 + >>> a['utf7'] + 0 + + .. versionchanged:: 0.5 + :class:`Accept` objects are forced immutable now. + + .. versionchanged:: 1.0.0 + :class:`Accept` internal values are no longer ordered + alphabetically for equal quality tags. Instead the initial + order is preserved. + + """ + + def __init__(self, values=()): + if values is None: + list.__init__(self) + self.provided = False + elif isinstance(values, Accept): + self.provided = values.provided + list.__init__(self, values) + else: + self.provided = True + values = sorted( + values, key=lambda x: (self._specificity(x[0]), x[1]), reverse=True + ) + list.__init__(self, values) + + def _specificity(self, value): + """Returns a tuple describing the value's specificity.""" + return (value != "*",) + + def _value_matches(self, value, item): + """Check if a value matches a given accept item.""" + return item == "*" or item.lower() == value.lower() + + def __getitem__(self, key): + """Besides index lookup (getting item n) you can also pass it a string + to get the quality for the item. If the item is not in the list, the + returned quality is ``0``. + """ + if isinstance(key, str): + return self.quality(key) + return list.__getitem__(self, key) + + def quality(self, key): + """Returns the quality of the key. + + .. versionadded:: 0.6 + In previous versions you had to use the item-lookup syntax + (eg: ``obj[key]`` instead of ``obj.quality(key)``) + """ + for item, quality in self: + if self._value_matches(key, item): + return quality + return 0 + + def __contains__(self, value): + for item, _quality in self: + if self._value_matches(value, item): + return True + return False + + def __repr__(self): + pairs_str = ", ".join(f"({x!r}, {y})" for x, y in self) + return f"{type(self).__name__}([{pairs_str}])" + + def index(self, key): + """Get the position of an entry or raise :exc:`ValueError`. + + :param key: The key to be looked up. + + .. versionchanged:: 0.5 + This used to raise :exc:`IndexError`, which was inconsistent + with the list API. + """ + if isinstance(key, str): + for idx, (item, _quality) in enumerate(self): + if self._value_matches(key, item): + return idx + raise ValueError(key) + return list.index(self, key) + + def find(self, key): + """Get the position of an entry or return -1. + + :param key: The key to be looked up. + """ + try: + return self.index(key) + except ValueError: + return -1 + + def values(self): + """Iterate over all values.""" + for item in self: + yield item[0] + + def to_header(self): + """Convert the header set into an HTTP header string.""" + result = [] + for value, quality in self: + if quality != 1: + value = f"{value};q={quality}" + result.append(value) + return ",".join(result) + + def __str__(self): + return self.to_header() + + def _best_single_match(self, match): + for client_item, quality in self: + if self._value_matches(match, client_item): + # self is sorted by specificity descending, we can exit + return client_item, quality + return None + + def best_match(self, matches, default=None): + """Returns the best match from a list of possible matches based + on the specificity and quality of the client. If two items have the + same quality and specificity, the one is returned that comes first. + + :param matches: a list of matches to check for + :param default: the value that is returned if none match + """ + result = default + best_quality = -1 + best_specificity = (-1,) + for server_item in matches: + match = self._best_single_match(server_item) + if not match: + continue + client_item, quality = match + specificity = self._specificity(client_item) + if quality <= 0 or quality < best_quality: + continue + # better quality or same quality but more specific => better match + if quality > best_quality or specificity > best_specificity: + result = server_item + best_quality = quality + best_specificity = specificity + return result + + @property + def best(self): + """The best match as value.""" + if self: + return self[0][0] + + +_mime_split_re = re.compile(r"/|(?:\s*;\s*)") + + +def _normalize_mime(value): + return _mime_split_re.split(value.lower()) + + +class MIMEAccept(Accept): + """Like :class:`Accept` but with special methods and behavior for + mimetypes. + """ + + def _specificity(self, value): + return tuple(x != "*" for x in _mime_split_re.split(value)) + + def _value_matches(self, value, item): + # item comes from the client, can't match if it's invalid. + if "/" not in item: + return False + + # value comes from the application, tell the developer when it + # doesn't look valid. + if "/" not in value: + raise ValueError(f"invalid mimetype {value!r}") + + # Split the match value into type, subtype, and a sorted list of parameters. + normalized_value = _normalize_mime(value) + value_type, value_subtype = normalized_value[:2] + value_params = sorted(normalized_value[2:]) + + # "*/*" is the only valid value that can start with "*". + if value_type == "*" and value_subtype != "*": + raise ValueError(f"invalid mimetype {value!r}") + + # Split the accept item into type, subtype, and parameters. + normalized_item = _normalize_mime(item) + item_type, item_subtype = normalized_item[:2] + item_params = sorted(normalized_item[2:]) + + # "*/not-*" from the client is invalid, can't match. + if item_type == "*" and item_subtype != "*": + return False + + return ( + (item_type == "*" and item_subtype == "*") + or (value_type == "*" and value_subtype == "*") + ) or ( + item_type == value_type + and ( + item_subtype == "*" + or value_subtype == "*" + or (item_subtype == value_subtype and item_params == value_params) + ) + ) + + @property + def accept_html(self): + """True if this object accepts HTML.""" + return ( + "text/html" in self or "application/xhtml+xml" in self or self.accept_xhtml + ) + + @property + def accept_xhtml(self): + """True if this object accepts XHTML.""" + return "application/xhtml+xml" in self or "application/xml" in self + + @property + def accept_json(self): + """True if this object accepts JSON.""" + return "application/json" in self + + +_locale_delim_re = re.compile(r"[_-]") + + +def _normalize_lang(value): + """Process a language tag for matching.""" + return _locale_delim_re.split(value.lower()) + + +class LanguageAccept(Accept): + """Like :class:`Accept` but with normalization for language tags.""" + + def _value_matches(self, value, item): + return item == "*" or _normalize_lang(value) == _normalize_lang(item) + + def best_match(self, matches, default=None): + """Given a list of supported values, finds the best match from + the list of accepted values. + + Language tags are normalized for the purpose of matching, but + are returned unchanged. + + If no exact match is found, this will fall back to matching + the first subtag (primary language only), first with the + accepted values then with the match values. This partial is not + applied to any other language subtags. + + The default is returned if no exact or fallback match is found. + + :param matches: A list of supported languages to find a match. + :param default: The value that is returned if none match. + """ + # Look for an exact match first. If a client accepts "en-US", + # "en-US" is a valid match at this point. + result = super().best_match(matches) + + if result is not None: + return result + + # Fall back to accepting primary tags. If a client accepts + # "en-US", "en" is a valid match at this point. Need to use + # re.split to account for 2 or 3 letter codes. + fallback = Accept( + [(_locale_delim_re.split(item[0], 1)[0], item[1]) for item in self] + ) + result = fallback.best_match(matches) + + if result is not None: + return result + + # Fall back to matching primary tags. If the client accepts + # "en", "en-US" is a valid match at this point. + fallback_matches = [_locale_delim_re.split(item, 1)[0] for item in matches] + result = super().best_match(fallback_matches) + + # Return a value from the original match list. Find the first + # original value that starts with the matched primary tag. + if result is not None: + return next(item for item in matches if item.startswith(result)) + + return default + + +class CharsetAccept(Accept): + """Like :class:`Accept` but with normalization for charsets.""" + + def _value_matches(self, value, item): + def _normalize(name): + try: + return codecs.lookup(name).name + except LookupError: + return name.lower() + + return item == "*" or _normalize(value) == _normalize(item) + + +def cache_control_property(key, empty, type): + """Return a new property object for a cache header. Useful if you + want to add support for a cache extension in a subclass. + + .. versionchanged:: 2.0 + Renamed from ``cache_property``. + """ + return property( + lambda x: x._get_cache_value(key, empty, type), + lambda x, v: x._set_cache_value(key, v, type), + lambda x: x._del_cache_value(key), + f"accessor for {key!r}", + ) + + +class _CacheControl(UpdateDictMixin, dict): + """Subclass of a dict that stores values for a Cache-Control header. It + has accessors for all the cache-control directives specified in RFC 2616. + The class does not differentiate between request and response directives. + + Because the cache-control directives in the HTTP header use dashes the + python descriptors use underscores for that. + + To get a header of the :class:`CacheControl` object again you can convert + the object into a string or call the :meth:`to_header` method. If you plan + to subclass it and add your own items have a look at the sourcecode for + that class. + + .. versionchanged:: 2.1.0 + Setting int properties such as ``max_age`` will convert the + value to an int. + + .. versionchanged:: 0.4 + + Setting `no_cache` or `private` to boolean `True` will set the implicit + none-value which is ``*``: + + >>> cc = ResponseCacheControl() + >>> cc.no_cache = True + >>> cc + + >>> cc.no_cache + '*' + >>> cc.no_cache = None + >>> cc + + + In versions before 0.5 the behavior documented here affected the now + no longer existing `CacheControl` class. + """ + + no_cache = cache_control_property("no-cache", "*", None) + no_store = cache_control_property("no-store", None, bool) + max_age = cache_control_property("max-age", -1, int) + no_transform = cache_control_property("no-transform", None, None) + + def __init__(self, values=(), on_update=None): + dict.__init__(self, values or ()) + self.on_update = on_update + self.provided = values is not None + + def _get_cache_value(self, key, empty, type): + """Used internally by the accessor properties.""" + if type is bool: + return key in self + if key in self: + value = self[key] + if value is None: + return empty + elif type is not None: + try: + value = type(value) + except ValueError: + pass + return value + return None + + def _set_cache_value(self, key, value, type): + """Used internally by the accessor properties.""" + if type is bool: + if value: + self[key] = None + else: + self.pop(key, None) + else: + if value is None: + self.pop(key, None) + elif value is True: + self[key] = None + else: + if type is not None: + self[key] = type(value) + else: + self[key] = value + + def _del_cache_value(self, key): + """Used internally by the accessor properties.""" + if key in self: + del self[key] + + def to_header(self): + """Convert the stored values into a cache control header.""" + return http.dump_header(self) + + def __str__(self): + return self.to_header() + + def __repr__(self): + kv_str = " ".join(f"{k}={v!r}" for k, v in sorted(self.items())) + return f"<{type(self).__name__} {kv_str}>" + + cache_property = staticmethod(cache_control_property) + + +class RequestCacheControl(ImmutableDictMixin, _CacheControl): + """A cache control for requests. This is immutable and gives access + to all the request-relevant cache control headers. + + To get a header of the :class:`RequestCacheControl` object again you can + convert the object into a string or call the :meth:`to_header` method. If + you plan to subclass it and add your own items have a look at the sourcecode + for that class. + + .. versionchanged:: 2.1.0 + Setting int properties such as ``max_age`` will convert the + value to an int. + + .. versionadded:: 0.5 + In previous versions a `CacheControl` class existed that was used + both for request and response. + """ + + max_stale = cache_control_property("max-stale", "*", int) + min_fresh = cache_control_property("min-fresh", "*", int) + only_if_cached = cache_control_property("only-if-cached", None, bool) + + +class ResponseCacheControl(_CacheControl): + """A cache control for responses. Unlike :class:`RequestCacheControl` + this is mutable and gives access to response-relevant cache control + headers. + + To get a header of the :class:`ResponseCacheControl` object again you can + convert the object into a string or call the :meth:`to_header` method. If + you plan to subclass it and add your own items have a look at the sourcecode + for that class. + + .. versionchanged:: 2.1.1 + ``s_maxage`` converts the value to an int. + + .. versionchanged:: 2.1.0 + Setting int properties such as ``max_age`` will convert the + value to an int. + + .. versionadded:: 0.5 + In previous versions a `CacheControl` class existed that was used + both for request and response. + """ + + public = cache_control_property("public", None, bool) + private = cache_control_property("private", "*", None) + must_revalidate = cache_control_property("must-revalidate", None, bool) + proxy_revalidate = cache_control_property("proxy-revalidate", None, bool) + s_maxage = cache_control_property("s-maxage", None, int) + immutable = cache_control_property("immutable", None, bool) + + +def csp_property(key): + """Return a new property object for a content security policy header. + Useful if you want to add support for a csp extension in a + subclass. + """ + return property( + lambda x: x._get_value(key), + lambda x, v: x._set_value(key, v), + lambda x: x._del_value(key), + f"accessor for {key!r}", + ) + + +class ContentSecurityPolicy(UpdateDictMixin, dict): + """Subclass of a dict that stores values for a Content Security Policy + header. It has accessors for all the level 3 policies. + + Because the csp directives in the HTTP header use dashes the + python descriptors use underscores for that. + + To get a header of the :class:`ContentSecuirtyPolicy` object again + you can convert the object into a string or call the + :meth:`to_header` method. If you plan to subclass it and add your + own items have a look at the sourcecode for that class. + + .. versionadded:: 1.0.0 + Support for Content Security Policy headers was added. + + """ + + base_uri = csp_property("base-uri") + child_src = csp_property("child-src") + connect_src = csp_property("connect-src") + default_src = csp_property("default-src") + font_src = csp_property("font-src") + form_action = csp_property("form-action") + frame_ancestors = csp_property("frame-ancestors") + frame_src = csp_property("frame-src") + img_src = csp_property("img-src") + manifest_src = csp_property("manifest-src") + media_src = csp_property("media-src") + navigate_to = csp_property("navigate-to") + object_src = csp_property("object-src") + prefetch_src = csp_property("prefetch-src") + plugin_types = csp_property("plugin-types") + report_to = csp_property("report-to") + report_uri = csp_property("report-uri") + sandbox = csp_property("sandbox") + script_src = csp_property("script-src") + script_src_attr = csp_property("script-src-attr") + script_src_elem = csp_property("script-src-elem") + style_src = csp_property("style-src") + style_src_attr = csp_property("style-src-attr") + style_src_elem = csp_property("style-src-elem") + worker_src = csp_property("worker-src") + + def __init__(self, values=(), on_update=None): + dict.__init__(self, values or ()) + self.on_update = on_update + self.provided = values is not None + + def _get_value(self, key): + """Used internally by the accessor properties.""" + return self.get(key) + + def _set_value(self, key, value): + """Used internally by the accessor properties.""" + if value is None: + self.pop(key, None) + else: + self[key] = value + + def _del_value(self, key): + """Used internally by the accessor properties.""" + if key in self: + del self[key] + + def to_header(self): + """Convert the stored values into a cache control header.""" + return http.dump_csp_header(self) + + def __str__(self): + return self.to_header() + + def __repr__(self): + kv_str = " ".join(f"{k}={v!r}" for k, v in sorted(self.items())) + return f"<{type(self).__name__} {kv_str}>" + + +class CallbackDict(UpdateDictMixin, dict): + """A dict that calls a function passed every time something is changed. + The function is passed the dict instance. + """ + + def __init__(self, initial=None, on_update=None): + dict.__init__(self, initial or ()) + self.on_update = on_update + + def __repr__(self): + return f"<{type(self).__name__} {dict.__repr__(self)}>" + + +class HeaderSet(MutableSet): + """Similar to the :class:`ETags` class this implements a set-like structure. + Unlike :class:`ETags` this is case insensitive and used for vary, allow, and + content-language headers. + + If not constructed using the :func:`parse_set_header` function the + instantiation works like this: + + >>> hs = HeaderSet(['foo', 'bar', 'baz']) + >>> hs + HeaderSet(['foo', 'bar', 'baz']) + """ + + def __init__(self, headers=None, on_update=None): + self._headers = list(headers or ()) + self._set = {x.lower() for x in self._headers} + self.on_update = on_update + + def add(self, header): + """Add a new header to the set.""" + self.update((header,)) + + def remove(self, header): + """Remove a header from the set. This raises an :exc:`KeyError` if the + header is not in the set. + + .. versionchanged:: 0.5 + In older versions a :exc:`IndexError` was raised instead of a + :exc:`KeyError` if the object was missing. + + :param header: the header to be removed. + """ + key = header.lower() + if key not in self._set: + raise KeyError(header) + self._set.remove(key) + for idx, key in enumerate(self._headers): + if key.lower() == header: + del self._headers[idx] + break + if self.on_update is not None: + self.on_update(self) + + def update(self, iterable): + """Add all the headers from the iterable to the set. + + :param iterable: updates the set with the items from the iterable. + """ + inserted_any = False + for header in iterable: + key = header.lower() + if key not in self._set: + self._headers.append(header) + self._set.add(key) + inserted_any = True + if inserted_any and self.on_update is not None: + self.on_update(self) + + def discard(self, header): + """Like :meth:`remove` but ignores errors. + + :param header: the header to be discarded. + """ + try: + self.remove(header) + except KeyError: + pass + + def find(self, header): + """Return the index of the header in the set or return -1 if not found. + + :param header: the header to be looked up. + """ + header = header.lower() + for idx, item in enumerate(self._headers): + if item.lower() == header: + return idx + return -1 + + def index(self, header): + """Return the index of the header in the set or raise an + :exc:`IndexError`. + + :param header: the header to be looked up. + """ + rv = self.find(header) + if rv < 0: + raise IndexError(header) + return rv + + def clear(self): + """Clear the set.""" + self._set.clear() + del self._headers[:] + if self.on_update is not None: + self.on_update(self) + + def as_set(self, preserve_casing=False): + """Return the set as real python set type. When calling this, all + the items are converted to lowercase and the ordering is lost. + + :param preserve_casing: if set to `True` the items in the set returned + will have the original case like in the + :class:`HeaderSet`, otherwise they will + be lowercase. + """ + if preserve_casing: + return set(self._headers) + return set(self._set) + + def to_header(self): + """Convert the header set into an HTTP header string.""" + return ", ".join(map(http.quote_header_value, self._headers)) + + def __getitem__(self, idx): + return self._headers[idx] + + def __delitem__(self, idx): + rv = self._headers.pop(idx) + self._set.remove(rv.lower()) + if self.on_update is not None: + self.on_update(self) + + def __setitem__(self, idx, value): + old = self._headers[idx] + self._set.remove(old.lower()) + self._headers[idx] = value + self._set.add(value.lower()) + if self.on_update is not None: + self.on_update(self) + + def __contains__(self, header): + return header.lower() in self._set + + def __len__(self): + return len(self._set) + + def __iter__(self): + return iter(self._headers) + + def __bool__(self): + return bool(self._set) + + def __str__(self): + return self.to_header() + + def __repr__(self): + return f"{type(self).__name__}({self._headers!r})" + + +class ETags(Collection): + """A set that can be used to check if one etag is present in a collection + of etags. + """ + + def __init__(self, strong_etags=None, weak_etags=None, star_tag=False): + if not star_tag and strong_etags: + self._strong = frozenset(strong_etags) + else: + self._strong = frozenset() + + self._weak = frozenset(weak_etags or ()) + self.star_tag = star_tag + + def as_set(self, include_weak=False): + """Convert the `ETags` object into a python set. Per default all the + weak etags are not part of this set.""" + rv = set(self._strong) + if include_weak: + rv.update(self._weak) + return rv + + def is_weak(self, etag): + """Check if an etag is weak.""" + return etag in self._weak + + def is_strong(self, etag): + """Check if an etag is strong.""" + return etag in self._strong + + def contains_weak(self, etag): + """Check if an etag is part of the set including weak and strong tags.""" + return self.is_weak(etag) or self.contains(etag) + + def contains(self, etag): + """Check if an etag is part of the set ignoring weak tags. + It is also possible to use the ``in`` operator. + """ + if self.star_tag: + return True + return self.is_strong(etag) + + def contains_raw(self, etag): + """When passed a quoted tag it will check if this tag is part of the + set. If the tag is weak it is checked against weak and strong tags, + otherwise strong only.""" + etag, weak = http.unquote_etag(etag) + if weak: + return self.contains_weak(etag) + return self.contains(etag) + + def to_header(self): + """Convert the etags set into a HTTP header string.""" + if self.star_tag: + return "*" + return ", ".join( + [f'"{x}"' for x in self._strong] + [f'W/"{x}"' for x in self._weak] + ) + + def __call__(self, etag=None, data=None, include_weak=False): + if [etag, data].count(None) != 1: + raise TypeError("either tag or data required, but at least one") + if etag is None: + etag = http.generate_etag(data) + if include_weak: + if etag in self._weak: + return True + return etag in self._strong + + def __bool__(self): + return bool(self.star_tag or self._strong or self._weak) + + def __str__(self): + return self.to_header() + + def __len__(self): + return len(self._strong) + + def __iter__(self): + return iter(self._strong) + + def __contains__(self, etag): + return self.contains(etag) + + def __repr__(self): + return f"<{type(self).__name__} {str(self)!r}>" + + +class IfRange: + """Very simple object that represents the `If-Range` header in parsed + form. It will either have neither a etag or date or one of either but + never both. + + .. versionadded:: 0.7 + """ + + def __init__(self, etag=None, date=None): + #: The etag parsed and unquoted. Ranges always operate on strong + #: etags so the weakness information is not necessary. + self.etag = etag + #: The date in parsed format or `None`. + self.date = date + + def to_header(self): + """Converts the object back into an HTTP header.""" + if self.date is not None: + return http.http_date(self.date) + if self.etag is not None: + return http.quote_etag(self.etag) + return "" + + def __str__(self): + return self.to_header() + + def __repr__(self): + return f"<{type(self).__name__} {str(self)!r}>" + + +class Range: + """Represents a ``Range`` header. All methods only support only + bytes as the unit. Stores a list of ranges if given, but the methods + only work if only one range is provided. + + :raise ValueError: If the ranges provided are invalid. + + .. versionchanged:: 0.15 + The ranges passed in are validated. + + .. versionadded:: 0.7 + """ + + def __init__(self, units, ranges): + #: The units of this range. Usually "bytes". + self.units = units + #: A list of ``(begin, end)`` tuples for the range header provided. + #: The ranges are non-inclusive. + self.ranges = ranges + + for start, end in ranges: + if start is None or (end is not None and (start < 0 or start >= end)): + raise ValueError(f"{(start, end)} is not a valid range.") + + def range_for_length(self, length): + """If the range is for bytes, the length is not None and there is + exactly one range and it is satisfiable it returns a ``(start, stop)`` + tuple, otherwise `None`. + """ + if self.units != "bytes" or length is None or len(self.ranges) != 1: + return None + start, end = self.ranges[0] + if end is None: + end = length + if start < 0: + start += length + if http.is_byte_range_valid(start, end, length): + return start, min(end, length) + return None + + def make_content_range(self, length): + """Creates a :class:`~werkzeug.datastructures.ContentRange` object + from the current range and given content length. + """ + rng = self.range_for_length(length) + if rng is not None: + return ContentRange(self.units, rng[0], rng[1], length) + return None + + def to_header(self): + """Converts the object back into an HTTP header.""" + ranges = [] + for begin, end in self.ranges: + if end is None: + ranges.append(f"{begin}-" if begin >= 0 else str(begin)) + else: + ranges.append(f"{begin}-{end - 1}") + return f"{self.units}={','.join(ranges)}" + + def to_content_range_header(self, length): + """Converts the object into `Content-Range` HTTP header, + based on given length + """ + range = self.range_for_length(length) + if range is not None: + return f"{self.units} {range[0]}-{range[1] - 1}/{length}" + return None + + def __str__(self): + return self.to_header() + + def __repr__(self): + return f"<{type(self).__name__} {str(self)!r}>" + + +def _callback_property(name): + def fget(self): + return getattr(self, name) + + def fset(self, value): + setattr(self, name, value) + if self.on_update is not None: + self.on_update(self) + + return property(fget, fset) + + +class ContentRange: + """Represents the content range header. + + .. versionadded:: 0.7 + """ + + def __init__(self, units, start, stop, length=None, on_update=None): + assert http.is_byte_range_valid(start, stop, length), "Bad range provided" + self.on_update = on_update + self.set(start, stop, length, units) + + #: The units to use, usually "bytes" + units = _callback_property("_units") + #: The start point of the range or `None`. + start = _callback_property("_start") + #: The stop point of the range (non-inclusive) or `None`. Can only be + #: `None` if also start is `None`. + stop = _callback_property("_stop") + #: The length of the range or `None`. + length = _callback_property("_length") + + def set(self, start, stop, length=None, units="bytes"): + """Simple method to update the ranges.""" + assert http.is_byte_range_valid(start, stop, length), "Bad range provided" + self._units = units + self._start = start + self._stop = stop + self._length = length + if self.on_update is not None: + self.on_update(self) + + def unset(self): + """Sets the units to `None` which indicates that the header should + no longer be used. + """ + self.set(None, None, units=None) + + def to_header(self): + if self.units is None: + return "" + if self.length is None: + length = "*" + else: + length = self.length + if self.start is None: + return f"{self.units} */{length}" + return f"{self.units} {self.start}-{self.stop - 1}/{length}" + + def __bool__(self): + return self.units is not None + + def __str__(self): + return self.to_header() + + def __repr__(self): + return f"<{type(self).__name__} {str(self)!r}>" + + +class Authorization(ImmutableDictMixin, dict): + """Represents an ``Authorization`` header sent by the client. + + This is returned by + :func:`~werkzeug.http.parse_authorization_header`. It can be useful + to create the object manually to pass to the test + :class:`~werkzeug.test.Client`. + + .. versionchanged:: 0.5 + This object became immutable. + """ + + def __init__(self, auth_type, data=None): + dict.__init__(self, data or {}) + self.type = auth_type + + @property + def username(self): + """The username transmitted. This is set for both basic and digest + auth all the time. + """ + return self.get("username") + + @property + def password(self): + """When the authentication type is basic this is the password + transmitted by the client, else `None`. + """ + return self.get("password") + + @property + def realm(self): + """This is the server realm sent back for HTTP digest auth.""" + return self.get("realm") + + @property + def nonce(self): + """The nonce the server sent for digest auth, sent back by the client. + A nonce should be unique for every 401 response for HTTP digest auth. + """ + return self.get("nonce") + + @property + def uri(self): + """The URI from Request-URI of the Request-Line; duplicated because + proxies are allowed to change the Request-Line in transit. HTTP + digest auth only. + """ + return self.get("uri") + + @property + def nc(self): + """The nonce count value transmitted by clients if a qop-header is + also transmitted. HTTP digest auth only. + """ + return self.get("nc") + + @property + def cnonce(self): + """If the server sent a qop-header in the ``WWW-Authenticate`` + header, the client has to provide this value for HTTP digest auth. + See the RFC for more details. + """ + return self.get("cnonce") + + @property + def response(self): + """A string of 32 hex digits computed as defined in RFC 2617, which + proves that the user knows a password. Digest auth only. + """ + return self.get("response") + + @property + def opaque(self): + """The opaque header from the server returned unchanged by the client. + It is recommended that this string be base64 or hexadecimal data. + Digest auth only. + """ + return self.get("opaque") + + @property + def qop(self): + """Indicates what "quality of protection" the client has applied to + the message for HTTP digest auth. Note that this is a single token, + not a quoted list of alternatives as in WWW-Authenticate. + """ + return self.get("qop") + + def to_header(self): + """Convert to a string value for an ``Authorization`` header. + + .. versionadded:: 2.0 + Added to support passing authorization to the test client. + """ + if self.type == "basic": + value = base64.b64encode( + f"{self.username}:{self.password}".encode() + ).decode("utf8") + return f"Basic {value}" + + if self.type == "digest": + return f"Digest {http.dump_header(self)}" + + raise ValueError(f"Unsupported type {self.type!r}.") + + +def auth_property(name, doc=None): + """A static helper function for Authentication subclasses to add + extra authentication system properties onto a class:: + + class FooAuthenticate(WWWAuthenticate): + special_realm = auth_property('special_realm') + + For more information have a look at the sourcecode to see how the + regular properties (:attr:`realm` etc.) are implemented. + """ + + def _set_value(self, value): + if value is None: + self.pop(name, None) + else: + self[name] = str(value) + + return property(lambda x: x.get(name), _set_value, doc=doc) + + +def _set_property(name, doc=None): + def fget(self): + def on_update(header_set): + if not header_set and name in self: + del self[name] + elif header_set: + self[name] = header_set.to_header() + + return http.parse_set_header(self.get(name), on_update) + + return property(fget, doc=doc) + + +class WWWAuthenticate(UpdateDictMixin, dict): + """Provides simple access to `WWW-Authenticate` headers.""" + + #: list of keys that require quoting in the generated header + _require_quoting = frozenset(["domain", "nonce", "opaque", "realm", "qop"]) + + def __init__(self, auth_type=None, values=None, on_update=None): + dict.__init__(self, values or ()) + if auth_type: + self["__auth_type__"] = auth_type + self.on_update = on_update + + def set_basic(self, realm="authentication required"): + """Clear the auth info and enable basic auth.""" + dict.clear(self) + dict.update(self, {"__auth_type__": "basic", "realm": realm}) + if self.on_update: + self.on_update(self) + + def set_digest( + self, realm, nonce, qop=("auth",), opaque=None, algorithm=None, stale=False + ): + """Clear the auth info and enable digest auth.""" + d = { + "__auth_type__": "digest", + "realm": realm, + "nonce": nonce, + "qop": http.dump_header(qop), + } + if stale: + d["stale"] = "TRUE" + if opaque is not None: + d["opaque"] = opaque + if algorithm is not None: + d["algorithm"] = algorithm + dict.clear(self) + dict.update(self, d) + if self.on_update: + self.on_update(self) + + def to_header(self): + """Convert the stored values into a WWW-Authenticate header.""" + d = dict(self) + auth_type = d.pop("__auth_type__", None) or "basic" + kv_items = ( + (k, http.quote_header_value(v, allow_token=k not in self._require_quoting)) + for k, v in d.items() + ) + kv_string = ", ".join([f"{k}={v}" for k, v in kv_items]) + return f"{auth_type.title()} {kv_string}" + + def __str__(self): + return self.to_header() + + def __repr__(self): + return f"<{type(self).__name__} {self.to_header()!r}>" + + type = auth_property( + "__auth_type__", + doc="""The type of the auth mechanism. HTTP currently specifies + ``Basic`` and ``Digest``.""", + ) + realm = auth_property( + "realm", + doc="""A string to be displayed to users so they know which + username and password to use. This string should contain at + least the name of the host performing the authentication and + might additionally indicate the collection of users who might + have access.""", + ) + domain = _set_property( + "domain", + doc="""A list of URIs that define the protection space. If a URI + is an absolute path, it is relative to the canonical root URL of + the server being accessed.""", + ) + nonce = auth_property( + "nonce", + doc=""" + A server-specified data string which should be uniquely generated + each time a 401 response is made. It is recommended that this + string be base64 or hexadecimal data.""", + ) + opaque = auth_property( + "opaque", + doc="""A string of data, specified by the server, which should + be returned by the client unchanged in the Authorization header + of subsequent requests with URIs in the same protection space. + It is recommended that this string be base64 or hexadecimal + data.""", + ) + algorithm = auth_property( + "algorithm", + doc="""A string indicating a pair of algorithms used to produce + the digest and a checksum. If this is not present it is assumed + to be "MD5". If the algorithm is not understood, the challenge + should be ignored (and a different one used, if there is more + than one).""", + ) + qop = _set_property( + "qop", + doc="""A set of quality-of-privacy directives such as auth and + auth-int.""", + ) + + @property + def stale(self): + """A flag, indicating that the previous request from the client + was rejected because the nonce value was stale. + """ + val = self.get("stale") + if val is not None: + return val.lower() == "true" + + @stale.setter + def stale(self, value): + if value is None: + self.pop("stale", None) + else: + self["stale"] = "TRUE" if value else "FALSE" + + auth_property = staticmethod(auth_property) + + +class FileStorage: + """The :class:`FileStorage` class is a thin wrapper over incoming files. + It is used by the request object to represent uploaded files. All the + attributes of the wrapper stream are proxied by the file storage so + it's possible to do ``storage.read()`` instead of the long form + ``storage.stream.read()``. + """ + + def __init__( + self, + stream=None, + filename=None, + name=None, + content_type=None, + content_length=None, + headers=None, + ): + self.name = name + self.stream = stream or BytesIO() + + # If no filename is provided, attempt to get the filename from + # the stream object. Python names special streams like + # ```` with angular brackets, skip these streams. + if filename is None: + filename = getattr(stream, "name", None) + + if filename is not None: + filename = os.fsdecode(filename) + + if filename and filename[0] == "<" and filename[-1] == ">": + filename = None + else: + filename = os.fsdecode(filename) + + self.filename = filename + + if headers is None: + headers = Headers() + self.headers = headers + if content_type is not None: + headers["Content-Type"] = content_type + if content_length is not None: + headers["Content-Length"] = str(content_length) + + def _parse_content_type(self): + if not hasattr(self, "_parsed_content_type"): + self._parsed_content_type = http.parse_options_header(self.content_type) + + @property + def content_type(self): + """The content-type sent in the header. Usually not available""" + return self.headers.get("content-type") + + @property + def content_length(self): + """The content-length sent in the header. Usually not available""" + try: + return int(self.headers.get("content-length") or 0) + except ValueError: + return 0 + + @property + def mimetype(self): + """Like :attr:`content_type`, but without parameters (eg, without + charset, type etc.) and always lowercase. For example if the content + type is ``text/HTML; charset=utf-8`` the mimetype would be + ``'text/html'``. + + .. versionadded:: 0.7 + """ + self._parse_content_type() + return self._parsed_content_type[0].lower() + + @property + def mimetype_params(self): + """The mimetype parameters as dict. For example if the content + type is ``text/html; charset=utf-8`` the params would be + ``{'charset': 'utf-8'}``. + + .. versionadded:: 0.7 + """ + self._parse_content_type() + return self._parsed_content_type[1] + + def save(self, dst, buffer_size=16384): + """Save the file to a destination path or file object. If the + destination is a file object you have to close it yourself after the + call. The buffer size is the number of bytes held in memory during + the copy process. It defaults to 16KB. + + For secure file saving also have a look at :func:`secure_filename`. + + :param dst: a filename, :class:`os.PathLike`, or open file + object to write to. + :param buffer_size: Passed as the ``length`` parameter of + :func:`shutil.copyfileobj`. + + .. versionchanged:: 1.0 + Supports :mod:`pathlib`. + """ + from shutil import copyfileobj + + close_dst = False + + if hasattr(dst, "__fspath__"): + dst = fspath(dst) + + if isinstance(dst, str): + dst = open(dst, "wb") + close_dst = True + + try: + copyfileobj(self.stream, dst, buffer_size) + finally: + if close_dst: + dst.close() + + def close(self): + """Close the underlying file if possible.""" + try: + self.stream.close() + except Exception: + pass + + def __bool__(self): + return bool(self.filename) + + def __getattr__(self, name): + try: + return getattr(self.stream, name) + except AttributeError: + # SpooledTemporaryFile doesn't implement IOBase, get the + # attribute from its backing file instead. + # https://github.com/python/cpython/pull/3249 + if hasattr(self.stream, "_file"): + return getattr(self.stream._file, name) + raise + + def __iter__(self): + return iter(self.stream) + + def __repr__(self): + return f"<{type(self).__name__}: {self.filename!r} ({self.content_type!r})>" + + +# circular dependencies +from . import http diff --git a/src/werkzeug/datastructures.pyi b/src/werkzeug/datastructures.pyi new file mode 100644 index 0000000..7bf7297 --- /dev/null +++ b/src/werkzeug/datastructures.pyi @@ -0,0 +1,921 @@ +from datetime import datetime +from os import PathLike +from typing import Any +from typing import Callable +from typing import Collection +from typing import Dict +from typing import FrozenSet +from typing import Generic +from typing import Hashable +from typing import IO +from typing import Iterable +from typing import Iterator +from typing import List +from typing import Mapping +from typing import NoReturn +from typing import Optional +from typing import overload +from typing import Set +from typing import Tuple +from typing import Type +from typing import TypeVar +from typing import Union +from _typeshed import SupportsKeysAndGetItem +from _typeshed.wsgi import WSGIEnvironment + +from typing_extensions import Literal +from typing_extensions import SupportsIndex + +K = TypeVar("K") +V = TypeVar("V") +T = TypeVar("T") +D = TypeVar("D") +_CD = TypeVar("_CD", bound="CallbackDict") + +def is_immutable(self: object) -> NoReturn: ... +def iter_multi_items( + mapping: Union[Mapping[K, Union[V, Iterable[V]]], Iterable[Tuple[K, V]]] +) -> Iterator[Tuple[K, V]]: ... + +class ImmutableListMixin(List[V]): + _hash_cache: Optional[int] + def __hash__(self) -> int: ... # type: ignore + def __delitem__(self, key: Union[SupportsIndex, slice]) -> NoReturn: ... + def __iadd__(self, other: t.Any) -> NoReturn: ... # type: ignore + def __imul__(self, other: SupportsIndex) -> NoReturn: ... + def __setitem__( # type: ignore + self, key: Union[int, slice], value: V + ) -> NoReturn: ... + def append(self, value: V) -> NoReturn: ... + def remove(self, value: V) -> NoReturn: ... + def extend(self, values: Iterable[V]) -> NoReturn: ... + def insert(self, pos: SupportsIndex, value: V) -> NoReturn: ... + def pop(self, index: SupportsIndex = -1) -> NoReturn: ... + def reverse(self) -> NoReturn: ... + def sort( + self, key: Optional[Callable[[V], Any]] = None, reverse: bool = False + ) -> NoReturn: ... + +class ImmutableList(ImmutableListMixin[V]): ... + +class ImmutableDictMixin(Dict[K, V]): + _hash_cache: Optional[int] + @classmethod + def fromkeys( # type: ignore + cls, keys: Iterable[K], value: Optional[V] = None + ) -> ImmutableDictMixin[K, V]: ... + def _iter_hashitems(self) -> Iterable[Hashable]: ... + def __hash__(self) -> int: ... # type: ignore + def setdefault(self, key: K, default: Optional[V] = None) -> NoReturn: ... + def update(self, *args: Any, **kwargs: V) -> NoReturn: ... + def pop(self, key: K, default: Optional[V] = None) -> NoReturn: ... # type: ignore + def popitem(self) -> NoReturn: ... + def __setitem__(self, key: K, value: V) -> NoReturn: ... + def __delitem__(self, key: K) -> NoReturn: ... + def clear(self) -> NoReturn: ... + +class ImmutableMultiDictMixin(ImmutableDictMixin[K, V]): + def _iter_hashitems(self) -> Iterable[Hashable]: ... + def add(self, key: K, value: V) -> NoReturn: ... + def popitemlist(self) -> NoReturn: ... + def poplist(self, key: K) -> NoReturn: ... + def setlist(self, key: K, new_list: Iterable[V]) -> NoReturn: ... + def setlistdefault( + self, key: K, default_list: Optional[Iterable[V]] = None + ) -> NoReturn: ... + +def _calls_update(name: str) -> Callable[[UpdateDictMixin[K, V]], Any]: ... + +class UpdateDictMixin(Dict[K, V]): + on_update: Optional[Callable[[UpdateDictMixin[K, V]], None]] + def setdefault(self, key: K, default: Optional[V] = None) -> V: ... + @overload + def pop(self, key: K) -> V: ... + @overload + def pop(self, key: K, default: Union[V, T] = ...) -> Union[V, T]: ... + def __setitem__(self, key: K, value: V) -> None: ... + def __delitem__(self, key: K) -> None: ... + def clear(self) -> None: ... + def popitem(self) -> Tuple[K, V]: ... + @overload + def update(self, __m: SupportsKeysAndGetItem[K, V], **kwargs: V) -> None: ... + @overload + def update(self, __m: Iterable[Tuple[K, V]], **kwargs: V) -> None: ... + @overload + def update(self, **kwargs: V) -> None: ... + +class TypeConversionDict(Dict[K, V]): + @overload + def get(self, key: K, default: None = ..., type: None = ...) -> Optional[V]: ... + @overload + def get(self, key: K, default: D, type: None = ...) -> Union[D, V]: ... + @overload + def get(self, key: K, default: D, type: Callable[[V], T]) -> Union[D, T]: ... + @overload + def get(self, key: K, type: Callable[[V], T]) -> Optional[T]: ... + +class ImmutableTypeConversionDict(ImmutableDictMixin[K, V], TypeConversionDict[K, V]): + def copy(self) -> TypeConversionDict[K, V]: ... + def __copy__(self) -> ImmutableTypeConversionDict: ... + +class MultiDict(TypeConversionDict[K, V]): + def __init__( + self, + mapping: Optional[ + Union[Mapping[K, Union[Iterable[V], V]], Iterable[Tuple[K, V]]] + ] = None, + ) -> None: ... + def __getitem__(self, item: K) -> V: ... + def __setitem__(self, key: K, value: V) -> None: ... + def add(self, key: K, value: V) -> None: ... + @overload + def getlist(self, key: K) -> List[V]: ... + @overload + def getlist(self, key: K, type: Callable[[V], T] = ...) -> List[T]: ... + def setlist(self, key: K, new_list: Iterable[V]) -> None: ... + def setdefault(self, key: K, default: Optional[V] = None) -> V: ... + def setlistdefault( + self, key: K, default_list: Optional[Iterable[V]] = None + ) -> List[V]: ... + def items(self, multi: bool = False) -> Iterator[Tuple[K, V]]: ... # type: ignore + def lists(self) -> Iterator[Tuple[K, List[V]]]: ... + def values(self) -> Iterator[V]: ... # type: ignore + def listvalues(self) -> Iterator[List[V]]: ... + def copy(self) -> MultiDict[K, V]: ... + def deepcopy(self, memo: Any = None) -> MultiDict[K, V]: ... + @overload + def to_dict(self) -> Dict[K, V]: ... + @overload + def to_dict(self, flat: Literal[False]) -> Dict[K, List[V]]: ... + def update( # type: ignore + self, mapping: Union[Mapping[K, Union[Iterable[V], V]], Iterable[Tuple[K, V]]] + ) -> None: ... + @overload + def pop(self, key: K) -> V: ... + @overload + def pop(self, key: K, default: Union[V, T] = ...) -> Union[V, T]: ... + def popitem(self) -> Tuple[K, V]: ... + def poplist(self, key: K) -> List[V]: ... + def popitemlist(self) -> Tuple[K, List[V]]: ... + def __copy__(self) -> MultiDict[K, V]: ... + def __deepcopy__(self, memo: Any) -> MultiDict[K, V]: ... + +class _omd_bucket(Generic[K, V]): + prev: Optional[_omd_bucket] + next: Optional[_omd_bucket] + key: K + value: V + def __init__(self, omd: OrderedMultiDict, key: K, value: V) -> None: ... + def unlink(self, omd: OrderedMultiDict) -> None: ... + +class OrderedMultiDict(MultiDict[K, V]): + _first_bucket: Optional[_omd_bucket] + _last_bucket: Optional[_omd_bucket] + def __init__(self, mapping: Optional[Mapping[K, V]] = None) -> None: ... + def __eq__(self, other: object) -> bool: ... + def __getitem__(self, key: K) -> V: ... + def __setitem__(self, key: K, value: V) -> None: ... + def __delitem__(self, key: K) -> None: ... + def keys(self) -> Iterator[K]: ... # type: ignore + def __iter__(self) -> Iterator[K]: ... + def values(self) -> Iterator[V]: ... # type: ignore + def items(self, multi: bool = False) -> Iterator[Tuple[K, V]]: ... # type: ignore + def lists(self) -> Iterator[Tuple[K, List[V]]]: ... + def listvalues(self) -> Iterator[List[V]]: ... + def add(self, key: K, value: V) -> None: ... + @overload + def getlist(self, key: K) -> List[V]: ... + @overload + def getlist(self, key: K, type: Callable[[V], T] = ...) -> List[T]: ... + def setlist(self, key: K, new_list: Iterable[V]) -> None: ... + def setlistdefault( + self, key: K, default_list: Optional[Iterable[V]] = None + ) -> List[V]: ... + def update( # type: ignore + self, mapping: Union[Mapping[K, V], Iterable[Tuple[K, V]]] + ) -> None: ... + def poplist(self, key: K) -> List[V]: ... + @overload + def pop(self, key: K) -> V: ... + @overload + def pop(self, key: K, default: Union[V, T] = ...) -> Union[V, T]: ... + def popitem(self) -> Tuple[K, V]: ... + def popitemlist(self) -> Tuple[K, List[V]]: ... + +def _options_header_vkw( + value: str, kw: Mapping[str, Optional[Union[str, int]]] +) -> str: ... +def _unicodify_header_value(value: Union[str, int]) -> str: ... + +HV = Union[str, int] + +class Headers(Dict[str, str]): + _list: List[Tuple[str, str]] + def __init__( + self, + defaults: Optional[ + Union[Mapping[str, Union[HV, Iterable[HV]]], Iterable[Tuple[str, HV]]] + ] = None, + ) -> None: ... + @overload + def __getitem__(self, key: str) -> str: ... + @overload + def __getitem__(self, key: int) -> Tuple[str, str]: ... + @overload + def __getitem__(self, key: slice) -> Headers: ... + @overload + def __getitem__(self, key: str, _get_mode: Literal[True] = ...) -> str: ... + def __eq__(self, other: object) -> bool: ... + @overload # type: ignore + def get(self, key: str, default: str) -> str: ... + @overload + def get(self, key: str, default: Optional[str] = None) -> Optional[str]: ... + @overload + def get( + self, key: str, default: Optional[T] = None, type: Callable[[str], T] = ... + ) -> Optional[T]: ... + @overload + def getlist(self, key: str) -> List[str]: ... + @overload + def getlist(self, key: str, type: Callable[[str], T]) -> List[T]: ... + def get_all(self, name: str) -> List[str]: ... + def items( # type: ignore + self, lower: bool = False + ) -> Iterator[Tuple[str, str]]: ... + def keys(self, lower: bool = False) -> Iterator[str]: ... # type: ignore + def values(self) -> Iterator[str]: ... # type: ignore + def extend( + self, + *args: Union[Mapping[str, Union[HV, Iterable[HV]]], Iterable[Tuple[str, HV]]], + **kwargs: Union[HV, Iterable[HV]], + ) -> None: ... + @overload + def __delitem__(self, key: Union[str, int, slice]) -> None: ... + @overload + def __delitem__(self, key: str, _index_operation: Literal[False]) -> None: ... + def remove(self, key: str) -> None: ... + @overload # type: ignore + def pop(self, key: str, default: Optional[str] = None) -> str: ... + @overload + def pop( + self, key: Optional[int] = None, default: Optional[Tuple[str, str]] = None + ) -> Tuple[str, str]: ... + def popitem(self) -> Tuple[str, str]: ... + def __contains__(self, key: str) -> bool: ... # type: ignore + def has_key(self, key: str) -> bool: ... + def __iter__(self) -> Iterator[Tuple[str, str]]: ... # type: ignore + def add(self, _key: str, _value: HV, **kw: HV) -> None: ... + def _validate_value(self, value: str) -> None: ... + def add_header(self, _key: str, _value: HV, **_kw: HV) -> None: ... + def clear(self) -> None: ... + def set(self, _key: str, _value: HV, **kw: HV) -> None: ... + def setlist(self, key: str, values: Iterable[HV]) -> None: ... + def setdefault(self, key: str, default: HV) -> str: ... # type: ignore + def setlistdefault(self, key: str, default: Iterable[HV]) -> None: ... + @overload + def __setitem__(self, key: str, value: HV) -> None: ... + @overload + def __setitem__(self, key: int, value: Tuple[str, HV]) -> None: ... + @overload + def __setitem__(self, key: slice, value: Iterable[Tuple[str, HV]]) -> None: ... + @overload + def update( + self, __m: SupportsKeysAndGetItem[str, HV], **kwargs: Union[HV, Iterable[HV]] + ) -> None: ... + @overload + def update( + self, __m: Iterable[Tuple[str, HV]], **kwargs: Union[HV, Iterable[HV]] + ) -> None: ... + @overload + def update(self, **kwargs: Union[HV, Iterable[HV]]) -> None: ... + def to_wsgi_list(self) -> List[Tuple[str, str]]: ... + def copy(self) -> Headers: ... + def __copy__(self) -> Headers: ... + +class ImmutableHeadersMixin(Headers): + def __delitem__(self, key: Any, _index_operation: bool = True) -> NoReturn: ... + def __setitem__(self, key: Any, value: Any) -> NoReturn: ... + def set(self, _key: Any, _value: Any, **kw: Any) -> NoReturn: ... + def setlist(self, key: Any, values: Any) -> NoReturn: ... + def add(self, _key: Any, _value: Any, **kw: Any) -> NoReturn: ... + def add_header(self, _key: Any, _value: Any, **_kw: Any) -> NoReturn: ... + def remove(self, key: Any) -> NoReturn: ... + def extend(self, *args: Any, **kwargs: Any) -> NoReturn: ... + def update(self, *args: Any, **kwargs: Any) -> NoReturn: ... + def insert(self, pos: Any, value: Any) -> NoReturn: ... + def pop(self, key: Any = None, default: Any = ...) -> NoReturn: ... + def popitem(self) -> NoReturn: ... + def setdefault(self, key: Any, default: Any) -> NoReturn: ... # type: ignore + def setlistdefault(self, key: Any, default: Any) -> NoReturn: ... + +class EnvironHeaders(ImmutableHeadersMixin, Headers): + environ: WSGIEnvironment + def __init__(self, environ: WSGIEnvironment) -> None: ... + def __eq__(self, other: object) -> bool: ... + def __getitem__( # type: ignore + self, key: str, _get_mode: Literal[False] = False + ) -> str: ... + def __iter__(self) -> Iterator[Tuple[str, str]]: ... # type: ignore + def copy(self) -> NoReturn: ... + +class CombinedMultiDict(ImmutableMultiDictMixin[K, V], MultiDict[K, V]): # type: ignore + dicts: List[MultiDict[K, V]] + def __init__(self, dicts: Optional[Iterable[MultiDict[K, V]]]) -> None: ... + @classmethod + def fromkeys(cls, keys: Any, value: Any = None) -> NoReturn: ... + def __getitem__(self, key: K) -> V: ... + @overload # type: ignore + def get(self, key: K) -> Optional[V]: ... + @overload + def get(self, key: K, default: Union[V, T] = ...) -> Union[V, T]: ... + @overload + def get( + self, key: K, default: Optional[T] = None, type: Callable[[V], T] = ... + ) -> Optional[T]: ... + @overload + def getlist(self, key: K) -> List[V]: ... + @overload + def getlist(self, key: K, type: Callable[[V], T] = ...) -> List[T]: ... + def _keys_impl(self) -> Set[K]: ... + def keys(self) -> Set[K]: ... # type: ignore + def __iter__(self) -> Set[K]: ... # type: ignore + def items(self, multi: bool = False) -> Iterator[Tuple[K, V]]: ... # type: ignore + def values(self) -> Iterator[V]: ... # type: ignore + def lists(self) -> Iterator[Tuple[K, List[V]]]: ... + def listvalues(self) -> Iterator[List[V]]: ... + def copy(self) -> MultiDict[K, V]: ... + @overload + def to_dict(self) -> Dict[K, V]: ... + @overload + def to_dict(self, flat: Literal[False]) -> Dict[K, List[V]]: ... + def __contains__(self, key: K) -> bool: ... # type: ignore + def has_key(self, key: K) -> bool: ... + +class FileMultiDict(MultiDict[str, "FileStorage"]): + def add_file( + self, + name: str, + file: Union[FileStorage, str, IO[bytes]], + filename: Optional[str] = None, + content_type: Optional[str] = None, + ) -> None: ... + +class ImmutableDict(ImmutableDictMixin[K, V], Dict[K, V]): + def copy(self) -> Dict[K, V]: ... + def __copy__(self) -> ImmutableDict[K, V]: ... + +class ImmutableMultiDict( # type: ignore + ImmutableMultiDictMixin[K, V], MultiDict[K, V] +): + def copy(self) -> MultiDict[K, V]: ... + def __copy__(self) -> ImmutableMultiDict[K, V]: ... + +class ImmutableOrderedMultiDict( # type: ignore + ImmutableMultiDictMixin[K, V], OrderedMultiDict[K, V] +): + def _iter_hashitems(self) -> Iterator[Tuple[int, Tuple[K, V]]]: ... + def copy(self) -> OrderedMultiDict[K, V]: ... + def __copy__(self) -> ImmutableOrderedMultiDict[K, V]: ... + +class Accept(ImmutableList[Tuple[str, int]]): + provided: bool + def __init__( + self, values: Optional[Union[Accept, Iterable[Tuple[str, float]]]] = None + ) -> None: ... + def _specificity(self, value: str) -> Tuple[bool, ...]: ... + def _value_matches(self, value: str, item: str) -> bool: ... + @overload # type: ignore + def __getitem__(self, key: str) -> int: ... + @overload + def __getitem__(self, key: int) -> Tuple[str, int]: ... + @overload + def __getitem__(self, key: slice) -> Iterable[Tuple[str, int]]: ... + def quality(self, key: str) -> int: ... + def __contains__(self, value: str) -> bool: ... # type: ignore + def index(self, key: str) -> int: ... # type: ignore + def find(self, key: str) -> int: ... + def values(self) -> Iterator[str]: ... + def to_header(self) -> str: ... + def _best_single_match(self, match: str) -> Optional[Tuple[str, int]]: ... + def best_match( + self, matches: Iterable[str], default: Optional[str] = None + ) -> Optional[str]: ... + @property + def best(self) -> str: ... + +def _normalize_mime(value: str) -> List[str]: ... + +class MIMEAccept(Accept): + def _specificity(self, value: str) -> Tuple[bool, ...]: ... + def _value_matches(self, value: str, item: str) -> bool: ... + @property + def accept_html(self) -> bool: ... + @property + def accept_xhtml(self) -> bool: ... + @property + def accept_json(self) -> bool: ... + +def _normalize_lang(value: str) -> List[str]: ... + +class LanguageAccept(Accept): + def _value_matches(self, value: str, item: str) -> bool: ... + def best_match( + self, matches: Iterable[str], default: Optional[str] = None + ) -> Optional[str]: ... + +class CharsetAccept(Accept): + def _value_matches(self, value: str, item: str) -> bool: ... + +_CPT = TypeVar("_CPT", str, int, bool) +_OptCPT = Optional[_CPT] + +def cache_control_property(key: str, empty: _OptCPT, type: Type[_CPT]) -> property: ... + +class _CacheControl(UpdateDictMixin[str, _OptCPT], Dict[str, _OptCPT]): + provided: bool + def __init__( + self, + values: Union[Mapping[str, _OptCPT], Iterable[Tuple[str, _OptCPT]]] = (), + on_update: Optional[Callable[[_CacheControl], None]] = None, + ) -> None: ... + @property + def no_cache(self) -> Optional[bool]: ... + @no_cache.setter + def no_cache(self, value: Optional[bool]) -> None: ... + @no_cache.deleter + def no_cache(self) -> None: ... + @property + def no_store(self) -> Optional[bool]: ... + @no_store.setter + def no_store(self, value: Optional[bool]) -> None: ... + @no_store.deleter + def no_store(self) -> None: ... + @property + def max_age(self) -> Optional[int]: ... + @max_age.setter + def max_age(self, value: Optional[int]) -> None: ... + @max_age.deleter + def max_age(self) -> None: ... + @property + def no_transform(self) -> Optional[bool]: ... + @no_transform.setter + def no_transform(self, value: Optional[bool]) -> None: ... + @no_transform.deleter + def no_transform(self) -> None: ... + def _get_cache_value(self, key: str, empty: Optional[T], type: Type[T]) -> T: ... + def _set_cache_value(self, key: str, value: Optional[T], type: Type[T]) -> None: ... + def _del_cache_value(self, key: str) -> None: ... + def to_header(self) -> str: ... + @staticmethod + def cache_property(key: str, empty: _OptCPT, type: Type[_CPT]) -> property: ... + +class RequestCacheControl(ImmutableDictMixin[str, _OptCPT], _CacheControl): + @property + def max_stale(self) -> Optional[int]: ... + @max_stale.setter + def max_stale(self, value: Optional[int]) -> None: ... + @max_stale.deleter + def max_stale(self) -> None: ... + @property + def min_fresh(self) -> Optional[int]: ... + @min_fresh.setter + def min_fresh(self, value: Optional[int]) -> None: ... + @min_fresh.deleter + def min_fresh(self) -> None: ... + @property + def only_if_cached(self) -> Optional[bool]: ... + @only_if_cached.setter + def only_if_cached(self, value: Optional[bool]) -> None: ... + @only_if_cached.deleter + def only_if_cached(self) -> None: ... + +class ResponseCacheControl(_CacheControl): + @property + def public(self) -> Optional[bool]: ... + @public.setter + def public(self, value: Optional[bool]) -> None: ... + @public.deleter + def public(self) -> None: ... + @property + def private(self) -> Optional[bool]: ... + @private.setter + def private(self, value: Optional[bool]) -> None: ... + @private.deleter + def private(self) -> None: ... + @property + def must_revalidate(self) -> Optional[bool]: ... + @must_revalidate.setter + def must_revalidate(self, value: Optional[bool]) -> None: ... + @must_revalidate.deleter + def must_revalidate(self) -> None: ... + @property + def proxy_revalidate(self) -> Optional[bool]: ... + @proxy_revalidate.setter + def proxy_revalidate(self, value: Optional[bool]) -> None: ... + @proxy_revalidate.deleter + def proxy_revalidate(self) -> None: ... + @property + def s_maxage(self) -> Optional[int]: ... + @s_maxage.setter + def s_maxage(self, value: Optional[int]) -> None: ... + @s_maxage.deleter + def s_maxage(self) -> None: ... + @property + def immutable(self) -> Optional[bool]: ... + @immutable.setter + def immutable(self, value: Optional[bool]) -> None: ... + @immutable.deleter + def immutable(self) -> None: ... + +def csp_property(key: str) -> property: ... + +class ContentSecurityPolicy(UpdateDictMixin[str, str], Dict[str, str]): + @property + def base_uri(self) -> Optional[str]: ... + @base_uri.setter + def base_uri(self, value: Optional[str]) -> None: ... + @base_uri.deleter + def base_uri(self) -> None: ... + @property + def child_src(self) -> Optional[str]: ... + @child_src.setter + def child_src(self, value: Optional[str]) -> None: ... + @child_src.deleter + def child_src(self) -> None: ... + @property + def connect_src(self) -> Optional[str]: ... + @connect_src.setter + def connect_src(self, value: Optional[str]) -> None: ... + @connect_src.deleter + def connect_src(self) -> None: ... + @property + def default_src(self) -> Optional[str]: ... + @default_src.setter + def default_src(self, value: Optional[str]) -> None: ... + @default_src.deleter + def default_src(self) -> None: ... + @property + def font_src(self) -> Optional[str]: ... + @font_src.setter + def font_src(self, value: Optional[str]) -> None: ... + @font_src.deleter + def font_src(self) -> None: ... + @property + def form_action(self) -> Optional[str]: ... + @form_action.setter + def form_action(self, value: Optional[str]) -> None: ... + @form_action.deleter + def form_action(self) -> None: ... + @property + def frame_ancestors(self) -> Optional[str]: ... + @frame_ancestors.setter + def frame_ancestors(self, value: Optional[str]) -> None: ... + @frame_ancestors.deleter + def frame_ancestors(self) -> None: ... + @property + def frame_src(self) -> Optional[str]: ... + @frame_src.setter + def frame_src(self, value: Optional[str]) -> None: ... + @frame_src.deleter + def frame_src(self) -> None: ... + @property + def img_src(self) -> Optional[str]: ... + @img_src.setter + def img_src(self, value: Optional[str]) -> None: ... + @img_src.deleter + def img_src(self) -> None: ... + @property + def manifest_src(self) -> Optional[str]: ... + @manifest_src.setter + def manifest_src(self, value: Optional[str]) -> None: ... + @manifest_src.deleter + def manifest_src(self) -> None: ... + @property + def media_src(self) -> Optional[str]: ... + @media_src.setter + def media_src(self, value: Optional[str]) -> None: ... + @media_src.deleter + def media_src(self) -> None: ... + @property + def navigate_to(self) -> Optional[str]: ... + @navigate_to.setter + def navigate_to(self, value: Optional[str]) -> None: ... + @navigate_to.deleter + def navigate_to(self) -> None: ... + @property + def object_src(self) -> Optional[str]: ... + @object_src.setter + def object_src(self, value: Optional[str]) -> None: ... + @object_src.deleter + def object_src(self) -> None: ... + @property + def prefetch_src(self) -> Optional[str]: ... + @prefetch_src.setter + def prefetch_src(self, value: Optional[str]) -> None: ... + @prefetch_src.deleter + def prefetch_src(self) -> None: ... + @property + def plugin_types(self) -> Optional[str]: ... + @plugin_types.setter + def plugin_types(self, value: Optional[str]) -> None: ... + @plugin_types.deleter + def plugin_types(self) -> None: ... + @property + def report_to(self) -> Optional[str]: ... + @report_to.setter + def report_to(self, value: Optional[str]) -> None: ... + @report_to.deleter + def report_to(self) -> None: ... + @property + def report_uri(self) -> Optional[str]: ... + @report_uri.setter + def report_uri(self, value: Optional[str]) -> None: ... + @report_uri.deleter + def report_uri(self) -> None: ... + @property + def sandbox(self) -> Optional[str]: ... + @sandbox.setter + def sandbox(self, value: Optional[str]) -> None: ... + @sandbox.deleter + def sandbox(self) -> None: ... + @property + def script_src(self) -> Optional[str]: ... + @script_src.setter + def script_src(self, value: Optional[str]) -> None: ... + @script_src.deleter + def script_src(self) -> None: ... + @property + def script_src_attr(self) -> Optional[str]: ... + @script_src_attr.setter + def script_src_attr(self, value: Optional[str]) -> None: ... + @script_src_attr.deleter + def script_src_attr(self) -> None: ... + @property + def script_src_elem(self) -> Optional[str]: ... + @script_src_elem.setter + def script_src_elem(self, value: Optional[str]) -> None: ... + @script_src_elem.deleter + def script_src_elem(self) -> None: ... + @property + def style_src(self) -> Optional[str]: ... + @style_src.setter + def style_src(self, value: Optional[str]) -> None: ... + @style_src.deleter + def style_src(self) -> None: ... + @property + def style_src_attr(self) -> Optional[str]: ... + @style_src_attr.setter + def style_src_attr(self, value: Optional[str]) -> None: ... + @style_src_attr.deleter + def style_src_attr(self) -> None: ... + @property + def style_src_elem(self) -> Optional[str]: ... + @style_src_elem.setter + def style_src_elem(self, value: Optional[str]) -> None: ... + @style_src_elem.deleter + def style_src_elem(self) -> None: ... + @property + def worker_src(self) -> Optional[str]: ... + @worker_src.setter + def worker_src(self, value: Optional[str]) -> None: ... + @worker_src.deleter + def worker_src(self) -> None: ... + provided: bool + def __init__( + self, + values: Union[Mapping[str, str], Iterable[Tuple[str, str]]] = (), + on_update: Optional[Callable[[ContentSecurityPolicy], None]] = None, + ) -> None: ... + def _get_value(self, key: str) -> Optional[str]: ... + def _set_value(self, key: str, value: str) -> None: ... + def _del_value(self, key: str) -> None: ... + def to_header(self) -> str: ... + +class CallbackDict(UpdateDictMixin[K, V], Dict[K, V]): + def __init__( + self, + initial: Optional[Union[Mapping[K, V], Iterable[Tuple[K, V]]]] = None, + on_update: Optional[Callable[[_CD], None]] = None, + ) -> None: ... + +class HeaderSet(Set[str]): + _headers: List[str] + _set: Set[str] + on_update: Optional[Callable[[HeaderSet], None]] + def __init__( + self, + headers: Optional[Iterable[str]] = None, + on_update: Optional[Callable[[HeaderSet], None]] = None, + ) -> None: ... + def add(self, header: str) -> None: ... + def remove(self, header: str) -> None: ... + def update(self, iterable: Iterable[str]) -> None: ... # type: ignore + def discard(self, header: str) -> None: ... + def find(self, header: str) -> int: ... + def index(self, header: str) -> int: ... + def clear(self) -> None: ... + def as_set(self, preserve_casing: bool = False) -> Set[str]: ... + def to_header(self) -> str: ... + def __getitem__(self, idx: int) -> str: ... + def __delitem__(self, idx: int) -> None: ... + def __setitem__(self, idx: int, value: str) -> None: ... + def __contains__(self, header: str) -> bool: ... # type: ignore + def __len__(self) -> int: ... + def __iter__(self) -> Iterator[str]: ... + +class ETags(Collection[str]): + _strong: FrozenSet[str] + _weak: FrozenSet[str] + star_tag: bool + def __init__( + self, + strong_etags: Optional[Iterable[str]] = None, + weak_etags: Optional[Iterable[str]] = None, + star_tag: bool = False, + ) -> None: ... + def as_set(self, include_weak: bool = False) -> Set[str]: ... + def is_weak(self, etag: str) -> bool: ... + def is_strong(self, etag: str) -> bool: ... + def contains_weak(self, etag: str) -> bool: ... + def contains(self, etag: str) -> bool: ... + def contains_raw(self, etag: str) -> bool: ... + def to_header(self) -> str: ... + def __call__( + self, + etag: Optional[str] = None, + data: Optional[bytes] = None, + include_weak: bool = False, + ) -> bool: ... + def __len__(self) -> int: ... + def __iter__(self) -> Iterator[str]: ... + def __contains__(self, item: str) -> bool: ... # type: ignore + +class IfRange: + etag: Optional[str] + date: Optional[datetime] + def __init__( + self, etag: Optional[str] = None, date: Optional[datetime] = None + ) -> None: ... + def to_header(self) -> str: ... + +class Range: + units: str + ranges: List[Tuple[int, Optional[int]]] + def __init__(self, units: str, ranges: List[Tuple[int, Optional[int]]]) -> None: ... + def range_for_length(self, length: Optional[int]) -> Optional[Tuple[int, int]]: ... + def make_content_range(self, length: Optional[int]) -> Optional[ContentRange]: ... + def to_header(self) -> str: ... + def to_content_range_header(self, length: Optional[int]) -> Optional[str]: ... + +def _callback_property(name: str) -> property: ... + +class ContentRange: + on_update: Optional[Callable[[ContentRange], None]] + def __init__( + self, + units: Optional[str], + start: Optional[int], + stop: Optional[int], + length: Optional[int] = None, + on_update: Optional[Callable[[ContentRange], None]] = None, + ) -> None: ... + @property + def units(self) -> Optional[str]: ... + @units.setter + def units(self, value: Optional[str]) -> None: ... + @property + def start(self) -> Optional[int]: ... + @start.setter + def start(self, value: Optional[int]) -> None: ... + @property + def stop(self) -> Optional[int]: ... + @stop.setter + def stop(self, value: Optional[int]) -> None: ... + @property + def length(self) -> Optional[int]: ... + @length.setter + def length(self, value: Optional[int]) -> None: ... + def set( + self, + start: Optional[int], + stop: Optional[int], + length: Optional[int] = None, + units: Optional[str] = "bytes", + ) -> None: ... + def unset(self) -> None: ... + def to_header(self) -> str: ... + +class Authorization(ImmutableDictMixin[str, str], Dict[str, str]): + type: str + def __init__( + self, + auth_type: str, + data: Optional[Union[Mapping[str, str], Iterable[Tuple[str, str]]]] = None, + ) -> None: ... + @property + def username(self) -> Optional[str]: ... + @property + def password(self) -> Optional[str]: ... + @property + def realm(self) -> Optional[str]: ... + @property + def nonce(self) -> Optional[str]: ... + @property + def uri(self) -> Optional[str]: ... + @property + def nc(self) -> Optional[str]: ... + @property + def cnonce(self) -> Optional[str]: ... + @property + def response(self) -> Optional[str]: ... + @property + def opaque(self) -> Optional[str]: ... + @property + def qop(self) -> Optional[str]: ... + def to_header(self) -> str: ... + +def auth_property(name: str, doc: Optional[str] = None) -> property: ... +def _set_property(name: str, doc: Optional[str] = None) -> property: ... + +class WWWAuthenticate(UpdateDictMixin[str, str], Dict[str, str]): + _require_quoting: FrozenSet[str] + def __init__( + self, + auth_type: Optional[str] = None, + values: Optional[Union[Mapping[str, str], Iterable[Tuple[str, str]]]] = None, + on_update: Optional[Callable[[WWWAuthenticate], None]] = None, + ) -> None: ... + def set_basic(self, realm: str = ...) -> None: ... + def set_digest( + self, + realm: str, + nonce: str, + qop: Iterable[str] = ("auth",), + opaque: Optional[str] = None, + algorithm: Optional[str] = None, + stale: bool = False, + ) -> None: ... + def to_header(self) -> str: ... + @property + def type(self) -> Optional[str]: ... + @type.setter + def type(self, value: Optional[str]) -> None: ... + @property + def realm(self) -> Optional[str]: ... + @realm.setter + def realm(self, value: Optional[str]) -> None: ... + @property + def domain(self) -> HeaderSet: ... + @property + def nonce(self) -> Optional[str]: ... + @nonce.setter + def nonce(self, value: Optional[str]) -> None: ... + @property + def opaque(self) -> Optional[str]: ... + @opaque.setter + def opaque(self, value: Optional[str]) -> None: ... + @property + def algorithm(self) -> Optional[str]: ... + @algorithm.setter + def algorithm(self, value: Optional[str]) -> None: ... + @property + def qop(self) -> HeaderSet: ... + @property + def stale(self) -> Optional[bool]: ... + @stale.setter + def stale(self, value: Optional[bool]) -> None: ... + @staticmethod + def auth_property(name: str, doc: Optional[str] = None) -> property: ... + +class FileStorage: + name: Optional[str] + stream: IO[bytes] + filename: Optional[str] + headers: Headers + _parsed_content_type: Tuple[str, Dict[str, str]] + def __init__( + self, + stream: Optional[IO[bytes]] = None, + filename: Union[str, PathLike, None] = None, + name: Optional[str] = None, + content_type: Optional[str] = None, + content_length: Optional[int] = None, + headers: Optional[Headers] = None, + ) -> None: ... + def _parse_content_type(self) -> None: ... + @property + def content_type(self) -> str: ... + @property + def content_length(self) -> int: ... + @property + def mimetype(self) -> str: ... + @property + def mimetype_params(self) -> Dict[str, str]: ... + def save( + self, dst: Union[str, PathLike, IO[bytes]], buffer_size: int = ... + ) -> None: ... + def close(self) -> None: ... + def __bool__(self) -> bool: ... + def __getattr__(self, name: str) -> Any: ... + def __iter__(self) -> Iterator[bytes]: ... + def __repr__(self) -> str: ... diff --git a/src/werkzeug/debug/__init__.py b/src/werkzeug/debug/__init__.py new file mode 100644 index 0000000..e0dcc65 --- /dev/null +++ b/src/werkzeug/debug/__init__.py @@ -0,0 +1,533 @@ +import getpass +import hashlib +import json +import os +import pkgutil +import re +import sys +import time +import typing as t +import uuid +from contextlib import ExitStack +from contextlib import nullcontext +from io import BytesIO +from itertools import chain +from os.path import basename +from os.path import join +from zlib import adler32 + +from .._internal import _log +from ..exceptions import NotFound +from ..http import parse_cookie +from ..security import gen_salt +from ..utils import send_file +from ..wrappers.request import Request +from ..wrappers.response import Response +from .console import Console +from .tbtools import DebugFrameSummary +from .tbtools import DebugTraceback +from .tbtools import render_console_html + +if t.TYPE_CHECKING: + from _typeshed.wsgi import StartResponse + from _typeshed.wsgi import WSGIApplication + from _typeshed.wsgi import WSGIEnvironment + +# A week +PIN_TIME = 60 * 60 * 24 * 7 + + +def hash_pin(pin: str) -> str: + return hashlib.sha1(f"{pin} added salt".encode("utf-8", "replace")).hexdigest()[:12] + + +_machine_id: t.Optional[t.Union[str, bytes]] = None + + +def get_machine_id() -> t.Optional[t.Union[str, bytes]]: + global _machine_id + + if _machine_id is not None: + return _machine_id + + def _generate() -> t.Optional[t.Union[str, bytes]]: + linux = b"" + + # machine-id is stable across boots, boot_id is not. + for filename in "/etc/machine-id", "/proc/sys/kernel/random/boot_id": + try: + with open(filename, "rb") as f: + value = f.readline().strip() + except OSError: + continue + + if value: + linux += value + break + + # Containers share the same machine id, add some cgroup + # information. This is used outside containers too but should be + # relatively stable across boots. + try: + with open("/proc/self/cgroup", "rb") as f: + linux += f.readline().strip().rpartition(b"/")[2] + except OSError: + pass + + if linux: + return linux + + # On OS X, use ioreg to get the computer's serial number. + try: + # subprocess may not be available, e.g. Google App Engine + # https://github.com/pallets/werkzeug/issues/925 + from subprocess import Popen, PIPE + + dump = Popen( + ["ioreg", "-c", "IOPlatformExpertDevice", "-d", "2"], stdout=PIPE + ).communicate()[0] + match = re.search(b'"serial-number" = <([^>]+)', dump) + + if match is not None: + return match.group(1) + except (OSError, ImportError): + pass + + # On Windows, use winreg to get the machine guid. + if sys.platform == "win32": + import winreg + + try: + with winreg.OpenKey( + winreg.HKEY_LOCAL_MACHINE, + "SOFTWARE\\Microsoft\\Cryptography", + 0, + winreg.KEY_READ | winreg.KEY_WOW64_64KEY, + ) as rk: + guid: t.Union[str, bytes] + guid_type: int + guid, guid_type = winreg.QueryValueEx(rk, "MachineGuid") + + if guid_type == winreg.REG_SZ: + return guid.encode("utf-8") + + return guid + except OSError: + pass + + return None + + _machine_id = _generate() + return _machine_id + + +class _ConsoleFrame: + """Helper class so that we can reuse the frame console code for the + standalone console. + """ + + def __init__(self, namespace: t.Dict[str, t.Any]): + self.console = Console(namespace) + self.id = 0 + + def eval(self, code: str) -> t.Any: + return self.console.eval(code) + + +def get_pin_and_cookie_name( + app: "WSGIApplication", +) -> t.Union[t.Tuple[str, str], t.Tuple[None, None]]: + """Given an application object this returns a semi-stable 9 digit pin + code and a random key. The hope is that this is stable between + restarts to not make debugging particularly frustrating. If the pin + was forcefully disabled this returns `None`. + + Second item in the resulting tuple is the cookie name for remembering. + """ + pin = os.environ.get("WERKZEUG_DEBUG_PIN") + rv = None + num = None + + # Pin was explicitly disabled + if pin == "off": + return None, None + + # Pin was provided explicitly + if pin is not None and pin.replace("-", "").isdecimal(): + # If there are separators in the pin, return it directly + if "-" in pin: + rv = pin + else: + num = pin + + modname = getattr(app, "__module__", t.cast(object, app).__class__.__module__) + username: t.Optional[str] + + try: + # getuser imports the pwd module, which does not exist in Google + # App Engine. It may also raise a KeyError if the UID does not + # have a username, such as in Docker. + username = getpass.getuser() + except (ImportError, KeyError): + username = None + + mod = sys.modules.get(modname) + + # This information only exists to make the cookie unique on the + # computer, not as a security feature. + probably_public_bits = [ + username, + modname, + getattr(app, "__name__", type(app).__name__), + getattr(mod, "__file__", None), + ] + + # This information is here to make it harder for an attacker to + # guess the cookie name. They are unlikely to be contained anywhere + # within the unauthenticated debug page. + private_bits = [str(uuid.getnode()), get_machine_id()] + + h = hashlib.sha1() + for bit in chain(probably_public_bits, private_bits): + if not bit: + continue + if isinstance(bit, str): + bit = bit.encode("utf-8") + h.update(bit) + h.update(b"cookiesalt") + + cookie_name = f"__wzd{h.hexdigest()[:20]}" + + # If we need to generate a pin we salt it a bit more so that we don't + # end up with the same value and generate out 9 digits + if num is None: + h.update(b"pinsalt") + num = f"{int(h.hexdigest(), 16):09d}"[:9] + + # Format the pincode in groups of digits for easier remembering if + # we don't have a result yet. + if rv is None: + for group_size in 5, 4, 3: + if len(num) % group_size == 0: + rv = "-".join( + num[x : x + group_size].rjust(group_size, "0") + for x in range(0, len(num), group_size) + ) + break + else: + rv = num + + return rv, cookie_name + + +class DebuggedApplication: + """Enables debugging support for a given application:: + + from werkzeug.debug import DebuggedApplication + from myapp import app + app = DebuggedApplication(app, evalex=True) + + The ``evalex`` argument allows evaluating expressions in any frame + of a traceback. This works by preserving each frame with its local + state. Some state, such as :doc:`local`, cannot be restored with the + frame by default. When ``evalex`` is enabled, + ``environ["werkzeug.debug.preserve_context"]`` will be a callable + that takes a context manager, and can be called multiple times. + Each context manager will be entered before evaluating code in the + frame, then exited again, so they can perform setup and cleanup for + each call. + + :param app: the WSGI application to run debugged. + :param evalex: enable exception evaluation feature (interactive + debugging). This requires a non-forking server. + :param request_key: The key that points to the request object in this + environment. This parameter is ignored in current + versions. + :param console_path: the URL for a general purpose console. + :param console_init_func: the function that is executed before starting + the general purpose console. The return value + is used as initial namespace. + :param show_hidden_frames: by default hidden traceback frames are skipped. + You can show them by setting this parameter + to `True`. + :param pin_security: can be used to disable the pin based security system. + :param pin_logging: enables the logging of the pin system. + + .. versionchanged:: 2.2 + Added the ``werkzeug.debug.preserve_context`` environ key. + """ + + _pin: str + _pin_cookie: str + + def __init__( + self, + app: "WSGIApplication", + evalex: bool = False, + request_key: str = "werkzeug.request", + console_path: str = "/console", + console_init_func: t.Optional[t.Callable[[], t.Dict[str, t.Any]]] = None, + show_hidden_frames: bool = False, + pin_security: bool = True, + pin_logging: bool = True, + ) -> None: + if not console_init_func: + console_init_func = None + self.app = app + self.evalex = evalex + self.frames: t.Dict[int, t.Union[DebugFrameSummary, _ConsoleFrame]] = {} + self.frame_contexts: t.Dict[int, t.List[t.ContextManager[None]]] = {} + self.request_key = request_key + self.console_path = console_path + self.console_init_func = console_init_func + self.show_hidden_frames = show_hidden_frames + self.secret = gen_salt(20) + self._failed_pin_auth = 0 + + self.pin_logging = pin_logging + if pin_security: + # Print out the pin for the debugger on standard out. + if os.environ.get("WERKZEUG_RUN_MAIN") == "true" and pin_logging: + _log("warning", " * Debugger is active!") + if self.pin is None: + _log("warning", " * Debugger PIN disabled. DEBUGGER UNSECURED!") + else: + _log("info", " * Debugger PIN: %s", self.pin) + else: + self.pin = None + + @property + def pin(self) -> t.Optional[str]: + if not hasattr(self, "_pin"): + pin_cookie = get_pin_and_cookie_name(self.app) + self._pin, self._pin_cookie = pin_cookie # type: ignore + return self._pin + + @pin.setter + def pin(self, value: str) -> None: + self._pin = value + + @property + def pin_cookie_name(self) -> str: + """The name of the pin cookie.""" + if not hasattr(self, "_pin_cookie"): + pin_cookie = get_pin_and_cookie_name(self.app) + self._pin, self._pin_cookie = pin_cookie # type: ignore + return self._pin_cookie + + def debug_application( + self, environ: "WSGIEnvironment", start_response: "StartResponse" + ) -> t.Iterator[bytes]: + """Run the application and conserve the traceback frames.""" + contexts: t.List[t.ContextManager[t.Any]] = [] + + if self.evalex: + environ["werkzeug.debug.preserve_context"] = contexts.append + + app_iter = None + try: + app_iter = self.app(environ, start_response) + yield from app_iter + if hasattr(app_iter, "close"): + app_iter.close() # type: ignore + except Exception as e: + if hasattr(app_iter, "close"): + app_iter.close() # type: ignore + + tb = DebugTraceback(e, skip=1, hide=not self.show_hidden_frames) + + for frame in tb.all_frames: + self.frames[id(frame)] = frame + self.frame_contexts[id(frame)] = contexts + + is_trusted = bool(self.check_pin_trust(environ)) + html = tb.render_debugger_html( + evalex=self.evalex, + secret=self.secret, + evalex_trusted=is_trusted, + ) + response = Response(html, status=500, mimetype="text/html") + + try: + yield from response(environ, start_response) + except Exception: + # if we end up here there has been output but an error + # occurred. in that situation we can do nothing fancy any + # more, better log something into the error log and fall + # back gracefully. + environ["wsgi.errors"].write( + "Debugging middleware caught exception in streamed " + "response at a point where response headers were already " + "sent.\n" + ) + + environ["wsgi.errors"].write("".join(tb.render_traceback_text())) + + def execute_command( # type: ignore[return] + self, + request: Request, + command: str, + frame: t.Union[DebugFrameSummary, _ConsoleFrame], + ) -> Response: + """Execute a command in a console.""" + contexts = self.frame_contexts.get(id(frame), []) + + with ExitStack() as exit_stack: + for cm in contexts: + exit_stack.enter_context(cm) + + return Response(frame.eval(command), mimetype="text/html") + + def display_console(self, request: Request) -> Response: + """Display a standalone shell.""" + if 0 not in self.frames: + if self.console_init_func is None: + ns = {} + else: + ns = dict(self.console_init_func()) + ns.setdefault("app", self.app) + self.frames[0] = _ConsoleFrame(ns) + is_trusted = bool(self.check_pin_trust(request.environ)) + return Response( + render_console_html(secret=self.secret, evalex_trusted=is_trusted), + mimetype="text/html", + ) + + def get_resource(self, request: Request, filename: str) -> Response: + """Return a static resource from the shared folder.""" + path = join("shared", basename(filename)) + + try: + data = pkgutil.get_data(__package__, path) + except OSError: + return NotFound() # type: ignore[return-value] + else: + if data is None: + return NotFound() # type: ignore[return-value] + + etag = str(adler32(data) & 0xFFFFFFFF) + return send_file( + BytesIO(data), request.environ, download_name=filename, etag=etag + ) + + def check_pin_trust(self, environ: "WSGIEnvironment") -> t.Optional[bool]: + """Checks if the request passed the pin test. This returns `True` if the + request is trusted on a pin/cookie basis and returns `False` if not. + Additionally if the cookie's stored pin hash is wrong it will return + `None` so that appropriate action can be taken. + """ + if self.pin is None: + return True + val = parse_cookie(environ).get(self.pin_cookie_name) + if not val or "|" not in val: + return False + ts_str, pin_hash = val.split("|", 1) + + try: + ts = int(ts_str) + except ValueError: + return False + + if pin_hash != hash_pin(self.pin): + return None + return (time.time() - PIN_TIME) < ts + + def _fail_pin_auth(self) -> None: + time.sleep(5.0 if self._failed_pin_auth > 5 else 0.5) + self._failed_pin_auth += 1 + + def pin_auth(self, request: Request) -> Response: + """Authenticates with the pin.""" + exhausted = False + auth = False + trust = self.check_pin_trust(request.environ) + pin = t.cast(str, self.pin) + + # If the trust return value is `None` it means that the cookie is + # set but the stored pin hash value is bad. This means that the + # pin was changed. In this case we count a bad auth and unset the + # cookie. This way it becomes harder to guess the cookie name + # instead of the pin as we still count up failures. + bad_cookie = False + if trust is None: + self._fail_pin_auth() + bad_cookie = True + + # If we're trusted, we're authenticated. + elif trust: + auth = True + + # If we failed too many times, then we're locked out. + elif self._failed_pin_auth > 10: + exhausted = True + + # Otherwise go through pin based authentication + else: + entered_pin = request.args["pin"] + + if entered_pin.strip().replace("-", "") == pin.replace("-", ""): + self._failed_pin_auth = 0 + auth = True + else: + self._fail_pin_auth() + + rv = Response( + json.dumps({"auth": auth, "exhausted": exhausted}), + mimetype="application/json", + ) + if auth: + rv.set_cookie( + self.pin_cookie_name, + f"{int(time.time())}|{hash_pin(pin)}", + httponly=True, + samesite="Strict", + secure=request.is_secure, + ) + elif bad_cookie: + rv.delete_cookie(self.pin_cookie_name) + return rv + + def log_pin_request(self) -> Response: + """Log the pin if needed.""" + if self.pin_logging and self.pin is not None: + _log( + "info", " * To enable the debugger you need to enter the security pin:" + ) + _log("info", " * Debugger pin code: %s", self.pin) + return Response("") + + def __call__( + self, environ: "WSGIEnvironment", start_response: "StartResponse" + ) -> t.Iterable[bytes]: + """Dispatch the requests.""" + # important: don't ever access a function here that reads the incoming + # form data! Otherwise the application won't have access to that data + # any more! + request = Request(environ) + response = self.debug_application + if request.args.get("__debugger__") == "yes": + cmd = request.args.get("cmd") + arg = request.args.get("f") + secret = request.args.get("s") + frame = self.frames.get(request.args.get("frm", type=int)) # type: ignore + if cmd == "resource" and arg: + response = self.get_resource(request, arg) # type: ignore + elif cmd == "pinauth" and secret == self.secret: + response = self.pin_auth(request) # type: ignore + elif cmd == "printpin" and secret == self.secret: + response = self.log_pin_request() # type: ignore + elif ( + self.evalex + and cmd is not None + and frame is not None + and self.secret == secret + and self.check_pin_trust(environ) + ): + response = self.execute_command(request, cmd, frame) # type: ignore + elif ( + self.evalex + and self.console_path is not None + and request.path == self.console_path + ): + response = self.display_console(request) # type: ignore + return response(environ, start_response) diff --git a/src/werkzeug/debug/console.py b/src/werkzeug/debug/console.py new file mode 100644 index 0000000..69974d1 --- /dev/null +++ b/src/werkzeug/debug/console.py @@ -0,0 +1,222 @@ +import code +import sys +import typing as t +from contextvars import ContextVar +from types import CodeType + +from markupsafe import escape + +from .repr import debug_repr +from .repr import dump +from .repr import helper + +if t.TYPE_CHECKING: + import codeop # noqa: F401 + +_stream: ContextVar["HTMLStringO"] = ContextVar("werkzeug.debug.console.stream") +_ipy: ContextVar = ContextVar("werkzeug.debug.console.ipy") + + +class HTMLStringO: + """A StringO version that HTML escapes on write.""" + + def __init__(self) -> None: + self._buffer: t.List[str] = [] + + def isatty(self) -> bool: + return False + + def close(self) -> None: + pass + + def flush(self) -> None: + pass + + def seek(self, n: int, mode: int = 0) -> None: + pass + + def readline(self) -> str: + if len(self._buffer) == 0: + return "" + ret = self._buffer[0] + del self._buffer[0] + return ret + + def reset(self) -> str: + val = "".join(self._buffer) + del self._buffer[:] + return val + + def _write(self, x: str) -> None: + if isinstance(x, bytes): + x = x.decode("utf-8", "replace") + self._buffer.append(x) + + def write(self, x: str) -> None: + self._write(escape(x)) + + def writelines(self, x: t.Iterable[str]) -> None: + self._write(escape("".join(x))) + + +class ThreadedStream: + """Thread-local wrapper for sys.stdout for the interactive console.""" + + @staticmethod + def push() -> None: + if not isinstance(sys.stdout, ThreadedStream): + sys.stdout = t.cast(t.TextIO, ThreadedStream()) + + _stream.set(HTMLStringO()) + + @staticmethod + def fetch() -> str: + try: + stream = _stream.get() + except LookupError: + return "" + + return stream.reset() + + @staticmethod + def displayhook(obj: object) -> None: + try: + stream = _stream.get() + except LookupError: + return _displayhook(obj) # type: ignore + + # stream._write bypasses escaping as debug_repr is + # already generating HTML for us. + if obj is not None: + _ipy.get().locals["_"] = obj + stream._write(debug_repr(obj)) + + def __setattr__(self, name: str, value: t.Any) -> None: + raise AttributeError(f"read only attribute {name}") + + def __dir__(self) -> t.List[str]: + return dir(sys.__stdout__) + + def __getattribute__(self, name: str) -> t.Any: + try: + stream = _stream.get() + except LookupError: + stream = sys.__stdout__ # type: ignore[assignment] + + return getattr(stream, name) + + def __repr__(self) -> str: + return repr(sys.__stdout__) + + +# add the threaded stream as display hook +_displayhook = sys.displayhook +sys.displayhook = ThreadedStream.displayhook + + +class _ConsoleLoader: + def __init__(self) -> None: + self._storage: t.Dict[int, str] = {} + + def register(self, code: CodeType, source: str) -> None: + self._storage[id(code)] = source + # register code objects of wrapped functions too. + for var in code.co_consts: + if isinstance(var, CodeType): + self._storage[id(var)] = source + + def get_source_by_code(self, code: CodeType) -> t.Optional[str]: + try: + return self._storage[id(code)] + except KeyError: + return None + + +class _InteractiveConsole(code.InteractiveInterpreter): + locals: t.Dict[str, t.Any] + + def __init__(self, globals: t.Dict[str, t.Any], locals: t.Dict[str, t.Any]) -> None: + self.loader = _ConsoleLoader() + locals = { + **globals, + **locals, + "dump": dump, + "help": helper, + "__loader__": self.loader, + } + super().__init__(locals) + original_compile = self.compile + + def compile(source: str, filename: str, symbol: str) -> t.Optional[CodeType]: + code = original_compile(source, filename, symbol) + + if code is not None: + self.loader.register(code, source) + + return code + + self.compile = compile # type: ignore[assignment] + self.more = False + self.buffer: t.List[str] = [] + + def runsource(self, source: str, **kwargs: t.Any) -> str: # type: ignore + source = f"{source.rstrip()}\n" + ThreadedStream.push() + prompt = "... " if self.more else ">>> " + try: + source_to_eval = "".join(self.buffer + [source]) + if super().runsource(source_to_eval, "", "single"): + self.more = True + self.buffer.append(source) + else: + self.more = False + del self.buffer[:] + finally: + output = ThreadedStream.fetch() + return f"{prompt}{escape(source)}{output}" + + def runcode(self, code: CodeType) -> None: + try: + exec(code, self.locals) + except Exception: + self.showtraceback() + + def showtraceback(self) -> None: + from .tbtools import DebugTraceback + + exc = t.cast(BaseException, sys.exc_info()[1]) + te = DebugTraceback(exc, skip=1) + sys.stdout._write(te.render_traceback_html()) # type: ignore + + def showsyntaxerror(self, filename: t.Optional[str] = None) -> None: + from .tbtools import DebugTraceback + + exc = t.cast(BaseException, sys.exc_info()[1]) + te = DebugTraceback(exc, skip=4) + sys.stdout._write(te.render_traceback_html()) # type: ignore + + def write(self, data: str) -> None: + sys.stdout.write(data) + + +class Console: + """An interactive console.""" + + def __init__( + self, + globals: t.Optional[t.Dict[str, t.Any]] = None, + locals: t.Optional[t.Dict[str, t.Any]] = None, + ) -> None: + if locals is None: + locals = {} + if globals is None: + globals = {} + self._ipy = _InteractiveConsole(globals, locals) + + def eval(self, code: str) -> str: + _ipy.set(self._ipy) + old_sys_stdout = sys.stdout + try: + return self._ipy.runsource(code) + finally: + sys.stdout = old_sys_stdout diff --git a/src/werkzeug/debug/repr.py b/src/werkzeug/debug/repr.py new file mode 100644 index 0000000..c0872f1 --- /dev/null +++ b/src/werkzeug/debug/repr.py @@ -0,0 +1,285 @@ +"""Object representations for debugging purposes. Unlike the default +repr, these expose more information and produce HTML instead of ASCII. + +Together with the CSS and JavaScript of the debugger this gives a +colorful and more compact output. +""" +import codecs +import re +import sys +import typing as t +from collections import deque +from traceback import format_exception_only + +from markupsafe import escape + +missing = object() +_paragraph_re = re.compile(r"(?:\r\n|\r|\n){2,}") +RegexType = type(_paragraph_re) + +HELP_HTML = """\ +
+

%(title)s

+
%(text)s
+
\ +""" +OBJECT_DUMP_HTML = """\ +
+

%(title)s

+ %(repr)s + %(items)s
+
\ +""" + + +def debug_repr(obj: object) -> str: + """Creates a debug repr of an object as HTML string.""" + return DebugReprGenerator().repr(obj) + + +def dump(obj: object = missing) -> None: + """Print the object details to stdout._write (for the interactive + console of the web debugger. + """ + gen = DebugReprGenerator() + if obj is missing: + rv = gen.dump_locals(sys._getframe(1).f_locals) + else: + rv = gen.dump_object(obj) + sys.stdout._write(rv) # type: ignore + + +class _Helper: + """Displays an HTML version of the normal help, for the interactive + debugger only because it requires a patched sys.stdout. + """ + + def __repr__(self) -> str: + return "Type help(object) for help about object." + + def __call__(self, topic: t.Optional[t.Any] = None) -> None: + if topic is None: + sys.stdout._write(f"{self!r}") # type: ignore + return + import pydoc + + pydoc.help(topic) + rv = sys.stdout.reset() # type: ignore + if isinstance(rv, bytes): + rv = rv.decode("utf-8", "ignore") + paragraphs = _paragraph_re.split(rv) + if len(paragraphs) > 1: + title = paragraphs[0] + text = "\n\n".join(paragraphs[1:]) + else: + title = "Help" + text = paragraphs[0] + sys.stdout._write(HELP_HTML % {"title": title, "text": text}) # type: ignore + + +helper = _Helper() + + +def _add_subclass_info( + inner: str, obj: object, base: t.Union[t.Type, t.Tuple[t.Type, ...]] +) -> str: + if isinstance(base, tuple): + for cls in base: + if type(obj) is cls: + return inner + elif type(obj) is base: + return inner + module = "" + if obj.__class__.__module__ not in ("__builtin__", "exceptions"): + module = f'{obj.__class__.__module__}.' + return f"{module}{type(obj).__name__}({inner})" + + +def _sequence_repr_maker( + left: str, right: str, base: t.Type, limit: int = 8 +) -> t.Callable[["DebugReprGenerator", t.Iterable, bool], str]: + def proxy(self: "DebugReprGenerator", obj: t.Iterable, recursive: bool) -> str: + if recursive: + return _add_subclass_info(f"{left}...{right}", obj, base) + buf = [left] + have_extended_section = False + for idx, item in enumerate(obj): + if idx: + buf.append(", ") + if idx == limit: + buf.append('') + have_extended_section = True + buf.append(self.repr(item)) + if have_extended_section: + buf.append("") + buf.append(right) + return _add_subclass_info("".join(buf), obj, base) + + return proxy + + +class DebugReprGenerator: + def __init__(self) -> None: + self._stack: t.List[t.Any] = [] + + list_repr = _sequence_repr_maker("[", "]", list) + tuple_repr = _sequence_repr_maker("(", ")", tuple) + set_repr = _sequence_repr_maker("set([", "])", set) + frozenset_repr = _sequence_repr_maker("frozenset([", "])", frozenset) + deque_repr = _sequence_repr_maker( + 'collections.deque([', "])", deque + ) + + def regex_repr(self, obj: t.Pattern) -> str: + pattern = repr(obj.pattern) + pattern = codecs.decode(pattern, "unicode-escape", "ignore") # type: ignore + pattern = f"r{pattern}" + return f're.compile({pattern})' + + def string_repr(self, obj: t.Union[str, bytes], limit: int = 70) -> str: + buf = [''] + r = repr(obj) + + # shorten the repr when the hidden part would be at least 3 chars + if len(r) - limit > 2: + buf.extend( + ( + escape(r[:limit]), + '', + escape(r[limit:]), + "", + ) + ) + else: + buf.append(escape(r)) + + buf.append("") + out = "".join(buf) + + # if the repr looks like a standard string, add subclass info if needed + if r[0] in "'\"" or (r[0] == "b" and r[1] in "'\""): + return _add_subclass_info(out, obj, (bytes, str)) + + # otherwise, assume the repr distinguishes the subclass already + return out + + def dict_repr( + self, + d: t.Union[t.Dict[int, None], t.Dict[str, int], t.Dict[t.Union[str, int], int]], + recursive: bool, + limit: int = 5, + ) -> str: + if recursive: + return _add_subclass_info("{...}", d, dict) + buf = ["{"] + have_extended_section = False + for idx, (key, value) in enumerate(d.items()): + if idx: + buf.append(", ") + if idx == limit - 1: + buf.append('') + have_extended_section = True + buf.append( + f'{self.repr(key)}:' + f' {self.repr(value)}' + ) + if have_extended_section: + buf.append("") + buf.append("}") + return _add_subclass_info("".join(buf), d, dict) + + def object_repr( + self, obj: t.Optional[t.Union[t.Type[dict], t.Callable, t.Type[list]]] + ) -> str: + r = repr(obj) + return f'{escape(r)}' + + def dispatch_repr(self, obj: t.Any, recursive: bool) -> str: + if obj is helper: + return f'{helper!r}' + if isinstance(obj, (int, float, complex)): + return f'{obj!r}' + if isinstance(obj, str) or isinstance(obj, bytes): + return self.string_repr(obj) + if isinstance(obj, RegexType): + return self.regex_repr(obj) + if isinstance(obj, list): + return self.list_repr(obj, recursive) + if isinstance(obj, tuple): + return self.tuple_repr(obj, recursive) + if isinstance(obj, set): + return self.set_repr(obj, recursive) + if isinstance(obj, frozenset): + return self.frozenset_repr(obj, recursive) + if isinstance(obj, dict): + return self.dict_repr(obj, recursive) + if isinstance(obj, deque): + return self.deque_repr(obj, recursive) + return self.object_repr(obj) + + def fallback_repr(self) -> str: + try: + info = "".join(format_exception_only(*sys.exc_info()[:2])) + except Exception: + info = "?" + return ( + '' + f"<broken repr ({escape(info.strip())})>" + ) + + def repr(self, obj: object) -> str: + recursive = False + for item in self._stack: + if item is obj: + recursive = True + break + self._stack.append(obj) + try: + try: + return self.dispatch_repr(obj, recursive) + except Exception: + return self.fallback_repr() + finally: + self._stack.pop() + + def dump_object(self, obj: object) -> str: + repr = None + items: t.Optional[t.List[t.Tuple[str, str]]] = None + + if isinstance(obj, dict): + title = "Contents of" + items = [] + for key, value in obj.items(): + if not isinstance(key, str): + items = None + break + items.append((key, self.repr(value))) + if items is None: + items = [] + repr = self.repr(obj) + for key in dir(obj): + try: + items.append((key, self.repr(getattr(obj, key)))) + except Exception: + pass + title = "Details for" + title += f" {object.__repr__(obj)[1:-1]}" + return self.render_object_dump(items, title, repr) + + def dump_locals(self, d: t.Dict[str, t.Any]) -> str: + items = [(key, self.repr(value)) for key, value in d.items()] + return self.render_object_dump(items, "Local variables in frame") + + def render_object_dump( + self, items: t.List[t.Tuple[str, str]], title: str, repr: t.Optional[str] = None + ) -> str: + html_items = [] + for key, value in items: + html_items.append(f"{escape(key)}
{value}
") + if not html_items: + html_items.append("Nothing") + return OBJECT_DUMP_HTML % { + "title": escape(title), + "repr": f"
{repr if repr else ''}
", + "items": "\n".join(html_items), + } diff --git a/src/werkzeug/debug/shared/ICON_LICENSE.md b/src/werkzeug/debug/shared/ICON_LICENSE.md new file mode 100644 index 0000000..3bdbfc7 --- /dev/null +++ b/src/werkzeug/debug/shared/ICON_LICENSE.md @@ -0,0 +1,6 @@ +Silk icon set 1.3 by Mark James + +http://www.famfamfam.com/lab/icons/silk/ + +License: [CC-BY-2.5](https://creativecommons.org/licenses/by/2.5/) +or [CC-BY-3.0](https://creativecommons.org/licenses/by/3.0/) diff --git a/src/werkzeug/debug/shared/console.png b/src/werkzeug/debug/shared/console.png new file mode 100644 index 0000000..c28dd63 Binary files /dev/null and b/src/werkzeug/debug/shared/console.png differ diff --git a/src/werkzeug/debug/shared/debugger.js b/src/werkzeug/debug/shared/debugger.js new file mode 100644 index 0000000..2354f03 --- /dev/null +++ b/src/werkzeug/debug/shared/debugger.js @@ -0,0 +1,359 @@ +docReady(() => { + if (!EVALEX_TRUSTED) { + initPinBox(); + } + // if we are in console mode, show the console. + if (CONSOLE_MODE && EVALEX) { + createInteractiveConsole(); + } + + const frames = document.querySelectorAll("div.traceback div.frame"); + if (EVALEX) { + addConsoleIconToFrames(frames); + } + addEventListenersToElements(document.querySelectorAll("div.detail"), "click", () => + document.querySelector("div.traceback").scrollIntoView(false) + ); + addToggleFrameTraceback(frames); + addToggleTraceTypesOnClick(document.querySelectorAll("h2.traceback")); + addInfoPrompt(document.querySelectorAll("span.nojavascript")); + wrapPlainTraceback(); +}); + +function addToggleFrameTraceback(frames) { + frames.forEach((frame) => { + frame.addEventListener("click", () => { + frame.getElementsByTagName("pre")[0].parentElement.classList.toggle("expanded"); + }); + }) +} + + +function wrapPlainTraceback() { + const plainTraceback = document.querySelector("div.plain textarea"); + const wrapper = document.createElement("pre"); + const textNode = document.createTextNode(plainTraceback.textContent); + wrapper.appendChild(textNode); + plainTraceback.replaceWith(wrapper); +} + +function initPinBox() { + document.querySelector(".pin-prompt form").addEventListener( + "submit", + function (event) { + event.preventDefault(); + const pin = encodeURIComponent(this.pin.value); + const encodedSecret = encodeURIComponent(SECRET); + const btn = this.btn; + btn.disabled = true; + + fetch( + `${document.location.pathname}?__debugger__=yes&cmd=pinauth&pin=${pin}&s=${encodedSecret}` + ) + .then((res) => res.json()) + .then(({auth, exhausted}) => { + if (auth) { + EVALEX_TRUSTED = true; + fadeOut(document.getElementsByClassName("pin-prompt")[0]); + } else { + alert( + `Error: ${ + exhausted + ? "too many attempts. Restart server to retry." + : "incorrect pin" + }` + ); + } + }) + .catch((err) => { + alert("Error: Could not verify PIN. Network error?"); + console.error(err); + }) + .finally(() => (btn.disabled = false)); + }, + false + ); +} + +function promptForPin() { + if (!EVALEX_TRUSTED) { + const encodedSecret = encodeURIComponent(SECRET); + fetch( + `${document.location.pathname}?__debugger__=yes&cmd=printpin&s=${encodedSecret}` + ); + const pinPrompt = document.getElementsByClassName("pin-prompt")[0]; + fadeIn(pinPrompt); + document.querySelector('.pin-prompt input[name="pin"]').focus(); + } +} + +/** + * Helper function for shell initialization + */ +function openShell(consoleNode, target, frameID) { + promptForPin(); + if (consoleNode) { + slideToggle(consoleNode); + return consoleNode; + } + let historyPos = 0; + const history = [""]; + const consoleElement = createConsole(); + const output = createConsoleOutput(); + const form = createConsoleInputForm(); + const command = createConsoleInput(); + + target.parentNode.appendChild(consoleElement); + consoleElement.append(output); + consoleElement.append(form); + form.append(command); + command.focus(); + slideToggle(consoleElement); + + form.addEventListener("submit", (e) => { + handleConsoleSubmit(e, command, frameID).then((consoleOutput) => { + output.append(consoleOutput); + command.focus(); + consoleElement.scrollTo(0, consoleElement.scrollHeight); + const old = history.pop(); + history.push(command.value); + if (typeof old !== "undefined") { + history.push(old); + } + historyPos = history.length - 1; + command.value = ""; + }); + }); + + command.addEventListener("keydown", (e) => { + if (e.key === "l" && e.ctrlKey) { + output.innerText = "--- screen cleared ---"; + } else if (e.key === "ArrowUp" || e.key === "ArrowDown") { + // Handle up arrow and down arrow. + if (e.key === "ArrowUp" && historyPos > 0) { + e.preventDefault(); + historyPos--; + } else if (e.key === "ArrowDown" && historyPos < history.length - 1) { + historyPos++; + } + command.value = history[historyPos]; + } + return false; + }); + + return consoleElement; +} + +function addEventListenersToElements(elements, event, listener) { + elements.forEach((el) => el.addEventListener(event, listener)); +} + +/** + * Add extra info + */ +function addInfoPrompt(elements) { + for (let i = 0; i < elements.length; i++) { + elements[i].innerHTML = + "

To switch between the interactive traceback and the plaintext " + + 'one, you can click on the "Traceback" headline. From the text ' + + "traceback you can also create a paste of it. " + + (!EVALEX + ? "" + : "For code execution mouse-over the frame you want to debug and " + + "click on the console icon on the right side." + + "

You can execute arbitrary Python code in the stack frames and " + + "there are some extra helpers available for introspection:" + + "

  • dump() shows all variables in the frame" + + "
  • dump(obj) dumps all that's known about the object
"); + elements[i].classList.remove("nojavascript"); + } +} + +function addConsoleIconToFrames(frames) { + for (let i = 0; i < frames.length; i++) { + let consoleNode = null; + const target = frames[i]; + const frameID = frames[i].id.substring(6); + + for (let j = 0; j < target.getElementsByTagName("pre").length; j++) { + const img = createIconForConsole(); + img.addEventListener("click", (e) => { + e.stopPropagation(); + consoleNode = openShell(consoleNode, target, frameID); + return false; + }); + target.getElementsByTagName("pre")[j].append(img); + } + } +} + +function slideToggle(target) { + target.classList.toggle("active"); +} + +/** + * toggle traceback types on click. + */ +function addToggleTraceTypesOnClick(elements) { + for (let i = 0; i < elements.length; i++) { + elements[i].addEventListener("click", () => { + document.querySelector("div.traceback").classList.toggle("hidden"); + document.querySelector("div.plain").classList.toggle("hidden"); + }); + elements[i].style.cursor = "pointer"; + document.querySelector("div.plain").classList.toggle("hidden"); + } +} + +function createConsole() { + const consoleNode = document.createElement("pre"); + consoleNode.classList.add("console"); + consoleNode.classList.add("active"); + return consoleNode; +} + +function createConsoleOutput() { + const output = document.createElement("div"); + output.classList.add("output"); + output.innerHTML = "[console ready]"; + return output; +} + +function createConsoleInputForm() { + const form = document.createElement("form"); + form.innerHTML = ">>> "; + return form; +} + +function createConsoleInput() { + const command = document.createElement("input"); + command.type = "text"; + command.setAttribute("autocomplete", "off"); + command.setAttribute("spellcheck", false); + command.setAttribute("autocapitalize", "off"); + command.setAttribute("autocorrect", "off"); + return command; +} + +function createIconForConsole() { + const img = document.createElement("img"); + img.setAttribute("src", "?__debugger__=yes&cmd=resource&f=console.png"); + img.setAttribute("title", "Open an interactive python shell in this frame"); + return img; +} + +function createExpansionButtonForConsole() { + const expansionButton = document.createElement("a"); + expansionButton.setAttribute("href", "#"); + expansionButton.setAttribute("class", "toggle"); + expansionButton.innerHTML = "  "; + return expansionButton; +} + +function createInteractiveConsole() { + const target = document.querySelector("div.console div.inner"); + while (target.firstChild) { + target.removeChild(target.firstChild); + } + openShell(null, target, 0); +} + +function handleConsoleSubmit(e, command, frameID) { + // Prevent page from refreshing. + e.preventDefault(); + + return new Promise((resolve) => { + // Get input command. + const cmd = command.value; + + // Setup GET request. + const urlPath = ""; + const params = { + __debugger__: "yes", + cmd: cmd, + frm: frameID, + s: SECRET, + }; + const paramString = Object.keys(params) + .map((key) => { + return "&" + encodeURIComponent(key) + "=" + encodeURIComponent(params[key]); + }) + .join(""); + + fetch(urlPath + "?" + paramString) + .then((res) => { + return res.text(); + }) + .then((data) => { + const tmp = document.createElement("div"); + tmp.innerHTML = data; + resolve(tmp); + + // Handle expandable span for long list outputs. + // Example to test: list(range(13)) + let wrapperAdded = false; + const wrapperSpan = document.createElement("span"); + const expansionButton = createExpansionButtonForConsole(); + + tmp.querySelectorAll("span.extended").forEach((spanToWrap) => { + const parentDiv = spanToWrap.parentNode; + if (!wrapperAdded) { + parentDiv.insertBefore(wrapperSpan, spanToWrap); + wrapperAdded = true; + } + parentDiv.removeChild(spanToWrap); + wrapperSpan.append(spanToWrap); + spanToWrap.hidden = true; + + expansionButton.addEventListener("click", () => { + spanToWrap.hidden = !spanToWrap.hidden; + expansionButton.classList.toggle("open"); + return false; + }); + }); + + // Add expansion button at end of wrapper. + if (wrapperAdded) { + wrapperSpan.append(expansionButton); + } + }) + .catch((err) => { + console.error(err); + }); + return false; + }); +} + +function fadeOut(element) { + element.style.opacity = 1; + + (function fade() { + element.style.opacity -= 0.1; + if (element.style.opacity < 0) { + element.style.display = "none"; + } else { + requestAnimationFrame(fade); + } + })(); +} + +function fadeIn(element, display) { + element.style.opacity = 0; + element.style.display = display || "block"; + + (function fade() { + let val = parseFloat(element.style.opacity) + 0.1; + if (val <= 1) { + element.style.opacity = val; + requestAnimationFrame(fade); + } + })(); +} + +function docReady(fn) { + if (document.readyState === "complete" || document.readyState === "interactive") { + setTimeout(fn, 1); + } else { + document.addEventListener("DOMContentLoaded", fn); + } +} diff --git a/src/werkzeug/debug/shared/less.png b/src/werkzeug/debug/shared/less.png new file mode 100644 index 0000000..5efefd6 Binary files /dev/null and b/src/werkzeug/debug/shared/less.png differ diff --git a/src/werkzeug/debug/shared/more.png b/src/werkzeug/debug/shared/more.png new file mode 100644 index 0000000..804fa22 Binary files /dev/null and b/src/werkzeug/debug/shared/more.png differ diff --git a/src/werkzeug/debug/shared/style.css b/src/werkzeug/debug/shared/style.css new file mode 100644 index 0000000..e9397ca --- /dev/null +++ b/src/werkzeug/debug/shared/style.css @@ -0,0 +1,150 @@ +body, input { font-family: sans-serif; color: #000; text-align: center; + margin: 1em; padding: 0; font-size: 15px; } +h1, h2, h3 { font-weight: normal; } + +input { background-color: #fff; margin: 0; text-align: left; + outline: none !important; } +input[type="submit"] { padding: 3px 6px; } +a { color: #11557C; } +a:hover { color: #177199; } +pre, code, +textarea { font-family: monospace; font-size: 14px; } + +div.debugger { text-align: left; padding: 12px; margin: auto; + background-color: white; } +h1 { font-size: 36px; margin: 0 0 0.3em 0; } +div.detail { cursor: pointer; } +div.detail p { margin: 0 0 8px 13px; font-size: 14px; white-space: pre-wrap; + font-family: monospace; } +div.explanation { margin: 20px 13px; font-size: 15px; color: #555; } +div.footer { font-size: 13px; text-align: right; margin: 30px 0; + color: #86989B; } + +h2 { font-size: 16px; margin: 1.3em 0 0.0 0; padding: 9px; + background-color: #11557C; color: white; } +h2 em, h3 em { font-style: normal; color: #A5D6D9; font-weight: normal; } + +div.traceback, div.plain { border: 1px solid #ddd; margin: 0 0 1em 0; padding: 10px; } +div.plain p { margin: 0; } +div.plain textarea, +div.plain pre { margin: 10px 0 0 0; padding: 4px; + background-color: #E8EFF0; border: 1px solid #D3E7E9; } +div.plain textarea { width: 99%; height: 300px; } +div.traceback h3 { font-size: 1em; margin: 0 0 0.8em 0; } +div.traceback ul { list-style: none; margin: 0; padding: 0 0 0 1em; } +div.traceback h4 { font-size: 13px; font-weight: normal; margin: 0.7em 0 0.1em 0; } +div.traceback pre { margin: 0; padding: 5px 0 3px 15px; + background-color: #E8EFF0; border: 1px solid #D3E7E9; } +div.traceback .library .current { background: white; color: #555; } +div.traceback .expanded .current { background: #E8EFF0; color: black; } +div.traceback pre:hover { background-color: #DDECEE; color: black; cursor: pointer; } +div.traceback div.source.expanded pre + pre { border-top: none; } + +div.traceback span.ws { display: none; } +div.traceback pre.before, div.traceback pre.after { display: none; background: white; } +div.traceback div.source.expanded pre.before, +div.traceback div.source.expanded pre.after { + display: block; +} + +div.traceback div.source.expanded span.ws { + display: inline; +} + +div.traceback blockquote { margin: 1em 0 0 0; padding: 0; white-space: pre-line; } +div.traceback img { float: right; padding: 2px; margin: -3px 2px 0 0; display: none; } +div.traceback img:hover { background-color: #ddd; cursor: pointer; + border-color: #BFDDE0; } +div.traceback pre:hover img { display: block; } +div.traceback cite.filename { font-style: normal; color: #3B666B; } + +pre.console { border: 1px solid #ccc; background: white!important; + color: black; padding: 5px!important; + margin: 3px 0 0 0!important; cursor: default!important; + max-height: 400px; overflow: auto; } +pre.console form { color: #555; } +pre.console input { background-color: transparent; color: #555; + width: 90%; font-family: monospace; font-size: 14px; + border: none!important; } + +span.string { color: #30799B; } +span.number { color: #9C1A1C; } +span.help { color: #3A7734; } +span.object { color: #485F6E; } +span.extended { opacity: 0.5; } +span.extended:hover { opacity: 1; } +a.toggle { text-decoration: none; background-repeat: no-repeat; + background-position: center center; + background-image: url(?__debugger__=yes&cmd=resource&f=more.png); } +a.toggle:hover { background-color: #444; } +a.open { background-image: url(?__debugger__=yes&cmd=resource&f=less.png); } + +pre.console div.traceback, +pre.console div.box { margin: 5px 10px; white-space: normal; + border: 1px solid #11557C; padding: 10px; + font-family: sans-serif; } +pre.console div.box h3, +pre.console div.traceback h3 { margin: -10px -10px 10px -10px; padding: 5px; + background: #11557C; color: white; } + +pre.console div.traceback pre:hover { cursor: default; background: #E8EFF0; } +pre.console div.traceback pre.syntaxerror { background: inherit; border: none; + margin: 20px -10px -10px -10px; + padding: 10px; border-top: 1px solid #BFDDE0; + background: #E8EFF0; } +pre.console div.noframe-traceback pre.syntaxerror { margin-top: -10px; border: none; } + +pre.console div.box pre.repr { padding: 0; margin: 0; background-color: white; border: none; } +pre.console div.box table { margin-top: 6px; } +pre.console div.box pre { border: none; } +pre.console div.box pre.help { background-color: white; } +pre.console div.box pre.help:hover { cursor: default; } +pre.console table tr { vertical-align: top; } +div.console { border: 1px solid #ccc; padding: 4px; background-color: #fafafa; } + +div.traceback pre, div.console pre { + white-space: pre-wrap; /* css-3 should we be so lucky... */ + white-space: -moz-pre-wrap; /* Mozilla, since 1999 */ + white-space: -pre-wrap; /* Opera 4-6 ?? */ + white-space: -o-pre-wrap; /* Opera 7 ?? */ + word-wrap: break-word; /* Internet Explorer 5.5+ */ + _white-space: pre; /* IE only hack to re-specify in + addition to word-wrap */ +} + + +div.pin-prompt { + position: absolute; + display: none; + top: 0; + bottom: 0; + left: 0; + right: 0; + background: rgba(255, 255, 255, 0.8); +} + +div.pin-prompt .inner { + background: #eee; + padding: 10px 50px; + width: 350px; + margin: 10% auto 0 auto; + border: 1px solid #ccc; + border-radius: 2px; +} + +div.exc-divider { + margin: 0.7em 0 0 -1em; + padding: 0.5em; + background: #11557C; + color: #ddd; + border: 1px solid #ddd; +} + +.console.active { + max-height: 0!important; + display: none; +} + +.hidden { + display: none; +} diff --git a/src/werkzeug/debug/tbtools.py b/src/werkzeug/debug/tbtools.py new file mode 100644 index 0000000..ea90de9 --- /dev/null +++ b/src/werkzeug/debug/tbtools.py @@ -0,0 +1,435 @@ +import itertools +import linecache +import os +import re +import sys +import sysconfig +import traceback +import typing as t + +from markupsafe import escape + +from ..utils import cached_property +from .console import Console + +HEADER = """\ + + + + %(title)s // Werkzeug Debugger + + + + + + +
+""" + +FOOTER = """\ + +
+ +
+
+

Console Locked

+

+ The console is locked and needs to be unlocked by entering the PIN. + You can find the PIN printed out on the standard output of your + shell that runs the server. +

+

PIN: + + +

+
+
+ + +""" + +PAGE_HTML = ( + HEADER + + """\ +

%(exception_type)s

+
+

%(exception)s

+
+

Traceback (most recent call last)

+%(summary)s +
+

+ This is the Copy/Paste friendly version of the traceback. +

+ +
+
+ The debugger caught an exception in your WSGI application. You can now + look at the traceback which led to the error. + If you enable JavaScript you can also use additional features such as code + execution (if the evalex feature is enabled), automatic pasting of the + exceptions and much more. +
+""" + + FOOTER + + """ + +""" +) + +CONSOLE_HTML = ( + HEADER + + """\ +

Interactive Console

+
+In this console you can execute Python expressions in the context of the +application. The initial namespace was created by the debugger automatically. +
+
The Console requires JavaScript.
+""" + + FOOTER +) + +SUMMARY_HTML = """\ +
+ %(title)s +
    %(frames)s
+ %(description)s +
+""" + +FRAME_HTML = """\ +
+

File "%(filename)s", + line %(lineno)s, + in %(function_name)s

+
%(lines)s
+
+""" + + +def _process_traceback( + exc: BaseException, + te: t.Optional[traceback.TracebackException] = None, + *, + skip: int = 0, + hide: bool = True, +) -> traceback.TracebackException: + if te is None: + te = traceback.TracebackException.from_exception(exc, lookup_lines=False) + + # Get the frames the same way StackSummary.extract did, in order + # to match each frame with the FrameSummary to augment. + frame_gen = traceback.walk_tb(exc.__traceback__) + limit = getattr(sys, "tracebacklimit", None) + + if limit is not None: + if limit < 0: + limit = 0 + + frame_gen = itertools.islice(frame_gen, limit) + + if skip: + frame_gen = itertools.islice(frame_gen, skip, None) + del te.stack[:skip] + + new_stack: t.List[DebugFrameSummary] = [] + hidden = False + + # Match each frame with the FrameSummary that was generated. + # Hide frames using Paste's __traceback_hide__ rules. Replace + # all visible FrameSummary with DebugFrameSummary. + for (f, _), fs in zip(frame_gen, te.stack): + if hide: + hide_value = f.f_locals.get("__traceback_hide__", False) + + if hide_value in {"before", "before_and_this"}: + new_stack = [] + hidden = False + + if hide_value == "before_and_this": + continue + elif hide_value in {"reset", "reset_and_this"}: + hidden = False + + if hide_value == "reset_and_this": + continue + elif hide_value in {"after", "after_and_this"}: + hidden = True + + if hide_value == "after_and_this": + continue + elif hide_value or hidden: + continue + + frame_args: t.Dict[str, t.Any] = { + "filename": fs.filename, + "lineno": fs.lineno, + "name": fs.name, + "locals": f.f_locals, + "globals": f.f_globals, + } + + if hasattr(fs, "colno"): + frame_args["colno"] = fs.colno # type: ignore[attr-defined] + frame_args["end_colno"] = fs.end_colno # type: ignore[attr-defined] + + new_stack.append(DebugFrameSummary(**frame_args)) + + # The codeop module is used to compile code from the interactive + # debugger. Hide any codeop frames from the bottom of the traceback. + while new_stack: + module = new_stack[0].global_ns.get("__name__") + + if module is None: + module = new_stack[0].local_ns.get("__name__") + + if module == "codeop": + del new_stack[0] + else: + break + + te.stack[:] = new_stack + + if te.__context__: + context_exc = t.cast(BaseException, exc.__context__) + te.__context__ = _process_traceback(context_exc, te.__context__, hide=hide) + + if te.__cause__: + cause_exc = t.cast(BaseException, exc.__cause__) + te.__cause__ = _process_traceback(cause_exc, te.__cause__, hide=hide) + + return te + + +class DebugTraceback: + __slots__ = ("_te", "_cache_all_tracebacks", "_cache_all_frames") + + def __init__( + self, + exc: BaseException, + te: t.Optional[traceback.TracebackException] = None, + *, + skip: int = 0, + hide: bool = True, + ) -> None: + self._te = _process_traceback(exc, te, skip=skip, hide=hide) + + def __str__(self) -> str: + return f"<{type(self).__name__} {self._te}>" + + @cached_property + def all_tracebacks( + self, + ) -> t.List[t.Tuple[t.Optional[str], traceback.TracebackException]]: + out = [] + current = self._te + + while current is not None: + if current.__cause__ is not None: + chained_msg = ( + "The above exception was the direct cause of the" + " following exception" + ) + chained_exc = current.__cause__ + elif current.__context__ is not None and not current.__suppress_context__: + chained_msg = ( + "During handling of the above exception, another" + " exception occurred" + ) + chained_exc = current.__context__ + else: + chained_msg = None + chained_exc = None + + out.append((chained_msg, current)) + current = chained_exc + + return out + + @cached_property + def all_frames(self) -> t.List["DebugFrameSummary"]: + return [ + f for _, te in self.all_tracebacks for f in te.stack # type: ignore[misc] + ] + + def render_traceback_text(self) -> str: + return "".join(self._te.format()) + + def render_traceback_html(self, include_title: bool = True) -> str: + library_frames = [f.is_library for f in self.all_frames] + mark_library = 0 < sum(library_frames) < len(library_frames) + rows = [] + + if not library_frames: + classes = "traceback noframe-traceback" + else: + classes = "traceback" + + for msg, current in reversed(self.all_tracebacks): + row_parts = [] + + if msg is not None: + row_parts.append(f'
  • {msg}:
    ') + + for frame in current.stack: + frame = t.cast(DebugFrameSummary, frame) + info = f' title="{escape(frame.info)}"' if frame.info else "" + row_parts.append(f"{frame.render_html(mark_library)}") + + rows.append("\n".join(row_parts)) + + is_syntax_error = issubclass(self._te.exc_type, SyntaxError) + + if include_title: + if is_syntax_error: + title = "Syntax Error" + else: + title = "Traceback (most recent call last):" + else: + title = "" + + exc_full = escape("".join(self._te.format_exception_only())) + + if is_syntax_error: + description = f"
    {exc_full}
    " + else: + description = f"
    {exc_full}
    " + + return SUMMARY_HTML % { + "classes": classes, + "title": f"

    {title}

    ", + "frames": "\n".join(rows), + "description": description, + } + + def render_debugger_html( + self, evalex: bool, secret: str, evalex_trusted: bool + ) -> str: + exc_lines = list(self._te.format_exception_only()) + plaintext = "".join(self._te.format()) + return PAGE_HTML % { + "evalex": "true" if evalex else "false", + "evalex_trusted": "true" if evalex_trusted else "false", + "console": "false", + "title": exc_lines[0], + "exception": escape("".join(exc_lines)), + "exception_type": escape(self._te.exc_type.__name__), + "summary": self.render_traceback_html(include_title=False), + "plaintext": escape(plaintext), + "plaintext_cs": re.sub("-{2,}", "-", plaintext), + "secret": secret, + } + + +class DebugFrameSummary(traceback.FrameSummary): + """A :class:`traceback.FrameSummary` that can evaluate code in the + frame's namespace. + """ + + __slots__ = ( + "local_ns", + "global_ns", + "_cache_info", + "_cache_is_library", + "_cache_console", + ) + + def __init__( + self, + *, + locals: t.Dict[str, t.Any], + globals: t.Dict[str, t.Any], + **kwargs: t.Any, + ) -> None: + super().__init__(locals=None, **kwargs) + self.local_ns = locals + self.global_ns = globals + + @cached_property + def info(self) -> t.Optional[str]: + return self.local_ns.get("__traceback_info__") + + @cached_property + def is_library(self) -> bool: + return any( + self.filename.startswith((path, os.path.realpath(path))) + for path in sysconfig.get_paths().values() + ) + + @cached_property + def console(self) -> Console: + return Console(self.global_ns, self.local_ns) + + def eval(self, code: str) -> t.Any: + return self.console.eval(code) + + def render_html(self, mark_library: bool) -> str: + context = 5 + lines = linecache.getlines(self.filename) + line_idx = self.lineno - 1 # type: ignore[operator] + start_idx = max(0, line_idx - context) + stop_idx = min(len(lines), line_idx + context + 1) + rendered_lines = [] + + def render_line(line: str, cls: str) -> None: + line = line.expandtabs().rstrip() + stripped_line = line.strip() + prefix = len(line) - len(stripped_line) + colno = getattr(self, "colno", 0) + end_colno = getattr(self, "end_colno", 0) + + if cls == "current" and colno and end_colno: + arrow = ( + f'\n{" " * prefix}' + f'{" " * (colno - prefix)}{"^" * (end_colno - colno)}' + ) + else: + arrow = "" + + rendered_lines.append( + f'
    {" " * prefix}'
    +                f"{escape(stripped_line) if stripped_line else ' '}"
    +                f"{arrow if arrow else ''}
    " + ) + + if lines: + for line in lines[start_idx:line_idx]: + render_line(line, "before") + + render_line(lines[line_idx], "current") + + for line in lines[line_idx + 1 : stop_idx]: + render_line(line, "after") + + return FRAME_HTML % { + "id": id(self), + "filename": escape(self.filename), + "lineno": self.lineno, + "function_name": escape(self.name), + "lines": "\n".join(rendered_lines), + "library": "library" if mark_library and self.is_library else "", + } + + +def render_console_html(secret: str, evalex_trusted: bool) -> str: + return CONSOLE_HTML % { + "evalex": "true", + "evalex_trusted": "true" if evalex_trusted else "false", + "console": "true", + "title": "Console", + "secret": secret, + } diff --git a/src/werkzeug/exceptions.py b/src/werkzeug/exceptions.py new file mode 100644 index 0000000..013df72 --- /dev/null +++ b/src/werkzeug/exceptions.py @@ -0,0 +1,884 @@ +"""Implements a number of Python exceptions which can be raised from within +a view to trigger a standard HTTP non-200 response. + +Usage Example +------------- + +.. code-block:: python + + from werkzeug.wrappers.request import Request + from werkzeug.exceptions import HTTPException, NotFound + + def view(request): + raise NotFound() + + @Request.application + def application(request): + try: + return view(request) + except HTTPException as e: + return e + +As you can see from this example those exceptions are callable WSGI +applications. However, they are not Werkzeug response objects. You +can get a response object by calling ``get_response()`` on a HTTP +exception. + +Keep in mind that you may have to pass an environ (WSGI) or scope +(ASGI) to ``get_response()`` because some errors fetch additional +information relating to the request. + +If you want to hook in a different exception page to say, a 404 status +code, you can add a second except for a specific subclass of an error: + +.. code-block:: python + + @Request.application + def application(request): + try: + return view(request) + except NotFound as e: + return not_found(request) + except HTTPException as e: + return e + +""" +import typing as t +from datetime import datetime + +from markupsafe import escape +from markupsafe import Markup + +from ._internal import _get_environ + +if t.TYPE_CHECKING: + import typing_extensions as te + from _typeshed.wsgi import StartResponse + from _typeshed.wsgi import WSGIEnvironment + from .datastructures import WWWAuthenticate + from .sansio.response import Response + from .wrappers.request import Request as WSGIRequest # noqa: F401 + from .wrappers.response import Response as WSGIResponse # noqa: F401 + + +class HTTPException(Exception): + """The base class for all HTTP exceptions. This exception can be called as a WSGI + application to render a default error page or you can catch the subclasses + of it independently and render nicer error messages. + + .. versionchanged:: 2.1 + Removed the ``wrap`` class method. + """ + + code: t.Optional[int] = None + description: t.Optional[str] = None + + def __init__( + self, + description: t.Optional[str] = None, + response: t.Optional["Response"] = None, + ) -> None: + super().__init__() + if description is not None: + self.description = description + self.response = response + + @property + def name(self) -> str: + """The status name.""" + from .http import HTTP_STATUS_CODES + + return HTTP_STATUS_CODES.get(self.code, "Unknown Error") # type: ignore + + def get_description( + self, + environ: t.Optional["WSGIEnvironment"] = None, + scope: t.Optional[dict] = None, + ) -> str: + """Get the description.""" + if self.description is None: + description = "" + elif not isinstance(self.description, str): + description = str(self.description) + else: + description = self.description + + description = escape(description).replace("\n", Markup("
    ")) + return f"

    {description}

    " + + def get_body( + self, + environ: t.Optional["WSGIEnvironment"] = None, + scope: t.Optional[dict] = None, + ) -> str: + """Get the HTML body.""" + return ( + "\n" + "\n" + f"{self.code} {escape(self.name)}\n" + f"

    {escape(self.name)}

    \n" + f"{self.get_description(environ)}\n" + ) + + def get_headers( + self, + environ: t.Optional["WSGIEnvironment"] = None, + scope: t.Optional[dict] = None, + ) -> t.List[t.Tuple[str, str]]: + """Get a list of headers.""" + return [("Content-Type", "text/html; charset=utf-8")] + + def get_response( + self, + environ: t.Optional[t.Union["WSGIEnvironment", "WSGIRequest"]] = None, + scope: t.Optional[dict] = None, + ) -> "Response": + """Get a response object. If one was passed to the exception + it's returned directly. + + :param environ: the optional environ for the request. This + can be used to modify the response depending + on how the request looked like. + :return: a :class:`Response` object or a subclass thereof. + """ + from .wrappers.response import Response as WSGIResponse # noqa: F811 + + if self.response is not None: + return self.response + if environ is not None: + environ = _get_environ(environ) + headers = self.get_headers(environ, scope) + return WSGIResponse(self.get_body(environ, scope), self.code, headers) + + def __call__( + self, environ: "WSGIEnvironment", start_response: "StartResponse" + ) -> t.Iterable[bytes]: + """Call the exception as WSGI application. + + :param environ: the WSGI environment. + :param start_response: the response callable provided by the WSGI + server. + """ + response = t.cast("WSGIResponse", self.get_response(environ)) + return response(environ, start_response) + + def __str__(self) -> str: + code = self.code if self.code is not None else "???" + return f"{code} {self.name}: {self.description}" + + def __repr__(self) -> str: + code = self.code if self.code is not None else "???" + return f"<{type(self).__name__} '{code}: {self.name}'>" + + +class BadRequest(HTTPException): + """*400* `Bad Request` + + Raise if the browser sends something to the application the application + or server cannot handle. + """ + + code = 400 + description = ( + "The browser (or proxy) sent a request that this server could " + "not understand." + ) + + +class BadRequestKeyError(BadRequest, KeyError): + """An exception that is used to signal both a :exc:`KeyError` and a + :exc:`BadRequest`. Used by many of the datastructures. + """ + + _description = BadRequest.description + #: Show the KeyError along with the HTTP error message in the + #: response. This should be disabled in production, but can be + #: useful in a debug mode. + show_exception = False + + def __init__(self, arg: t.Optional[str] = None, *args: t.Any, **kwargs: t.Any): + super().__init__(*args, **kwargs) + + if arg is None: + KeyError.__init__(self) + else: + KeyError.__init__(self, arg) + + @property # type: ignore + def description(self) -> str: # type: ignore + if self.show_exception: + return ( + f"{self._description}\n" + f"{KeyError.__name__}: {KeyError.__str__(self)}" + ) + + return self._description + + @description.setter + def description(self, value: str) -> None: + self._description = value + + +class ClientDisconnected(BadRequest): + """Internal exception that is raised if Werkzeug detects a disconnected + client. Since the client is already gone at that point attempting to + send the error message to the client might not work and might ultimately + result in another exception in the server. Mainly this is here so that + it is silenced by default as far as Werkzeug is concerned. + + Since disconnections cannot be reliably detected and are unspecified + by WSGI to a large extent this might or might not be raised if a client + is gone. + + .. versionadded:: 0.8 + """ + + +class SecurityError(BadRequest): + """Raised if something triggers a security error. This is otherwise + exactly like a bad request error. + + .. versionadded:: 0.9 + """ + + +class BadHost(BadRequest): + """Raised if the submitted host is badly formatted. + + .. versionadded:: 0.11.2 + """ + + +class Unauthorized(HTTPException): + """*401* ``Unauthorized`` + + Raise if the user is not authorized to access a resource. + + The ``www_authenticate`` argument should be used to set the + ``WWW-Authenticate`` header. This is used for HTTP basic auth and + other schemes. Use :class:`~werkzeug.datastructures.WWWAuthenticate` + to create correctly formatted values. Strictly speaking a 401 + response is invalid if it doesn't provide at least one value for + this header, although real clients typically don't care. + + :param description: Override the default message used for the body + of the response. + :param www-authenticate: A single value, or list of values, for the + WWW-Authenticate header(s). + + .. versionchanged:: 2.0 + Serialize multiple ``www_authenticate`` items into multiple + ``WWW-Authenticate`` headers, rather than joining them + into a single value, for better interoperability. + + .. versionchanged:: 0.15.3 + If the ``www_authenticate`` argument is not set, the + ``WWW-Authenticate`` header is not set. + + .. versionchanged:: 0.15.3 + The ``response`` argument was restored. + + .. versionchanged:: 0.15.1 + ``description`` was moved back as the first argument, restoring + its previous position. + + .. versionchanged:: 0.15.0 + ``www_authenticate`` was added as the first argument, ahead of + ``description``. + """ + + code = 401 + description = ( + "The server could not verify that you are authorized to access" + " the URL requested. You either supplied the wrong credentials" + " (e.g. a bad password), or your browser doesn't understand" + " how to supply the credentials required." + ) + + def __init__( + self, + description: t.Optional[str] = None, + response: t.Optional["Response"] = None, + www_authenticate: t.Optional[ + t.Union["WWWAuthenticate", t.Iterable["WWWAuthenticate"]] + ] = None, + ) -> None: + super().__init__(description, response) + + from .datastructures import WWWAuthenticate + + if isinstance(www_authenticate, WWWAuthenticate): + www_authenticate = (www_authenticate,) + + self.www_authenticate = www_authenticate + + def get_headers( + self, + environ: t.Optional["WSGIEnvironment"] = None, + scope: t.Optional[dict] = None, + ) -> t.List[t.Tuple[str, str]]: + headers = super().get_headers(environ, scope) + if self.www_authenticate: + headers.extend(("WWW-Authenticate", str(x)) for x in self.www_authenticate) + return headers + + +class Forbidden(HTTPException): + """*403* `Forbidden` + + Raise if the user doesn't have the permission for the requested resource + but was authenticated. + """ + + code = 403 + description = ( + "You don't have the permission to access the requested" + " resource. It is either read-protected or not readable by the" + " server." + ) + + +class NotFound(HTTPException): + """*404* `Not Found` + + Raise if a resource does not exist and never existed. + """ + + code = 404 + description = ( + "The requested URL was not found on the server. If you entered" + " the URL manually please check your spelling and try again." + ) + + +class MethodNotAllowed(HTTPException): + """*405* `Method Not Allowed` + + Raise if the server used a method the resource does not handle. For + example `POST` if the resource is view only. Especially useful for REST. + + The first argument for this exception should be a list of allowed methods. + Strictly speaking the response would be invalid if you don't provide valid + methods in the header which you can do with that list. + """ + + code = 405 + description = "The method is not allowed for the requested URL." + + def __init__( + self, + valid_methods: t.Optional[t.Iterable[str]] = None, + description: t.Optional[str] = None, + response: t.Optional["Response"] = None, + ) -> None: + """Takes an optional list of valid http methods + starting with werkzeug 0.3 the list will be mandatory.""" + super().__init__(description=description, response=response) + self.valid_methods = valid_methods + + def get_headers( + self, + environ: t.Optional["WSGIEnvironment"] = None, + scope: t.Optional[dict] = None, + ) -> t.List[t.Tuple[str, str]]: + headers = super().get_headers(environ, scope) + if self.valid_methods: + headers.append(("Allow", ", ".join(self.valid_methods))) + return headers + + +class NotAcceptable(HTTPException): + """*406* `Not Acceptable` + + Raise if the server can't return any content conforming to the + `Accept` headers of the client. + """ + + code = 406 + description = ( + "The resource identified by the request is only capable of" + " generating response entities which have content" + " characteristics not acceptable according to the accept" + " headers sent in the request." + ) + + +class RequestTimeout(HTTPException): + """*408* `Request Timeout` + + Raise to signalize a timeout. + """ + + code = 408 + description = ( + "The server closed the network connection because the browser" + " didn't finish the request within the specified time." + ) + + +class Conflict(HTTPException): + """*409* `Conflict` + + Raise to signal that a request cannot be completed because it conflicts + with the current state on the server. + + .. versionadded:: 0.7 + """ + + code = 409 + description = ( + "A conflict happened while processing the request. The" + " resource might have been modified while the request was being" + " processed." + ) + + +class Gone(HTTPException): + """*410* `Gone` + + Raise if a resource existed previously and went away without new location. + """ + + code = 410 + description = ( + "The requested URL is no longer available on this server and" + " there is no forwarding address. If you followed a link from a" + " foreign page, please contact the author of this page." + ) + + +class LengthRequired(HTTPException): + """*411* `Length Required` + + Raise if the browser submitted data but no ``Content-Length`` header which + is required for the kind of processing the server does. + """ + + code = 411 + description = ( + "A request with this method requires a valid Content-" + "Length header." + ) + + +class PreconditionFailed(HTTPException): + """*412* `Precondition Failed` + + Status code used in combination with ``If-Match``, ``If-None-Match``, or + ``If-Unmodified-Since``. + """ + + code = 412 + description = ( + "The precondition on the request for the URL failed positive evaluation." + ) + + +class RequestEntityTooLarge(HTTPException): + """*413* `Request Entity Too Large` + + The status code one should return if the data submitted exceeded a given + limit. + """ + + code = 413 + description = "The data value transmitted exceeds the capacity limit." + + +class RequestURITooLarge(HTTPException): + """*414* `Request URI Too Large` + + Like *413* but for too long URLs. + """ + + code = 414 + description = ( + "The length of the requested URL exceeds the capacity limit for" + " this server. The request cannot be processed." + ) + + +class UnsupportedMediaType(HTTPException): + """*415* `Unsupported Media Type` + + The status code returned if the server is unable to handle the media type + the client transmitted. + """ + + code = 415 + description = ( + "The server does not support the media type transmitted in the request." + ) + + +class RequestedRangeNotSatisfiable(HTTPException): + """*416* `Requested Range Not Satisfiable` + + The client asked for an invalid part of the file. + + .. versionadded:: 0.7 + """ + + code = 416 + description = "The server cannot provide the requested range." + + def __init__( + self, + length: t.Optional[int] = None, + units: str = "bytes", + description: t.Optional[str] = None, + response: t.Optional["Response"] = None, + ) -> None: + """Takes an optional `Content-Range` header value based on ``length`` + parameter. + """ + super().__init__(description=description, response=response) + self.length = length + self.units = units + + def get_headers( + self, + environ: t.Optional["WSGIEnvironment"] = None, + scope: t.Optional[dict] = None, + ) -> t.List[t.Tuple[str, str]]: + headers = super().get_headers(environ, scope) + if self.length is not None: + headers.append(("Content-Range", f"{self.units} */{self.length}")) + return headers + + +class ExpectationFailed(HTTPException): + """*417* `Expectation Failed` + + The server cannot meet the requirements of the Expect request-header. + + .. versionadded:: 0.7 + """ + + code = 417 + description = "The server could not meet the requirements of the Expect header" + + +class ImATeapot(HTTPException): + """*418* `I'm a teapot` + + The server should return this if it is a teapot and someone attempted + to brew coffee with it. + + .. versionadded:: 0.7 + """ + + code = 418 + description = "This server is a teapot, not a coffee machine" + + +class UnprocessableEntity(HTTPException): + """*422* `Unprocessable Entity` + + Used if the request is well formed, but the instructions are otherwise + incorrect. + """ + + code = 422 + description = ( + "The request was well-formed but was unable to be followed due" + " to semantic errors." + ) + + +class Locked(HTTPException): + """*423* `Locked` + + Used if the resource that is being accessed is locked. + """ + + code = 423 + description = "The resource that is being accessed is locked." + + +class FailedDependency(HTTPException): + """*424* `Failed Dependency` + + Used if the method could not be performed on the resource + because the requested action depended on another action and that action failed. + """ + + code = 424 + description = ( + "The method could not be performed on the resource because the" + " requested action depended on another action and that action" + " failed." + ) + + +class PreconditionRequired(HTTPException): + """*428* `Precondition Required` + + The server requires this request to be conditional, typically to prevent + the lost update problem, which is a race condition between two or more + clients attempting to update a resource through PUT or DELETE. By requiring + each client to include a conditional header ("If-Match" or "If-Unmodified- + Since") with the proper value retained from a recent GET request, the + server ensures that each client has at least seen the previous revision of + the resource. + """ + + code = 428 + description = ( + "This request is required to be conditional; try using" + ' "If-Match" or "If-Unmodified-Since".' + ) + + +class _RetryAfter(HTTPException): + """Adds an optional ``retry_after`` parameter which will set the + ``Retry-After`` header. May be an :class:`int` number of seconds or + a :class:`~datetime.datetime`. + """ + + def __init__( + self, + description: t.Optional[str] = None, + response: t.Optional["Response"] = None, + retry_after: t.Optional[t.Union[datetime, int]] = None, + ) -> None: + super().__init__(description, response) + self.retry_after = retry_after + + def get_headers( + self, + environ: t.Optional["WSGIEnvironment"] = None, + scope: t.Optional[dict] = None, + ) -> t.List[t.Tuple[str, str]]: + headers = super().get_headers(environ, scope) + + if self.retry_after: + if isinstance(self.retry_after, datetime): + from .http import http_date + + value = http_date(self.retry_after) + else: + value = str(self.retry_after) + + headers.append(("Retry-After", value)) + + return headers + + +class TooManyRequests(_RetryAfter): + """*429* `Too Many Requests` + + The server is limiting the rate at which this user receives + responses, and this request exceeds that rate. (The server may use + any convenient method to identify users and their request rates). + The server may include a "Retry-After" header to indicate how long + the user should wait before retrying. + + :param retry_after: If given, set the ``Retry-After`` header to this + value. May be an :class:`int` number of seconds or a + :class:`~datetime.datetime`. + + .. versionchanged:: 1.0 + Added ``retry_after`` parameter. + """ + + code = 429 + description = "This user has exceeded an allotted request count. Try again later." + + +class RequestHeaderFieldsTooLarge(HTTPException): + """*431* `Request Header Fields Too Large` + + The server refuses to process the request because the header fields are too + large. One or more individual fields may be too large, or the set of all + headers is too large. + """ + + code = 431 + description = "One or more header fields exceeds the maximum size." + + +class UnavailableForLegalReasons(HTTPException): + """*451* `Unavailable For Legal Reasons` + + This status code indicates that the server is denying access to the + resource as a consequence of a legal demand. + """ + + code = 451 + description = "Unavailable for legal reasons." + + +class InternalServerError(HTTPException): + """*500* `Internal Server Error` + + Raise if an internal server error occurred. This is a good fallback if an + unknown error occurred in the dispatcher. + + .. versionchanged:: 1.0.0 + Added the :attr:`original_exception` attribute. + """ + + code = 500 + description = ( + "The server encountered an internal error and was unable to" + " complete your request. Either the server is overloaded or" + " there is an error in the application." + ) + + def __init__( + self, + description: t.Optional[str] = None, + response: t.Optional["Response"] = None, + original_exception: t.Optional[BaseException] = None, + ) -> None: + #: The original exception that caused this 500 error. Can be + #: used by frameworks to provide context when handling + #: unexpected errors. + self.original_exception = original_exception + super().__init__(description=description, response=response) + + +class NotImplemented(HTTPException): + """*501* `Not Implemented` + + Raise if the application does not support the action requested by the + browser. + """ + + code = 501 + description = "The server does not support the action requested by the browser." + + +class BadGateway(HTTPException): + """*502* `Bad Gateway` + + If you do proxying in your application you should return this status code + if you received an invalid response from the upstream server it accessed + in attempting to fulfill the request. + """ + + code = 502 + description = ( + "The proxy server received an invalid response from an upstream server." + ) + + +class ServiceUnavailable(_RetryAfter): + """*503* `Service Unavailable` + + Status code you should return if a service is temporarily + unavailable. + + :param retry_after: If given, set the ``Retry-After`` header to this + value. May be an :class:`int` number of seconds or a + :class:`~datetime.datetime`. + + .. versionchanged:: 1.0 + Added ``retry_after`` parameter. + """ + + code = 503 + description = ( + "The server is temporarily unable to service your request due" + " to maintenance downtime or capacity problems. Please try" + " again later." + ) + + +class GatewayTimeout(HTTPException): + """*504* `Gateway Timeout` + + Status code you should return if a connection to an upstream server + times out. + """ + + code = 504 + description = "The connection to an upstream server timed out." + + +class HTTPVersionNotSupported(HTTPException): + """*505* `HTTP Version Not Supported` + + The server does not support the HTTP protocol version used in the request. + """ + + code = 505 + description = ( + "The server does not support the HTTP protocol version used in the request." + ) + + +default_exceptions: t.Dict[int, t.Type[HTTPException]] = {} + + +def _find_exceptions() -> None: + for obj in globals().values(): + try: + is_http_exception = issubclass(obj, HTTPException) + except TypeError: + is_http_exception = False + if not is_http_exception or obj.code is None: + continue + old_obj = default_exceptions.get(obj.code, None) + if old_obj is not None and issubclass(obj, old_obj): + continue + default_exceptions[obj.code] = obj + + +_find_exceptions() +del _find_exceptions + + +class Aborter: + """When passed a dict of code -> exception items it can be used as + callable that raises exceptions. If the first argument to the + callable is an integer it will be looked up in the mapping, if it's + a WSGI application it will be raised in a proxy exception. + + The rest of the arguments are forwarded to the exception constructor. + """ + + def __init__( + self, + mapping: t.Optional[t.Dict[int, t.Type[HTTPException]]] = None, + extra: t.Optional[t.Dict[int, t.Type[HTTPException]]] = None, + ) -> None: + if mapping is None: + mapping = default_exceptions + self.mapping = dict(mapping) + if extra is not None: + self.mapping.update(extra) + + def __call__( + self, code: t.Union[int, "Response"], *args: t.Any, **kwargs: t.Any + ) -> "te.NoReturn": + from .sansio.response import Response + + if isinstance(code, Response): + raise HTTPException(response=code) + + if code not in self.mapping: + raise LookupError(f"no exception for {code!r}") + + raise self.mapping[code](*args, **kwargs) + + +def abort( + status: t.Union[int, "Response"], *args: t.Any, **kwargs: t.Any +) -> "te.NoReturn": + """Raises an :py:exc:`HTTPException` for the given status code or WSGI + application. + + If a status code is given, it will be looked up in the list of + exceptions and will raise that exception. If passed a WSGI application, + it will wrap it in a proxy WSGI exception and raise that:: + + abort(404) # 404 Not Found + abort(Response('Hello World')) + + """ + _aborter(status, *args, **kwargs) + + +_aborter: Aborter = Aborter() diff --git a/src/werkzeug/formparser.py b/src/werkzeug/formparser.py new file mode 100644 index 0000000..10d58ca --- /dev/null +++ b/src/werkzeug/formparser.py @@ -0,0 +1,455 @@ +import typing as t +from functools import update_wrapper +from io import BytesIO +from itertools import chain +from typing import Union + +from . import exceptions +from .datastructures import FileStorage +from .datastructures import Headers +from .datastructures import MultiDict +from .http import parse_options_header +from .sansio.multipart import Data +from .sansio.multipart import Epilogue +from .sansio.multipart import Field +from .sansio.multipart import File +from .sansio.multipart import MultipartDecoder +from .sansio.multipart import NeedData +from .urls import url_decode_stream +from .wsgi import _make_chunk_iter +from .wsgi import get_content_length +from .wsgi import get_input_stream + +# there are some platforms where SpooledTemporaryFile is not available. +# In that case we need to provide a fallback. +try: + from tempfile import SpooledTemporaryFile +except ImportError: + from tempfile import TemporaryFile + + SpooledTemporaryFile = None # type: ignore + +if t.TYPE_CHECKING: + import typing as te + from _typeshed.wsgi import WSGIEnvironment + + t_parse_result = t.Tuple[t.IO[bytes], MultiDict, MultiDict] + + class TStreamFactory(te.Protocol): + def __call__( + self, + total_content_length: t.Optional[int], + content_type: t.Optional[str], + filename: t.Optional[str], + content_length: t.Optional[int] = None, + ) -> t.IO[bytes]: + ... + + +F = t.TypeVar("F", bound=t.Callable[..., t.Any]) + + +def _exhaust(stream: t.IO[bytes]) -> None: + bts = stream.read(64 * 1024) + while bts: + bts = stream.read(64 * 1024) + + +def default_stream_factory( + total_content_length: t.Optional[int], + content_type: t.Optional[str], + filename: t.Optional[str], + content_length: t.Optional[int] = None, +) -> t.IO[bytes]: + max_size = 1024 * 500 + + if SpooledTemporaryFile is not None: + return t.cast(t.IO[bytes], SpooledTemporaryFile(max_size=max_size, mode="rb+")) + elif total_content_length is None or total_content_length > max_size: + return t.cast(t.IO[bytes], TemporaryFile("rb+")) + + return BytesIO() + + +def parse_form_data( + environ: "WSGIEnvironment", + stream_factory: t.Optional["TStreamFactory"] = None, + charset: str = "utf-8", + errors: str = "replace", + max_form_memory_size: t.Optional[int] = None, + max_content_length: t.Optional[int] = None, + cls: t.Optional[t.Type[MultiDict]] = None, + silent: bool = True, +) -> "t_parse_result": + """Parse the form data in the environ and return it as tuple in the form + ``(stream, form, files)``. You should only call this method if the + transport method is `POST`, `PUT`, or `PATCH`. + + If the mimetype of the data transmitted is `multipart/form-data` the + files multidict will be filled with `FileStorage` objects. If the + mimetype is unknown the input stream is wrapped and returned as first + argument, else the stream is empty. + + This is a shortcut for the common usage of :class:`FormDataParser`. + + Have a look at :doc:`/request_data` for more details. + + .. versionadded:: 0.5 + The `max_form_memory_size`, `max_content_length` and + `cls` parameters were added. + + .. versionadded:: 0.5.1 + The optional `silent` flag was added. + + :param environ: the WSGI environment to be used for parsing. + :param stream_factory: An optional callable that returns a new read and + writeable file descriptor. This callable works + the same as :meth:`Response._get_file_stream`. + :param charset: The character set for URL and url encoded form data. + :param errors: The encoding error behavior. + :param max_form_memory_size: the maximum number of bytes to be accepted for + in-memory stored form data. If the data + exceeds the value specified an + :exc:`~exceptions.RequestEntityTooLarge` + exception is raised. + :param max_content_length: If this is provided and the transmitted data + is longer than this value an + :exc:`~exceptions.RequestEntityTooLarge` + exception is raised. + :param cls: an optional dict class to use. If this is not specified + or `None` the default :class:`MultiDict` is used. + :param silent: If set to False parsing errors will not be caught. + :return: A tuple in the form ``(stream, form, files)``. + """ + return FormDataParser( + stream_factory, + charset, + errors, + max_form_memory_size, + max_content_length, + cls, + silent, + ).parse_from_environ(environ) + + +def exhaust_stream(f: F) -> F: + """Helper decorator for methods that exhausts the stream on return.""" + + def wrapper(self, stream, *args, **kwargs): # type: ignore + try: + return f(self, stream, *args, **kwargs) + finally: + exhaust = getattr(stream, "exhaust", None) + + if exhaust is not None: + exhaust() + else: + while True: + chunk = stream.read(1024 * 64) + + if not chunk: + break + + return update_wrapper(t.cast(F, wrapper), f) + + +class FormDataParser: + """This class implements parsing of form data for Werkzeug. By itself + it can parse multipart and url encoded form data. It can be subclassed + and extended but for most mimetypes it is a better idea to use the + untouched stream and expose it as separate attributes on a request + object. + + .. versionadded:: 0.8 + + :param stream_factory: An optional callable that returns a new read and + writeable file descriptor. This callable works + the same as :meth:`Response._get_file_stream`. + :param charset: The character set for URL and url encoded form data. + :param errors: The encoding error behavior. + :param max_form_memory_size: the maximum number of bytes to be accepted for + in-memory stored form data. If the data + exceeds the value specified an + :exc:`~exceptions.RequestEntityTooLarge` + exception is raised. + :param max_content_length: If this is provided and the transmitted data + is longer than this value an + :exc:`~exceptions.RequestEntityTooLarge` + exception is raised. + :param cls: an optional dict class to use. If this is not specified + or `None` the default :class:`MultiDict` is used. + :param silent: If set to False parsing errors will not be caught. + """ + + def __init__( + self, + stream_factory: t.Optional["TStreamFactory"] = None, + charset: str = "utf-8", + errors: str = "replace", + max_form_memory_size: t.Optional[int] = None, + max_content_length: t.Optional[int] = None, + cls: t.Optional[t.Type[MultiDict]] = None, + silent: bool = True, + ) -> None: + if stream_factory is None: + stream_factory = default_stream_factory + + self.stream_factory = stream_factory + self.charset = charset + self.errors = errors + self.max_form_memory_size = max_form_memory_size + self.max_content_length = max_content_length + + if cls is None: + cls = MultiDict + + self.cls = cls + self.silent = silent + + def get_parse_func( + self, mimetype: str, options: t.Dict[str, str] + ) -> t.Optional[ + t.Callable[ + ["FormDataParser", t.IO[bytes], str, t.Optional[int], t.Dict[str, str]], + "t_parse_result", + ] + ]: + return self.parse_functions.get(mimetype) + + def parse_from_environ(self, environ: "WSGIEnvironment") -> "t_parse_result": + """Parses the information from the environment as form data. + + :param environ: the WSGI environment to be used for parsing. + :return: A tuple in the form ``(stream, form, files)``. + """ + content_type = environ.get("CONTENT_TYPE", "") + content_length = get_content_length(environ) + mimetype, options = parse_options_header(content_type) + return self.parse(get_input_stream(environ), mimetype, content_length, options) + + def parse( + self, + stream: t.IO[bytes], + mimetype: str, + content_length: t.Optional[int], + options: t.Optional[t.Dict[str, str]] = None, + ) -> "t_parse_result": + """Parses the information from the given stream, mimetype, + content length and mimetype parameters. + + :param stream: an input stream + :param mimetype: the mimetype of the data + :param content_length: the content length of the incoming data + :param options: optional mimetype parameters (used for + the multipart boundary for instance) + :return: A tuple in the form ``(stream, form, files)``. + """ + if ( + self.max_content_length is not None + and content_length is not None + and content_length > self.max_content_length + ): + # if the input stream is not exhausted, firefox reports Connection Reset + _exhaust(stream) + raise exceptions.RequestEntityTooLarge() + + if options is None: + options = {} + + parse_func = self.get_parse_func(mimetype, options) + + if parse_func is not None: + try: + return parse_func(self, stream, mimetype, content_length, options) + except ValueError: + if not self.silent: + raise + + return stream, self.cls(), self.cls() + + @exhaust_stream + def _parse_multipart( + self, + stream: t.IO[bytes], + mimetype: str, + content_length: t.Optional[int], + options: t.Dict[str, str], + ) -> "t_parse_result": + parser = MultiPartParser( + self.stream_factory, + self.charset, + self.errors, + max_form_memory_size=self.max_form_memory_size, + cls=self.cls, + ) + boundary = options.get("boundary", "").encode("ascii") + + if not boundary: + raise ValueError("Missing boundary") + + form, files = parser.parse(stream, boundary, content_length) + return stream, form, files + + @exhaust_stream + def _parse_urlencoded( + self, + stream: t.IO[bytes], + mimetype: str, + content_length: t.Optional[int], + options: t.Dict[str, str], + ) -> "t_parse_result": + if ( + self.max_form_memory_size is not None + and content_length is not None + and content_length > self.max_form_memory_size + ): + # if the input stream is not exhausted, firefox reports Connection Reset + _exhaust(stream) + raise exceptions.RequestEntityTooLarge() + + form = url_decode_stream(stream, self.charset, errors=self.errors, cls=self.cls) + return stream, form, self.cls() + + #: mapping of mimetypes to parsing functions + parse_functions: t.Dict[ + str, + t.Callable[ + ["FormDataParser", t.IO[bytes], str, t.Optional[int], t.Dict[str, str]], + "t_parse_result", + ], + ] = { + "multipart/form-data": _parse_multipart, + "application/x-www-form-urlencoded": _parse_urlencoded, + "application/x-url-encoded": _parse_urlencoded, + } + + +def _line_parse(line: str) -> t.Tuple[str, bool]: + """Removes line ending characters and returns a tuple (`stripped_line`, + `is_terminated`). + """ + if line[-2:] == "\r\n": + return line[:-2], True + + elif line[-1:] in {"\r", "\n"}: + return line[:-1], True + + return line, False + + +class MultiPartParser: + def __init__( + self, + stream_factory: t.Optional["TStreamFactory"] = None, + charset: str = "utf-8", + errors: str = "replace", + max_form_memory_size: t.Optional[int] = None, + cls: t.Optional[t.Type[MultiDict]] = None, + buffer_size: int = 64 * 1024, + ) -> None: + self.charset = charset + self.errors = errors + self.max_form_memory_size = max_form_memory_size + + if stream_factory is None: + stream_factory = default_stream_factory + + self.stream_factory = stream_factory + + if cls is None: + cls = MultiDict + + self.cls = cls + + self.buffer_size = buffer_size + + def fail(self, message: str) -> "te.NoReturn": + raise ValueError(message) + + def get_part_charset(self, headers: Headers) -> str: + # Figure out input charset for current part + content_type = headers.get("content-type") + + if content_type: + mimetype, ct_params = parse_options_header(content_type) + return ct_params.get("charset", self.charset) + + return self.charset + + def start_file_streaming( + self, event: File, total_content_length: t.Optional[int] + ) -> t.IO[bytes]: + content_type = event.headers.get("content-type") + + try: + content_length = int(event.headers["content-length"]) + except (KeyError, ValueError): + content_length = 0 + + container = self.stream_factory( + total_content_length=total_content_length, + filename=event.filename, + content_type=content_type, + content_length=content_length, + ) + return container + + def parse( + self, stream: t.IO[bytes], boundary: bytes, content_length: t.Optional[int] + ) -> t.Tuple[MultiDict, MultiDict]: + container: t.Union[t.IO[bytes], t.List[bytes]] + _write: t.Callable[[bytes], t.Any] + + iterator = chain( + _make_chunk_iter( + stream, + limit=content_length, + buffer_size=self.buffer_size, + ), + [None], + ) + + parser = MultipartDecoder(boundary, self.max_form_memory_size) + + fields = [] + files = [] + + current_part: Union[Field, File] + for data in iterator: + parser.receive_data(data) + event = parser.next_event() + while not isinstance(event, (Epilogue, NeedData)): + if isinstance(event, Field): + current_part = event + container = [] + _write = container.append + elif isinstance(event, File): + current_part = event + container = self.start_file_streaming(event, content_length) + _write = container.write + elif isinstance(event, Data): + _write(event.data) + if not event.more_data: + if isinstance(current_part, Field): + value = b"".join(container).decode( + self.get_part_charset(current_part.headers), self.errors + ) + fields.append((current_part.name, value)) + else: + container = t.cast(t.IO[bytes], container) + container.seek(0) + files.append( + ( + current_part.name, + FileStorage( + container, + current_part.filename, + current_part.name, + headers=current_part.headers, + ), + ) + ) + + event = parser.next_event() + + return self.cls(fields), self.cls(files) diff --git a/src/werkzeug/http.py b/src/werkzeug/http.py new file mode 100644 index 0000000..9777685 --- /dev/null +++ b/src/werkzeug/http.py @@ -0,0 +1,1311 @@ +import base64 +import email.utils +import re +import typing +import typing as t +import warnings +from datetime import date +from datetime import datetime +from datetime import time +from datetime import timedelta +from datetime import timezone +from enum import Enum +from hashlib import sha1 +from time import mktime +from time import struct_time +from urllib.parse import unquote_to_bytes as _unquote +from urllib.request import parse_http_list as _parse_list_header + +from ._internal import _cookie_quote +from ._internal import _dt_as_utc +from ._internal import _make_cookie_domain +from ._internal import _to_bytes +from ._internal import _to_str +from ._internal import _wsgi_decoding_dance + +if t.TYPE_CHECKING: + from _typeshed.wsgi import WSGIEnvironment + +# for explanation of "media-range", etc. see Sections 5.3.{1,2} of RFC 7231 +_accept_re = re.compile( + r""" + ( # media-range capturing-parenthesis + [^\s;,]+ # type/subtype + (?:[ \t]*;[ \t]* # ";" + (?: # parameter non-capturing-parenthesis + [^\s;,q][^\s;,]* # token that doesn't start with "q" + | # or + q[^\s;,=][^\s;,]* # token that is more than just "q" + ) + )* # zero or more parameters + ) # end of media-range + (?:[ \t]*;[ \t]*q= # weight is a "q" parameter + (\d*(?:\.\d+)?) # qvalue capturing-parentheses + [^,]* # "extension" accept params: who cares? + )? # accept params are optional + """, + re.VERBOSE, +) +_token_chars = frozenset( + "!#$%&'*+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~" +) +_etag_re = re.compile(r'([Ww]/)?(?:"(.*?)"|(.*?))(?:\s*,\s*|$)') +_option_header_piece_re = re.compile( + r""" + ;\s*,?\s* # newlines were replaced with commas + (?P + "[^"\\]*(?:\\.[^"\\]*)*" # quoted string + | + [^\s;,=*]+ # token + ) + (?:\*(?P\d+))? # *1, optional continuation index + \s* + (?: # optionally followed by =value + (?: # equals sign, possibly with encoding + \*\s*=\s* # * indicates extended notation + (?: # optional encoding + (?P[^\s]+?) + '(?P[^\s]*?)' + )? + | + =\s* # basic notation + ) + (?P + "[^"\\]*(?:\\.[^"\\]*)*" # quoted string + | + [^;,]+ # token + )? + )? + \s* + """, + flags=re.VERBOSE, +) +_option_header_start_mime_type = re.compile(r",\s*([^;,\s]+)([;,]\s*.+)?") +_entity_headers = frozenset( + [ + "allow", + "content-encoding", + "content-language", + "content-length", + "content-location", + "content-md5", + "content-range", + "content-type", + "expires", + "last-modified", + ] +) +_hop_by_hop_headers = frozenset( + [ + "connection", + "keep-alive", + "proxy-authenticate", + "proxy-authorization", + "te", + "trailer", + "transfer-encoding", + "upgrade", + ] +) +HTTP_STATUS_CODES = { + 100: "Continue", + 101: "Switching Protocols", + 102: "Processing", + 103: "Early Hints", # see RFC 8297 + 200: "OK", + 201: "Created", + 202: "Accepted", + 203: "Non Authoritative Information", + 204: "No Content", + 205: "Reset Content", + 206: "Partial Content", + 207: "Multi Status", + 208: "Already Reported", # see RFC 5842 + 226: "IM Used", # see RFC 3229 + 300: "Multiple Choices", + 301: "Moved Permanently", + 302: "Found", + 303: "See Other", + 304: "Not Modified", + 305: "Use Proxy", + 306: "Switch Proxy", # unused + 307: "Temporary Redirect", + 308: "Permanent Redirect", + 400: "Bad Request", + 401: "Unauthorized", + 402: "Payment Required", # unused + 403: "Forbidden", + 404: "Not Found", + 405: "Method Not Allowed", + 406: "Not Acceptable", + 407: "Proxy Authentication Required", + 408: "Request Timeout", + 409: "Conflict", + 410: "Gone", + 411: "Length Required", + 412: "Precondition Failed", + 413: "Request Entity Too Large", + 414: "Request URI Too Long", + 415: "Unsupported Media Type", + 416: "Requested Range Not Satisfiable", + 417: "Expectation Failed", + 418: "I'm a teapot", # see RFC 2324 + 421: "Misdirected Request", # see RFC 7540 + 422: "Unprocessable Entity", + 423: "Locked", + 424: "Failed Dependency", + 425: "Too Early", # see RFC 8470 + 426: "Upgrade Required", + 428: "Precondition Required", # see RFC 6585 + 429: "Too Many Requests", + 431: "Request Header Fields Too Large", + 449: "Retry With", # proprietary MS extension + 451: "Unavailable For Legal Reasons", + 500: "Internal Server Error", + 501: "Not Implemented", + 502: "Bad Gateway", + 503: "Service Unavailable", + 504: "Gateway Timeout", + 505: "HTTP Version Not Supported", + 506: "Variant Also Negotiates", # see RFC 2295 + 507: "Insufficient Storage", + 508: "Loop Detected", # see RFC 5842 + 510: "Not Extended", + 511: "Network Authentication Failed", +} + + +class COEP(Enum): + """Cross Origin Embedder Policies""" + + UNSAFE_NONE = "unsafe-none" + REQUIRE_CORP = "require-corp" + + +class COOP(Enum): + """Cross Origin Opener Policies""" + + UNSAFE_NONE = "unsafe-none" + SAME_ORIGIN_ALLOW_POPUPS = "same-origin-allow-popups" + SAME_ORIGIN = "same-origin" + + +def quote_header_value( + value: t.Union[str, int], extra_chars: str = "", allow_token: bool = True +) -> str: + """Quote a header value if necessary. + + .. versionadded:: 0.5 + + :param value: the value to quote. + :param extra_chars: a list of extra characters to skip quoting. + :param allow_token: if this is enabled token values are returned + unchanged. + """ + if isinstance(value, bytes): + value = value.decode("latin1") + value = str(value) + if allow_token: + token_chars = _token_chars | set(extra_chars) + if set(value).issubset(token_chars): + return value + value = value.replace("\\", "\\\\").replace('"', '\\"') + return f'"{value}"' + + +def unquote_header_value(value: str, is_filename: bool = False) -> str: + r"""Unquotes a header value. (Reversal of :func:`quote_header_value`). + This does not use the real unquoting but what browsers are actually + using for quoting. + + .. versionadded:: 0.5 + + :param value: the header value to unquote. + :param is_filename: The value represents a filename or path. + """ + if value and value[0] == value[-1] == '"': + # this is not the real unquoting, but fixing this so that the + # RFC is met will result in bugs with internet explorer and + # probably some other browsers as well. IE for example is + # uploading files with "C:\foo\bar.txt" as filename + value = value[1:-1] + + # if this is a filename and the starting characters look like + # a UNC path, then just return the value without quotes. Using the + # replace sequence below on a UNC path has the effect of turning + # the leading double slash into a single slash and then + # _fix_ie_filename() doesn't work correctly. See #458. + if not is_filename or value[:2] != "\\\\": + return value.replace("\\\\", "\\").replace('\\"', '"') + return value + + +def dump_options_header( + header: t.Optional[str], options: t.Mapping[str, t.Optional[t.Union[str, int]]] +) -> str: + """The reverse function to :func:`parse_options_header`. + + :param header: the header to dump + :param options: a dict of options to append. + """ + segments = [] + if header is not None: + segments.append(header) + for key, value in options.items(): + if value is None: + segments.append(key) + else: + segments.append(f"{key}={quote_header_value(value)}") + return "; ".join(segments) + + +def dump_header( + iterable: t.Union[t.Dict[str, t.Union[str, int]], t.Iterable[str]], + allow_token: bool = True, +) -> str: + """Dump an HTTP header again. This is the reversal of + :func:`parse_list_header`, :func:`parse_set_header` and + :func:`parse_dict_header`. This also quotes strings that include an + equals sign unless you pass it as dict of key, value pairs. + + >>> dump_header({'foo': 'bar baz'}) + 'foo="bar baz"' + >>> dump_header(('foo', 'bar baz')) + 'foo, "bar baz"' + + :param iterable: the iterable or dict of values to quote. + :param allow_token: if set to `False` tokens as values are disallowed. + See :func:`quote_header_value` for more details. + """ + if isinstance(iterable, dict): + items = [] + for key, value in iterable.items(): + if value is None: + items.append(key) + else: + items.append( + f"{key}={quote_header_value(value, allow_token=allow_token)}" + ) + else: + items = [quote_header_value(x, allow_token=allow_token) for x in iterable] + return ", ".join(items) + + +def dump_csp_header(header: "ds.ContentSecurityPolicy") -> str: + """Dump a Content Security Policy header. + + These are structured into policies such as "default-src 'self'; + script-src 'self'". + + .. versionadded:: 1.0.0 + Support for Content Security Policy headers was added. + + """ + return "; ".join(f"{key} {value}" for key, value in header.items()) + + +def parse_list_header(value: str) -> t.List[str]: + """Parse lists as described by RFC 2068 Section 2. + + In particular, parse comma-separated lists where the elements of + the list may include quoted-strings. A quoted-string could + contain a comma. A non-quoted string could have quotes in the + middle. Quotes are removed automatically after parsing. + + It basically works like :func:`parse_set_header` just that items + may appear multiple times and case sensitivity is preserved. + + The return value is a standard :class:`list`: + + >>> parse_list_header('token, "quoted value"') + ['token', 'quoted value'] + + To create a header from the :class:`list` again, use the + :func:`dump_header` function. + + :param value: a string with a list header. + :return: :class:`list` + """ + result = [] + for item in _parse_list_header(value): + if item[:1] == item[-1:] == '"': + item = unquote_header_value(item[1:-1]) + result.append(item) + return result + + +def parse_dict_header(value: str, cls: t.Type[dict] = dict) -> t.Dict[str, str]: + """Parse lists of key, value pairs as described by RFC 2068 Section 2 and + convert them into a python dict (or any other mapping object created from + the type with a dict like interface provided by the `cls` argument): + + >>> d = parse_dict_header('foo="is a fish", bar="as well"') + >>> type(d) is dict + True + >>> sorted(d.items()) + [('bar', 'as well'), ('foo', 'is a fish')] + + If there is no value for a key it will be `None`: + + >>> parse_dict_header('key_without_value') + {'key_without_value': None} + + To create a header from the :class:`dict` again, use the + :func:`dump_header` function. + + .. versionchanged:: 0.9 + Added support for `cls` argument. + + :param value: a string with a dict header. + :param cls: callable to use for storage of parsed results. + :return: an instance of `cls` + """ + result = cls() + if isinstance(value, bytes): + value = value.decode("latin1") + for item in _parse_list_header(value): + if "=" not in item: + result[item] = None + continue + name, value = item.split("=", 1) + if value[:1] == value[-1:] == '"': + value = unquote_header_value(value[1:-1]) + result[name] = value + return result + + +def parse_options_header(value: t.Optional[str]) -> t.Tuple[str, t.Dict[str, str]]: + """Parse a ``Content-Type``-like header into a tuple with the + value and any options: + + >>> parse_options_header('text/html; charset=utf8') + ('text/html', {'charset': 'utf8'}) + + This should is not for ``Cache-Control``-like headers, which use a + different format. For those, use :func:`parse_dict_header`. + + :param value: The header value to parse. + + .. versionchanged:: 2.2 + Option names are always converted to lowercase. + + .. versionchanged:: 2.1 + The ``multiple`` parameter is deprecated and will be removed in + Werkzeug 2.2. + + .. versionchanged:: 0.15 + :rfc:`2231` parameter continuations are handled. + + .. versionadded:: 0.5 + """ + if not value: + return "", {} + + result: t.List[t.Any] = [] + + value = "," + value.replace("\n", ",") + while value: + match = _option_header_start_mime_type.match(value) + if not match: + break + result.append(match.group(1)) # mimetype + options: t.Dict[str, str] = {} + # Parse options + rest = match.group(2) + encoding: t.Optional[str] + continued_encoding: t.Optional[str] = None + while rest: + optmatch = _option_header_piece_re.match(rest) + if not optmatch: + break + option, count, encoding, language, option_value = optmatch.groups() + # Continuations don't have to supply the encoding after the + # first line. If we're in a continuation, track the current + # encoding to use for subsequent lines. Reset it when the + # continuation ends. + if not count: + continued_encoding = None + else: + if not encoding: + encoding = continued_encoding + continued_encoding = encoding + option = unquote_header_value(option).lower() + + if option_value is not None: + option_value = unquote_header_value(option_value, option == "filename") + + if encoding is not None: + option_value = _unquote(option_value).decode(encoding) + + if count: + # Continuations append to the existing value. For + # simplicity, this ignores the possibility of + # out-of-order indices, which shouldn't happen anyway. + if option_value is not None: + options[option] = options.get(option, "") + option_value + else: + options[option] = option_value # type: ignore[assignment] + + rest = rest[optmatch.end() :] + result.append(options) + return tuple(result) # type: ignore[return-value] + + return tuple(result) if result else ("", {}) # type: ignore[return-value] + + +_TAnyAccept = t.TypeVar("_TAnyAccept", bound="ds.Accept") + + +@typing.overload +def parse_accept_header(value: t.Optional[str]) -> "ds.Accept": + ... + + +@typing.overload +def parse_accept_header( + value: t.Optional[str], cls: t.Type[_TAnyAccept] +) -> _TAnyAccept: + ... + + +def parse_accept_header( + value: t.Optional[str], cls: t.Optional[t.Type[_TAnyAccept]] = None +) -> _TAnyAccept: + """Parses an HTTP Accept-* header. This does not implement a complete + valid algorithm but one that supports at least value and quality + extraction. + + Returns a new :class:`Accept` object (basically a list of ``(value, quality)`` + tuples sorted by the quality with some additional accessor methods). + + The second parameter can be a subclass of :class:`Accept` that is created + with the parsed values and returned. + + :param value: the accept header string to be parsed. + :param cls: the wrapper class for the return value (can be + :class:`Accept` or a subclass thereof) + :return: an instance of `cls`. + """ + if cls is None: + cls = t.cast(t.Type[_TAnyAccept], ds.Accept) + + if not value: + return cls(None) + + result = [] + for match in _accept_re.finditer(value): + quality_match = match.group(2) + if not quality_match: + quality: float = 1 + else: + quality = max(min(float(quality_match), 1), 0) + result.append((match.group(1), quality)) + return cls(result) + + +_TAnyCC = t.TypeVar("_TAnyCC", bound="ds._CacheControl") +_t_cc_update = t.Optional[t.Callable[[_TAnyCC], None]] + + +@typing.overload +def parse_cache_control_header( + value: t.Optional[str], on_update: _t_cc_update, cls: None = None +) -> "ds.RequestCacheControl": + ... + + +@typing.overload +def parse_cache_control_header( + value: t.Optional[str], on_update: _t_cc_update, cls: t.Type[_TAnyCC] +) -> _TAnyCC: + ... + + +def parse_cache_control_header( + value: t.Optional[str], + on_update: _t_cc_update = None, + cls: t.Optional[t.Type[_TAnyCC]] = None, +) -> _TAnyCC: + """Parse a cache control header. The RFC differs between response and + request cache control, this method does not. It's your responsibility + to not use the wrong control statements. + + .. versionadded:: 0.5 + The `cls` was added. If not specified an immutable + :class:`~werkzeug.datastructures.RequestCacheControl` is returned. + + :param value: a cache control header to be parsed. + :param on_update: an optional callable that is called every time a value + on the :class:`~werkzeug.datastructures.CacheControl` + object is changed. + :param cls: the class for the returned object. By default + :class:`~werkzeug.datastructures.RequestCacheControl` is used. + :return: a `cls` object. + """ + if cls is None: + cls = t.cast(t.Type[_TAnyCC], ds.RequestCacheControl) + + if not value: + return cls((), on_update) + + return cls(parse_dict_header(value), on_update) + + +_TAnyCSP = t.TypeVar("_TAnyCSP", bound="ds.ContentSecurityPolicy") +_t_csp_update = t.Optional[t.Callable[[_TAnyCSP], None]] + + +@typing.overload +def parse_csp_header( + value: t.Optional[str], on_update: _t_csp_update, cls: None = None +) -> "ds.ContentSecurityPolicy": + ... + + +@typing.overload +def parse_csp_header( + value: t.Optional[str], on_update: _t_csp_update, cls: t.Type[_TAnyCSP] +) -> _TAnyCSP: + ... + + +def parse_csp_header( + value: t.Optional[str], + on_update: _t_csp_update = None, + cls: t.Optional[t.Type[_TAnyCSP]] = None, +) -> _TAnyCSP: + """Parse a Content Security Policy header. + + .. versionadded:: 1.0.0 + Support for Content Security Policy headers was added. + + :param value: a csp header to be parsed. + :param on_update: an optional callable that is called every time a value + on the object is changed. + :param cls: the class for the returned object. By default + :class:`~werkzeug.datastructures.ContentSecurityPolicy` is used. + :return: a `cls` object. + """ + if cls is None: + cls = t.cast(t.Type[_TAnyCSP], ds.ContentSecurityPolicy) + + if value is None: + return cls((), on_update) + + items = [] + + for policy in value.split(";"): + policy = policy.strip() + + # Ignore badly formatted policies (no space) + if " " in policy: + directive, value = policy.strip().split(" ", 1) + items.append((directive.strip(), value.strip())) + + return cls(items, on_update) + + +def parse_set_header( + value: t.Optional[str], + on_update: t.Optional[t.Callable[["ds.HeaderSet"], None]] = None, +) -> "ds.HeaderSet": + """Parse a set-like header and return a + :class:`~werkzeug.datastructures.HeaderSet` object: + + >>> hs = parse_set_header('token, "quoted value"') + + The return value is an object that treats the items case-insensitively + and keeps the order of the items: + + >>> 'TOKEN' in hs + True + >>> hs.index('quoted value') + 1 + >>> hs + HeaderSet(['token', 'quoted value']) + + To create a header from the :class:`HeaderSet` again, use the + :func:`dump_header` function. + + :param value: a set header to be parsed. + :param on_update: an optional callable that is called every time a + value on the :class:`~werkzeug.datastructures.HeaderSet` + object is changed. + :return: a :class:`~werkzeug.datastructures.HeaderSet` + """ + if not value: + return ds.HeaderSet(None, on_update) + return ds.HeaderSet(parse_list_header(value), on_update) + + +def parse_authorization_header( + value: t.Optional[str], +) -> t.Optional["ds.Authorization"]: + """Parse an HTTP basic/digest authorization header transmitted by the web + browser. The return value is either `None` if the header was invalid or + not given, otherwise an :class:`~werkzeug.datastructures.Authorization` + object. + + :param value: the authorization header to parse. + :return: a :class:`~werkzeug.datastructures.Authorization` object or `None`. + """ + if not value: + return None + value = _wsgi_decoding_dance(value) + try: + auth_type, auth_info = value.split(None, 1) + auth_type = auth_type.lower() + except ValueError: + return None + if auth_type == "basic": + try: + username, password = base64.b64decode(auth_info).split(b":", 1) + except Exception: + return None + try: + return ds.Authorization( + "basic", + { + "username": _to_str(username, "utf-8"), + "password": _to_str(password, "utf-8"), + }, + ) + except UnicodeDecodeError: + return None + elif auth_type == "digest": + auth_map = parse_dict_header(auth_info) + for key in "username", "realm", "nonce", "uri", "response": + if key not in auth_map: + return None + if "qop" in auth_map: + if not auth_map.get("nc") or not auth_map.get("cnonce"): + return None + return ds.Authorization("digest", auth_map) + return None + + +def parse_www_authenticate_header( + value: t.Optional[str], + on_update: t.Optional[t.Callable[["ds.WWWAuthenticate"], None]] = None, +) -> "ds.WWWAuthenticate": + """Parse an HTTP WWW-Authenticate header into a + :class:`~werkzeug.datastructures.WWWAuthenticate` object. + + :param value: a WWW-Authenticate header to parse. + :param on_update: an optional callable that is called every time a value + on the :class:`~werkzeug.datastructures.WWWAuthenticate` + object is changed. + :return: a :class:`~werkzeug.datastructures.WWWAuthenticate` object. + """ + if not value: + return ds.WWWAuthenticate(on_update=on_update) + try: + auth_type, auth_info = value.split(None, 1) + auth_type = auth_type.lower() + except (ValueError, AttributeError): + return ds.WWWAuthenticate(value.strip().lower(), on_update=on_update) + return ds.WWWAuthenticate(auth_type, parse_dict_header(auth_info), on_update) + + +def parse_if_range_header(value: t.Optional[str]) -> "ds.IfRange": + """Parses an if-range header which can be an etag or a date. Returns + a :class:`~werkzeug.datastructures.IfRange` object. + + .. versionchanged:: 2.0 + If the value represents a datetime, it is timezone-aware. + + .. versionadded:: 0.7 + """ + if not value: + return ds.IfRange() + date = parse_date(value) + if date is not None: + return ds.IfRange(date=date) + # drop weakness information + return ds.IfRange(unquote_etag(value)[0]) + + +def parse_range_header( + value: t.Optional[str], make_inclusive: bool = True +) -> t.Optional["ds.Range"]: + """Parses a range header into a :class:`~werkzeug.datastructures.Range` + object. If the header is missing or malformed `None` is returned. + `ranges` is a list of ``(start, stop)`` tuples where the ranges are + non-inclusive. + + .. versionadded:: 0.7 + """ + if not value or "=" not in value: + return None + + ranges = [] + last_end = 0 + units, rng = value.split("=", 1) + units = units.strip().lower() + + for item in rng.split(","): + item = item.strip() + if "-" not in item: + return None + if item.startswith("-"): + if last_end < 0: + return None + try: + begin = int(item) + except ValueError: + return None + end = None + last_end = -1 + elif "-" in item: + begin_str, end_str = item.split("-", 1) + begin_str = begin_str.strip() + end_str = end_str.strip() + + try: + begin = int(begin_str) + except ValueError: + return None + + if begin < last_end or last_end < 0: + return None + if end_str: + try: + end = int(end_str) + 1 + except ValueError: + return None + + if begin >= end: + return None + else: + end = None + last_end = end if end is not None else -1 + ranges.append((begin, end)) + + return ds.Range(units, ranges) + + +def parse_content_range_header( + value: t.Optional[str], + on_update: t.Optional[t.Callable[["ds.ContentRange"], None]] = None, +) -> t.Optional["ds.ContentRange"]: + """Parses a range header into a + :class:`~werkzeug.datastructures.ContentRange` object or `None` if + parsing is not possible. + + .. versionadded:: 0.7 + + :param value: a content range header to be parsed. + :param on_update: an optional callable that is called every time a value + on the :class:`~werkzeug.datastructures.ContentRange` + object is changed. + """ + if value is None: + return None + try: + units, rangedef = (value or "").strip().split(None, 1) + except ValueError: + return None + + if "/" not in rangedef: + return None + rng, length_str = rangedef.split("/", 1) + if length_str == "*": + length = None + else: + try: + length = int(length_str) + except ValueError: + return None + + if rng == "*": + return ds.ContentRange(units, None, None, length, on_update=on_update) + elif "-" not in rng: + return None + + start_str, stop_str = rng.split("-", 1) + try: + start = int(start_str) + stop = int(stop_str) + 1 + except ValueError: + return None + + if is_byte_range_valid(start, stop, length): + return ds.ContentRange(units, start, stop, length, on_update=on_update) + + return None + + +def quote_etag(etag: str, weak: bool = False) -> str: + """Quote an etag. + + :param etag: the etag to quote. + :param weak: set to `True` to tag it "weak". + """ + if '"' in etag: + raise ValueError("invalid etag") + etag = f'"{etag}"' + if weak: + etag = f"W/{etag}" + return etag + + +def unquote_etag( + etag: t.Optional[str], +) -> t.Union[t.Tuple[str, bool], t.Tuple[None, None]]: + """Unquote a single etag: + + >>> unquote_etag('W/"bar"') + ('bar', True) + >>> unquote_etag('"bar"') + ('bar', False) + + :param etag: the etag identifier to unquote. + :return: a ``(etag, weak)`` tuple. + """ + if not etag: + return None, None + etag = etag.strip() + weak = False + if etag.startswith(("W/", "w/")): + weak = True + etag = etag[2:] + if etag[:1] == etag[-1:] == '"': + etag = etag[1:-1] + return etag, weak + + +def parse_etags(value: t.Optional[str]) -> "ds.ETags": + """Parse an etag header. + + :param value: the tag header to parse + :return: an :class:`~werkzeug.datastructures.ETags` object. + """ + if not value: + return ds.ETags() + strong = [] + weak = [] + end = len(value) + pos = 0 + while pos < end: + match = _etag_re.match(value, pos) + if match is None: + break + is_weak, quoted, raw = match.groups() + if raw == "*": + return ds.ETags(star_tag=True) + elif quoted: + raw = quoted + if is_weak: + weak.append(raw) + else: + strong.append(raw) + pos = match.end() + return ds.ETags(strong, weak) + + +def generate_etag(data: bytes) -> str: + """Generate an etag for some data. + + .. versionchanged:: 2.0 + Use SHA-1. MD5 may not be available in some environments. + """ + return sha1(data).hexdigest() + + +def parse_date(value: t.Optional[str]) -> t.Optional[datetime]: + """Parse an :rfc:`2822` date into a timezone-aware + :class:`datetime.datetime` object, or ``None`` if parsing fails. + + This is a wrapper for :func:`email.utils.parsedate_to_datetime`. It + returns ``None`` if parsing fails instead of raising an exception, + and always returns a timezone-aware datetime object. If the string + doesn't have timezone information, it is assumed to be UTC. + + :param value: A string with a supported date format. + + .. versionchanged:: 2.0 + Return a timezone-aware datetime object. Use + ``email.utils.parsedate_to_datetime``. + """ + if value is None: + return None + + try: + dt = email.utils.parsedate_to_datetime(value) + except (TypeError, ValueError): + return None + + if dt.tzinfo is None: + return dt.replace(tzinfo=timezone.utc) + + return dt + + +def http_date( + timestamp: t.Optional[t.Union[datetime, date, int, float, struct_time]] = None +) -> str: + """Format a datetime object or timestamp into an :rfc:`2822` date + string. + + This is a wrapper for :func:`email.utils.format_datetime`. It + assumes naive datetime objects are in UTC instead of raising an + exception. + + :param timestamp: The datetime or timestamp to format. Defaults to + the current time. + + .. versionchanged:: 2.0 + Use ``email.utils.format_datetime``. Accept ``date`` objects. + """ + if isinstance(timestamp, date): + if not isinstance(timestamp, datetime): + # Assume plain date is midnight UTC. + timestamp = datetime.combine(timestamp, time(), tzinfo=timezone.utc) + else: + # Ensure datetime is timezone-aware. + timestamp = _dt_as_utc(timestamp) + + return email.utils.format_datetime(timestamp, usegmt=True) + + if isinstance(timestamp, struct_time): + timestamp = mktime(timestamp) + + return email.utils.formatdate(timestamp, usegmt=True) + + +def parse_age(value: t.Optional[str] = None) -> t.Optional[timedelta]: + """Parses a base-10 integer count of seconds into a timedelta. + + If parsing fails, the return value is `None`. + + :param value: a string consisting of an integer represented in base-10 + :return: a :class:`datetime.timedelta` object or `None`. + """ + if not value: + return None + try: + seconds = int(value) + except ValueError: + return None + if seconds < 0: + return None + try: + return timedelta(seconds=seconds) + except OverflowError: + return None + + +def dump_age(age: t.Optional[t.Union[timedelta, int]] = None) -> t.Optional[str]: + """Formats the duration as a base-10 integer. + + :param age: should be an integer number of seconds, + a :class:`datetime.timedelta` object, or, + if the age is unknown, `None` (default). + """ + if age is None: + return None + if isinstance(age, timedelta): + age = int(age.total_seconds()) + else: + age = int(age) + + if age < 0: + raise ValueError("age cannot be negative") + + return str(age) + + +def is_resource_modified( + environ: "WSGIEnvironment", + etag: t.Optional[str] = None, + data: t.Optional[bytes] = None, + last_modified: t.Optional[t.Union[datetime, str]] = None, + ignore_if_range: bool = True, +) -> bool: + """Convenience method for conditional requests. + + :param environ: the WSGI environment of the request to be checked. + :param etag: the etag for the response for comparison. + :param data: or alternatively the data of the response to automatically + generate an etag using :func:`generate_etag`. + :param last_modified: an optional date of the last modification. + :param ignore_if_range: If `False`, `If-Range` header will be taken into + account. + :return: `True` if the resource was modified, otherwise `False`. + + .. versionchanged:: 2.0 + SHA-1 is used to generate an etag value for the data. MD5 may + not be available in some environments. + + .. versionchanged:: 1.0.0 + The check is run for methods other than ``GET`` and ``HEAD``. + """ + return _sansio_http.is_resource_modified( + http_range=environ.get("HTTP_RANGE"), + http_if_range=environ.get("HTTP_IF_RANGE"), + http_if_modified_since=environ.get("HTTP_IF_MODIFIED_SINCE"), + http_if_none_match=environ.get("HTTP_IF_NONE_MATCH"), + http_if_match=environ.get("HTTP_IF_MATCH"), + etag=etag, + data=data, + last_modified=last_modified, + ignore_if_range=ignore_if_range, + ) + + +def remove_entity_headers( + headers: t.Union["ds.Headers", t.List[t.Tuple[str, str]]], + allowed: t.Iterable[str] = ("expires", "content-location"), +) -> None: + """Remove all entity headers from a list or :class:`Headers` object. This + operation works in-place. `Expires` and `Content-Location` headers are + by default not removed. The reason for this is :rfc:`2616` section + 10.3.5 which specifies some entity headers that should be sent. + + .. versionchanged:: 0.5 + added `allowed` parameter. + + :param headers: a list or :class:`Headers` object. + :param allowed: a list of headers that should still be allowed even though + they are entity headers. + """ + allowed = {x.lower() for x in allowed} + headers[:] = [ + (key, value) + for key, value in headers + if not is_entity_header(key) or key.lower() in allowed + ] + + +def remove_hop_by_hop_headers( + headers: t.Union["ds.Headers", t.List[t.Tuple[str, str]]] +) -> None: + """Remove all HTTP/1.1 "Hop-by-Hop" headers from a list or + :class:`Headers` object. This operation works in-place. + + .. versionadded:: 0.5 + + :param headers: a list or :class:`Headers` object. + """ + headers[:] = [ + (key, value) for key, value in headers if not is_hop_by_hop_header(key) + ] + + +def is_entity_header(header: str) -> bool: + """Check if a header is an entity header. + + .. versionadded:: 0.5 + + :param header: the header to test. + :return: `True` if it's an entity header, `False` otherwise. + """ + return header.lower() in _entity_headers + + +def is_hop_by_hop_header(header: str) -> bool: + """Check if a header is an HTTP/1.1 "Hop-by-Hop" header. + + .. versionadded:: 0.5 + + :param header: the header to test. + :return: `True` if it's an HTTP/1.1 "Hop-by-Hop" header, `False` otherwise. + """ + return header.lower() in _hop_by_hop_headers + + +def parse_cookie( + header: t.Union["WSGIEnvironment", str, bytes, None], + charset: str = "utf-8", + errors: str = "replace", + cls: t.Optional[t.Type["ds.MultiDict"]] = None, +) -> "ds.MultiDict[str, str]": + """Parse a cookie from a string or WSGI environ. + + The same key can be provided multiple times, the values are stored + in-order. The default :class:`MultiDict` will have the first value + first, and all values can be retrieved with + :meth:`MultiDict.getlist`. + + :param header: The cookie header as a string, or a WSGI environ dict + with a ``HTTP_COOKIE`` key. + :param charset: The charset for the cookie values. + :param errors: The error behavior for the charset decoding. + :param cls: A dict-like class to store the parsed cookies in. + Defaults to :class:`MultiDict`. + + .. versionchanged:: 1.0.0 + Returns a :class:`MultiDict` instead of a + ``TypeConversionDict``. + + .. versionchanged:: 0.5 + Returns a :class:`TypeConversionDict` instead of a regular dict. + The ``cls`` parameter was added. + """ + if isinstance(header, dict): + cookie = header.get("HTTP_COOKIE", "") + elif header is None: + cookie = "" + else: + cookie = header + + return _sansio_http.parse_cookie( + cookie=cookie, charset=charset, errors=errors, cls=cls + ) + + +def dump_cookie( + key: str, + value: t.Union[bytes, str] = "", + max_age: t.Optional[t.Union[timedelta, int]] = None, + expires: t.Optional[t.Union[str, datetime, int, float]] = None, + path: t.Optional[str] = "/", + domain: t.Optional[str] = None, + secure: bool = False, + httponly: bool = False, + charset: str = "utf-8", + sync_expires: bool = True, + max_size: int = 4093, + samesite: t.Optional[str] = None, +) -> str: + """Create a Set-Cookie header without the ``Set-Cookie`` prefix. + + The return value is usually restricted to ascii as the vast majority + of values are properly escaped, but that is no guarantee. It's + tunneled through latin1 as required by :pep:`3333`. + + The return value is not ASCII safe if the key contains unicode + characters. This is technically against the specification but + happens in the wild. It's strongly recommended to not use + non-ASCII values for the keys. + + :param max_age: should be a number of seconds, or `None` (default) if + the cookie should last only as long as the client's + browser session. Additionally `timedelta` objects + are accepted, too. + :param expires: should be a `datetime` object or unix timestamp. + :param path: limits the cookie to a given path, per default it will + span the whole domain. + :param domain: Use this if you want to set a cross-domain cookie. For + example, ``domain=".example.com"`` will set a cookie + that is readable by the domain ``www.example.com``, + ``foo.example.com`` etc. Otherwise, a cookie will only + be readable by the domain that set it. + :param secure: The cookie will only be available via HTTPS + :param httponly: disallow JavaScript to access the cookie. This is an + extension to the cookie standard and probably not + supported by all browsers. + :param charset: the encoding for string values. + :param sync_expires: automatically set expires if max_age is defined + but expires not. + :param max_size: Warn if the final header value exceeds this size. The + default, 4093, should be safely `supported by most browsers + `_. Set to 0 to disable this check. + :param samesite: Limits the scope of the cookie such that it will + only be attached to requests if those requests are same-site. + + .. _`cookie`: http://browsercookielimits.squawky.net/ + + .. versionchanged:: 1.0.0 + The string ``'None'`` is accepted for ``samesite``. + """ + key = _to_bytes(key, charset) + value = _to_bytes(value, charset) + + if path is not None: + from .urls import iri_to_uri + + path = iri_to_uri(path, charset) + + domain = _make_cookie_domain(domain) + + if isinstance(max_age, timedelta): + max_age = int(max_age.total_seconds()) + + if expires is not None: + if not isinstance(expires, str): + expires = http_date(expires) + elif max_age is not None and sync_expires: + expires = http_date(datetime.now(tz=timezone.utc).timestamp() + max_age) + + if samesite is not None: + samesite = samesite.title() + + if samesite not in {"Strict", "Lax", "None"}: + raise ValueError("SameSite must be 'Strict', 'Lax', or 'None'.") + + buf = [key + b"=" + _cookie_quote(value)] + + # XXX: In theory all of these parameters that are not marked with `None` + # should be quoted. Because stdlib did not quote it before I did not + # want to introduce quoting there now. + for k, v, q in ( + (b"Domain", domain, True), + (b"Expires", expires, False), + (b"Max-Age", max_age, False), + (b"Secure", secure, None), + (b"HttpOnly", httponly, None), + (b"Path", path, False), + (b"SameSite", samesite, False), + ): + if q is None: + if v: + buf.append(k) + continue + + if v is None: + continue + + tmp = bytearray(k) + if not isinstance(v, (bytes, bytearray)): + v = _to_bytes(str(v), charset) + if q: + v = _cookie_quote(v) + tmp += b"=" + v + buf.append(bytes(tmp)) + + # The return value will be an incorrectly encoded latin1 header for + # consistency with the headers object. + rv = b"; ".join(buf) + rv = rv.decode("latin1") + + # Warn if the final value of the cookie is larger than the limit. If the + # cookie is too large, then it may be silently ignored by the browser, + # which can be quite hard to debug. + cookie_size = len(rv) + + if max_size and cookie_size > max_size: + value_size = len(value) + warnings.warn( + f"The {key.decode(charset)!r} cookie is too large: the value was" + f" {value_size} bytes but the" + f" header required {cookie_size - value_size} extra bytes. The final size" + f" was {cookie_size} bytes but the limit is {max_size} bytes. Browsers may" + f" silently ignore cookies larger than this.", + stacklevel=2, + ) + + return rv + + +def is_byte_range_valid( + start: t.Optional[int], stop: t.Optional[int], length: t.Optional[int] +) -> bool: + """Checks if a given byte content range is valid for the given length. + + .. versionadded:: 0.7 + """ + if (start is None) != (stop is None): + return False + elif start is None: + return length is None or length >= 0 + elif length is None: + return 0 <= start < stop # type: ignore + elif start >= stop: # type: ignore + return False + return 0 <= start < length + + +# circular dependencies +from . import datastructures as ds +from .sansio import http as _sansio_http diff --git a/src/werkzeug/local.py b/src/werkzeug/local.py new file mode 100644 index 0000000..70e9bf7 --- /dev/null +++ b/src/werkzeug/local.py @@ -0,0 +1,648 @@ +import copy +import math +import operator +import typing as t +from contextvars import ContextVar +from functools import partial +from functools import update_wrapper +from operator import attrgetter + +from .wsgi import ClosingIterator + +if t.TYPE_CHECKING: + from _typeshed.wsgi import StartResponse + from _typeshed.wsgi import WSGIApplication + from _typeshed.wsgi import WSGIEnvironment + +T = t.TypeVar("T") +F = t.TypeVar("F", bound=t.Callable[..., t.Any]) + + +def release_local(local: t.Union["Local", "LocalStack"]) -> None: + """Release the data for the current context in a :class:`Local` or + :class:`LocalStack` without using a :class:`LocalManager`. + + This should not be needed for modern use cases, and may be removed + in the future. + + .. versionadded:: 0.6.1 + """ + local.__release_local__() + + +class Local: + """Create a namespace of context-local data. This wraps a + :class:`ContextVar` containing a :class:`dict` value. + + This may incur a performance penalty compared to using individual + context vars, as it has to copy data to avoid mutating the dict + between nested contexts. + + :param context_var: The :class:`~contextvars.ContextVar` to use as + storage for this local. If not given, one will be created. + Context vars not created at the global scope may interfere with + garbage collection. + + .. versionchanged:: 2.0 + Uses ``ContextVar`` instead of a custom storage implementation. + """ + + __slots__ = ("__storage",) + + def __init__( + self, context_var: t.Optional[ContextVar[t.Dict[str, t.Any]]] = None + ) -> None: + if context_var is None: + # A ContextVar not created at global scope interferes with + # Python's garbage collection. However, a local only makes + # sense defined at the global scope as well, in which case + # the GC issue doesn't seem relevant. + context_var = ContextVar(f"werkzeug.Local<{id(self)}>.storage") + + object.__setattr__(self, "_Local__storage", context_var) + + def __iter__(self) -> t.Iterator[t.Tuple[str, t.Any]]: + return iter(self.__storage.get({}).items()) + + def __call__( + self, name: str, *, unbound_message: t.Optional[str] = None + ) -> "LocalProxy": + """Create a :class:`LocalProxy` that access an attribute on this + local namespace. + + :param name: Proxy this attribute. + :param unbound_message: The error message that the proxy will + show if the attribute isn't set. + """ + return LocalProxy(self, name, unbound_message=unbound_message) + + def __release_local__(self) -> None: + self.__storage.set({}) + + def __getattr__(self, name: str) -> t.Any: + values = self.__storage.get({}) + + if name in values: + return values[name] + + raise AttributeError(name) + + def __setattr__(self, name: str, value: t.Any) -> None: + values = self.__storage.get({}).copy() + values[name] = value + self.__storage.set(values) + + def __delattr__(self, name: str) -> None: + values = self.__storage.get({}) + + if name in values: + values = values.copy() + del values[name] + self.__storage.set(values) + else: + raise AttributeError(name) + + +class LocalStack(t.Generic[T]): + """Create a stack of context-local data. This wraps a + :class:`ContextVar` containing a :class:`list` value. + + This may incur a performance penalty compared to using individual + context vars, as it has to copy data to avoid mutating the list + between nested contexts. + + :param context_var: The :class:`~contextvars.ContextVar` to use as + storage for this local. If not given, one will be created. + Context vars not created at the global scope may interfere with + garbage collection. + + .. versionchanged:: 2.0 + Uses ``ContextVar`` instead of a custom storage implementation. + + .. versionadded:: 0.6.1 + """ + + __slots__ = ("_storage",) + + def __init__(self, context_var: t.Optional[ContextVar[t.List[T]]] = None) -> None: + if context_var is None: + # A ContextVar not created at global scope interferes with + # Python's garbage collection. However, a local only makes + # sense defined at the global scope as well, in which case + # the GC issue doesn't seem relevant. + context_var = ContextVar(f"werkzeug.LocalStack<{id(self)}>.storage") + + self._storage = context_var + + def __release_local__(self) -> None: + self._storage.set([]) + + def push(self, obj: T) -> t.List[T]: + """Add a new item to the top of the stack.""" + stack = self._storage.get([]).copy() + stack.append(obj) + self._storage.set(stack) + return stack + + def pop(self) -> t.Optional[T]: + """Remove the top item from the stack and return it. If the + stack is empty, return ``None``. + """ + stack = self._storage.get([]) + + if len(stack) == 0: + return None + + rv = stack[-1] + self._storage.set(stack[:-1]) + return rv + + @property + def top(self) -> t.Optional[T]: + """The topmost item on the stack. If the stack is empty, + `None` is returned. + """ + stack = self._storage.get([]) + + if len(stack) == 0: + return None + + return stack[-1] + + def __call__( + self, name: t.Optional[str] = None, *, unbound_message: t.Optional[str] = None + ) -> "LocalProxy": + """Create a :class:`LocalProxy` that accesses the top of this + local stack. + + :param name: If given, the proxy access this attribute of the + top item, rather than the item itself. + :param unbound_message: The error message that the proxy will + show if the stack is empty. + """ + return LocalProxy(self, name, unbound_message=unbound_message) + + +class LocalManager: + """Manage releasing the data for the current context in one or more + :class:`Local` and :class:`LocalStack` objects. + + This should not be needed for modern use cases, and may be removed + in the future. + + :param locals: A local or list of locals to manage. + + .. versionchanged:: 2.0 + ``ident_func`` is deprecated and will be removed in Werkzeug + 2.1. + + .. versionchanged:: 0.7 + The ``ident_func`` parameter was added. + + .. versionchanged:: 0.6.1 + The :func:`release_local` function can be used instead of a + manager. + """ + + __slots__ = ("locals",) + + def __init__( + self, + locals: t.Optional[ + t.Union[Local, LocalStack, t.Iterable[t.Union[Local, LocalStack]]] + ] = None, + ) -> None: + if locals is None: + self.locals = [] + elif isinstance(locals, Local): + self.locals = [locals] + else: + self.locals = list(locals) # type: ignore[arg-type] + + def cleanup(self) -> None: + """Release the data in the locals for this context. Call this at + the end of each request or use :meth:`make_middleware`. + """ + for local in self.locals: + release_local(local) + + def make_middleware(self, app: "WSGIApplication") -> "WSGIApplication": + """Wrap a WSGI application so that local data is released + automatically after the response has been sent for a request. + """ + + def application( + environ: "WSGIEnvironment", start_response: "StartResponse" + ) -> t.Iterable[bytes]: + return ClosingIterator(app(environ, start_response), self.cleanup) + + return application + + def middleware(self, func: "WSGIApplication") -> "WSGIApplication": + """Like :meth:`make_middleware` but used as a decorator on the + WSGI application function. + + .. code-block:: python + + @manager.middleware + def application(environ, start_response): + ... + """ + return update_wrapper(self.make_middleware(func), func) + + def __repr__(self) -> str: + return f"<{type(self).__name__} storages: {len(self.locals)}>" + + +class _ProxyLookup: + """Descriptor that handles proxied attribute lookup for + :class:`LocalProxy`. + + :param f: The built-in function this attribute is accessed through. + Instead of looking up the special method, the function call + is redone on the object. + :param fallback: Return this function if the proxy is unbound + instead of raising a :exc:`RuntimeError`. + :param is_attr: This proxied name is an attribute, not a function. + Call the fallback immediately to get the value. + :param class_value: Value to return when accessed from the + ``LocalProxy`` class directly. Used for ``__doc__`` so building + docs still works. + """ + + __slots__ = ("bind_f", "fallback", "is_attr", "class_value", "name") + + def __init__( + self, + f: t.Optional[t.Callable] = None, + fallback: t.Optional[t.Callable] = None, + class_value: t.Optional[t.Any] = None, + is_attr: bool = False, + ) -> None: + bind_f: t.Optional[t.Callable[["LocalProxy", t.Any], t.Callable]] + + if hasattr(f, "__get__"): + # A Python function, can be turned into a bound method. + + def bind_f(instance: "LocalProxy", obj: t.Any) -> t.Callable: + return f.__get__(obj, type(obj)) # type: ignore + + elif f is not None: + # A C function, use partial to bind the first argument. + + def bind_f(instance: "LocalProxy", obj: t.Any) -> t.Callable: + return partial(f, obj) # type: ignore + + else: + # Use getattr, which will produce a bound method. + bind_f = None + + self.bind_f = bind_f + self.fallback = fallback + self.class_value = class_value + self.is_attr = is_attr + + def __set_name__(self, owner: "LocalProxy", name: str) -> None: + self.name = name + + def __get__(self, instance: "LocalProxy", owner: t.Optional[type] = None) -> t.Any: + if instance is None: + if self.class_value is not None: + return self.class_value + + return self + + try: + obj = instance._get_current_object() # type: ignore[misc] + except RuntimeError: + if self.fallback is None: + raise + + fallback = self.fallback.__get__(instance, owner) + + if self.is_attr: + # __class__ and __doc__ are attributes, not methods. + # Call the fallback to get the value. + return fallback() + + return fallback + + if self.bind_f is not None: + return self.bind_f(instance, obj) + + return getattr(obj, self.name) + + def __repr__(self) -> str: + return f"proxy {self.name}" + + def __call__(self, instance: "LocalProxy", *args: t.Any, **kwargs: t.Any) -> t.Any: + """Support calling unbound methods from the class. For example, + this happens with ``copy.copy``, which does + ``type(x).__copy__(x)``. ``type(x)`` can't be proxied, so it + returns the proxy type and descriptor. + """ + return self.__get__(instance, type(instance))(*args, **kwargs) + + +class _ProxyIOp(_ProxyLookup): + """Look up an augmented assignment method on a proxied object. The + method is wrapped to return the proxy instead of the object. + """ + + __slots__ = () + + def __init__( + self, f: t.Optional[t.Callable] = None, fallback: t.Optional[t.Callable] = None + ) -> None: + super().__init__(f, fallback) + + def bind_f(instance: "LocalProxy", obj: t.Any) -> t.Callable: + def i_op(self: t.Any, other: t.Any) -> "LocalProxy": + f(self, other) # type: ignore + return instance + + return i_op.__get__(obj, type(obj)) # type: ignore + + self.bind_f = bind_f + + +def _l_to_r_op(op: F) -> F: + """Swap the argument order to turn an l-op into an r-op.""" + + def r_op(obj: t.Any, other: t.Any) -> t.Any: + return op(other, obj) + + return t.cast(F, r_op) + + +def _identity(o: T) -> T: + return o + + +class LocalProxy(t.Generic[T]): + """A proxy to the object bound to a context-local object. All + operations on the proxy are forwarded to the bound object. If no + object is bound, a ``RuntimeError`` is raised. + + :param local: The context-local object that provides the proxied + object. + :param name: Proxy this attribute from the proxied object. + :param unbound_message: The error message to show if the + context-local object is unbound. + + Proxy a :class:`~contextvars.ContextVar` to make it easier to + access. Pass a name to proxy that attribute. + + .. code-block:: python + + _request_var = ContextVar("request") + request = LocalProxy(_request_var) + session = LocalProxy(_request_var, "session") + + Proxy an attribute on a :class:`Local` namespace by calling the + local with the attribute name: + + .. code-block:: python + + data = Local() + user = data("user") + + Proxy the top item on a :class:`LocalStack` by calling the local. + Pass a name to proxy that attribute. + + .. code-block:: + + app_stack = LocalStack() + current_app = app_stack() + g = app_stack("g") + + Pass a function to proxy the return value from that function. This + was previously used to access attributes of local objects before + that was supported directly. + + .. code-block:: python + + session = LocalProxy(lambda: request.session) + + ``__repr__`` and ``__class__`` are proxied, so ``repr(x)`` and + ``isinstance(x, cls)`` will look like the proxied object. Use + ``issubclass(type(x), LocalProxy)`` to check if an object is a + proxy. + + .. code-block:: python + + repr(user) # + isinstance(user, User) # True + issubclass(type(user), LocalProxy) # True + + .. versionchanged:: 2.2.2 + ``__wrapped__`` is set when wrapping an object, not only when + wrapping a function, to prevent doctest from failing. + + .. versionchanged:: 2.2 + Can proxy a ``ContextVar`` or ``LocalStack`` directly. + + .. versionchanged:: 2.2 + The ``name`` parameter can be used with any proxied object, not + only ``Local``. + + .. versionchanged:: 2.2 + Added the ``unbound_message`` parameter. + + .. versionchanged:: 2.0 + Updated proxied attributes and methods to reflect the current + data model. + + .. versionchanged:: 0.6.1 + The class can be instantiated with a callable. + """ + + __slots__ = ("__wrapped", "_get_current_object") + + _get_current_object: t.Callable[[], T] + """Return the current object this proxy is bound to. If the proxy is + unbound, this raises a ``RuntimeError``. + + This should be used if you need to pass the object to something that + doesn't understand the proxy. It can also be useful for performance + if you are accessing the object multiple times in a function, rather + than going through the proxy multiple times. + """ + + def __init__( + self, + local: t.Union[ContextVar[T], Local, LocalStack[T], t.Callable[[], T]], + name: t.Optional[str] = None, + *, + unbound_message: t.Optional[str] = None, + ) -> None: + if name is None: + get_name = _identity + else: + get_name = attrgetter(name) # type: ignore[assignment] + + if unbound_message is None: + unbound_message = "object is not bound" + + if isinstance(local, Local): + if name is None: + raise TypeError("'name' is required when proxying a 'Local' object.") + + def _get_current_object() -> T: + try: + return get_name(local) # type: ignore[return-value] + except AttributeError: + raise RuntimeError(unbound_message) from None + + elif isinstance(local, LocalStack): + + def _get_current_object() -> T: + obj = local.top # type: ignore[union-attr] + + if obj is None: + raise RuntimeError(unbound_message) + + return get_name(obj) + + elif isinstance(local, ContextVar): + + def _get_current_object() -> T: + try: + obj = local.get() # type: ignore[union-attr] + except LookupError: + raise RuntimeError(unbound_message) from None + + return get_name(obj) + + elif callable(local): + + def _get_current_object() -> T: + return get_name(local()) # type: ignore + + else: + raise TypeError(f"Don't know how to proxy '{type(local)}'.") + + object.__setattr__(self, "_LocalProxy__wrapped", local) + object.__setattr__(self, "_get_current_object", _get_current_object) + + __doc__ = _ProxyLookup( # type: ignore + class_value=__doc__, fallback=lambda self: type(self).__doc__, is_attr=True + ) + __wrapped__ = _ProxyLookup( + fallback=lambda self: self._LocalProxy__wrapped, is_attr=True + ) + # __del__ should only delete the proxy + __repr__ = _ProxyLookup( # type: ignore + repr, fallback=lambda self: f"<{type(self).__name__} unbound>" + ) + __str__ = _ProxyLookup(str) # type: ignore + __bytes__ = _ProxyLookup(bytes) + __format__ = _ProxyLookup() # type: ignore + __lt__ = _ProxyLookup(operator.lt) + __le__ = _ProxyLookup(operator.le) + __eq__ = _ProxyLookup(operator.eq) # type: ignore + __ne__ = _ProxyLookup(operator.ne) # type: ignore + __gt__ = _ProxyLookup(operator.gt) + __ge__ = _ProxyLookup(operator.ge) + __hash__ = _ProxyLookup(hash) # type: ignore + __bool__ = _ProxyLookup(bool, fallback=lambda self: False) + __getattr__ = _ProxyLookup(getattr) + # __getattribute__ triggered through __getattr__ + __setattr__ = _ProxyLookup(setattr) # type: ignore + __delattr__ = _ProxyLookup(delattr) # type: ignore + __dir__ = _ProxyLookup(dir, fallback=lambda self: []) # type: ignore + # __get__ (proxying descriptor not supported) + # __set__ (descriptor) + # __delete__ (descriptor) + # __set_name__ (descriptor) + # __objclass__ (descriptor) + # __slots__ used by proxy itself + # __dict__ (__getattr__) + # __weakref__ (__getattr__) + # __init_subclass__ (proxying metaclass not supported) + # __prepare__ (metaclass) + __class__ = _ProxyLookup( + fallback=lambda self: type(self), is_attr=True + ) # type: ignore + __instancecheck__ = _ProxyLookup(lambda self, other: isinstance(other, self)) + __subclasscheck__ = _ProxyLookup(lambda self, other: issubclass(other, self)) + # __class_getitem__ triggered through __getitem__ + __call__ = _ProxyLookup(lambda self, *args, **kwargs: self(*args, **kwargs)) + __len__ = _ProxyLookup(len) + __length_hint__ = _ProxyLookup(operator.length_hint) + __getitem__ = _ProxyLookup(operator.getitem) + __setitem__ = _ProxyLookup(operator.setitem) + __delitem__ = _ProxyLookup(operator.delitem) + # __missing__ triggered through __getitem__ + __iter__ = _ProxyLookup(iter) + __next__ = _ProxyLookup(next) + __reversed__ = _ProxyLookup(reversed) + __contains__ = _ProxyLookup(operator.contains) + __add__ = _ProxyLookup(operator.add) + __sub__ = _ProxyLookup(operator.sub) + __mul__ = _ProxyLookup(operator.mul) + __matmul__ = _ProxyLookup(operator.matmul) + __truediv__ = _ProxyLookup(operator.truediv) + __floordiv__ = _ProxyLookup(operator.floordiv) + __mod__ = _ProxyLookup(operator.mod) + __divmod__ = _ProxyLookup(divmod) + __pow__ = _ProxyLookup(pow) + __lshift__ = _ProxyLookup(operator.lshift) + __rshift__ = _ProxyLookup(operator.rshift) + __and__ = _ProxyLookup(operator.and_) + __xor__ = _ProxyLookup(operator.xor) + __or__ = _ProxyLookup(operator.or_) + __radd__ = _ProxyLookup(_l_to_r_op(operator.add)) + __rsub__ = _ProxyLookup(_l_to_r_op(operator.sub)) + __rmul__ = _ProxyLookup(_l_to_r_op(operator.mul)) + __rmatmul__ = _ProxyLookup(_l_to_r_op(operator.matmul)) + __rtruediv__ = _ProxyLookup(_l_to_r_op(operator.truediv)) + __rfloordiv__ = _ProxyLookup(_l_to_r_op(operator.floordiv)) + __rmod__ = _ProxyLookup(_l_to_r_op(operator.mod)) + __rdivmod__ = _ProxyLookup(_l_to_r_op(divmod)) + __rpow__ = _ProxyLookup(_l_to_r_op(pow)) + __rlshift__ = _ProxyLookup(_l_to_r_op(operator.lshift)) + __rrshift__ = _ProxyLookup(_l_to_r_op(operator.rshift)) + __rand__ = _ProxyLookup(_l_to_r_op(operator.and_)) + __rxor__ = _ProxyLookup(_l_to_r_op(operator.xor)) + __ror__ = _ProxyLookup(_l_to_r_op(operator.or_)) + __iadd__ = _ProxyIOp(operator.iadd) + __isub__ = _ProxyIOp(operator.isub) + __imul__ = _ProxyIOp(operator.imul) + __imatmul__ = _ProxyIOp(operator.imatmul) + __itruediv__ = _ProxyIOp(operator.itruediv) + __ifloordiv__ = _ProxyIOp(operator.ifloordiv) + __imod__ = _ProxyIOp(operator.imod) + __ipow__ = _ProxyIOp(operator.ipow) + __ilshift__ = _ProxyIOp(operator.ilshift) + __irshift__ = _ProxyIOp(operator.irshift) + __iand__ = _ProxyIOp(operator.iand) + __ixor__ = _ProxyIOp(operator.ixor) + __ior__ = _ProxyIOp(operator.ior) + __neg__ = _ProxyLookup(operator.neg) + __pos__ = _ProxyLookup(operator.pos) + __abs__ = _ProxyLookup(abs) + __invert__ = _ProxyLookup(operator.invert) + __complex__ = _ProxyLookup(complex) + __int__ = _ProxyLookup(int) + __float__ = _ProxyLookup(float) + __index__ = _ProxyLookup(operator.index) + __round__ = _ProxyLookup(round) + __trunc__ = _ProxyLookup(math.trunc) + __floor__ = _ProxyLookup(math.floor) + __ceil__ = _ProxyLookup(math.ceil) + __enter__ = _ProxyLookup() + __exit__ = _ProxyLookup() + __await__ = _ProxyLookup() + __aiter__ = _ProxyLookup() + __anext__ = _ProxyLookup() + __aenter__ = _ProxyLookup() + __aexit__ = _ProxyLookup() + __copy__ = _ProxyLookup(copy.copy) + __deepcopy__ = _ProxyLookup(copy.deepcopy) + # __getnewargs_ex__ (pickle through proxy not supported) + # __getnewargs__ (pickle) + # __getstate__ (pickle) + # __setstate__ (pickle) + # __reduce__ (pickle) + # __reduce_ex__ (pickle) diff --git a/src/werkzeug/middleware/__init__.py b/src/werkzeug/middleware/__init__.py new file mode 100644 index 0000000..6ddcf7f --- /dev/null +++ b/src/werkzeug/middleware/__init__.py @@ -0,0 +1,22 @@ +""" +Middleware +========== + +A WSGI middleware is a WSGI application that wraps another application +in order to observe or change its behavior. Werkzeug provides some +middleware for common use cases. + +.. toctree:: + :maxdepth: 1 + + proxy_fix + shared_data + dispatcher + http_proxy + lint + profiler + +The :doc:`interactive debugger ` is also a middleware that can +be applied manually, although it is typically used automatically with +the :doc:`development server `. +""" diff --git a/src/werkzeug/middleware/dispatcher.py b/src/werkzeug/middleware/dispatcher.py new file mode 100644 index 0000000..ace1c75 --- /dev/null +++ b/src/werkzeug/middleware/dispatcher.py @@ -0,0 +1,78 @@ +""" +Application Dispatcher +====================== + +This middleware creates a single WSGI application that dispatches to +multiple other WSGI applications mounted at different URL paths. + +A common example is writing a Single Page Application, where you have a +backend API and a frontend written in JavaScript that does the routing +in the browser rather than requesting different pages from the server. +The frontend is a single HTML and JS file that should be served for any +path besides "/api". + +This example dispatches to an API app under "/api", an admin app +under "/admin", and an app that serves frontend files for all other +requests:: + + app = DispatcherMiddleware(serve_frontend, { + '/api': api_app, + '/admin': admin_app, + }) + +In production, you might instead handle this at the HTTP server level, +serving files or proxying to application servers based on location. The +API and admin apps would each be deployed with a separate WSGI server, +and the static files would be served directly by the HTTP server. + +.. autoclass:: DispatcherMiddleware + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +import typing as t + +if t.TYPE_CHECKING: + from _typeshed.wsgi import StartResponse + from _typeshed.wsgi import WSGIApplication + from _typeshed.wsgi import WSGIEnvironment + + +class DispatcherMiddleware: + """Combine multiple applications as a single WSGI application. + Requests are dispatched to an application based on the path it is + mounted under. + + :param app: The WSGI application to dispatch to if the request + doesn't match a mounted path. + :param mounts: Maps path prefixes to applications for dispatching. + """ + + def __init__( + self, + app: "WSGIApplication", + mounts: t.Optional[t.Dict[str, "WSGIApplication"]] = None, + ) -> None: + self.app = app + self.mounts = mounts or {} + + def __call__( + self, environ: "WSGIEnvironment", start_response: "StartResponse" + ) -> t.Iterable[bytes]: + script = environ.get("PATH_INFO", "") + path_info = "" + + while "/" in script: + if script in self.mounts: + app = self.mounts[script] + break + + script, last_item = script.rsplit("/", 1) + path_info = f"/{last_item}{path_info}" + else: + app = self.mounts.get(script, self.app) + + original_script_name = environ.get("SCRIPT_NAME", "") + environ["SCRIPT_NAME"] = original_script_name + script + environ["PATH_INFO"] = path_info + return app(environ, start_response) diff --git a/src/werkzeug/middleware/http_proxy.py b/src/werkzeug/middleware/http_proxy.py new file mode 100644 index 0000000..1cde458 --- /dev/null +++ b/src/werkzeug/middleware/http_proxy.py @@ -0,0 +1,230 @@ +""" +Basic HTTP Proxy +================ + +.. autoclass:: ProxyMiddleware + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +import typing as t +from http import client + +from ..datastructures import EnvironHeaders +from ..http import is_hop_by_hop_header +from ..urls import url_parse +from ..urls import url_quote +from ..wsgi import get_input_stream + +if t.TYPE_CHECKING: + from _typeshed.wsgi import StartResponse + from _typeshed.wsgi import WSGIApplication + from _typeshed.wsgi import WSGIEnvironment + + +class ProxyMiddleware: + """Proxy requests under a path to an external server, routing other + requests to the app. + + This middleware can only proxy HTTP requests, as HTTP is the only + protocol handled by the WSGI server. Other protocols, such as + WebSocket requests, cannot be proxied at this layer. This should + only be used for development, in production a real proxy server + should be used. + + The middleware takes a dict mapping a path prefix to a dict + describing the host to be proxied to:: + + app = ProxyMiddleware(app, { + "/static/": { + "target": "http://127.0.0.1:5001/", + } + }) + + Each host has the following options: + + ``target``: + The target URL to dispatch to. This is required. + ``remove_prefix``: + Whether to remove the prefix from the URL before dispatching it + to the target. The default is ``False``. + ``host``: + ``""`` (default): + The host header is automatically rewritten to the URL of the + target. + ``None``: + The host header is unmodified from the client request. + Any other value: + The host header is overwritten with the value. + ``headers``: + A dictionary of headers to be sent with the request to the + target. The default is ``{}``. + ``ssl_context``: + A :class:`ssl.SSLContext` defining how to verify requests if the + target is HTTPS. The default is ``None``. + + In the example above, everything under ``"/static/"`` is proxied to + the server on port 5001. The host header is rewritten to the target, + and the ``"/static/"`` prefix is removed from the URLs. + + :param app: The WSGI application to wrap. + :param targets: Proxy target configurations. See description above. + :param chunk_size: Size of chunks to read from input stream and + write to target. + :param timeout: Seconds before an operation to a target fails. + + .. versionadded:: 0.14 + """ + + def __init__( + self, + app: "WSGIApplication", + targets: t.Mapping[str, t.Dict[str, t.Any]], + chunk_size: int = 2 << 13, + timeout: int = 10, + ) -> None: + def _set_defaults(opts: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]: + opts.setdefault("remove_prefix", False) + opts.setdefault("host", "") + opts.setdefault("headers", {}) + opts.setdefault("ssl_context", None) + return opts + + self.app = app + self.targets = { + f"/{k.strip('/')}/": _set_defaults(v) for k, v in targets.items() + } + self.chunk_size = chunk_size + self.timeout = timeout + + def proxy_to( + self, opts: t.Dict[str, t.Any], path: str, prefix: str + ) -> "WSGIApplication": + target = url_parse(opts["target"]) + host = t.cast(str, target.ascii_host) + + def application( + environ: "WSGIEnvironment", start_response: "StartResponse" + ) -> t.Iterable[bytes]: + headers = list(EnvironHeaders(environ).items()) + headers[:] = [ + (k, v) + for k, v in headers + if not is_hop_by_hop_header(k) + and k.lower() not in ("content-length", "host") + ] + headers.append(("Connection", "close")) + + if opts["host"] == "": + headers.append(("Host", host)) + elif opts["host"] is None: + headers.append(("Host", environ["HTTP_HOST"])) + else: + headers.append(("Host", opts["host"])) + + headers.extend(opts["headers"].items()) + remote_path = path + + if opts["remove_prefix"]: + remote_path = remote_path[len(prefix) :].lstrip("/") + remote_path = f"{target.path.rstrip('/')}/{remote_path}" + + content_length = environ.get("CONTENT_LENGTH") + chunked = False + + if content_length not in ("", None): + headers.append(("Content-Length", content_length)) # type: ignore + elif content_length is not None: + headers.append(("Transfer-Encoding", "chunked")) + chunked = True + + try: + if target.scheme == "http": + con = client.HTTPConnection( + host, target.port or 80, timeout=self.timeout + ) + elif target.scheme == "https": + con = client.HTTPSConnection( + host, + target.port or 443, + timeout=self.timeout, + context=opts["ssl_context"], + ) + else: + raise RuntimeError( + "Target scheme must be 'http' or 'https', got" + f" {target.scheme!r}." + ) + + con.connect() + remote_url = url_quote(remote_path) + querystring = environ["QUERY_STRING"] + + if querystring: + remote_url = f"{remote_url}?{querystring}" + + con.putrequest(environ["REQUEST_METHOD"], remote_url, skip_host=True) + + for k, v in headers: + if k.lower() == "connection": + v = "close" + + con.putheader(k, v) + + con.endheaders() + stream = get_input_stream(environ) + + while True: + data = stream.read(self.chunk_size) + + if not data: + break + + if chunked: + con.send(b"%x\r\n%s\r\n" % (len(data), data)) + else: + con.send(data) + + resp = con.getresponse() + except OSError: + from ..exceptions import BadGateway + + return BadGateway()(environ, start_response) + + start_response( + f"{resp.status} {resp.reason}", + [ + (k.title(), v) + for k, v in resp.getheaders() + if not is_hop_by_hop_header(k) + ], + ) + + def read() -> t.Iterator[bytes]: + while True: + try: + data = resp.read(self.chunk_size) + except OSError: + break + + if not data: + break + + yield data + + return read() + + return application + + def __call__( + self, environ: "WSGIEnvironment", start_response: "StartResponse" + ) -> t.Iterable[bytes]: + path = environ["PATH_INFO"] + app = self.app + + for prefix, opts in self.targets.items(): + if path.startswith(prefix): + app = self.proxy_to(opts, path, prefix) + break + + return app(environ, start_response) diff --git a/src/werkzeug/middleware/lint.py b/src/werkzeug/middleware/lint.py new file mode 100644 index 0000000..6b54630 --- /dev/null +++ b/src/werkzeug/middleware/lint.py @@ -0,0 +1,420 @@ +""" +WSGI Protocol Linter +==================== + +This module provides a middleware that performs sanity checks on the +behavior of the WSGI server and application. It checks that the +:pep:`3333` WSGI spec is properly implemented. It also warns on some +common HTTP errors such as non-empty responses for 304 status codes. + +.. autoclass:: LintMiddleware + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +import typing as t +from types import TracebackType +from urllib.parse import urlparse +from warnings import warn + +from ..datastructures import Headers +from ..http import is_entity_header +from ..wsgi import FileWrapper + +if t.TYPE_CHECKING: + from _typeshed.wsgi import StartResponse + from _typeshed.wsgi import WSGIApplication + from _typeshed.wsgi import WSGIEnvironment + + +class WSGIWarning(Warning): + """Warning class for WSGI warnings.""" + + +class HTTPWarning(Warning): + """Warning class for HTTP warnings.""" + + +def check_type(context: str, obj: object, need: t.Type = str) -> None: + if type(obj) is not need: + warn( + f"{context!r} requires {need.__name__!r}, got {type(obj).__name__!r}.", + WSGIWarning, + stacklevel=3, + ) + + +class InputStream: + def __init__(self, stream: t.IO[bytes]) -> None: + self._stream = stream + + def read(self, *args: t.Any) -> bytes: + if len(args) == 0: + warn( + "WSGI does not guarantee an EOF marker on the input stream, thus making" + " calls to 'wsgi.input.read()' unsafe. Conforming servers may never" + " return from this call.", + WSGIWarning, + stacklevel=2, + ) + elif len(args) != 1: + warn( + "Too many parameters passed to 'wsgi.input.read()'.", + WSGIWarning, + stacklevel=2, + ) + return self._stream.read(*args) + + def readline(self, *args: t.Any) -> bytes: + if len(args) == 0: + warn( + "Calls to 'wsgi.input.readline()' without arguments are unsafe. Use" + " 'wsgi.input.read()' instead.", + WSGIWarning, + stacklevel=2, + ) + elif len(args) == 1: + warn( + "'wsgi.input.readline()' was called with a size hint. WSGI does not" + " support this, although it's available on all major servers.", + WSGIWarning, + stacklevel=2, + ) + else: + raise TypeError("Too many arguments passed to 'wsgi.input.readline()'.") + return self._stream.readline(*args) + + def __iter__(self) -> t.Iterator[bytes]: + try: + return iter(self._stream) + except TypeError: + warn("'wsgi.input' is not iterable.", WSGIWarning, stacklevel=2) + return iter(()) + + def close(self) -> None: + warn("The application closed the input stream!", WSGIWarning, stacklevel=2) + self._stream.close() + + +class ErrorStream: + def __init__(self, stream: t.IO[str]) -> None: + self._stream = stream + + def write(self, s: str) -> None: + check_type("wsgi.error.write()", s, str) + self._stream.write(s) + + def flush(self) -> None: + self._stream.flush() + + def writelines(self, seq: t.Iterable[str]) -> None: + for line in seq: + self.write(line) + + def close(self) -> None: + warn("The application closed the error stream!", WSGIWarning, stacklevel=2) + self._stream.close() + + +class GuardedWrite: + def __init__(self, write: t.Callable[[bytes], object], chunks: t.List[int]) -> None: + self._write = write + self._chunks = chunks + + def __call__(self, s: bytes) -> None: + check_type("write()", s, bytes) + self._write(s) + self._chunks.append(len(s)) + + +class GuardedIterator: + def __init__( + self, + iterator: t.Iterable[bytes], + headers_set: t.Tuple[int, Headers], + chunks: t.List[int], + ) -> None: + self._iterator = iterator + self._next = iter(iterator).__next__ + self.closed = False + self.headers_set = headers_set + self.chunks = chunks + + def __iter__(self) -> "GuardedIterator": + return self + + def __next__(self) -> bytes: + if self.closed: + warn("Iterated over closed 'app_iter'.", WSGIWarning, stacklevel=2) + + rv = self._next() + + if not self.headers_set: + warn( + "The application returned before it started the response.", + WSGIWarning, + stacklevel=2, + ) + + check_type("application iterator items", rv, bytes) + self.chunks.append(len(rv)) + return rv + + def close(self) -> None: + self.closed = True + + if hasattr(self._iterator, "close"): + self._iterator.close() # type: ignore + + if self.headers_set: + status_code, headers = self.headers_set + bytes_sent = sum(self.chunks) + content_length = headers.get("content-length", type=int) + + if status_code == 304: + for key, _value in headers: + key = key.lower() + if key not in ("expires", "content-location") and is_entity_header( + key + ): + warn( + f"Entity header {key!r} found in 304 response.", HTTPWarning + ) + if bytes_sent: + warn("304 responses must not have a body.", HTTPWarning) + elif 100 <= status_code < 200 or status_code == 204: + if content_length != 0: + warn( + f"{status_code} responses must have an empty content length.", + HTTPWarning, + ) + if bytes_sent: + warn(f"{status_code} responses must not have a body.", HTTPWarning) + elif content_length is not None and content_length != bytes_sent: + warn( + "Content-Length and the number of bytes sent to the" + " client do not match.", + WSGIWarning, + ) + + def __del__(self) -> None: + if not self.closed: + try: + warn( + "Iterator was garbage collected before it was closed.", WSGIWarning + ) + except Exception: + pass + + +class LintMiddleware: + """Warns about common errors in the WSGI and HTTP behavior of the + server and wrapped application. Some of the issues it checks are: + + - invalid status codes + - non-bytes sent to the WSGI server + - strings returned from the WSGI application + - non-empty conditional responses + - unquoted etags + - relative URLs in the Location header + - unsafe calls to wsgi.input + - unclosed iterators + + Error information is emitted using the :mod:`warnings` module. + + :param app: The WSGI application to wrap. + + .. code-block:: python + + from werkzeug.middleware.lint import LintMiddleware + app = LintMiddleware(app) + """ + + def __init__(self, app: "WSGIApplication") -> None: + self.app = app + + def check_environ(self, environ: "WSGIEnvironment") -> None: + if type(environ) is not dict: + warn( + "WSGI environment is not a standard Python dict.", + WSGIWarning, + stacklevel=4, + ) + for key in ( + "REQUEST_METHOD", + "SERVER_NAME", + "SERVER_PORT", + "wsgi.version", + "wsgi.input", + "wsgi.errors", + "wsgi.multithread", + "wsgi.multiprocess", + "wsgi.run_once", + ): + if key not in environ: + warn( + f"Required environment key {key!r} not found", + WSGIWarning, + stacklevel=3, + ) + if environ["wsgi.version"] != (1, 0): + warn("Environ is not a WSGI 1.0 environ.", WSGIWarning, stacklevel=3) + + script_name = environ.get("SCRIPT_NAME", "") + path_info = environ.get("PATH_INFO", "") + + if script_name and script_name[0] != "/": + warn( + f"'SCRIPT_NAME' does not start with a slash: {script_name!r}", + WSGIWarning, + stacklevel=3, + ) + + if path_info and path_info[0] != "/": + warn( + f"'PATH_INFO' does not start with a slash: {path_info!r}", + WSGIWarning, + stacklevel=3, + ) + + def check_start_response( + self, + status: str, + headers: t.List[t.Tuple[str, str]], + exc_info: t.Optional[ + t.Tuple[t.Type[BaseException], BaseException, TracebackType] + ], + ) -> t.Tuple[int, Headers]: + check_type("status", status, str) + status_code_str = status.split(None, 1)[0] + + if len(status_code_str) != 3 or not status_code_str.isdecimal(): + warn("Status code must be three digits.", WSGIWarning, stacklevel=3) + + if len(status) < 4 or status[3] != " ": + warn( + f"Invalid value for status {status!r}. Valid status strings are three" + " digits, a space and a status explanation.", + WSGIWarning, + stacklevel=3, + ) + + status_code = int(status_code_str) + + if status_code < 100: + warn("Status code < 100 detected.", WSGIWarning, stacklevel=3) + + if type(headers) is not list: + warn("Header list is not a list.", WSGIWarning, stacklevel=3) + + for item in headers: + if type(item) is not tuple or len(item) != 2: + warn("Header items must be 2-item tuples.", WSGIWarning, stacklevel=3) + name, value = item + if type(name) is not str or type(value) is not str: + warn( + "Header keys and values must be strings.", WSGIWarning, stacklevel=3 + ) + if name.lower() == "status": + warn( + "The status header is not supported due to" + " conflicts with the CGI spec.", + WSGIWarning, + stacklevel=3, + ) + + if exc_info is not None and not isinstance(exc_info, tuple): + warn("Invalid value for exc_info.", WSGIWarning, stacklevel=3) + + headers = Headers(headers) + self.check_headers(headers) + + return status_code, headers + + def check_headers(self, headers: Headers) -> None: + etag = headers.get("etag") + + if etag is not None: + if etag.startswith(("W/", "w/")): + if etag.startswith("w/"): + warn( + "Weak etag indicator should be upper case.", + HTTPWarning, + stacklevel=4, + ) + + etag = etag[2:] + + if not (etag[:1] == etag[-1:] == '"'): + warn("Unquoted etag emitted.", HTTPWarning, stacklevel=4) + + location = headers.get("location") + + if location is not None: + if not urlparse(location).netloc: + warn( + "Absolute URLs required for location header.", + HTTPWarning, + stacklevel=4, + ) + + def check_iterator(self, app_iter: t.Iterable[bytes]) -> None: + if isinstance(app_iter, bytes): + warn( + "The application returned a bytestring. The response will send one" + " character at a time to the client, which will kill performance." + " Return a list or iterable instead.", + WSGIWarning, + stacklevel=3, + ) + + def __call__(self, *args: t.Any, **kwargs: t.Any) -> t.Iterable[bytes]: + if len(args) != 2: + warn("A WSGI app takes two arguments.", WSGIWarning, stacklevel=2) + + if kwargs: + warn( + "A WSGI app does not take keyword arguments.", WSGIWarning, stacklevel=2 + ) + + environ: "WSGIEnvironment" = args[0] + start_response: "StartResponse" = args[1] + + self.check_environ(environ) + environ["wsgi.input"] = InputStream(environ["wsgi.input"]) + environ["wsgi.errors"] = ErrorStream(environ["wsgi.errors"]) + + # Hook our own file wrapper in so that applications will always + # iterate to the end and we can check the content length. + environ["wsgi.file_wrapper"] = FileWrapper + + headers_set: t.List[t.Any] = [] + chunks: t.List[int] = [] + + def checking_start_response( + *args: t.Any, **kwargs: t.Any + ) -> t.Callable[[bytes], None]: + if len(args) not in {2, 3}: + warn( + f"Invalid number of arguments: {len(args)}, expected 2 or 3.", + WSGIWarning, + stacklevel=2, + ) + + if kwargs: + warn("'start_response' does not take keyword arguments.", WSGIWarning) + + status: str = args[0] + headers: t.List[t.Tuple[str, str]] = args[1] + exc_info: t.Optional[ + t.Tuple[t.Type[BaseException], BaseException, TracebackType] + ] = (args[2] if len(args) == 3 else None) + + headers_set[:] = self.check_start_response(status, headers, exc_info) + return GuardedWrite(start_response(status, headers, exc_info), chunks) + + app_iter = self.app(environ, t.cast("StartResponse", checking_start_response)) + self.check_iterator(app_iter) + return GuardedIterator( + app_iter, t.cast(t.Tuple[int, Headers], headers_set), chunks + ) diff --git a/src/werkzeug/middleware/profiler.py b/src/werkzeug/middleware/profiler.py new file mode 100644 index 0000000..200dae0 --- /dev/null +++ b/src/werkzeug/middleware/profiler.py @@ -0,0 +1,139 @@ +""" +Application Profiler +==================== + +This module provides a middleware that profiles each request with the +:mod:`cProfile` module. This can help identify bottlenecks in your code +that may be slowing down your application. + +.. autoclass:: ProfilerMiddleware + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +import os.path +import sys +import time +import typing as t +from pstats import Stats + +try: + from cProfile import Profile +except ImportError: + from profile import Profile # type: ignore + +if t.TYPE_CHECKING: + from _typeshed.wsgi import StartResponse + from _typeshed.wsgi import WSGIApplication + from _typeshed.wsgi import WSGIEnvironment + + +class ProfilerMiddleware: + """Wrap a WSGI application and profile the execution of each + request. Responses are buffered so that timings are more exact. + + If ``stream`` is given, :class:`pstats.Stats` are written to it + after each request. If ``profile_dir`` is given, :mod:`cProfile` + data files are saved to that directory, one file per request. + + The filename can be customized by passing ``filename_format``. If + it is a string, it will be formatted using :meth:`str.format` with + the following fields available: + + - ``{method}`` - The request method; GET, POST, etc. + - ``{path}`` - The request path or 'root' should one not exist. + - ``{elapsed}`` - The elapsed time of the request. + - ``{time}`` - The time of the request. + + If it is a callable, it will be called with the WSGI ``environ`` + dict and should return a filename. + + :param app: The WSGI application to wrap. + :param stream: Write stats to this stream. Disable with ``None``. + :param sort_by: A tuple of columns to sort stats by. See + :meth:`pstats.Stats.sort_stats`. + :param restrictions: A tuple of restrictions to filter stats by. See + :meth:`pstats.Stats.print_stats`. + :param profile_dir: Save profile data files to this directory. + :param filename_format: Format string for profile data file names, + or a callable returning a name. See explanation above. + + .. code-block:: python + + from werkzeug.middleware.profiler import ProfilerMiddleware + app = ProfilerMiddleware(app) + + .. versionchanged:: 0.15 + Stats are written even if ``profile_dir`` is given, and can be + disable by passing ``stream=None``. + + .. versionadded:: 0.15 + Added ``filename_format``. + + .. versionadded:: 0.9 + Added ``restrictions`` and ``profile_dir``. + """ + + def __init__( + self, + app: "WSGIApplication", + stream: t.IO[str] = sys.stdout, + sort_by: t.Iterable[str] = ("time", "calls"), + restrictions: t.Iterable[t.Union[str, int, float]] = (), + profile_dir: t.Optional[str] = None, + filename_format: str = "{method}.{path}.{elapsed:.0f}ms.{time:.0f}.prof", + ) -> None: + self._app = app + self._stream = stream + self._sort_by = sort_by + self._restrictions = restrictions + self._profile_dir = profile_dir + self._filename_format = filename_format + + def __call__( + self, environ: "WSGIEnvironment", start_response: "StartResponse" + ) -> t.Iterable[bytes]: + response_body: t.List[bytes] = [] + + def catching_start_response(status, headers, exc_info=None): # type: ignore + start_response(status, headers, exc_info) + return response_body.append + + def runapp() -> None: + app_iter = self._app( + environ, t.cast("StartResponse", catching_start_response) + ) + response_body.extend(app_iter) + + if hasattr(app_iter, "close"): + app_iter.close() # type: ignore + + profile = Profile() + start = time.time() + profile.runcall(runapp) + body = b"".join(response_body) + elapsed = time.time() - start + + if self._profile_dir is not None: + if callable(self._filename_format): + filename = self._filename_format(environ) + else: + filename = self._filename_format.format( + method=environ["REQUEST_METHOD"], + path=environ["PATH_INFO"].strip("/").replace("/", ".") or "root", + elapsed=elapsed * 1000.0, + time=time.time(), + ) + filename = os.path.join(self._profile_dir, filename) + profile.dump_stats(filename) + + if self._stream is not None: + stats = Stats(profile, stream=self._stream) + stats.sort_stats(*self._sort_by) + print("-" * 80, file=self._stream) + path_info = environ.get("PATH_INFO", "") + print(f"PATH: {path_info!r}", file=self._stream) + stats.print_stats(*self._restrictions) + print(f"{'-' * 80}\n", file=self._stream) + + return [body] diff --git a/src/werkzeug/middleware/proxy_fix.py b/src/werkzeug/middleware/proxy_fix.py new file mode 100644 index 0000000..4cef7cc --- /dev/null +++ b/src/werkzeug/middleware/proxy_fix.py @@ -0,0 +1,187 @@ +""" +X-Forwarded-For Proxy Fix +========================= + +This module provides a middleware that adjusts the WSGI environ based on +``X-Forwarded-`` headers that proxies in front of an application may +set. + +When an application is running behind a proxy server, WSGI may see the +request as coming from that server rather than the real client. Proxies +set various headers to track where the request actually came from. + +This middleware should only be used if the application is actually +behind such a proxy, and should be configured with the number of proxies +that are chained in front of it. Not all proxies set all the headers. +Since incoming headers can be faked, you must set how many proxies are +setting each header so the middleware knows what to trust. + +.. autoclass:: ProxyFix + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +import typing as t + +from ..http import parse_list_header + +if t.TYPE_CHECKING: + from _typeshed.wsgi import StartResponse + from _typeshed.wsgi import WSGIApplication + from _typeshed.wsgi import WSGIEnvironment + + +class ProxyFix: + """Adjust the WSGI environ based on ``X-Forwarded-`` that proxies in + front of the application may set. + + - ``X-Forwarded-For`` sets ``REMOTE_ADDR``. + - ``X-Forwarded-Proto`` sets ``wsgi.url_scheme``. + - ``X-Forwarded-Host`` sets ``HTTP_HOST``, ``SERVER_NAME``, and + ``SERVER_PORT``. + - ``X-Forwarded-Port`` sets ``HTTP_HOST`` and ``SERVER_PORT``. + - ``X-Forwarded-Prefix`` sets ``SCRIPT_NAME``. + + You must tell the middleware how many proxies set each header so it + knows what values to trust. It is a security issue to trust values + that came from the client rather than a proxy. + + The original values of the headers are stored in the WSGI + environ as ``werkzeug.proxy_fix.orig``, a dict. + + :param app: The WSGI application to wrap. + :param x_for: Number of values to trust for ``X-Forwarded-For``. + :param x_proto: Number of values to trust for ``X-Forwarded-Proto``. + :param x_host: Number of values to trust for ``X-Forwarded-Host``. + :param x_port: Number of values to trust for ``X-Forwarded-Port``. + :param x_prefix: Number of values to trust for + ``X-Forwarded-Prefix``. + + .. code-block:: python + + from werkzeug.middleware.proxy_fix import ProxyFix + # App is behind one proxy that sets the -For and -Host headers. + app = ProxyFix(app, x_for=1, x_host=1) + + .. versionchanged:: 1.0 + Deprecated code has been removed: + + * The ``num_proxies`` argument and attribute. + * The ``get_remote_addr`` method. + * The environ keys ``orig_remote_addr``, + ``orig_wsgi_url_scheme``, and ``orig_http_host``. + + .. versionchanged:: 0.15 + All headers support multiple values. The ``num_proxies`` + argument is deprecated. Each header is configured with a + separate number of trusted proxies. + + .. versionchanged:: 0.15 + Original WSGI environ values are stored in the + ``werkzeug.proxy_fix.orig`` dict. ``orig_remote_addr``, + ``orig_wsgi_url_scheme``, and ``orig_http_host`` are deprecated + and will be removed in 1.0. + + .. versionchanged:: 0.15 + Support ``X-Forwarded-Port`` and ``X-Forwarded-Prefix``. + + .. versionchanged:: 0.15 + ``X-Forwarded-Host`` and ``X-Forwarded-Port`` modify + ``SERVER_NAME`` and ``SERVER_PORT``. + """ + + def __init__( + self, + app: "WSGIApplication", + x_for: int = 1, + x_proto: int = 1, + x_host: int = 0, + x_port: int = 0, + x_prefix: int = 0, + ) -> None: + self.app = app + self.x_for = x_for + self.x_proto = x_proto + self.x_host = x_host + self.x_port = x_port + self.x_prefix = x_prefix + + def _get_real_value(self, trusted: int, value: t.Optional[str]) -> t.Optional[str]: + """Get the real value from a list header based on the configured + number of trusted proxies. + + :param trusted: Number of values to trust in the header. + :param value: Comma separated list header value to parse. + :return: The real value, or ``None`` if there are fewer values + than the number of trusted proxies. + + .. versionchanged:: 1.0 + Renamed from ``_get_trusted_comma``. + + .. versionadded:: 0.15 + """ + if not (trusted and value): + return None + values = parse_list_header(value) + if len(values) >= trusted: + return values[-trusted] + return None + + def __call__( + self, environ: "WSGIEnvironment", start_response: "StartResponse" + ) -> t.Iterable[bytes]: + """Modify the WSGI environ based on the various ``Forwarded`` + headers before calling the wrapped application. Store the + original environ values in ``werkzeug.proxy_fix.orig_{key}``. + """ + environ_get = environ.get + orig_remote_addr = environ_get("REMOTE_ADDR") + orig_wsgi_url_scheme = environ_get("wsgi.url_scheme") + orig_http_host = environ_get("HTTP_HOST") + environ.update( + { + "werkzeug.proxy_fix.orig": { + "REMOTE_ADDR": orig_remote_addr, + "wsgi.url_scheme": orig_wsgi_url_scheme, + "HTTP_HOST": orig_http_host, + "SERVER_NAME": environ_get("SERVER_NAME"), + "SERVER_PORT": environ_get("SERVER_PORT"), + "SCRIPT_NAME": environ_get("SCRIPT_NAME"), + } + } + ) + + x_for = self._get_real_value(self.x_for, environ_get("HTTP_X_FORWARDED_FOR")) + if x_for: + environ["REMOTE_ADDR"] = x_for + + x_proto = self._get_real_value( + self.x_proto, environ_get("HTTP_X_FORWARDED_PROTO") + ) + if x_proto: + environ["wsgi.url_scheme"] = x_proto + + x_host = self._get_real_value(self.x_host, environ_get("HTTP_X_FORWARDED_HOST")) + if x_host: + environ["HTTP_HOST"] = environ["SERVER_NAME"] = x_host + # "]" to check for IPv6 address without port + if ":" in x_host and not x_host.endswith("]"): + environ["SERVER_NAME"], environ["SERVER_PORT"] = x_host.rsplit(":", 1) + + x_port = self._get_real_value(self.x_port, environ_get("HTTP_X_FORWARDED_PORT")) + if x_port: + host = environ.get("HTTP_HOST") + if host: + # "]" to check for IPv6 address without port + if ":" in host and not host.endswith("]"): + host = host.rsplit(":", 1)[0] + environ["HTTP_HOST"] = f"{host}:{x_port}" + environ["SERVER_PORT"] = x_port + + x_prefix = self._get_real_value( + self.x_prefix, environ_get("HTTP_X_FORWARDED_PREFIX") + ) + if x_prefix: + environ["SCRIPT_NAME"] = x_prefix + + return self.app(environ, start_response) diff --git a/src/werkzeug/middleware/shared_data.py b/src/werkzeug/middleware/shared_data.py new file mode 100644 index 0000000..2ec396c --- /dev/null +++ b/src/werkzeug/middleware/shared_data.py @@ -0,0 +1,280 @@ +""" +Serve Shared Static Files +========================= + +.. autoclass:: SharedDataMiddleware + :members: is_allowed + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +import mimetypes +import os +import pkgutil +import posixpath +import typing as t +from datetime import datetime +from datetime import timezone +from io import BytesIO +from time import time +from zlib import adler32 + +from ..http import http_date +from ..http import is_resource_modified +from ..security import safe_join +from ..utils import get_content_type +from ..wsgi import get_path_info +from ..wsgi import wrap_file + +_TOpener = t.Callable[[], t.Tuple[t.IO[bytes], datetime, int]] +_TLoader = t.Callable[[t.Optional[str]], t.Tuple[t.Optional[str], t.Optional[_TOpener]]] + +if t.TYPE_CHECKING: + from _typeshed.wsgi import StartResponse + from _typeshed.wsgi import WSGIApplication + from _typeshed.wsgi import WSGIEnvironment + + +class SharedDataMiddleware: + + """A WSGI middleware which provides static content for development + environments or simple server setups. Its usage is quite simple:: + + import os + from werkzeug.middleware.shared_data import SharedDataMiddleware + + app = SharedDataMiddleware(app, { + '/shared': os.path.join(os.path.dirname(__file__), 'shared') + }) + + The contents of the folder ``./shared`` will now be available on + ``http://example.com/shared/``. This is pretty useful during development + because a standalone media server is not required. Files can also be + mounted on the root folder and still continue to use the application because + the shared data middleware forwards all unhandled requests to the + application, even if the requests are below one of the shared folders. + + If `pkg_resources` is available you can also tell the middleware to serve + files from package data:: + + app = SharedDataMiddleware(app, { + '/static': ('myapplication', 'static') + }) + + This will then serve the ``static`` folder in the `myapplication` + Python package. + + The optional `disallow` parameter can be a list of :func:`~fnmatch.fnmatch` + rules for files that are not accessible from the web. If `cache` is set to + `False` no caching headers are sent. + + Currently the middleware does not support non-ASCII filenames. If the + encoding on the file system happens to match the encoding of the URI it may + work but this could also be by accident. We strongly suggest using ASCII + only file names for static files. + + The middleware will guess the mimetype using the Python `mimetype` + module. If it's unable to figure out the charset it will fall back + to `fallback_mimetype`. + + :param app: the application to wrap. If you don't want to wrap an + application you can pass it :exc:`NotFound`. + :param exports: a list or dict of exported files and folders. + :param disallow: a list of :func:`~fnmatch.fnmatch` rules. + :param cache: enable or disable caching headers. + :param cache_timeout: the cache timeout in seconds for the headers. + :param fallback_mimetype: The fallback mimetype for unknown files. + + .. versionchanged:: 1.0 + The default ``fallback_mimetype`` is + ``application/octet-stream``. If a filename looks like a text + mimetype, the ``utf-8`` charset is added to it. + + .. versionadded:: 0.6 + Added ``fallback_mimetype``. + + .. versionchanged:: 0.5 + Added ``cache_timeout``. + """ + + def __init__( + self, + app: "WSGIApplication", + exports: t.Union[ + t.Dict[str, t.Union[str, t.Tuple[str, str]]], + t.Iterable[t.Tuple[str, t.Union[str, t.Tuple[str, str]]]], + ], + disallow: None = None, + cache: bool = True, + cache_timeout: int = 60 * 60 * 12, + fallback_mimetype: str = "application/octet-stream", + ) -> None: + self.app = app + self.exports: t.List[t.Tuple[str, _TLoader]] = [] + self.cache = cache + self.cache_timeout = cache_timeout + + if isinstance(exports, dict): + exports = exports.items() + + for key, value in exports: + if isinstance(value, tuple): + loader = self.get_package_loader(*value) + elif isinstance(value, str): + if os.path.isfile(value): + loader = self.get_file_loader(value) + else: + loader = self.get_directory_loader(value) + else: + raise TypeError(f"unknown def {value!r}") + + self.exports.append((key, loader)) + + if disallow is not None: + from fnmatch import fnmatch + + self.is_allowed = lambda x: not fnmatch(x, disallow) + + self.fallback_mimetype = fallback_mimetype + + def is_allowed(self, filename: str) -> bool: + """Subclasses can override this method to disallow the access to + certain files. However by providing `disallow` in the constructor + this method is overwritten. + """ + return True + + def _opener(self, filename: str) -> _TOpener: + return lambda: ( + open(filename, "rb"), + datetime.fromtimestamp(os.path.getmtime(filename), tz=timezone.utc), + int(os.path.getsize(filename)), + ) + + def get_file_loader(self, filename: str) -> _TLoader: + return lambda x: (os.path.basename(filename), self._opener(filename)) + + def get_package_loader(self, package: str, package_path: str) -> _TLoader: + load_time = datetime.now(timezone.utc) + provider = pkgutil.get_loader(package) + reader = provider.get_resource_reader(package) # type: ignore + + def loader( + path: t.Optional[str], + ) -> t.Tuple[t.Optional[str], t.Optional[_TOpener]]: + if path is None: + return None, None + + path = safe_join(package_path, path) + + if path is None: + return None, None + + basename = posixpath.basename(path) + + try: + resource = reader.open_resource(path) + except OSError: + return None, None + + if isinstance(resource, BytesIO): + return ( + basename, + lambda: (resource, load_time, len(resource.getvalue())), + ) + + return ( + basename, + lambda: ( + resource, + datetime.fromtimestamp( + os.path.getmtime(resource.name), tz=timezone.utc + ), + os.path.getsize(resource.name), + ), + ) + + return loader + + def get_directory_loader(self, directory: str) -> _TLoader: + def loader( + path: t.Optional[str], + ) -> t.Tuple[t.Optional[str], t.Optional[_TOpener]]: + if path is not None: + path = safe_join(directory, path) + + if path is None: + return None, None + else: + path = directory + + if os.path.isfile(path): + return os.path.basename(path), self._opener(path) + + return None, None + + return loader + + def generate_etag(self, mtime: datetime, file_size: int, real_filename: str) -> str: + real_filename = os.fsencode(real_filename) + timestamp = mtime.timestamp() + checksum = adler32(real_filename) & 0xFFFFFFFF + return f"wzsdm-{timestamp}-{file_size}-{checksum}" + + def __call__( + self, environ: "WSGIEnvironment", start_response: "StartResponse" + ) -> t.Iterable[bytes]: + path = get_path_info(environ) + file_loader = None + + for search_path, loader in self.exports: + if search_path == path: + real_filename, file_loader = loader(None) + + if file_loader is not None: + break + + if not search_path.endswith("/"): + search_path += "/" + + if path.startswith(search_path): + real_filename, file_loader = loader(path[len(search_path) :]) + + if file_loader is not None: + break + + if file_loader is None or not self.is_allowed(real_filename): # type: ignore + return self.app(environ, start_response) + + guessed_type = mimetypes.guess_type(real_filename) # type: ignore + mime_type = get_content_type(guessed_type[0] or self.fallback_mimetype, "utf-8") + f, mtime, file_size = file_loader() + + headers = [("Date", http_date())] + + if self.cache: + timeout = self.cache_timeout + etag = self.generate_etag(mtime, file_size, real_filename) # type: ignore + headers += [ + ("Etag", f'"{etag}"'), + ("Cache-Control", f"max-age={timeout}, public"), + ] + + if not is_resource_modified(environ, etag, last_modified=mtime): + f.close() + start_response("304 Not Modified", headers) + return [] + + headers.append(("Expires", http_date(time() + timeout))) + else: + headers.append(("Cache-Control", "public")) + + headers.extend( + ( + ("Content-Type", mime_type), + ("Content-Length", str(file_size)), + ("Last-Modified", http_date(mtime)), + ) + ) + start_response("200 OK", headers) + return wrap_file(environ, f) diff --git a/src/werkzeug/py.typed b/src/werkzeug/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/src/werkzeug/routing/__init__.py b/src/werkzeug/routing/__init__.py new file mode 100644 index 0000000..84b043f --- /dev/null +++ b/src/werkzeug/routing/__init__.py @@ -0,0 +1,133 @@ +"""When it comes to combining multiple controller or view functions +(however you want to call them) you need a dispatcher. A simple way +would be applying regular expression tests on the ``PATH_INFO`` and +calling registered callback functions that return the value then. + +This module implements a much more powerful system than simple regular +expression matching because it can also convert values in the URLs and +build URLs. + +Here a simple example that creates a URL map for an application with +two subdomains (www and kb) and some URL rules: + +.. code-block:: python + + m = Map([ + # Static URLs + Rule('/', endpoint='static/index'), + Rule('/about', endpoint='static/about'), + Rule('/help', endpoint='static/help'), + # Knowledge Base + Subdomain('kb', [ + Rule('/', endpoint='kb/index'), + Rule('/browse/', endpoint='kb/browse'), + Rule('/browse//', endpoint='kb/browse'), + Rule('/browse//', endpoint='kb/browse') + ]) + ], default_subdomain='www') + +If the application doesn't use subdomains it's perfectly fine to not set +the default subdomain and not use the `Subdomain` rule factory. The +endpoint in the rules can be anything, for example import paths or +unique identifiers. The WSGI application can use those endpoints to get the +handler for that URL. It doesn't have to be a string at all but it's +recommended. + +Now it's possible to create a URL adapter for one of the subdomains and +build URLs: + +.. code-block:: python + + c = m.bind('example.com') + + c.build("kb/browse", dict(id=42)) + 'http://kb.example.com/browse/42/' + + c.build("kb/browse", dict()) + 'http://kb.example.com/browse/' + + c.build("kb/browse", dict(id=42, page=3)) + 'http://kb.example.com/browse/42/3' + + c.build("static/about") + '/about' + + c.build("static/index", force_external=True) + 'http://www.example.com/' + + c = m.bind('example.com', subdomain='kb') + + c.build("static/about") + 'http://www.example.com/about' + +The first argument to bind is the server name *without* the subdomain. +Per default it will assume that the script is mounted on the root, but +often that's not the case so you can provide the real mount point as +second argument: + +.. code-block:: python + + c = m.bind('example.com', '/applications/example') + +The third argument can be the subdomain, if not given the default +subdomain is used. For more details about binding have a look at the +documentation of the `MapAdapter`. + +And here is how you can match URLs: + +.. code-block:: python + + c = m.bind('example.com') + + c.match("/") + ('static/index', {}) + + c.match("/about") + ('static/about', {}) + + c = m.bind('example.com', '/', 'kb') + + c.match("/") + ('kb/index', {}) + + c.match("/browse/42/23") + ('kb/browse', {'id': 42, 'page': 23}) + +If matching fails you get a ``NotFound`` exception, if the rule thinks +it's a good idea to redirect (for example because the URL was defined +to have a slash at the end but the request was missing that slash) it +will raise a ``RequestRedirect`` exception. Both are subclasses of +``HTTPException`` so you can use those errors as responses in the +application. + +If matching succeeded but the URL rule was incompatible to the given +method (for example there were only rules for ``GET`` and ``HEAD`` but +routing tried to match a ``POST`` request) a ``MethodNotAllowed`` +exception is raised. +""" +from .converters import AnyConverter as AnyConverter +from .converters import BaseConverter as BaseConverter +from .converters import FloatConverter as FloatConverter +from .converters import IntegerConverter as IntegerConverter +from .converters import PathConverter as PathConverter +from .converters import UnicodeConverter as UnicodeConverter +from .converters import UUIDConverter as UUIDConverter +from .converters import ValidationError as ValidationError +from .exceptions import BuildError as BuildError +from .exceptions import NoMatch as NoMatch +from .exceptions import RequestAliasRedirect as RequestAliasRedirect +from .exceptions import RequestPath as RequestPath +from .exceptions import RequestRedirect as RequestRedirect +from .exceptions import RoutingException as RoutingException +from .exceptions import WebsocketMismatch as WebsocketMismatch +from .map import Map as Map +from .map import MapAdapter as MapAdapter +from .matcher import StateMachineMatcher as StateMachineMatcher +from .rules import EndpointPrefix as EndpointPrefix +from .rules import parse_converter_args as parse_converter_args +from .rules import Rule as Rule +from .rules import RuleFactory as RuleFactory +from .rules import RuleTemplate as RuleTemplate +from .rules import RuleTemplateFactory as RuleTemplateFactory +from .rules import Subdomain as Subdomain +from .rules import Submount as Submount diff --git a/src/werkzeug/routing/converters.py b/src/werkzeug/routing/converters.py new file mode 100644 index 0000000..bbad29d --- /dev/null +++ b/src/werkzeug/routing/converters.py @@ -0,0 +1,257 @@ +import re +import typing as t +import uuid + +from ..urls import _fast_url_quote + +if t.TYPE_CHECKING: + from .map import Map + + +class ValidationError(ValueError): + """Validation error. If a rule converter raises this exception the rule + does not match the current URL and the next URL is tried. + """ + + +class BaseConverter: + """Base class for all converters.""" + + regex = "[^/]+" + weight = 100 + part_isolating = True + + def __init__(self, map: "Map", *args: t.Any, **kwargs: t.Any) -> None: + self.map = map + + def to_python(self, value: str) -> t.Any: + return value + + def to_url(self, value: t.Any) -> str: + if isinstance(value, (bytes, bytearray)): + return _fast_url_quote(value) + return _fast_url_quote(str(value).encode(self.map.charset)) + + +class UnicodeConverter(BaseConverter): + """This converter is the default converter and accepts any string but + only one path segment. Thus the string can not include a slash. + + This is the default validator. + + Example:: + + Rule('/pages/'), + Rule('/') + + :param map: the :class:`Map`. + :param minlength: the minimum length of the string. Must be greater + or equal 1. + :param maxlength: the maximum length of the string. + :param length: the exact length of the string. + """ + + part_isolating = True + + def __init__( + self, + map: "Map", + minlength: int = 1, + maxlength: t.Optional[int] = None, + length: t.Optional[int] = None, + ) -> None: + super().__init__(map) + if length is not None: + length_regex = f"{{{int(length)}}}" + else: + if maxlength is None: + maxlength_value = "" + else: + maxlength_value = str(int(maxlength)) + length_regex = f"{{{int(minlength)},{maxlength_value}}}" + self.regex = f"[^/]{length_regex}" + + +class AnyConverter(BaseConverter): + """Matches one of the items provided. Items can either be Python + identifiers or strings:: + + Rule('/') + + :param map: the :class:`Map`. + :param items: this function accepts the possible items as positional + arguments. + + .. versionchanged:: 2.2 + Value is validated when building a URL. + """ + + part_isolating = True + + def __init__(self, map: "Map", *items: str) -> None: + super().__init__(map) + self.items = set(items) + self.regex = f"(?:{'|'.join([re.escape(x) for x in items])})" + + def to_url(self, value: t.Any) -> str: + if value in self.items: + return str(value) + + valid_values = ", ".join(f"'{item}'" for item in sorted(self.items)) + raise ValueError(f"'{value}' is not one of {valid_values}") + + +class PathConverter(BaseConverter): + """Like the default :class:`UnicodeConverter`, but it also matches + slashes. This is useful for wikis and similar applications:: + + Rule('/') + Rule('//edit') + + :param map: the :class:`Map`. + """ + + regex = "[^/].*?" + weight = 200 + part_isolating = False + + +class NumberConverter(BaseConverter): + """Baseclass for `IntegerConverter` and `FloatConverter`. + + :internal: + """ + + weight = 50 + num_convert: t.Callable = int + part_isolating = True + + def __init__( + self, + map: "Map", + fixed_digits: int = 0, + min: t.Optional[int] = None, + max: t.Optional[int] = None, + signed: bool = False, + ) -> None: + if signed: + self.regex = self.signed_regex + super().__init__(map) + self.fixed_digits = fixed_digits + self.min = min + self.max = max + self.signed = signed + + def to_python(self, value: str) -> t.Any: + if self.fixed_digits and len(value) != self.fixed_digits: + raise ValidationError() + value = self.num_convert(value) + if (self.min is not None and value < self.min) or ( + self.max is not None and value > self.max + ): + raise ValidationError() + return value + + def to_url(self, value: t.Any) -> str: + value = str(self.num_convert(value)) + if self.fixed_digits: + value = value.zfill(self.fixed_digits) + return value + + @property + def signed_regex(self) -> str: + return f"-?{self.regex}" + + +class IntegerConverter(NumberConverter): + """This converter only accepts integer values:: + + Rule("/page/") + + By default it only accepts unsigned, positive values. The ``signed`` + parameter will enable signed, negative values. :: + + Rule("/page/") + + :param map: The :class:`Map`. + :param fixed_digits: The number of fixed digits in the URL. If you + set this to ``4`` for example, the rule will only match if the + URL looks like ``/0001/``. The default is variable length. + :param min: The minimal value. + :param max: The maximal value. + :param signed: Allow signed (negative) values. + + .. versionadded:: 0.15 + The ``signed`` parameter. + """ + + regex = r"\d+" + part_isolating = True + + +class FloatConverter(NumberConverter): + """This converter only accepts floating point values:: + + Rule("/probability/") + + By default it only accepts unsigned, positive values. The ``signed`` + parameter will enable signed, negative values. :: + + Rule("/offset/") + + :param map: The :class:`Map`. + :param min: The minimal value. + :param max: The maximal value. + :param signed: Allow signed (negative) values. + + .. versionadded:: 0.15 + The ``signed`` parameter. + """ + + regex = r"\d+\.\d+" + num_convert = float + part_isolating = True + + def __init__( + self, + map: "Map", + min: t.Optional[float] = None, + max: t.Optional[float] = None, + signed: bool = False, + ) -> None: + super().__init__(map, min=min, max=max, signed=signed) # type: ignore + + +class UUIDConverter(BaseConverter): + """This converter only accepts UUID strings:: + + Rule('/object/') + + .. versionadded:: 0.10 + + :param map: the :class:`Map`. + """ + + regex = ( + r"[A-Fa-f0-9]{8}-[A-Fa-f0-9]{4}-" + r"[A-Fa-f0-9]{4}-[A-Fa-f0-9]{4}-[A-Fa-f0-9]{12}" + ) + part_isolating = True + + def to_python(self, value: str) -> uuid.UUID: + return uuid.UUID(value) + + def to_url(self, value: uuid.UUID) -> str: + return str(value) + + +#: the default converter mapping for the map. +DEFAULT_CONVERTERS: t.Mapping[str, t.Type[BaseConverter]] = { + "default": UnicodeConverter, + "string": UnicodeConverter, + "any": AnyConverter, + "path": PathConverter, + "int": IntegerConverter, + "float": FloatConverter, + "uuid": UUIDConverter, +} diff --git a/src/werkzeug/routing/exceptions.py b/src/werkzeug/routing/exceptions.py new file mode 100644 index 0000000..7cbe6e9 --- /dev/null +++ b/src/werkzeug/routing/exceptions.py @@ -0,0 +1,146 @@ +import difflib +import typing as t + +from ..exceptions import BadRequest +from ..exceptions import HTTPException +from ..utils import cached_property +from ..utils import redirect + +if t.TYPE_CHECKING: + from _typeshed.wsgi import WSGIEnvironment + from .map import MapAdapter + from .rules import Rule # noqa: F401 + from ..wrappers.request import Request + from ..wrappers.response import Response + + +class RoutingException(Exception): + """Special exceptions that require the application to redirect, notifying + about missing urls, etc. + + :internal: + """ + + +class RequestRedirect(HTTPException, RoutingException): + """Raise if the map requests a redirect. This is for example the case if + `strict_slashes` are activated and an url that requires a trailing slash. + + The attribute `new_url` contains the absolute destination url. + """ + + code = 308 + + def __init__(self, new_url: str) -> None: + super().__init__(new_url) + self.new_url = new_url + + def get_response( + self, + environ: t.Optional[t.Union["WSGIEnvironment", "Request"]] = None, + scope: t.Optional[dict] = None, + ) -> "Response": + return redirect(self.new_url, self.code) + + +class RequestPath(RoutingException): + """Internal exception.""" + + __slots__ = ("path_info",) + + def __init__(self, path_info: str) -> None: + super().__init__() + self.path_info = path_info + + +class RequestAliasRedirect(RoutingException): # noqa: B903 + """This rule is an alias and wants to redirect to the canonical URL.""" + + def __init__(self, matched_values: t.Mapping[str, t.Any], endpoint: str) -> None: + super().__init__() + self.matched_values = matched_values + self.endpoint = endpoint + + +class BuildError(RoutingException, LookupError): + """Raised if the build system cannot find a URL for an endpoint with the + values provided. + """ + + def __init__( + self, + endpoint: str, + values: t.Mapping[str, t.Any], + method: t.Optional[str], + adapter: t.Optional["MapAdapter"] = None, + ) -> None: + super().__init__(endpoint, values, method) + self.endpoint = endpoint + self.values = values + self.method = method + self.adapter = adapter + + @cached_property + def suggested(self) -> t.Optional["Rule"]: + return self.closest_rule(self.adapter) + + def closest_rule(self, adapter: t.Optional["MapAdapter"]) -> t.Optional["Rule"]: + def _score_rule(rule: "Rule") -> float: + return sum( + [ + 0.98 + * difflib.SequenceMatcher( + None, rule.endpoint, self.endpoint + ).ratio(), + 0.01 * bool(set(self.values or ()).issubset(rule.arguments)), + 0.01 * bool(rule.methods and self.method in rule.methods), + ] + ) + + if adapter and adapter.map._rules: + return max(adapter.map._rules, key=_score_rule) + + return None + + def __str__(self) -> str: + message = [f"Could not build url for endpoint {self.endpoint!r}"] + if self.method: + message.append(f" ({self.method!r})") + if self.values: + message.append(f" with values {sorted(self.values)!r}") + message.append(".") + if self.suggested: + if self.endpoint == self.suggested.endpoint: + if ( + self.method + and self.suggested.methods is not None + and self.method not in self.suggested.methods + ): + message.append( + " Did you mean to use methods" + f" {sorted(self.suggested.methods)!r}?" + ) + missing_values = self.suggested.arguments.union( + set(self.suggested.defaults or ()) + ) - set(self.values.keys()) + if missing_values: + message.append( + f" Did you forget to specify values {sorted(missing_values)!r}?" + ) + else: + message.append(f" Did you mean {self.suggested.endpoint!r} instead?") + return "".join(message) + + +class WebsocketMismatch(BadRequest): + """The only matched rule is either a WebSocket and the request is + HTTP, or the rule is HTTP and the request is a WebSocket. + """ + + +class NoMatch(Exception): + __slots__ = ("have_match_for", "websocket_mismatch") + + def __init__(self, have_match_for: t.Set[str], websocket_mismatch: bool) -> None: + self.have_match_for = have_match_for + self.websocket_mismatch = websocket_mismatch diff --git a/src/werkzeug/routing/map.py b/src/werkzeug/routing/map.py new file mode 100644 index 0000000..daf94b6 --- /dev/null +++ b/src/werkzeug/routing/map.py @@ -0,0 +1,944 @@ +import posixpath +import typing as t +import warnings +from pprint import pformat +from threading import Lock + +from .._internal import _encode_idna +from .._internal import _get_environ +from .._internal import _to_str +from .._internal import _wsgi_decoding_dance +from ..datastructures import ImmutableDict +from ..datastructures import MultiDict +from ..exceptions import BadHost +from ..exceptions import HTTPException +from ..exceptions import MethodNotAllowed +from ..exceptions import NotFound +from ..urls import url_encode +from ..urls import url_join +from ..urls import url_quote +from ..wsgi import get_host +from .converters import DEFAULT_CONVERTERS +from .exceptions import BuildError +from .exceptions import NoMatch +from .exceptions import RequestAliasRedirect +from .exceptions import RequestPath +from .exceptions import RequestRedirect +from .exceptions import WebsocketMismatch +from .matcher import StateMachineMatcher +from .rules import _simple_rule_re +from .rules import Rule + +if t.TYPE_CHECKING: + import typing_extensions as te + from _typeshed.wsgi import WSGIApplication + from _typeshed.wsgi import WSGIEnvironment + from .converters import BaseConverter + from .rules import RuleFactory + from ..wrappers.request import Request + + +class Map: + """The map class stores all the URL rules and some configuration + parameters. Some of the configuration values are only stored on the + `Map` instance since those affect all rules, others are just defaults + and can be overridden for each rule. Note that you have to specify all + arguments besides the `rules` as keyword arguments! + + :param rules: sequence of url rules for this map. + :param default_subdomain: The default subdomain for rules without a + subdomain defined. + :param charset: charset of the url. defaults to ``"utf-8"`` + :param strict_slashes: If a rule ends with a slash but the matched + URL does not, redirect to the URL with a trailing slash. + :param merge_slashes: Merge consecutive slashes when matching or + building URLs. Matches will redirect to the normalized URL. + Slashes in variable parts are not merged. + :param redirect_defaults: This will redirect to the default rule if it + wasn't visited that way. This helps creating + unique URLs. + :param converters: A dict of converters that adds additional converters + to the list of converters. If you redefine one + converter this will override the original one. + :param sort_parameters: If set to `True` the url parameters are sorted. + See `url_encode` for more details. + :param sort_key: The sort key function for `url_encode`. + :param encoding_errors: the error method to use for decoding + :param host_matching: if set to `True` it enables the host matching + feature and disables the subdomain one. If + enabled the `host` parameter to rules is used + instead of the `subdomain` one. + + .. versionchanged:: 1.0 + If ``url_scheme`` is ``ws`` or ``wss``, only WebSocket rules + will match. + + .. versionchanged:: 1.0 + Added ``merge_slashes``. + + .. versionchanged:: 0.7 + Added ``encoding_errors`` and ``host_matching``. + + .. versionchanged:: 0.5 + Added ``sort_parameters`` and ``sort_key``. + """ + + #: A dict of default converters to be used. + default_converters = ImmutableDict(DEFAULT_CONVERTERS) + + #: The type of lock to use when updating. + #: + #: .. versionadded:: 1.0 + lock_class = Lock + + def __init__( + self, + rules: t.Optional[t.Iterable["RuleFactory"]] = None, + default_subdomain: str = "", + charset: str = "utf-8", + strict_slashes: bool = True, + merge_slashes: bool = True, + redirect_defaults: bool = True, + converters: t.Optional[t.Mapping[str, t.Type["BaseConverter"]]] = None, + sort_parameters: bool = False, + sort_key: t.Optional[t.Callable[[t.Any], t.Any]] = None, + encoding_errors: str = "replace", + host_matching: bool = False, + ) -> None: + self._matcher = StateMachineMatcher(merge_slashes) + self._rules_by_endpoint: t.Dict[str, t.List[Rule]] = {} + self._remap = True + self._remap_lock = self.lock_class() + + self.default_subdomain = default_subdomain + self.charset = charset + self.encoding_errors = encoding_errors + self.strict_slashes = strict_slashes + self.merge_slashes = merge_slashes + self.redirect_defaults = redirect_defaults + self.host_matching = host_matching + + self.converters = self.default_converters.copy() + if converters: + self.converters.update(converters) + + self.sort_parameters = sort_parameters + self.sort_key = sort_key + + for rulefactory in rules or (): + self.add(rulefactory) + + def is_endpoint_expecting(self, endpoint: str, *arguments: str) -> bool: + """Iterate over all rules and check if the endpoint expects + the arguments provided. This is for example useful if you have + some URLs that expect a language code and others that do not and + you want to wrap the builder a bit so that the current language + code is automatically added if not provided but endpoints expect + it. + + :param endpoint: the endpoint to check. + :param arguments: this function accepts one or more arguments + as positional arguments. Each one of them is + checked. + """ + self.update() + arguments = set(arguments) + for rule in self._rules_by_endpoint[endpoint]: + if arguments.issubset(rule.arguments): + return True + return False + + @property + def _rules(self) -> t.List[Rule]: + return [rule for rules in self._rules_by_endpoint.values() for rule in rules] + + def iter_rules(self, endpoint: t.Optional[str] = None) -> t.Iterator[Rule]: + """Iterate over all rules or the rules of an endpoint. + + :param endpoint: if provided only the rules for that endpoint + are returned. + :return: an iterator + """ + self.update() + if endpoint is not None: + return iter(self._rules_by_endpoint[endpoint]) + return iter(self._rules) + + def add(self, rulefactory: "RuleFactory") -> None: + """Add a new rule or factory to the map and bind it. Requires that the + rule is not bound to another map. + + :param rulefactory: a :class:`Rule` or :class:`RuleFactory` + """ + for rule in rulefactory.get_rules(self): + rule.bind(self) + if not rule.build_only: + self._matcher.add(rule) + self._rules_by_endpoint.setdefault(rule.endpoint, []).append(rule) + self._remap = True + + def bind( + self, + server_name: str, + script_name: t.Optional[str] = None, + subdomain: t.Optional[str] = None, + url_scheme: str = "http", + default_method: str = "GET", + path_info: t.Optional[str] = None, + query_args: t.Optional[t.Union[t.Mapping[str, t.Any], str]] = None, + ) -> "MapAdapter": + """Return a new :class:`MapAdapter` with the details specified to the + call. Note that `script_name` will default to ``'/'`` if not further + specified or `None`. The `server_name` at least is a requirement + because the HTTP RFC requires absolute URLs for redirects and so all + redirect exceptions raised by Werkzeug will contain the full canonical + URL. + + If no path_info is passed to :meth:`match` it will use the default path + info passed to bind. While this doesn't really make sense for + manual bind calls, it's useful if you bind a map to a WSGI + environment which already contains the path info. + + `subdomain` will default to the `default_subdomain` for this map if + no defined. If there is no `default_subdomain` you cannot use the + subdomain feature. + + .. versionchanged:: 1.0 + If ``url_scheme`` is ``ws`` or ``wss``, only WebSocket rules + will match. + + .. versionchanged:: 0.15 + ``path_info`` defaults to ``'/'`` if ``None``. + + .. versionchanged:: 0.8 + ``query_args`` can be a string. + + .. versionchanged:: 0.7 + Added ``query_args``. + """ + server_name = server_name.lower() + if self.host_matching: + if subdomain is not None: + raise RuntimeError("host matching enabled and a subdomain was provided") + elif subdomain is None: + subdomain = self.default_subdomain + if script_name is None: + script_name = "/" + if path_info is None: + path_info = "/" + + try: + server_name = _encode_idna(server_name) # type: ignore + except UnicodeError as e: + raise BadHost() from e + + return MapAdapter( + self, + server_name, + script_name, + subdomain, + url_scheme, + path_info, + default_method, + query_args, + ) + + def bind_to_environ( + self, + environ: t.Union["WSGIEnvironment", "Request"], + server_name: t.Optional[str] = None, + subdomain: t.Optional[str] = None, + ) -> "MapAdapter": + """Like :meth:`bind` but you can pass it an WSGI environment and it + will fetch the information from that dictionary. Note that because of + limitations in the protocol there is no way to get the current + subdomain and real `server_name` from the environment. If you don't + provide it, Werkzeug will use `SERVER_NAME` and `SERVER_PORT` (or + `HTTP_HOST` if provided) as used `server_name` with disabled subdomain + feature. + + If `subdomain` is `None` but an environment and a server name is + provided it will calculate the current subdomain automatically. + Example: `server_name` is ``'example.com'`` and the `SERVER_NAME` + in the wsgi `environ` is ``'staging.dev.example.com'`` the calculated + subdomain will be ``'staging.dev'``. + + If the object passed as environ has an environ attribute, the value of + this attribute is used instead. This allows you to pass request + objects. Additionally `PATH_INFO` added as a default of the + :class:`MapAdapter` so that you don't have to pass the path info to + the match method. + + .. versionchanged:: 1.0.0 + If the passed server name specifies port 443, it will match + if the incoming scheme is ``https`` without a port. + + .. versionchanged:: 1.0.0 + A warning is shown when the passed server name does not + match the incoming WSGI server name. + + .. versionchanged:: 0.8 + This will no longer raise a ValueError when an unexpected server + name was passed. + + .. versionchanged:: 0.5 + previously this method accepted a bogus `calculate_subdomain` + parameter that did not have any effect. It was removed because + of that. + + :param environ: a WSGI environment. + :param server_name: an optional server name hint (see above). + :param subdomain: optionally the current subdomain (see above). + """ + env = _get_environ(environ) + wsgi_server_name = get_host(env).lower() + scheme = env["wsgi.url_scheme"] + upgrade = any( + v.strip() == "upgrade" + for v in env.get("HTTP_CONNECTION", "").lower().split(",") + ) + + if upgrade and env.get("HTTP_UPGRADE", "").lower() == "websocket": + scheme = "wss" if scheme == "https" else "ws" + + if server_name is None: + server_name = wsgi_server_name + else: + server_name = server_name.lower() + + # strip standard port to match get_host() + if scheme in {"http", "ws"} and server_name.endswith(":80"): + server_name = server_name[:-3] + elif scheme in {"https", "wss"} and server_name.endswith(":443"): + server_name = server_name[:-4] + + if subdomain is None and not self.host_matching: + cur_server_name = wsgi_server_name.split(".") + real_server_name = server_name.split(".") + offset = -len(real_server_name) + + if cur_server_name[offset:] != real_server_name: + # This can happen even with valid configs if the server was + # accessed directly by IP address under some situations. + # Instead of raising an exception like in Werkzeug 0.7 or + # earlier we go by an invalid subdomain which will result + # in a 404 error on matching. + warnings.warn( + f"Current server name {wsgi_server_name!r} doesn't match configured" + f" server name {server_name!r}", + stacklevel=2, + ) + subdomain = "" + else: + subdomain = ".".join(filter(None, cur_server_name[:offset])) + + def _get_wsgi_string(name: str) -> t.Optional[str]: + val = env.get(name) + if val is not None: + return _wsgi_decoding_dance(val, self.charset) + return None + + script_name = _get_wsgi_string("SCRIPT_NAME") + path_info = _get_wsgi_string("PATH_INFO") + query_args = _get_wsgi_string("QUERY_STRING") + return Map.bind( + self, + server_name, + script_name, + subdomain, + scheme, + env["REQUEST_METHOD"], + path_info, + query_args=query_args, + ) + + def update(self) -> None: + """Called before matching and building to keep the compiled rules + in the correct order after things changed. + """ + if not self._remap: + return + + with self._remap_lock: + if not self._remap: + return + + self._matcher.update() + for rules in self._rules_by_endpoint.values(): + rules.sort(key=lambda x: x.build_compare_key()) + self._remap = False + + def __repr__(self) -> str: + rules = self.iter_rules() + return f"{type(self).__name__}({pformat(list(rules))})" + + +class MapAdapter: + + """Returned by :meth:`Map.bind` or :meth:`Map.bind_to_environ` and does + the URL matching and building based on runtime information. + """ + + def __init__( + self, + map: Map, + server_name: str, + script_name: str, + subdomain: t.Optional[str], + url_scheme: str, + path_info: str, + default_method: str, + query_args: t.Optional[t.Union[t.Mapping[str, t.Any], str]] = None, + ): + self.map = map + self.server_name = _to_str(server_name) + script_name = _to_str(script_name) + if not script_name.endswith("/"): + script_name += "/" + self.script_name = script_name + self.subdomain = _to_str(subdomain) + self.url_scheme = _to_str(url_scheme) + self.path_info = _to_str(path_info) + self.default_method = _to_str(default_method) + self.query_args = query_args + self.websocket = self.url_scheme in {"ws", "wss"} + + def dispatch( + self, + view_func: t.Callable[[str, t.Mapping[str, t.Any]], "WSGIApplication"], + path_info: t.Optional[str] = None, + method: t.Optional[str] = None, + catch_http_exceptions: bool = False, + ) -> "WSGIApplication": + """Does the complete dispatching process. `view_func` is called with + the endpoint and a dict with the values for the view. It should + look up the view function, call it, and return a response object + or WSGI application. http exceptions are not caught by default + so that applications can display nicer error messages by just + catching them by hand. If you want to stick with the default + error messages you can pass it ``catch_http_exceptions=True`` and + it will catch the http exceptions. + + Here a small example for the dispatch usage:: + + from werkzeug.wrappers import Request, Response + from werkzeug.wsgi import responder + from werkzeug.routing import Map, Rule + + def on_index(request): + return Response('Hello from the index') + + url_map = Map([Rule('/', endpoint='index')]) + views = {'index': on_index} + + @responder + def application(environ, start_response): + request = Request(environ) + urls = url_map.bind_to_environ(environ) + return urls.dispatch(lambda e, v: views[e](request, **v), + catch_http_exceptions=True) + + Keep in mind that this method might return exception objects, too, so + use :class:`Response.force_type` to get a response object. + + :param view_func: a function that is called with the endpoint as + first argument and the value dict as second. Has + to dispatch to the actual view function with this + information. (see above) + :param path_info: the path info to use for matching. Overrides the + path info specified on binding. + :param method: the HTTP method used for matching. Overrides the + method specified on binding. + :param catch_http_exceptions: set to `True` to catch any of the + werkzeug :class:`HTTPException`\\s. + """ + try: + try: + endpoint, args = self.match(path_info, method) + except RequestRedirect as e: + return e + return view_func(endpoint, args) + except HTTPException as e: + if catch_http_exceptions: + return e + raise + + @t.overload + def match( # type: ignore + self, + path_info: t.Optional[str] = None, + method: t.Optional[str] = None, + return_rule: "te.Literal[False]" = False, + query_args: t.Optional[t.Union[t.Mapping[str, t.Any], str]] = None, + websocket: t.Optional[bool] = None, + ) -> t.Tuple[str, t.Mapping[str, t.Any]]: + ... + + @t.overload + def match( + self, + path_info: t.Optional[str] = None, + method: t.Optional[str] = None, + return_rule: "te.Literal[True]" = True, + query_args: t.Optional[t.Union[t.Mapping[str, t.Any], str]] = None, + websocket: t.Optional[bool] = None, + ) -> t.Tuple[Rule, t.Mapping[str, t.Any]]: + ... + + def match( + self, + path_info: t.Optional[str] = None, + method: t.Optional[str] = None, + return_rule: bool = False, + query_args: t.Optional[t.Union[t.Mapping[str, t.Any], str]] = None, + websocket: t.Optional[bool] = None, + ) -> t.Tuple[t.Union[str, Rule], t.Mapping[str, t.Any]]: + """The usage is simple: you just pass the match method the current + path info as well as the method (which defaults to `GET`). The + following things can then happen: + + - you receive a `NotFound` exception that indicates that no URL is + matching. A `NotFound` exception is also a WSGI application you + can call to get a default page not found page (happens to be the + same object as `werkzeug.exceptions.NotFound`) + + - you receive a `MethodNotAllowed` exception that indicates that there + is a match for this URL but not for the current request method. + This is useful for RESTful applications. + + - you receive a `RequestRedirect` exception with a `new_url` + attribute. This exception is used to notify you about a request + Werkzeug requests from your WSGI application. This is for example the + case if you request ``/foo`` although the correct URL is ``/foo/`` + You can use the `RequestRedirect` instance as response-like object + similar to all other subclasses of `HTTPException`. + + - you receive a ``WebsocketMismatch`` exception if the only + match is a WebSocket rule but the bind is an HTTP request, or + if the match is an HTTP rule but the bind is a WebSocket + request. + + - you get a tuple in the form ``(endpoint, arguments)`` if there is + a match (unless `return_rule` is True, in which case you get a tuple + in the form ``(rule, arguments)``) + + If the path info is not passed to the match method the default path + info of the map is used (defaults to the root URL if not defined + explicitly). + + All of the exceptions raised are subclasses of `HTTPException` so they + can be used as WSGI responses. They will all render generic error or + redirect pages. + + Here is a small example for matching: + + >>> m = Map([ + ... Rule('/', endpoint='index'), + ... Rule('/downloads/', endpoint='downloads/index'), + ... Rule('/downloads/', endpoint='downloads/show') + ... ]) + >>> urls = m.bind("example.com", "/") + >>> urls.match("/", "GET") + ('index', {}) + >>> urls.match("/downloads/42") + ('downloads/show', {'id': 42}) + + And here is what happens on redirect and missing URLs: + + >>> urls.match("/downloads") + Traceback (most recent call last): + ... + RequestRedirect: http://example.com/downloads/ + >>> urls.match("/missing") + Traceback (most recent call last): + ... + NotFound: 404 Not Found + + :param path_info: the path info to use for matching. Overrides the + path info specified on binding. + :param method: the HTTP method used for matching. Overrides the + method specified on binding. + :param return_rule: return the rule that matched instead of just the + endpoint (defaults to `False`). + :param query_args: optional query arguments that are used for + automatic redirects as string or dictionary. It's + currently not possible to use the query arguments + for URL matching. + :param websocket: Match WebSocket instead of HTTP requests. A + websocket request has a ``ws`` or ``wss`` + :attr:`url_scheme`. This overrides that detection. + + .. versionadded:: 1.0 + Added ``websocket``. + + .. versionchanged:: 0.8 + ``query_args`` can be a string. + + .. versionadded:: 0.7 + Added ``query_args``. + + .. versionadded:: 0.6 + Added ``return_rule``. + """ + self.map.update() + if path_info is None: + path_info = self.path_info + else: + path_info = _to_str(path_info, self.map.charset) + if query_args is None: + query_args = self.query_args or {} + method = (method or self.default_method).upper() + + if websocket is None: + websocket = self.websocket + + domain_part = self.server_name if self.map.host_matching else self.subdomain + path_part = f"/{path_info.lstrip('/')}" if path_info else "" + + try: + result = self.map._matcher.match(domain_part, path_part, method, websocket) + except RequestPath as e: + raise RequestRedirect( + self.make_redirect_url( + url_quote(e.path_info, self.map.charset, safe="/:|+"), + query_args, + ) + ) from None + except RequestAliasRedirect as e: + raise RequestRedirect( + self.make_alias_redirect_url( + f"{domain_part}|{path_part}", + e.endpoint, + e.matched_values, + method, + query_args, + ) + ) from None + except NoMatch as e: + if e.have_match_for: + raise MethodNotAllowed(valid_methods=list(e.have_match_for)) from None + + if e.websocket_mismatch: + raise WebsocketMismatch() from None + + raise NotFound() from None + else: + rule, rv = result + + if self.map.redirect_defaults: + redirect_url = self.get_default_redirect(rule, method, rv, query_args) + if redirect_url is not None: + raise RequestRedirect(redirect_url) + + if rule.redirect_to is not None: + if isinstance(rule.redirect_to, str): + + def _handle_match(match: t.Match[str]) -> str: + value = rv[match.group(1)] + return rule._converters[match.group(1)].to_url(value) + + redirect_url = _simple_rule_re.sub(_handle_match, rule.redirect_to) + else: + redirect_url = rule.redirect_to(self, **rv) + + if self.subdomain: + netloc = f"{self.subdomain}.{self.server_name}" + else: + netloc = self.server_name + + raise RequestRedirect( + url_join( + f"{self.url_scheme or 'http'}://{netloc}{self.script_name}", + redirect_url, + ) + ) + + if return_rule: + return rule, rv + else: + return rule.endpoint, rv + + def test( + self, path_info: t.Optional[str] = None, method: t.Optional[str] = None + ) -> bool: + """Test if a rule would match. Works like `match` but returns `True` + if the URL matches, or `False` if it does not exist. + + :param path_info: the path info to use for matching. Overrides the + path info specified on binding. + :param method: the HTTP method used for matching. Overrides the + method specified on binding. + """ + try: + self.match(path_info, method) + except RequestRedirect: + pass + except HTTPException: + return False + return True + + def allowed_methods(self, path_info: t.Optional[str] = None) -> t.Iterable[str]: + """Returns the valid methods that match for a given path. + + .. versionadded:: 0.7 + """ + try: + self.match(path_info, method="--") + except MethodNotAllowed as e: + return e.valid_methods # type: ignore + except HTTPException: + pass + return [] + + def get_host(self, domain_part: t.Optional[str]) -> str: + """Figures out the full host name for the given domain part. The + domain part is a subdomain in case host matching is disabled or + a full host name. + """ + if self.map.host_matching: + if domain_part is None: + return self.server_name + return _to_str(domain_part, "ascii") + subdomain = domain_part + if subdomain is None: + subdomain = self.subdomain + else: + subdomain = _to_str(subdomain, "ascii") + + if subdomain: + return f"{subdomain}.{self.server_name}" + else: + return self.server_name + + def get_default_redirect( + self, + rule: Rule, + method: str, + values: t.MutableMapping[str, t.Any], + query_args: t.Union[t.Mapping[str, t.Any], str], + ) -> t.Optional[str]: + """A helper that returns the URL to redirect to if it finds one. + This is used for default redirecting only. + + :internal: + """ + assert self.map.redirect_defaults + for r in self.map._rules_by_endpoint[rule.endpoint]: + # every rule that comes after this one, including ourself + # has a lower priority for the defaults. We order the ones + # with the highest priority up for building. + if r is rule: + break + if r.provides_defaults_for(rule) and r.suitable_for(values, method): + values.update(r.defaults) # type: ignore + domain_part, path = r.build(values) # type: ignore + return self.make_redirect_url(path, query_args, domain_part=domain_part) + return None + + def encode_query_args(self, query_args: t.Union[t.Mapping[str, t.Any], str]) -> str: + if not isinstance(query_args, str): + return url_encode(query_args, self.map.charset) + return query_args + + def make_redirect_url( + self, + path_info: str, + query_args: t.Optional[t.Union[t.Mapping[str, t.Any], str]] = None, + domain_part: t.Optional[str] = None, + ) -> str: + """Creates a redirect URL. + + :internal: + """ + if query_args: + suffix = f"?{self.encode_query_args(query_args)}" + else: + suffix = "" + + scheme = self.url_scheme or "http" + host = self.get_host(domain_part) + path = posixpath.join(self.script_name.strip("/"), path_info.lstrip("/")) + return f"{scheme}://{host}/{path}{suffix}" + + def make_alias_redirect_url( + self, + path: str, + endpoint: str, + values: t.Mapping[str, t.Any], + method: str, + query_args: t.Union[t.Mapping[str, t.Any], str], + ) -> str: + """Internally called to make an alias redirect URL.""" + url = self.build( + endpoint, values, method, append_unknown=False, force_external=True + ) + if query_args: + url += f"?{self.encode_query_args(query_args)}" + assert url != path, "detected invalid alias setting. No canonical URL found" + return url + + def _partial_build( + self, + endpoint: str, + values: t.Mapping[str, t.Any], + method: t.Optional[str], + append_unknown: bool, + ) -> t.Optional[t.Tuple[str, str, bool]]: + """Helper for :meth:`build`. Returns subdomain and path for the + rule that accepts this endpoint, values and method. + + :internal: + """ + # in case the method is none, try with the default method first + if method is None: + rv = self._partial_build( + endpoint, values, self.default_method, append_unknown + ) + if rv is not None: + return rv + + # Default method did not match or a specific method is passed. + # Check all for first match with matching host. If no matching + # host is found, go with first result. + first_match = None + + for rule in self.map._rules_by_endpoint.get(endpoint, ()): + if rule.suitable_for(values, method): + build_rv = rule.build(values, append_unknown) + + if build_rv is not None: + rv = (build_rv[0], build_rv[1], rule.websocket) + if self.map.host_matching: + if rv[0] == self.server_name: + return rv + elif first_match is None: + first_match = rv + else: + return rv + + return first_match + + def build( + self, + endpoint: str, + values: t.Optional[t.Mapping[str, t.Any]] = None, + method: t.Optional[str] = None, + force_external: bool = False, + append_unknown: bool = True, + url_scheme: t.Optional[str] = None, + ) -> str: + """Building URLs works pretty much the other way round. Instead of + `match` you call `build` and pass it the endpoint and a dict of + arguments for the placeholders. + + The `build` function also accepts an argument called `force_external` + which, if you set it to `True` will force external URLs. Per default + external URLs (include the server name) will only be used if the + target URL is on a different subdomain. + + >>> m = Map([ + ... Rule('/', endpoint='index'), + ... Rule('/downloads/', endpoint='downloads/index'), + ... Rule('/downloads/', endpoint='downloads/show') + ... ]) + >>> urls = m.bind("example.com", "/") + >>> urls.build("index", {}) + '/' + >>> urls.build("downloads/show", {'id': 42}) + '/downloads/42' + >>> urls.build("downloads/show", {'id': 42}, force_external=True) + 'http://example.com/downloads/42' + + Because URLs cannot contain non ASCII data you will always get + bytes back. Non ASCII characters are urlencoded with the + charset defined on the map instance. + + Additional values are converted to strings and appended to the URL as + URL querystring parameters: + + >>> urls.build("index", {'q': 'My Searchstring'}) + '/?q=My+Searchstring' + + When processing those additional values, lists are furthermore + interpreted as multiple values (as per + :py:class:`werkzeug.datastructures.MultiDict`): + + >>> urls.build("index", {'q': ['a', 'b', 'c']}) + '/?q=a&q=b&q=c' + + Passing a ``MultiDict`` will also add multiple values: + + >>> urls.build("index", MultiDict((('p', 'z'), ('q', 'a'), ('q', 'b')))) + '/?p=z&q=a&q=b' + + If a rule does not exist when building a `BuildError` exception is + raised. + + The build method accepts an argument called `method` which allows you + to specify the method you want to have an URL built for if you have + different methods for the same endpoint specified. + + :param endpoint: the endpoint of the URL to build. + :param values: the values for the URL to build. Unhandled values are + appended to the URL as query parameters. + :param method: the HTTP method for the rule if there are different + URLs for different methods on the same endpoint. + :param force_external: enforce full canonical external URLs. If the URL + scheme is not provided, this will generate + a protocol-relative URL. + :param append_unknown: unknown parameters are appended to the generated + URL as query string argument. Disable this + if you want the builder to ignore those. + :param url_scheme: Scheme to use in place of the bound + :attr:`url_scheme`. + + .. versionchanged:: 2.0 + Added the ``url_scheme`` parameter. + + .. versionadded:: 0.6 + Added the ``append_unknown`` parameter. + """ + self.map.update() + + if values: + if isinstance(values, MultiDict): + values = { + k: (v[0] if len(v) == 1 else v) + for k, v in dict.items(values) + if len(v) != 0 + } + else: # plain dict + values = {k: v for k, v in values.items() if v is not None} + else: + values = {} + + rv = self._partial_build(endpoint, values, method, append_unknown) + if rv is None: + raise BuildError(endpoint, values, method, self) + + domain_part, path, websocket = rv + host = self.get_host(domain_part) + + if url_scheme is None: + url_scheme = self.url_scheme + + # Always build WebSocket routes with the scheme (browsers + # require full URLs). If bound to a WebSocket, ensure that HTTP + # routes are built with an HTTP scheme. + secure = url_scheme in {"https", "wss"} + + if websocket: + force_external = True + url_scheme = "wss" if secure else "ws" + elif url_scheme: + url_scheme = "https" if secure else "http" + + # shortcut this. + if not force_external and ( + (self.map.host_matching and host == self.server_name) + or (not self.map.host_matching and domain_part == self.subdomain) + ): + return f"{self.script_name.rstrip('/')}/{path.lstrip('/')}" + + scheme = f"{url_scheme}:" if url_scheme else "" + return f"{scheme}//{host}{self.script_name[:-1]}/{path.lstrip('/')}" diff --git a/src/werkzeug/routing/matcher.py b/src/werkzeug/routing/matcher.py new file mode 100644 index 0000000..d22b05a --- /dev/null +++ b/src/werkzeug/routing/matcher.py @@ -0,0 +1,185 @@ +import re +import typing as t +from dataclasses import dataclass +from dataclasses import field + +from .converters import ValidationError +from .exceptions import NoMatch +from .exceptions import RequestAliasRedirect +from .exceptions import RequestPath +from .rules import Rule +from .rules import RulePart + + +class SlashRequired(Exception): + pass + + +@dataclass +class State: + """A representation of a rule state. + + This includes the *rules* that correspond to the state and the + possible *static* and *dynamic* transitions to the next state. + """ + + dynamic: t.List[t.Tuple[RulePart, "State"]] = field(default_factory=list) + rules: t.List[Rule] = field(default_factory=list) + static: t.Dict[str, "State"] = field(default_factory=dict) + + +class StateMachineMatcher: + def __init__(self, merge_slashes: bool) -> None: + self._root = State() + self.merge_slashes = merge_slashes + + def add(self, rule: Rule) -> None: + state = self._root + for part in rule._parts: + if part.static: + state.static.setdefault(part.content, State()) + state = state.static[part.content] + else: + for test_part, new_state in state.dynamic: + if test_part == part: + state = new_state + break + else: + new_state = State() + state.dynamic.append((part, new_state)) + state = new_state + state.rules.append(rule) + + def update(self) -> None: + # For every state the dynamic transitions should be sorted by + # the weight of the transition + state = self._root + + def _update_state(state: State) -> None: + state.dynamic.sort(key=lambda entry: entry[0].weight) + for new_state in state.static.values(): + _update_state(new_state) + for _, new_state in state.dynamic: + _update_state(new_state) + + _update_state(state) + + def match( + self, domain: str, path: str, method: str, websocket: bool + ) -> t.Tuple[Rule, t.MutableMapping[str, t.Any]]: + # To match to a rule we need to start at the root state and + # try to follow the transitions until we find a match, or find + # there is no transition to follow. + + have_match_for = set() + websocket_mismatch = False + + def _match( + state: State, parts: t.List[str], values: t.List[str] + ) -> t.Optional[t.Tuple[Rule, t.List[str]]]: + # This function is meant to be called recursively, and will attempt + # to match the head part to the state's transitions. + nonlocal have_match_for, websocket_mismatch + + # The base case is when all parts have been matched via + # transitions. Hence if there is a rule with methods & + # websocket that work return it and the dynamic values + # extracted. + if parts == []: + for rule in state.rules: + if rule.methods is not None and method not in rule.methods: + have_match_for.update(rule.methods) + elif rule.websocket != websocket: + websocket_mismatch = True + else: + return rule, values + + # Test if there is a match with this path with a + # trailing slash, if so raise an exception to report + # that matching is possible with an additional slash + if "" in state.static: + for rule in state.static[""].rules: + if websocket == rule.websocket and ( + rule.methods is None or method in rule.methods + ): + if rule.strict_slashes: + raise SlashRequired() + else: + return rule, values + return None + + part = parts[0] + # To match this part try the static transitions first + if part in state.static: + rv = _match(state.static[part], parts[1:], values) + if rv is not None: + return rv + # No match via the static transitions, so try the dynamic + # ones. + for test_part, new_state in state.dynamic: + target = part + remaining = parts[1:] + # A final part indicates a transition that always + # consumes the remaining parts i.e. transitions to a + # final state. + if test_part.final: + target = "/".join(parts) + remaining = [] + match = re.compile(test_part.content).match(target) + if match is not None: + rv = _match(new_state, remaining, values + list(match.groups())) + if rv is not None: + return rv + + # If there is no match and the only part left is a + # trailing slash ("") consider rules that aren't + # strict-slashes as these should match if there is a final + # slash part. + if parts == [""]: + for rule in state.rules: + if rule.strict_slashes: + continue + if rule.methods is not None and method not in rule.methods: + have_match_for.update(rule.methods) + elif rule.websocket != websocket: + websocket_mismatch = True + else: + return rule, values + + return None + + try: + rv = _match(self._root, [domain, *path.split("/")], []) + except SlashRequired: + raise RequestPath(f"{path}/") from None + + if self.merge_slashes and rv is None: + # Try to match again, but with slashes merged + path = re.sub("/{2,}?", "/", path) + try: + rv = _match(self._root, [domain, *path.split("/")], []) + except SlashRequired: + raise RequestPath(f"{path}/") from None + if rv is None: + raise NoMatch(have_match_for, websocket_mismatch) + else: + raise RequestPath(f"{path}") + elif rv is not None: + rule, values = rv + + result = {} + for name, value in zip(rule._converters.keys(), values): + try: + value = rule._converters[name].to_python(value) + except ValidationError: + raise NoMatch(have_match_for, websocket_mismatch) from None + result[str(name)] = value + if rule.defaults: + result.update(rule.defaults) + + if rule.alias and rule.map.redirect_defaults: + raise RequestAliasRedirect(result, rule.endpoint) + + return rule, result + + raise NoMatch(have_match_for, websocket_mismatch) diff --git a/src/werkzeug/routing/rules.py b/src/werkzeug/routing/rules.py new file mode 100644 index 0000000..a61717a --- /dev/null +++ b/src/werkzeug/routing/rules.py @@ -0,0 +1,879 @@ +import ast +import re +import typing as t +from dataclasses import dataclass +from string import Template +from types import CodeType + +from .._internal import _to_bytes +from ..urls import url_encode +from ..urls import url_quote +from .converters import ValidationError + +if t.TYPE_CHECKING: + from .converters import BaseConverter + from .map import Map + + +class Weighting(t.NamedTuple): + number_static_weights: int + static_weights: t.List[t.Tuple[int, int]] + number_argument_weights: int + argument_weights: t.List[int] + + +@dataclass +class RulePart: + """A part of a rule. + + Rules can be represented by parts as delimited by `/` with + instances of this class representing those parts. The *content* is + either the raw content if *static* or a regex string to match + against. The *weight* can be used to order parts when matching. + + """ + + content: str + final: bool + static: bool + weight: Weighting + + +_part_re = re.compile( + r""" + (?: + (?P\/) # a slash + | + (?P[^<\/]+) # static rule data + | + (?: + < + (?: + (?P[a-zA-Z_][a-zA-Z0-9_]*) # converter name + (?:\((?P.*?)\))? # converter arguments + \: # variable delimiter + )? + (?P[a-zA-Z_][a-zA-Z0-9_]*) # variable name + > + ) + ) + """, + re.VERBOSE, +) + +_simple_rule_re = re.compile(r"<([^>]+)>") +_converter_args_re = re.compile( + r""" + ((?P\w+)\s*=\s*)? + (?P + True|False| + \d+.\d+| + \d+.| + \d+| + [\w\d_.]+| + [urUR]?(?P"[^"]*?"|'[^']*') + )\s*, + """, + re.VERBOSE, +) + + +_PYTHON_CONSTANTS = {"None": None, "True": True, "False": False} + + +def _find(value: str, target: str, pos: int) -> int: + """Find the *target* in *value* after *pos*. + + Returns the *value* length if *target* isn't found. + """ + try: + return value.index(target, pos) + except ValueError: + return len(value) + + +def _pythonize(value: str) -> t.Union[None, bool, int, float, str]: + if value in _PYTHON_CONSTANTS: + return _PYTHON_CONSTANTS[value] + for convert in int, float: + try: + return convert(value) # type: ignore + except ValueError: + pass + if value[:1] == value[-1:] and value[0] in "\"'": + value = value[1:-1] + return str(value) + + +def parse_converter_args(argstr: str) -> t.Tuple[t.Tuple, t.Dict[str, t.Any]]: + argstr += "," + args = [] + kwargs = {} + + for item in _converter_args_re.finditer(argstr): + value = item.group("stringval") + if value is None: + value = item.group("value") + value = _pythonize(value) + if not item.group("name"): + args.append(value) + else: + name = item.group("name") + kwargs[name] = value + + return tuple(args), kwargs + + +class RuleFactory: + """As soon as you have more complex URL setups it's a good idea to use rule + factories to avoid repetitive tasks. Some of them are builtin, others can + be added by subclassing `RuleFactory` and overriding `get_rules`. + """ + + def get_rules(self, map: "Map") -> t.Iterable["Rule"]: + """Subclasses of `RuleFactory` have to override this method and return + an iterable of rules.""" + raise NotImplementedError() + + +class Subdomain(RuleFactory): + """All URLs provided by this factory have the subdomain set to a + specific domain. For example if you want to use the subdomain for + the current language this can be a good setup:: + + url_map = Map([ + Rule('/', endpoint='#select_language'), + Subdomain('', [ + Rule('/', endpoint='index'), + Rule('/about', endpoint='about'), + Rule('/help', endpoint='help') + ]) + ]) + + All the rules except for the ``'#select_language'`` endpoint will now + listen on a two letter long subdomain that holds the language code + for the current request. + """ + + def __init__(self, subdomain: str, rules: t.Iterable[RuleFactory]) -> None: + self.subdomain = subdomain + self.rules = rules + + def get_rules(self, map: "Map") -> t.Iterator["Rule"]: + for rulefactory in self.rules: + for rule in rulefactory.get_rules(map): + rule = rule.empty() + rule.subdomain = self.subdomain + yield rule + + +class Submount(RuleFactory): + """Like `Subdomain` but prefixes the URL rule with a given string:: + + url_map = Map([ + Rule('/', endpoint='index'), + Submount('/blog', [ + Rule('/', endpoint='blog/index'), + Rule('/entry/', endpoint='blog/show') + ]) + ]) + + Now the rule ``'blog/show'`` matches ``/blog/entry/``. + """ + + def __init__(self, path: str, rules: t.Iterable[RuleFactory]) -> None: + self.path = path.rstrip("/") + self.rules = rules + + def get_rules(self, map: "Map") -> t.Iterator["Rule"]: + for rulefactory in self.rules: + for rule in rulefactory.get_rules(map): + rule = rule.empty() + rule.rule = self.path + rule.rule + yield rule + + +class EndpointPrefix(RuleFactory): + """Prefixes all endpoints (which must be strings for this factory) with + another string. This can be useful for sub applications:: + + url_map = Map([ + Rule('/', endpoint='index'), + EndpointPrefix('blog/', [Submount('/blog', [ + Rule('/', endpoint='index'), + Rule('/entry/', endpoint='show') + ])]) + ]) + """ + + def __init__(self, prefix: str, rules: t.Iterable[RuleFactory]) -> None: + self.prefix = prefix + self.rules = rules + + def get_rules(self, map: "Map") -> t.Iterator["Rule"]: + for rulefactory in self.rules: + for rule in rulefactory.get_rules(map): + rule = rule.empty() + rule.endpoint = self.prefix + rule.endpoint + yield rule + + +class RuleTemplate: + """Returns copies of the rules wrapped and expands string templates in + the endpoint, rule, defaults or subdomain sections. + + Here a small example for such a rule template:: + + from werkzeug.routing import Map, Rule, RuleTemplate + + resource = RuleTemplate([ + Rule('/$name/', endpoint='$name.list'), + Rule('/$name/', endpoint='$name.show') + ]) + + url_map = Map([resource(name='user'), resource(name='page')]) + + When a rule template is called the keyword arguments are used to + replace the placeholders in all the string parameters. + """ + + def __init__(self, rules: t.Iterable["Rule"]) -> None: + self.rules = list(rules) + + def __call__(self, *args: t.Any, **kwargs: t.Any) -> "RuleTemplateFactory": + return RuleTemplateFactory(self.rules, dict(*args, **kwargs)) + + +class RuleTemplateFactory(RuleFactory): + """A factory that fills in template variables into rules. Used by + `RuleTemplate` internally. + + :internal: + """ + + def __init__( + self, rules: t.Iterable[RuleFactory], context: t.Dict[str, t.Any] + ) -> None: + self.rules = rules + self.context = context + + def get_rules(self, map: "Map") -> t.Iterator["Rule"]: + for rulefactory in self.rules: + for rule in rulefactory.get_rules(map): + new_defaults = subdomain = None + if rule.defaults: + new_defaults = {} + for key, value in rule.defaults.items(): + if isinstance(value, str): + value = Template(value).substitute(self.context) + new_defaults[key] = value + if rule.subdomain is not None: + subdomain = Template(rule.subdomain).substitute(self.context) + new_endpoint = rule.endpoint + if isinstance(new_endpoint, str): + new_endpoint = Template(new_endpoint).substitute(self.context) + yield Rule( + Template(rule.rule).substitute(self.context), + new_defaults, + subdomain, + rule.methods, + rule.build_only, + new_endpoint, + rule.strict_slashes, + ) + + +def _prefix_names(src: str) -> ast.stmt: + """ast parse and prefix names with `.` to avoid collision with user vars""" + tree = ast.parse(src).body[0] + if isinstance(tree, ast.Expr): + tree = tree.value # type: ignore + for node in ast.walk(tree): + if isinstance(node, ast.Name): + node.id = f".{node.id}" + return tree + + +_CALL_CONVERTER_CODE_FMT = "self._converters[{elem!r}].to_url()" +_IF_KWARGS_URL_ENCODE_CODE = """\ +if kwargs: + params = self._encode_query_vars(kwargs) + q = "?" if params else "" +else: + q = params = "" +""" +_IF_KWARGS_URL_ENCODE_AST = _prefix_names(_IF_KWARGS_URL_ENCODE_CODE) +_URL_ENCODE_AST_NAMES = (_prefix_names("q"), _prefix_names("params")) + + +class Rule(RuleFactory): + """A Rule represents one URL pattern. There are some options for `Rule` + that change the way it behaves and are passed to the `Rule` constructor. + Note that besides the rule-string all arguments *must* be keyword arguments + in order to not break the application on Werkzeug upgrades. + + `string` + Rule strings basically are just normal URL paths with placeholders in + the format ```` where the converter and the + arguments are optional. If no converter is defined the `default` + converter is used which means `string` in the normal configuration. + + URL rules that end with a slash are branch URLs, others are leaves. + If you have `strict_slashes` enabled (which is the default), all + branch URLs that are matched without a trailing slash will trigger a + redirect to the same URL with the missing slash appended. + + The converters are defined on the `Map`. + + `endpoint` + The endpoint for this rule. This can be anything. A reference to a + function, a string, a number etc. The preferred way is using a string + because the endpoint is used for URL generation. + + `defaults` + An optional dict with defaults for other rules with the same endpoint. + This is a bit tricky but useful if you want to have unique URLs:: + + url_map = Map([ + Rule('/all/', defaults={'page': 1}, endpoint='all_entries'), + Rule('/all/page/', endpoint='all_entries') + ]) + + If a user now visits ``http://example.com/all/page/1`` they will be + redirected to ``http://example.com/all/``. If `redirect_defaults` is + disabled on the `Map` instance this will only affect the URL + generation. + + `subdomain` + The subdomain rule string for this rule. If not specified the rule + only matches for the `default_subdomain` of the map. If the map is + not bound to a subdomain this feature is disabled. + + Can be useful if you want to have user profiles on different subdomains + and all subdomains are forwarded to your application:: + + url_map = Map([ + Rule('/', subdomain='', endpoint='user/homepage'), + Rule('/stats', subdomain='', endpoint='user/stats') + ]) + + `methods` + A sequence of http methods this rule applies to. If not specified, all + methods are allowed. For example this can be useful if you want different + endpoints for `POST` and `GET`. If methods are defined and the path + matches but the method matched against is not in this list or in the + list of another rule for that path the error raised is of the type + `MethodNotAllowed` rather than `NotFound`. If `GET` is present in the + list of methods and `HEAD` is not, `HEAD` is added automatically. + + `strict_slashes` + Override the `Map` setting for `strict_slashes` only for this rule. If + not specified the `Map` setting is used. + + `merge_slashes` + Override :attr:`Map.merge_slashes` for this rule. + + `build_only` + Set this to True and the rule will never match but will create a URL + that can be build. This is useful if you have resources on a subdomain + or folder that are not handled by the WSGI application (like static data) + + `redirect_to` + If given this must be either a string or callable. In case of a + callable it's called with the url adapter that triggered the match and + the values of the URL as keyword arguments and has to return the target + for the redirect, otherwise it has to be a string with placeholders in + rule syntax:: + + def foo_with_slug(adapter, id): + # ask the database for the slug for the old id. this of + # course has nothing to do with werkzeug. + return f'foo/{Foo.get_slug_for_id(id)}' + + url_map = Map([ + Rule('/foo/', endpoint='foo'), + Rule('/some/old/url/', redirect_to='foo/'), + Rule('/other/old/url/', redirect_to=foo_with_slug) + ]) + + When the rule is matched the routing system will raise a + `RequestRedirect` exception with the target for the redirect. + + Keep in mind that the URL will be joined against the URL root of the + script so don't use a leading slash on the target URL unless you + really mean root of that domain. + + `alias` + If enabled this rule serves as an alias for another rule with the same + endpoint and arguments. + + `host` + If provided and the URL map has host matching enabled this can be + used to provide a match rule for the whole host. This also means + that the subdomain feature is disabled. + + `websocket` + If ``True``, this rule is only matches for WebSocket (``ws://``, + ``wss://``) requests. By default, rules will only match for HTTP + requests. + + .. versionchanged:: 2.1 + Percent-encoded newlines (``%0a``), which are decoded by WSGI + servers, are considered when routing instead of terminating the + match early. + + .. versionadded:: 1.0 + Added ``websocket``. + + .. versionadded:: 1.0 + Added ``merge_slashes``. + + .. versionadded:: 0.7 + Added ``alias`` and ``host``. + + .. versionchanged:: 0.6.1 + ``HEAD`` is added to ``methods`` if ``GET`` is present. + """ + + def __init__( + self, + string: str, + defaults: t.Optional[t.Mapping[str, t.Any]] = None, + subdomain: t.Optional[str] = None, + methods: t.Optional[t.Iterable[str]] = None, + build_only: bool = False, + endpoint: t.Optional[str] = None, + strict_slashes: t.Optional[bool] = None, + merge_slashes: t.Optional[bool] = None, + redirect_to: t.Optional[t.Union[str, t.Callable[..., str]]] = None, + alias: bool = False, + host: t.Optional[str] = None, + websocket: bool = False, + ) -> None: + if not string.startswith("/"): + raise ValueError("urls must start with a leading slash") + self.rule = string + self.is_leaf = not string.endswith("/") + self.is_branch = string.endswith("/") + + self.map: "Map" = None # type: ignore + self.strict_slashes = strict_slashes + self.merge_slashes = merge_slashes + self.subdomain = subdomain + self.host = host + self.defaults = defaults + self.build_only = build_only + self.alias = alias + self.websocket = websocket + + if methods is not None: + if isinstance(methods, str): + raise TypeError("'methods' should be a list of strings.") + + methods = {x.upper() for x in methods} + + if "HEAD" not in methods and "GET" in methods: + methods.add("HEAD") + + if websocket and methods - {"GET", "HEAD", "OPTIONS"}: + raise ValueError( + "WebSocket rules can only use 'GET', 'HEAD', and 'OPTIONS' methods." + ) + + self.methods = methods + self.endpoint: str = endpoint # type: ignore + self.redirect_to = redirect_to + + if defaults: + self.arguments = set(map(str, defaults)) + else: + self.arguments = set() + + self._converters: t.Dict[str, "BaseConverter"] = {} + self._trace: t.List[t.Tuple[bool, str]] = [] + self._parts: t.List[RulePart] = [] + + def empty(self) -> "Rule": + """ + Return an unbound copy of this rule. + + This can be useful if want to reuse an already bound URL for another + map. See ``get_empty_kwargs`` to override what keyword arguments are + provided to the new copy. + """ + return type(self)(self.rule, **self.get_empty_kwargs()) + + def get_empty_kwargs(self) -> t.Mapping[str, t.Any]: + """ + Provides kwargs for instantiating empty copy with empty() + + Use this method to provide custom keyword arguments to the subclass of + ``Rule`` when calling ``some_rule.empty()``. Helpful when the subclass + has custom keyword arguments that are needed at instantiation. + + Must return a ``dict`` that will be provided as kwargs to the new + instance of ``Rule``, following the initial ``self.rule`` value which + is always provided as the first, required positional argument. + """ + defaults = None + if self.defaults: + defaults = dict(self.defaults) + return dict( + defaults=defaults, + subdomain=self.subdomain, + methods=self.methods, + build_only=self.build_only, + endpoint=self.endpoint, + strict_slashes=self.strict_slashes, + redirect_to=self.redirect_to, + alias=self.alias, + host=self.host, + ) + + def get_rules(self, map: "Map") -> t.Iterator["Rule"]: + yield self + + def refresh(self) -> None: + """Rebinds and refreshes the URL. Call this if you modified the + rule in place. + + :internal: + """ + self.bind(self.map, rebind=True) + + def bind(self, map: "Map", rebind: bool = False) -> None: + """Bind the url to a map and create a regular expression based on + the information from the rule itself and the defaults from the map. + + :internal: + """ + if self.map is not None and not rebind: + raise RuntimeError(f"url rule {self!r} already bound to map {self.map!r}") + self.map = map + if self.strict_slashes is None: + self.strict_slashes = map.strict_slashes + if self.merge_slashes is None: + self.merge_slashes = map.merge_slashes + if self.subdomain is None: + self.subdomain = map.default_subdomain + self.compile() + + def get_converter( + self, + variable_name: str, + converter_name: str, + args: t.Tuple, + kwargs: t.Mapping[str, t.Any], + ) -> "BaseConverter": + """Looks up the converter for the given parameter. + + .. versionadded:: 0.9 + """ + if converter_name not in self.map.converters: + raise LookupError(f"the converter {converter_name!r} does not exist") + return self.map.converters[converter_name](self.map, *args, **kwargs) + + def _encode_query_vars(self, query_vars: t.Mapping[str, t.Any]) -> str: + return url_encode( + query_vars, + charset=self.map.charset, + sort=self.map.sort_parameters, + key=self.map.sort_key, + ) + + def _parse_rule(self, rule: str) -> t.Iterable[RulePart]: + content = "" + static = True + argument_weights = [] + static_weights: t.List[t.Tuple[int, int]] = [] + final = False + + pos = 0 + while pos < len(rule): + match = _part_re.match(rule, pos) + if match is None: + raise ValueError(f"malformed url rule: {rule!r}") + + data = match.groupdict() + if data["static"] is not None: + static_weights.append((len(static_weights), -len(data["static"]))) + self._trace.append((False, data["static"])) + content += data["static"] if static else re.escape(data["static"]) + + if data["variable"] is not None: + if static: + # Switching content to represent regex, hence the need to escape + content = re.escape(content) + static = False + c_args, c_kwargs = parse_converter_args(data["arguments"] or "") + convobj = self.get_converter( + data["variable"], data["converter"] or "default", c_args, c_kwargs + ) + self._converters[data["variable"]] = convobj + self.arguments.add(data["variable"]) + if not convobj.part_isolating: + final = True + content += f"({convobj.regex})" + argument_weights.append(convobj.weight) + self._trace.append((True, data["variable"])) + + if data["slash"] is not None: + self._trace.append((False, "/")) + if final: + content += "/" + else: + if not static: + content += r"\Z" + weight = Weighting( + -len(static_weights), + static_weights, + -len(argument_weights), + argument_weights, + ) + yield RulePart( + content=content, final=final, static=static, weight=weight + ) + content = "" + static = True + argument_weights = [] + static_weights = [] + final = False + + pos = match.end() + + if not static: + content += r"\Z" + weight = Weighting( + -len(static_weights), + static_weights, + -len(argument_weights), + argument_weights, + ) + yield RulePart(content=content, final=final, static=static, weight=weight) + + def compile(self) -> None: + """Compiles the regular expression and stores it.""" + assert self.map is not None, "rule not bound" + + if self.map.host_matching: + domain_rule = self.host or "" + else: + domain_rule = self.subdomain or "" + self._parts = [] + self._trace = [] + self._converters = {} + if domain_rule == "": + self._parts = [ + RulePart( + content="", final=False, static=True, weight=Weighting(0, [], 0, []) + ) + ] + else: + self._parts.extend(self._parse_rule(domain_rule)) + self._trace.append((False, "|")) + rule = self.rule + if self.merge_slashes: + rule = re.sub("/{2,}?", "/", self.rule) + self._parts.extend(self._parse_rule(rule)) + + self._build: t.Callable[..., t.Tuple[str, str]] + self._build = self._compile_builder(False).__get__(self, None) + self._build_unknown: t.Callable[..., t.Tuple[str, str]] + self._build_unknown = self._compile_builder(True).__get__(self, None) + + @staticmethod + def _get_func_code(code: CodeType, name: str) -> t.Callable[..., t.Tuple[str, str]]: + globs: t.Dict[str, t.Any] = {} + locs: t.Dict[str, t.Any] = {} + exec(code, globs, locs) + return locs[name] # type: ignore + + def _compile_builder( + self, append_unknown: bool = True + ) -> t.Callable[..., t.Tuple[str, str]]: + defaults = self.defaults or {} + dom_ops: t.List[t.Tuple[bool, str]] = [] + url_ops: t.List[t.Tuple[bool, str]] = [] + + opl = dom_ops + for is_dynamic, data in self._trace: + if data == "|" and opl is dom_ops: + opl = url_ops + continue + # this seems like a silly case to ever come up but: + # if a default is given for a value that appears in the rule, + # resolve it to a constant ahead of time + if is_dynamic and data in defaults: + data = self._converters[data].to_url(defaults[data]) + opl.append((False, data)) + elif not is_dynamic: + opl.append( + (False, url_quote(_to_bytes(data, self.map.charset), safe="/:|+")) + ) + else: + opl.append((True, data)) + + def _convert(elem: str) -> ast.stmt: + ret = _prefix_names(_CALL_CONVERTER_CODE_FMT.format(elem=elem)) + ret.args = [ast.Name(str(elem), ast.Load())] # type: ignore # str for py2 + return ret + + def _parts(ops: t.List[t.Tuple[bool, str]]) -> t.List[ast.AST]: + parts = [ + _convert(elem) if is_dynamic else ast.Str(s=elem) + for is_dynamic, elem in ops + ] + parts = parts or [ast.Str("")] + # constant fold + ret = [parts[0]] + for p in parts[1:]: + if isinstance(p, ast.Str) and isinstance(ret[-1], ast.Str): + ret[-1] = ast.Str(ret[-1].s + p.s) + else: + ret.append(p) + return ret + + dom_parts = _parts(dom_ops) + url_parts = _parts(url_ops) + if not append_unknown: + body = [] + else: + body = [_IF_KWARGS_URL_ENCODE_AST] + url_parts.extend(_URL_ENCODE_AST_NAMES) + + def _join(parts: t.List[ast.AST]) -> ast.AST: + if len(parts) == 1: # shortcut + return parts[0] + return ast.JoinedStr(parts) + + body.append( + ast.Return(ast.Tuple([_join(dom_parts), _join(url_parts)], ast.Load())) + ) + + pargs = [ + elem + for is_dynamic, elem in dom_ops + url_ops + if is_dynamic and elem not in defaults + ] + kargs = [str(k) for k in defaults] + + func_ast: ast.FunctionDef = _prefix_names("def _(): pass") # type: ignore + func_ast.name = f"" + func_ast.args.args.append(ast.arg(".self", None)) + for arg in pargs + kargs: + func_ast.args.args.append(ast.arg(arg, None)) + func_ast.args.kwarg = ast.arg(".kwargs", None) + for _ in kargs: + func_ast.args.defaults.append(ast.Str("")) + func_ast.body = body + + # use `ast.parse` instead of `ast.Module` for better portability + # Python 3.8 changes the signature of `ast.Module` + module = ast.parse("") + module.body = [func_ast] + + # mark everything as on line 1, offset 0 + # less error-prone than `ast.fix_missing_locations` + # bad line numbers cause an assert to fail in debug builds + for node in ast.walk(module): + if "lineno" in node._attributes: + node.lineno = 1 + if "end_lineno" in node._attributes: + node.end_lineno = node.lineno # type: ignore[attr-defined] + if "col_offset" in node._attributes: + node.col_offset = 0 + if "end_col_offset" in node._attributes: + node.end_col_offset = node.col_offset # type: ignore[attr-defined] + + code = compile(module, "", "exec") + return self._get_func_code(code, func_ast.name) + + def build( + self, values: t.Mapping[str, t.Any], append_unknown: bool = True + ) -> t.Optional[t.Tuple[str, str]]: + """Assembles the relative url for that rule and the subdomain. + If building doesn't work for some reasons `None` is returned. + + :internal: + """ + try: + if append_unknown: + return self._build_unknown(**values) + else: + return self._build(**values) + except ValidationError: + return None + + def provides_defaults_for(self, rule: "Rule") -> bool: + """Check if this rule has defaults for a given rule. + + :internal: + """ + return bool( + not self.build_only + and self.defaults + and self.endpoint == rule.endpoint + and self != rule + and self.arguments == rule.arguments + ) + + def suitable_for( + self, values: t.Mapping[str, t.Any], method: t.Optional[str] = None + ) -> bool: + """Check if the dict of values has enough data for url generation. + + :internal: + """ + # if a method was given explicitly and that method is not supported + # by this rule, this rule is not suitable. + if ( + method is not None + and self.methods is not None + and method not in self.methods + ): + return False + + defaults = self.defaults or () + + # all arguments required must be either in the defaults dict or + # the value dictionary otherwise it's not suitable + for key in self.arguments: + if key not in defaults and key not in values: + return False + + # in case defaults are given we ensure that either the value was + # skipped or the value is the same as the default value. + if defaults: + for key, value in defaults.items(): + if key in values and value != values[key]: + return False + + return True + + def build_compare_key(self) -> t.Tuple[int, int, int]: + """The build compare key for sorting. + + :internal: + """ + return (1 if self.alias else 0, -len(self.arguments), -len(self.defaults or ())) + + def __eq__(self, other: object) -> bool: + return isinstance(other, type(self)) and self._trace == other._trace + + __hash__ = None # type: ignore + + def __str__(self) -> str: + return self.rule + + def __repr__(self) -> str: + if self.map is None: + return f"<{type(self).__name__} (unbound)>" + parts = [] + for is_dynamic, data in self._trace: + if is_dynamic: + parts.append(f"<{data}>") + else: + parts.append(data) + parts = "".join(parts).lstrip("|") + methods = f" ({', '.join(self.methods)})" if self.methods is not None else "" + return f"<{type(self).__name__} {parts!r}{methods} -> {self.endpoint}>" diff --git a/src/werkzeug/sansio/__init__.py b/src/werkzeug/sansio/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/werkzeug/sansio/http.py b/src/werkzeug/sansio/http.py new file mode 100644 index 0000000..8288882 --- /dev/null +++ b/src/werkzeug/sansio/http.py @@ -0,0 +1,140 @@ +import re +import typing as t +from datetime import datetime + +from .._internal import _cookie_parse_impl +from .._internal import _dt_as_utc +from .._internal import _to_str +from ..http import generate_etag +from ..http import parse_date +from ..http import parse_etags +from ..http import parse_if_range_header +from ..http import unquote_etag + +_etag_re = re.compile(r'([Ww]/)?(?:"(.*?)"|(.*?))(?:\s*,\s*|$)') + + +def is_resource_modified( + http_range: t.Optional[str] = None, + http_if_range: t.Optional[str] = None, + http_if_modified_since: t.Optional[str] = None, + http_if_none_match: t.Optional[str] = None, + http_if_match: t.Optional[str] = None, + etag: t.Optional[str] = None, + data: t.Optional[bytes] = None, + last_modified: t.Optional[t.Union[datetime, str]] = None, + ignore_if_range: bool = True, +) -> bool: + """Convenience method for conditional requests. + :param http_range: Range HTTP header + :param http_if_range: If-Range HTTP header + :param http_if_modified_since: If-Modified-Since HTTP header + :param http_if_none_match: If-None-Match HTTP header + :param http_if_match: If-Match HTTP header + :param etag: the etag for the response for comparison. + :param data: or alternatively the data of the response to automatically + generate an etag using :func:`generate_etag`. + :param last_modified: an optional date of the last modification. + :param ignore_if_range: If `False`, `If-Range` header will be taken into + account. + :return: `True` if the resource was modified, otherwise `False`. + + .. versionadded:: 2.2 + """ + if etag is None and data is not None: + etag = generate_etag(data) + elif data is not None: + raise TypeError("both data and etag given") + + unmodified = False + if isinstance(last_modified, str): + last_modified = parse_date(last_modified) + + # HTTP doesn't use microsecond, remove it to avoid false positive + # comparisons. Mark naive datetimes as UTC. + if last_modified is not None: + last_modified = _dt_as_utc(last_modified.replace(microsecond=0)) + + if_range = None + if not ignore_if_range and http_range is not None: + # https://tools.ietf.org/html/rfc7233#section-3.2 + # A server MUST ignore an If-Range header field received in a request + # that does not contain a Range header field. + if_range = parse_if_range_header(http_if_range) + + if if_range is not None and if_range.date is not None: + modified_since: t.Optional[datetime] = if_range.date + else: + modified_since = parse_date(http_if_modified_since) + + if modified_since and last_modified and last_modified <= modified_since: + unmodified = True + + if etag: + etag, _ = unquote_etag(etag) + etag = t.cast(str, etag) + + if if_range is not None and if_range.etag is not None: + unmodified = parse_etags(if_range.etag).contains(etag) + else: + if_none_match = parse_etags(http_if_none_match) + if if_none_match: + # https://tools.ietf.org/html/rfc7232#section-3.2 + # "A recipient MUST use the weak comparison function when comparing + # entity-tags for If-None-Match" + unmodified = if_none_match.contains_weak(etag) + + # https://tools.ietf.org/html/rfc7232#section-3.1 + # "Origin server MUST use the strong comparison function when + # comparing entity-tags for If-Match" + if_match = parse_etags(http_if_match) + if if_match: + unmodified = not if_match.is_strong(etag) + + return not unmodified + + +def parse_cookie( + cookie: t.Union[bytes, str, None] = "", + charset: str = "utf-8", + errors: str = "replace", + cls: t.Optional[t.Type["ds.MultiDict"]] = None, +) -> "ds.MultiDict[str, str]": + """Parse a cookie from a string. + + The same key can be provided multiple times, the values are stored + in-order. The default :class:`MultiDict` will have the first value + first, and all values can be retrieved with + :meth:`MultiDict.getlist`. + + :param cookie: The cookie header as a string. + :param charset: The charset for the cookie values. + :param errors: The error behavior for the charset decoding. + :param cls: A dict-like class to store the parsed cookies in. + Defaults to :class:`MultiDict`. + + .. versionadded:: 2.2 + """ + # PEP 3333 sends headers through the environ as latin1 decoded + # strings. Encode strings back to bytes for parsing. + if isinstance(cookie, str): + cookie = cookie.encode("latin1", "replace") + + if cls is None: + cls = ds.MultiDict + + def _parse_pairs() -> t.Iterator[t.Tuple[str, str]]: + for key, val in _cookie_parse_impl(cookie): # type: ignore + key_str = _to_str(key, charset, errors, allow_none_charset=True) + + if not key_str: + continue + + val_str = _to_str(val, charset, errors, allow_none_charset=True) + yield key_str, val_str + + return cls(_parse_pairs()) + + +# circular dependencies +from .. import datastructures as ds diff --git a/src/werkzeug/sansio/multipart.py b/src/werkzeug/sansio/multipart.py new file mode 100644 index 0000000..d8abeb3 --- /dev/null +++ b/src/werkzeug/sansio/multipart.py @@ -0,0 +1,279 @@ +import re +from dataclasses import dataclass +from enum import auto +from enum import Enum +from typing import cast +from typing import List +from typing import Optional +from typing import Tuple + +from .._internal import _to_bytes +from .._internal import _to_str +from ..datastructures import Headers +from ..exceptions import RequestEntityTooLarge +from ..http import parse_options_header + + +class Event: + pass + + +@dataclass(frozen=True) +class Preamble(Event): + data: bytes + + +@dataclass(frozen=True) +class Field(Event): + name: str + headers: Headers + + +@dataclass(frozen=True) +class File(Event): + name: str + filename: str + headers: Headers + + +@dataclass(frozen=True) +class Data(Event): + data: bytes + more_data: bool + + +@dataclass(frozen=True) +class Epilogue(Event): + data: bytes + + +class NeedData(Event): + pass + + +NEED_DATA = NeedData() + + +class State(Enum): + PREAMBLE = auto() + PART = auto() + DATA = auto() + EPILOGUE = auto() + COMPLETE = auto() + + +# Multipart line breaks MUST be CRLF (\r\n) by RFC-7578, except that +# many implementations break this and either use CR or LF alone. +LINE_BREAK = b"(?:\r\n|\n|\r)" +BLANK_LINE_RE = re.compile(b"(?:\r\n\r\n|\r\r|\n\n)", re.MULTILINE) +LINE_BREAK_RE = re.compile(LINE_BREAK, re.MULTILINE) +# Header values can be continued via a space or tab after the linebreak, as +# per RFC2231 +HEADER_CONTINUATION_RE = re.compile(b"%s[ \t]" % LINE_BREAK, re.MULTILINE) +# This must be long enough to contain any line breaks plus any +# additional boundary markers (--) such that they will be found in a +# subsequent search +SEARCH_EXTRA_LENGTH = 8 + + +class MultipartDecoder: + """Decodes a multipart message as bytes into Python events. + + The part data is returned as available to allow the caller to save + the data from memory to disk, if desired. + """ + + def __init__( + self, + boundary: bytes, + max_form_memory_size: Optional[int] = None, + ) -> None: + self.buffer = bytearray() + self.complete = False + self.max_form_memory_size = max_form_memory_size + self.state = State.PREAMBLE + self.boundary = boundary + + # Note in the below \h i.e. horizontal whitespace is used + # as [^\S\n\r] as \h isn't supported in python. + + # The preamble must end with a boundary where the boundary is + # prefixed by a line break, RFC2046. Except that many + # implementations including Werkzeug's tests omit the line + # break prefix. In addition the first boundary could be the + # epilogue boundary (for empty form-data) hence the matching + # group to understand if it is an epilogue boundary. + self.preamble_re = re.compile( + rb"%s?--%s(--[^\S\n\r]*%s?|[^\S\n\r]*%s)" + % (LINE_BREAK, re.escape(boundary), LINE_BREAK, LINE_BREAK), + re.MULTILINE, + ) + # A boundary must include a line break prefix and suffix, and + # may include trailing whitespace. In addition the boundary + # could be the epilogue boundary hence the matching group to + # understand if it is an epilogue boundary. + self.boundary_re = re.compile( + rb"%s--%s(--[^\S\n\r]*%s?|[^\S\n\r]*%s)" + % (LINE_BREAK, re.escape(boundary), LINE_BREAK, LINE_BREAK), + re.MULTILINE, + ) + self._search_position = 0 + + def last_newline(self) -> int: + try: + last_nl = self.buffer.rindex(b"\n") + except ValueError: + last_nl = len(self.buffer) + try: + last_cr = self.buffer.rindex(b"\r") + except ValueError: + last_cr = len(self.buffer) + + return min(last_nl, last_cr) + + def receive_data(self, data: Optional[bytes]) -> None: + if data is None: + self.complete = True + elif ( + self.max_form_memory_size is not None + and len(self.buffer) + len(data) > self.max_form_memory_size + ): + raise RequestEntityTooLarge() + else: + self.buffer.extend(data) + + def next_event(self) -> Event: + event: Event = NEED_DATA + + if self.state == State.PREAMBLE: + match = self.preamble_re.search(self.buffer, self._search_position) + if match is not None: + if match.group(1).startswith(b"--"): + self.state = State.EPILOGUE + else: + self.state = State.PART + data = bytes(self.buffer[: match.start()]) + del self.buffer[: match.end()] + event = Preamble(data=data) + self._search_position = 0 + else: + # Update the search start position to be equal to the + # current buffer length (already searched) minus a + # safe buffer for part of the search target. + self._search_position = max( + 0, len(self.buffer) - len(self.boundary) - SEARCH_EXTRA_LENGTH + ) + + elif self.state == State.PART: + match = BLANK_LINE_RE.search(self.buffer, self._search_position) + if match is not None: + headers = self._parse_headers(self.buffer[: match.start()]) + del self.buffer[: match.end()] + + if "content-disposition" not in headers: + raise ValueError("Missing Content-Disposition header") + + disposition, extra = parse_options_header( + headers["content-disposition"] + ) + name = cast(str, extra.get("name")) + filename = extra.get("filename") + if filename is not None: + event = File( + filename=filename, + headers=headers, + name=name, + ) + else: + event = Field( + headers=headers, + name=name, + ) + self.state = State.DATA + self._search_position = 0 + else: + # Update the search start position to be equal to the + # current buffer length (already searched) minus a + # safe buffer for part of the search target. + self._search_position = max(0, len(self.buffer) - SEARCH_EXTRA_LENGTH) + + elif self.state == State.DATA: + if self.buffer.find(b"--" + self.boundary) == -1: + # No complete boundary in the buffer, but there may be + # a partial boundary at the end. As the boundary + # starts with either a nl or cr find the earliest and + # return up to that as data. + data_length = del_index = self.last_newline() + more_data = True + else: + match = self.boundary_re.search(self.buffer) + if match is not None: + if match.group(1).startswith(b"--"): + self.state = State.EPILOGUE + else: + self.state = State.PART + data_length = match.start() + del_index = match.end() + else: + data_length = del_index = self.last_newline() + more_data = match is None + + data = bytes(self.buffer[:data_length]) + del self.buffer[:del_index] + if data or not more_data: + event = Data(data=data, more_data=more_data) + + elif self.state == State.EPILOGUE and self.complete: + event = Epilogue(data=bytes(self.buffer)) + del self.buffer[:] + self.state = State.COMPLETE + + if self.complete and isinstance(event, NeedData): + raise ValueError(f"Invalid form-data cannot parse beyond {self.state}") + + return event + + def _parse_headers(self, data: bytes) -> Headers: + headers: List[Tuple[str, str]] = [] + # Merge the continued headers into one line + data = HEADER_CONTINUATION_RE.sub(b" ", data) + # Now there is one header per line + for line in data.splitlines(): + if line.strip() != b"": + name, value = _to_str(line).strip().split(":", 1) + headers.append((name.strip(), value.strip())) + return Headers(headers) + + +class MultipartEncoder: + def __init__(self, boundary: bytes) -> None: + self.boundary = boundary + self.state = State.PREAMBLE + + def send_event(self, event: Event) -> bytes: + if isinstance(event, Preamble) and self.state == State.PREAMBLE: + self.state = State.PART + return event.data + elif isinstance(event, (Field, File)) and self.state in { + State.PREAMBLE, + State.PART, + State.DATA, + }: + self.state = State.DATA + data = b"\r\n--" + self.boundary + b"\r\n" + data += b'Content-Disposition: form-data; name="%s"' % _to_bytes(event.name) + if isinstance(event, File): + data += b'; filename="%s"' % _to_bytes(event.filename) + data += b"\r\n" + for name, value in cast(Field, event).headers: + if name.lower() != "content-disposition": + data += _to_bytes(f"{name}: {value}\r\n") + data += b"\r\n" + return data + elif isinstance(event, Data) and self.state == State.DATA: + return event.data + elif isinstance(event, Epilogue): + self.state = State.COMPLETE + return b"\r\n--" + self.boundary + b"--\r\n" + event.data + else: + raise ValueError(f"Cannot generate {event} in state: {self.state}") diff --git a/src/werkzeug/sansio/request.py b/src/werkzeug/sansio/request.py new file mode 100644 index 0000000..8832baa --- /dev/null +++ b/src/werkzeug/sansio/request.py @@ -0,0 +1,547 @@ +import typing as t +from datetime import datetime + +from .._internal import _to_str +from ..datastructures import Accept +from ..datastructures import Authorization +from ..datastructures import CharsetAccept +from ..datastructures import ETags +from ..datastructures import Headers +from ..datastructures import HeaderSet +from ..datastructures import IfRange +from ..datastructures import ImmutableList +from ..datastructures import ImmutableMultiDict +from ..datastructures import LanguageAccept +from ..datastructures import MIMEAccept +from ..datastructures import MultiDict +from ..datastructures import Range +from ..datastructures import RequestCacheControl +from ..http import parse_accept_header +from ..http import parse_authorization_header +from ..http import parse_cache_control_header +from ..http import parse_date +from ..http import parse_etags +from ..http import parse_if_range_header +from ..http import parse_list_header +from ..http import parse_options_header +from ..http import parse_range_header +from ..http import parse_set_header +from ..urls import url_decode +from ..user_agent import UserAgent +from ..utils import cached_property +from ..utils import header_property +from .http import parse_cookie +from .utils import get_current_url +from .utils import get_host + + +class Request: + """Represents the non-IO parts of a HTTP request, including the + method, URL info, and headers. + + This class is not meant for general use. It should only be used when + implementing WSGI, ASGI, or another HTTP application spec. Werkzeug + provides a WSGI implementation at :cls:`werkzeug.wrappers.Request`. + + :param method: The method the request was made with, such as + ``GET``. + :param scheme: The URL scheme of the protocol the request used, such + as ``https`` or ``wss``. + :param server: The address of the server. ``(host, port)``, + ``(path, None)`` for unix sockets, or ``None`` if not known. + :param root_path: The prefix that the application is mounted under. + This is prepended to generated URLs, but is not part of route + matching. + :param path: The path part of the URL after ``root_path``. + :param query_string: The part of the URL after the "?". + :param headers: The headers received with the request. + :param remote_addr: The address of the client sending the request. + + .. versionadded:: 2.0 + """ + + #: The charset used to decode most data in the request. + charset = "utf-8" + + #: the error handling procedure for errors, defaults to 'replace' + encoding_errors = "replace" + + #: the class to use for `args` and `form`. The default is an + #: :class:`~werkzeug.datastructures.ImmutableMultiDict` which supports + #: multiple values per key. alternatively it makes sense to use an + #: :class:`~werkzeug.datastructures.ImmutableOrderedMultiDict` which + #: preserves order or a :class:`~werkzeug.datastructures.ImmutableDict` + #: which is the fastest but only remembers the last key. It is also + #: possible to use mutable structures, but this is not recommended. + #: + #: .. versionadded:: 0.6 + parameter_storage_class: t.Type[MultiDict] = ImmutableMultiDict + + #: The type to be used for dict values from the incoming WSGI + #: environment. (For example for :attr:`cookies`.) By default an + #: :class:`~werkzeug.datastructures.ImmutableMultiDict` is used. + #: + #: .. versionchanged:: 1.0.0 + #: Changed to ``ImmutableMultiDict`` to support multiple values. + #: + #: .. versionadded:: 0.6 + dict_storage_class: t.Type[MultiDict] = ImmutableMultiDict + + #: the type to be used for list values from the incoming WSGI environment. + #: By default an :class:`~werkzeug.datastructures.ImmutableList` is used + #: (for example for :attr:`access_list`). + #: + #: .. versionadded:: 0.6 + list_storage_class: t.Type[t.List] = ImmutableList + + user_agent_class: t.Type[UserAgent] = UserAgent + """The class used and returned by the :attr:`user_agent` property to + parse the header. Defaults to + :class:`~werkzeug.user_agent.UserAgent`, which does no parsing. An + extension can provide a subclass that uses a parser to provide other + data. + + .. versionadded:: 2.0 + """ + + #: Valid host names when handling requests. By default all hosts are + #: trusted, which means that whatever the client says the host is + #: will be accepted. + #: + #: Because ``Host`` and ``X-Forwarded-Host`` headers can be set to + #: any value by a malicious client, it is recommended to either set + #: this property or implement similar validation in the proxy (if + #: the application is being run behind one). + #: + #: .. versionadded:: 0.9 + trusted_hosts: t.Optional[t.List[str]] = None + + def __init__( + self, + method: str, + scheme: str, + server: t.Optional[t.Tuple[str, t.Optional[int]]], + root_path: str, + path: str, + query_string: bytes, + headers: Headers, + remote_addr: t.Optional[str], + ) -> None: + #: The method the request was made with, such as ``GET``. + self.method = method.upper() + #: The URL scheme of the protocol the request used, such as + #: ``https`` or ``wss``. + self.scheme = scheme + #: The address of the server. ``(host, port)``, ``(path, None)`` + #: for unix sockets, or ``None`` if not known. + self.server = server + #: The prefix that the application is mounted under, without a + #: trailing slash. :attr:`path` comes after this. + self.root_path = root_path.rstrip("/") + #: The path part of the URL after :attr:`root_path`. This is the + #: path used for routing within the application. + self.path = "/" + path.lstrip("/") + #: The part of the URL after the "?". This is the raw value, use + #: :attr:`args` for the parsed values. + self.query_string = query_string + #: The headers received with the request. + self.headers = headers + #: The address of the client sending the request. + self.remote_addr = remote_addr + + def __repr__(self) -> str: + try: + url = self.url + except Exception as e: + url = f"(invalid URL: {e})" + + return f"<{type(self).__name__} {url!r} [{self.method}]>" + + @property + def url_charset(self) -> str: + """The charset that is assumed for URLs. Defaults to the value + of :attr:`charset`. + + .. versionadded:: 0.6 + """ + return self.charset + + @cached_property + def args(self) -> "MultiDict[str, str]": + """The parsed URL parameters (the part in the URL after the question + mark). + + By default an + :class:`~werkzeug.datastructures.ImmutableMultiDict` + is returned from this function. This can be changed by setting + :attr:`parameter_storage_class` to a different type. This might + be necessary if the order of the form data is important. + """ + return url_decode( + self.query_string, + self.url_charset, + errors=self.encoding_errors, + cls=self.parameter_storage_class, + ) + + @cached_property + def access_route(self) -> t.List[str]: + """If a forwarded header exists this is a list of all ip addresses + from the client ip to the last proxy server. + """ + if "X-Forwarded-For" in self.headers: + return self.list_storage_class( + parse_list_header(self.headers["X-Forwarded-For"]) + ) + elif self.remote_addr is not None: + return self.list_storage_class([self.remote_addr]) + return self.list_storage_class() + + @cached_property + def full_path(self) -> str: + """Requested path, including the query string.""" + return f"{self.path}?{_to_str(self.query_string, self.url_charset)}" + + @property + def is_secure(self) -> bool: + """``True`` if the request was made with a secure protocol + (HTTPS or WSS). + """ + return self.scheme in {"https", "wss"} + + @cached_property + def url(self) -> str: + """The full request URL with the scheme, host, root path, path, + and query string.""" + return get_current_url( + self.scheme, self.host, self.root_path, self.path, self.query_string + ) + + @cached_property + def base_url(self) -> str: + """Like :attr:`url` but without the query string.""" + return get_current_url(self.scheme, self.host, self.root_path, self.path) + + @cached_property + def root_url(self) -> str: + """The request URL scheme, host, and root path. This is the root + that the application is accessed from. + """ + return get_current_url(self.scheme, self.host, self.root_path) + + @cached_property + def host_url(self) -> str: + """The request URL scheme and host only.""" + return get_current_url(self.scheme, self.host) + + @cached_property + def host(self) -> str: + """The host name the request was made to, including the port if + it's non-standard. Validated with :attr:`trusted_hosts`. + """ + return get_host( + self.scheme, self.headers.get("host"), self.server, self.trusted_hosts + ) + + @cached_property + def cookies(self) -> "ImmutableMultiDict[str, str]": + """A :class:`dict` with the contents of all cookies transmitted with + the request.""" + wsgi_combined_cookie = ";".join(self.headers.getlist("Cookie")) + return parse_cookie( # type: ignore + wsgi_combined_cookie, + self.charset, + self.encoding_errors, + cls=self.dict_storage_class, + ) + + # Common Descriptors + + content_type = header_property[str]( + "Content-Type", + doc="""The Content-Type entity-header field indicates the media + type of the entity-body sent to the recipient or, in the case of + the HEAD method, the media type that would have been sent had + the request been a GET.""", + read_only=True, + ) + + @cached_property + def content_length(self) -> t.Optional[int]: + """The Content-Length entity-header field indicates the size of the + entity-body in bytes or, in the case of the HEAD method, the size of + the entity-body that would have been sent had the request been a + GET. + """ + if self.headers.get("Transfer-Encoding", "") == "chunked": + return None + + content_length = self.headers.get("Content-Length") + if content_length is not None: + try: + return max(0, int(content_length)) + except (ValueError, TypeError): + pass + + return None + + content_encoding = header_property[str]( + "Content-Encoding", + doc="""The Content-Encoding entity-header field is used as a + modifier to the media-type. When present, its value indicates + what additional content codings have been applied to the + entity-body, and thus what decoding mechanisms must be applied + in order to obtain the media-type referenced by the Content-Type + header field. + + .. versionadded:: 0.9""", + read_only=True, + ) + content_md5 = header_property[str]( + "Content-MD5", + doc="""The Content-MD5 entity-header field, as defined in + RFC 1864, is an MD5 digest of the entity-body for the purpose of + providing an end-to-end message integrity check (MIC) of the + entity-body. (Note: a MIC is good for detecting accidental + modification of the entity-body in transit, but is not proof + against malicious attacks.) + + .. versionadded:: 0.9""", + read_only=True, + ) + referrer = header_property[str]( + "Referer", + doc="""The Referer[sic] request-header field allows the client + to specify, for the server's benefit, the address (URI) of the + resource from which the Request-URI was obtained (the + "referrer", although the header field is misspelled).""", + read_only=True, + ) + date = header_property( + "Date", + None, + parse_date, + doc="""The Date general-header field represents the date and + time at which the message was originated, having the same + semantics as orig-date in RFC 822. + + .. versionchanged:: 2.0 + The datetime object is timezone-aware. + """, + read_only=True, + ) + max_forwards = header_property( + "Max-Forwards", + None, + int, + doc="""The Max-Forwards request-header field provides a + mechanism with the TRACE and OPTIONS methods to limit the number + of proxies or gateways that can forward the request to the next + inbound server.""", + read_only=True, + ) + + def _parse_content_type(self) -> None: + if not hasattr(self, "_parsed_content_type"): + self._parsed_content_type = parse_options_header( + self.headers.get("Content-Type", "") + ) + + @property + def mimetype(self) -> str: + """Like :attr:`content_type`, but without parameters (eg, without + charset, type etc.) and always lowercase. For example if the content + type is ``text/HTML; charset=utf-8`` the mimetype would be + ``'text/html'``. + """ + self._parse_content_type() + return self._parsed_content_type[0].lower() + + @property + def mimetype_params(self) -> t.Dict[str, str]: + """The mimetype parameters as dict. For example if the content + type is ``text/html; charset=utf-8`` the params would be + ``{'charset': 'utf-8'}``. + """ + self._parse_content_type() + return self._parsed_content_type[1] + + @cached_property + def pragma(self) -> HeaderSet: + """The Pragma general-header field is used to include + implementation-specific directives that might apply to any recipient + along the request/response chain. All pragma directives specify + optional behavior from the viewpoint of the protocol; however, some + systems MAY require that behavior be consistent with the directives. + """ + return parse_set_header(self.headers.get("Pragma", "")) + + # Accept + + @cached_property + def accept_mimetypes(self) -> MIMEAccept: + """List of mimetypes this client supports as + :class:`~werkzeug.datastructures.MIMEAccept` object. + """ + return parse_accept_header(self.headers.get("Accept"), MIMEAccept) + + @cached_property + def accept_charsets(self) -> CharsetAccept: + """List of charsets this client supports as + :class:`~werkzeug.datastructures.CharsetAccept` object. + """ + return parse_accept_header(self.headers.get("Accept-Charset"), CharsetAccept) + + @cached_property + def accept_encodings(self) -> Accept: + """List of encodings this client accepts. Encodings in a HTTP term + are compression encodings such as gzip. For charsets have a look at + :attr:`accept_charset`. + """ + return parse_accept_header(self.headers.get("Accept-Encoding")) + + @cached_property + def accept_languages(self) -> LanguageAccept: + """List of languages this client accepts as + :class:`~werkzeug.datastructures.LanguageAccept` object. + + .. versionchanged 0.5 + In previous versions this was a regular + :class:`~werkzeug.datastructures.Accept` object. + """ + return parse_accept_header(self.headers.get("Accept-Language"), LanguageAccept) + + # ETag + + @cached_property + def cache_control(self) -> RequestCacheControl: + """A :class:`~werkzeug.datastructures.RequestCacheControl` object + for the incoming cache control headers. + """ + cache_control = self.headers.get("Cache-Control") + return parse_cache_control_header(cache_control, None, RequestCacheControl) + + @cached_property + def if_match(self) -> ETags: + """An object containing all the etags in the `If-Match` header. + + :rtype: :class:`~werkzeug.datastructures.ETags` + """ + return parse_etags(self.headers.get("If-Match")) + + @cached_property + def if_none_match(self) -> ETags: + """An object containing all the etags in the `If-None-Match` header. + + :rtype: :class:`~werkzeug.datastructures.ETags` + """ + return parse_etags(self.headers.get("If-None-Match")) + + @cached_property + def if_modified_since(self) -> t.Optional[datetime]: + """The parsed `If-Modified-Since` header as a datetime object. + + .. versionchanged:: 2.0 + The datetime object is timezone-aware. + """ + return parse_date(self.headers.get("If-Modified-Since")) + + @cached_property + def if_unmodified_since(self) -> t.Optional[datetime]: + """The parsed `If-Unmodified-Since` header as a datetime object. + + .. versionchanged:: 2.0 + The datetime object is timezone-aware. + """ + return parse_date(self.headers.get("If-Unmodified-Since")) + + @cached_property + def if_range(self) -> IfRange: + """The parsed ``If-Range`` header. + + .. versionchanged:: 2.0 + ``IfRange.date`` is timezone-aware. + + .. versionadded:: 0.7 + """ + return parse_if_range_header(self.headers.get("If-Range")) + + @cached_property + def range(self) -> t.Optional[Range]: + """The parsed `Range` header. + + .. versionadded:: 0.7 + + :rtype: :class:`~werkzeug.datastructures.Range` + """ + return parse_range_header(self.headers.get("Range")) + + # User Agent + + @cached_property + def user_agent(self) -> UserAgent: + """The user agent. Use ``user_agent.string`` to get the header + value. Set :attr:`user_agent_class` to a subclass of + :class:`~werkzeug.user_agent.UserAgent` to provide parsing for + the other properties or other extended data. + + .. versionchanged:: 2.0 + The built in parser is deprecated and will be removed in + Werkzeug 2.1. A ``UserAgent`` subclass must be set to parse + data from the string. + """ + return self.user_agent_class(self.headers.get("User-Agent", "")) + + # Authorization + + @cached_property + def authorization(self) -> t.Optional[Authorization]: + """The `Authorization` object in parsed form.""" + return parse_authorization_header(self.headers.get("Authorization")) + + # CORS + + origin = header_property[str]( + "Origin", + doc=( + "The host that the request originated from. Set" + " :attr:`~CORSResponseMixin.access_control_allow_origin` on" + " the response to indicate which origins are allowed." + ), + read_only=True, + ) + + access_control_request_headers = header_property( + "Access-Control-Request-Headers", + load_func=parse_set_header, + doc=( + "Sent with a preflight request to indicate which headers" + " will be sent with the cross origin request. Set" + " :attr:`~CORSResponseMixin.access_control_allow_headers`" + " on the response to indicate which headers are allowed." + ), + read_only=True, + ) + + access_control_request_method = header_property[str]( + "Access-Control-Request-Method", + doc=( + "Sent with a preflight request to indicate which method" + " will be used for the cross origin request. Set" + " :attr:`~CORSResponseMixin.access_control_allow_methods`" + " on the response to indicate which methods are allowed." + ), + read_only=True, + ) + + @property + def is_json(self) -> bool: + """Check if the mimetype indicates JSON data, either + :mimetype:`application/json` or :mimetype:`application/*+json`. + """ + mt = self.mimetype + return ( + mt == "application/json" + or mt.startswith("application/") + and mt.endswith("+json") + ) diff --git a/src/werkzeug/sansio/response.py b/src/werkzeug/sansio/response.py new file mode 100644 index 0000000..de0bec2 --- /dev/null +++ b/src/werkzeug/sansio/response.py @@ -0,0 +1,704 @@ +import typing as t +from datetime import datetime +from datetime import timedelta +from datetime import timezone +from http import HTTPStatus + +from .._internal import _to_str +from ..datastructures import Headers +from ..datastructures import HeaderSet +from ..http import dump_cookie +from ..http import HTTP_STATUS_CODES +from ..utils import get_content_type +from werkzeug.datastructures import CallbackDict +from werkzeug.datastructures import ContentRange +from werkzeug.datastructures import ContentSecurityPolicy +from werkzeug.datastructures import ResponseCacheControl +from werkzeug.datastructures import WWWAuthenticate +from werkzeug.http import COEP +from werkzeug.http import COOP +from werkzeug.http import dump_age +from werkzeug.http import dump_header +from werkzeug.http import dump_options_header +from werkzeug.http import http_date +from werkzeug.http import parse_age +from werkzeug.http import parse_cache_control_header +from werkzeug.http import parse_content_range_header +from werkzeug.http import parse_csp_header +from werkzeug.http import parse_date +from werkzeug.http import parse_options_header +from werkzeug.http import parse_set_header +from werkzeug.http import parse_www_authenticate_header +from werkzeug.http import quote_etag +from werkzeug.http import unquote_etag +from werkzeug.utils import header_property + + +def _set_property(name: str, doc: t.Optional[str] = None) -> property: + def fget(self: "Response") -> HeaderSet: + def on_update(header_set: HeaderSet) -> None: + if not header_set and name in self.headers: + del self.headers[name] + elif header_set: + self.headers[name] = header_set.to_header() + + return parse_set_header(self.headers.get(name), on_update) + + def fset( + self: "Response", + value: t.Optional[ + t.Union[str, t.Dict[str, t.Union[str, int]], t.Iterable[str]] + ], + ) -> None: + if not value: + del self.headers[name] + elif isinstance(value, str): + self.headers[name] = value + else: + self.headers[name] = dump_header(value) + + return property(fget, fset, doc=doc) + + +class Response: + """Represents the non-IO parts of an HTTP response, specifically the + status and headers but not the body. + + This class is not meant for general use. It should only be used when + implementing WSGI, ASGI, or another HTTP application spec. Werkzeug + provides a WSGI implementation at :cls:`werkzeug.wrappers.Response`. + + :param status: The status code for the response. Either an int, in + which case the default status message is added, or a string in + the form ``{code} {message}``, like ``404 Not Found``. Defaults + to 200. + :param headers: A :class:`~werkzeug.datastructures.Headers` object, + or a list of ``(key, value)`` tuples that will be converted to a + ``Headers`` object. + :param mimetype: The mime type (content type without charset or + other parameters) of the response. If the value starts with + ``text/`` (or matches some other special cases), the charset + will be added to create the ``content_type``. + :param content_type: The full content type of the response. + Overrides building the value from ``mimetype``. + + .. versionadded:: 2.0 + """ + + #: the charset of the response. + charset = "utf-8" + + #: the default status if none is provided. + default_status = 200 + + #: the default mimetype if none is provided. + default_mimetype: t.Optional[str] = "text/plain" + + #: Warn if a cookie header exceeds this size. The default, 4093, should be + #: safely `supported by most browsers `_. A cookie larger than + #: this size will still be sent, but it may be ignored or handled + #: incorrectly by some browsers. Set to 0 to disable this check. + #: + #: .. versionadded:: 0.13 + #: + #: .. _`cookie`: http://browsercookielimits.squawky.net/ + max_cookie_size = 4093 + + # A :class:`Headers` object representing the response headers. + headers: Headers + + def __init__( + self, + status: t.Optional[t.Union[int, str, HTTPStatus]] = None, + headers: t.Optional[ + t.Union[ + t.Mapping[str, t.Union[str, int, t.Iterable[t.Union[str, int]]]], + t.Iterable[t.Tuple[str, t.Union[str, int]]], + ] + ] = None, + mimetype: t.Optional[str] = None, + content_type: t.Optional[str] = None, + ) -> None: + if isinstance(headers, Headers): + self.headers = headers + elif not headers: + self.headers = Headers() + else: + self.headers = Headers(headers) + + if content_type is None: + if mimetype is None and "content-type" not in self.headers: + mimetype = self.default_mimetype + if mimetype is not None: + mimetype = get_content_type(mimetype, self.charset) + content_type = mimetype + if content_type is not None: + self.headers["Content-Type"] = content_type + if status is None: + status = self.default_status + self.status = status # type: ignore + + def __repr__(self) -> str: + return f"<{type(self).__name__} [{self.status}]>" + + @property + def status_code(self) -> int: + """The HTTP status code as a number.""" + return self._status_code + + @status_code.setter + def status_code(self, code: int) -> None: + self.status = code # type: ignore + + @property + def status(self) -> str: + """The HTTP status code as a string.""" + return self._status + + @status.setter + def status(self, value: t.Union[str, int, HTTPStatus]) -> None: + if not isinstance(value, (str, bytes, int, HTTPStatus)): + raise TypeError("Invalid status argument") + + self._status, self._status_code = self._clean_status(value) + + def _clean_status(self, value: t.Union[str, int, HTTPStatus]) -> t.Tuple[str, int]: + if isinstance(value, HTTPStatus): + value = int(value) + status = _to_str(value, self.charset) + split_status = status.split(None, 1) + + if len(split_status) == 0: + raise ValueError("Empty status argument") + + try: + status_code = int(split_status[0]) + except ValueError: + # only message + return f"0 {status}", 0 + + if len(split_status) > 1: + # code and message + return status, status_code + + # only code, look up message + try: + status = f"{status_code} {HTTP_STATUS_CODES[status_code].upper()}" + except KeyError: + status = f"{status_code} UNKNOWN" + + return status, status_code + + def set_cookie( + self, + key: str, + value: str = "", + max_age: t.Optional[t.Union[timedelta, int]] = None, + expires: t.Optional[t.Union[str, datetime, int, float]] = None, + path: t.Optional[str] = "/", + domain: t.Optional[str] = None, + secure: bool = False, + httponly: bool = False, + samesite: t.Optional[str] = None, + ) -> None: + """Sets a cookie. + + A warning is raised if the size of the cookie header exceeds + :attr:`max_cookie_size`, but the header will still be set. + + :param key: the key (name) of the cookie to be set. + :param value: the value of the cookie. + :param max_age: should be a number of seconds, or `None` (default) if + the cookie should last only as long as the client's + browser session. + :param expires: should be a `datetime` object or UNIX timestamp. + :param path: limits the cookie to a given path, per default it will + span the whole domain. + :param domain: if you want to set a cross-domain cookie. For example, + ``domain=".example.com"`` will set a cookie that is + readable by the domain ``www.example.com``, + ``foo.example.com`` etc. Otherwise, a cookie will only + be readable by the domain that set it. + :param secure: If ``True``, the cookie will only be available + via HTTPS. + :param httponly: Disallow JavaScript access to the cookie. + :param samesite: Limit the scope of the cookie to only be + attached to requests that are "same-site". + """ + self.headers.add( + "Set-Cookie", + dump_cookie( + key, + value=value, + max_age=max_age, + expires=expires, + path=path, + domain=domain, + secure=secure, + httponly=httponly, + charset=self.charset, + max_size=self.max_cookie_size, + samesite=samesite, + ), + ) + + def delete_cookie( + self, + key: str, + path: str = "/", + domain: t.Optional[str] = None, + secure: bool = False, + httponly: bool = False, + samesite: t.Optional[str] = None, + ) -> None: + """Delete a cookie. Fails silently if key doesn't exist. + + :param key: the key (name) of the cookie to be deleted. + :param path: if the cookie that should be deleted was limited to a + path, the path has to be defined here. + :param domain: if the cookie that should be deleted was limited to a + domain, that domain has to be defined here. + :param secure: If ``True``, the cookie will only be available + via HTTPS. + :param httponly: Disallow JavaScript access to the cookie. + :param samesite: Limit the scope of the cookie to only be + attached to requests that are "same-site". + """ + self.set_cookie( + key, + expires=0, + max_age=0, + path=path, + domain=domain, + secure=secure, + httponly=httponly, + samesite=samesite, + ) + + @property + def is_json(self) -> bool: + """Check if the mimetype indicates JSON data, either + :mimetype:`application/json` or :mimetype:`application/*+json`. + """ + mt = self.mimetype + return mt is not None and ( + mt == "application/json" + or mt.startswith("application/") + and mt.endswith("+json") + ) + + # Common Descriptors + + @property + def mimetype(self) -> t.Optional[str]: + """The mimetype (content type without charset etc.)""" + ct = self.headers.get("content-type") + + if ct: + return ct.split(";")[0].strip() + else: + return None + + @mimetype.setter + def mimetype(self, value: str) -> None: + self.headers["Content-Type"] = get_content_type(value, self.charset) + + @property + def mimetype_params(self) -> t.Dict[str, str]: + """The mimetype parameters as dict. For example if the + content type is ``text/html; charset=utf-8`` the params would be + ``{'charset': 'utf-8'}``. + + .. versionadded:: 0.5 + """ + + def on_update(d: CallbackDict) -> None: + self.headers["Content-Type"] = dump_options_header(self.mimetype, d) + + d = parse_options_header(self.headers.get("content-type", ""))[1] + return CallbackDict(d, on_update) + + location = header_property[str]( + "Location", + doc="""The Location response-header field is used to redirect + the recipient to a location other than the Request-URI for + completion of the request or identification of a new + resource.""", + ) + age = header_property( + "Age", + None, + parse_age, + dump_age, # type: ignore + doc="""The Age response-header field conveys the sender's + estimate of the amount of time since the response (or its + revalidation) was generated at the origin server. + + Age values are non-negative decimal integers, representing time + in seconds.""", + ) + content_type = header_property[str]( + "Content-Type", + doc="""The Content-Type entity-header field indicates the media + type of the entity-body sent to the recipient or, in the case of + the HEAD method, the media type that would have been sent had + the request been a GET.""", + ) + content_length = header_property( + "Content-Length", + None, + int, + str, + doc="""The Content-Length entity-header field indicates the size + of the entity-body, in decimal number of OCTETs, sent to the + recipient or, in the case of the HEAD method, the size of the + entity-body that would have been sent had the request been a + GET.""", + ) + content_location = header_property[str]( + "Content-Location", + doc="""The Content-Location entity-header field MAY be used to + supply the resource location for the entity enclosed in the + message when that entity is accessible from a location separate + from the requested resource's URI.""", + ) + content_encoding = header_property[str]( + "Content-Encoding", + doc="""The Content-Encoding entity-header field is used as a + modifier to the media-type. When present, its value indicates + what additional content codings have been applied to the + entity-body, and thus what decoding mechanisms must be applied + in order to obtain the media-type referenced by the Content-Type + header field.""", + ) + content_md5 = header_property[str]( + "Content-MD5", + doc="""The Content-MD5 entity-header field, as defined in + RFC 1864, is an MD5 digest of the entity-body for the purpose of + providing an end-to-end message integrity check (MIC) of the + entity-body. (Note: a MIC is good for detecting accidental + modification of the entity-body in transit, but is not proof + against malicious attacks.)""", + ) + date = header_property( + "Date", + None, + parse_date, + http_date, + doc="""The Date general-header field represents the date and + time at which the message was originated, having the same + semantics as orig-date in RFC 822. + + .. versionchanged:: 2.0 + The datetime object is timezone-aware. + """, + ) + expires = header_property( + "Expires", + None, + parse_date, + http_date, + doc="""The Expires entity-header field gives the date/time after + which the response is considered stale. A stale cache entry may + not normally be returned by a cache. + + .. versionchanged:: 2.0 + The datetime object is timezone-aware. + """, + ) + last_modified = header_property( + "Last-Modified", + None, + parse_date, + http_date, + doc="""The Last-Modified entity-header field indicates the date + and time at which the origin server believes the variant was + last modified. + + .. versionchanged:: 2.0 + The datetime object is timezone-aware. + """, + ) + + @property + def retry_after(self) -> t.Optional[datetime]: + """The Retry-After response-header field can be used with a + 503 (Service Unavailable) response to indicate how long the + service is expected to be unavailable to the requesting client. + + Time in seconds until expiration or date. + + .. versionchanged:: 2.0 + The datetime object is timezone-aware. + """ + value = self.headers.get("retry-after") + if value is None: + return None + + try: + seconds = int(value) + except ValueError: + return parse_date(value) + + return datetime.now(timezone.utc) + timedelta(seconds=seconds) + + @retry_after.setter + def retry_after(self, value: t.Optional[t.Union[datetime, int, str]]) -> None: + if value is None: + if "retry-after" in self.headers: + del self.headers["retry-after"] + return + elif isinstance(value, datetime): + value = http_date(value) + else: + value = str(value) + self.headers["Retry-After"] = value + + vary = _set_property( + "Vary", + doc="""The Vary field value indicates the set of request-header + fields that fully determines, while the response is fresh, + whether a cache is permitted to use the response to reply to a + subsequent request without revalidation.""", + ) + content_language = _set_property( + "Content-Language", + doc="""The Content-Language entity-header field describes the + natural language(s) of the intended audience for the enclosed + entity. Note that this might not be equivalent to all the + languages used within the entity-body.""", + ) + allow = _set_property( + "Allow", + doc="""The Allow entity-header field lists the set of methods + supported by the resource identified by the Request-URI. The + purpose of this field is strictly to inform the recipient of + valid methods associated with the resource. An Allow header + field MUST be present in a 405 (Method Not Allowed) + response.""", + ) + + # ETag + + @property + def cache_control(self) -> ResponseCacheControl: + """The Cache-Control general-header field is used to specify + directives that MUST be obeyed by all caching mechanisms along the + request/response chain. + """ + + def on_update(cache_control: ResponseCacheControl) -> None: + if not cache_control and "cache-control" in self.headers: + del self.headers["cache-control"] + elif cache_control: + self.headers["Cache-Control"] = cache_control.to_header() + + return parse_cache_control_header( + self.headers.get("cache-control"), on_update, ResponseCacheControl + ) + + def set_etag(self, etag: str, weak: bool = False) -> None: + """Set the etag, and override the old one if there was one.""" + self.headers["ETag"] = quote_etag(etag, weak) + + def get_etag(self) -> t.Union[t.Tuple[str, bool], t.Tuple[None, None]]: + """Return a tuple in the form ``(etag, is_weak)``. If there is no + ETag the return value is ``(None, None)``. + """ + return unquote_etag(self.headers.get("ETag")) + + accept_ranges = header_property[str]( + "Accept-Ranges", + doc="""The `Accept-Ranges` header. Even though the name would + indicate that multiple values are supported, it must be one + string token only. + + The values ``'bytes'`` and ``'none'`` are common. + + .. versionadded:: 0.7""", + ) + + @property + def content_range(self) -> ContentRange: + """The ``Content-Range`` header as a + :class:`~werkzeug.datastructures.ContentRange` object. Available + even if the header is not set. + + .. versionadded:: 0.7 + """ + + def on_update(rng: ContentRange) -> None: + if not rng: + del self.headers["content-range"] + else: + self.headers["Content-Range"] = rng.to_header() + + rv = parse_content_range_header(self.headers.get("content-range"), on_update) + # always provide a content range object to make the descriptor + # more user friendly. It provides an unset() method that can be + # used to remove the header quickly. + if rv is None: + rv = ContentRange(None, None, None, on_update=on_update) + return rv + + @content_range.setter + def content_range(self, value: t.Optional[t.Union[ContentRange, str]]) -> None: + if not value: + del self.headers["content-range"] + elif isinstance(value, str): + self.headers["Content-Range"] = value + else: + self.headers["Content-Range"] = value.to_header() + + # Authorization + + @property + def www_authenticate(self) -> WWWAuthenticate: + """The ``WWW-Authenticate`` header in a parsed form.""" + + def on_update(www_auth: WWWAuthenticate) -> None: + if not www_auth and "www-authenticate" in self.headers: + del self.headers["www-authenticate"] + elif www_auth: + self.headers["WWW-Authenticate"] = www_auth.to_header() + + header = self.headers.get("www-authenticate") + return parse_www_authenticate_header(header, on_update) + + # CSP + + @property + def content_security_policy(self) -> ContentSecurityPolicy: + """The ``Content-Security-Policy`` header as a + :class:`~werkzeug.datastructures.ContentSecurityPolicy` object. Available + even if the header is not set. + + The Content-Security-Policy header adds an additional layer of + security to help detect and mitigate certain types of attacks. + """ + + def on_update(csp: ContentSecurityPolicy) -> None: + if not csp: + del self.headers["content-security-policy"] + else: + self.headers["Content-Security-Policy"] = csp.to_header() + + rv = parse_csp_header(self.headers.get("content-security-policy"), on_update) + if rv is None: + rv = ContentSecurityPolicy(None, on_update=on_update) + return rv + + @content_security_policy.setter + def content_security_policy( + self, value: t.Optional[t.Union[ContentSecurityPolicy, str]] + ) -> None: + if not value: + del self.headers["content-security-policy"] + elif isinstance(value, str): + self.headers["Content-Security-Policy"] = value + else: + self.headers["Content-Security-Policy"] = value.to_header() + + @property + def content_security_policy_report_only(self) -> ContentSecurityPolicy: + """The ``Content-Security-policy-report-only`` header as a + :class:`~werkzeug.datastructures.ContentSecurityPolicy` object. Available + even if the header is not set. + + The Content-Security-Policy-Report-Only header adds a csp policy + that is not enforced but is reported thereby helping detect + certain types of attacks. + """ + + def on_update(csp: ContentSecurityPolicy) -> None: + if not csp: + del self.headers["content-security-policy-report-only"] + else: + self.headers["Content-Security-policy-report-only"] = csp.to_header() + + rv = parse_csp_header( + self.headers.get("content-security-policy-report-only"), on_update + ) + if rv is None: + rv = ContentSecurityPolicy(None, on_update=on_update) + return rv + + @content_security_policy_report_only.setter + def content_security_policy_report_only( + self, value: t.Optional[t.Union[ContentSecurityPolicy, str]] + ) -> None: + if not value: + del self.headers["content-security-policy-report-only"] + elif isinstance(value, str): + self.headers["Content-Security-policy-report-only"] = value + else: + self.headers["Content-Security-policy-report-only"] = value.to_header() + + # CORS + + @property + def access_control_allow_credentials(self) -> bool: + """Whether credentials can be shared by the browser to + JavaScript code. As part of the preflight request it indicates + whether credentials can be used on the cross origin request. + """ + return "Access-Control-Allow-Credentials" in self.headers + + @access_control_allow_credentials.setter + def access_control_allow_credentials(self, value: t.Optional[bool]) -> None: + if value is True: + self.headers["Access-Control-Allow-Credentials"] = "true" + else: + self.headers.pop("Access-Control-Allow-Credentials", None) + + access_control_allow_headers = header_property( + "Access-Control-Allow-Headers", + load_func=parse_set_header, + dump_func=dump_header, + doc="Which headers can be sent with the cross origin request.", + ) + + access_control_allow_methods = header_property( + "Access-Control-Allow-Methods", + load_func=parse_set_header, + dump_func=dump_header, + doc="Which methods can be used for the cross origin request.", + ) + + access_control_allow_origin = header_property[str]( + "Access-Control-Allow-Origin", + doc="The origin or '*' for any origin that may make cross origin requests.", + ) + + access_control_expose_headers = header_property( + "Access-Control-Expose-Headers", + load_func=parse_set_header, + dump_func=dump_header, + doc="Which headers can be shared by the browser to JavaScript code.", + ) + + access_control_max_age = header_property( + "Access-Control-Max-Age", + load_func=int, + dump_func=str, + doc="The maximum age in seconds the access control settings can be cached for.", + ) + + cross_origin_opener_policy = header_property[COOP]( + "Cross-Origin-Opener-Policy", + load_func=lambda value: COOP(value), + dump_func=lambda value: value.value, + default=COOP.UNSAFE_NONE, + doc="""Allows control over sharing of browsing context group with cross-origin + documents. Values must be a member of the :class:`werkzeug.http.COOP` enum.""", + ) + + cross_origin_embedder_policy = header_property[COEP]( + "Cross-Origin-Embedder-Policy", + load_func=lambda value: COEP(value), + dump_func=lambda value: value.value, + default=COEP.UNSAFE_NONE, + doc="""Prevents a document from loading any cross-origin resources that do not + explicitly grant the document permission. Values must be a member of the + :class:`werkzeug.http.COEP` enum.""", + ) diff --git a/src/werkzeug/sansio/utils.py b/src/werkzeug/sansio/utils.py new file mode 100644 index 0000000..e639dcb --- /dev/null +++ b/src/werkzeug/sansio/utils.py @@ -0,0 +1,165 @@ +import typing as t + +from .._internal import _encode_idna +from ..exceptions import SecurityError +from ..urls import uri_to_iri +from ..urls import url_quote + + +def host_is_trusted(hostname: str, trusted_list: t.Iterable[str]) -> bool: + """Check if a host matches a list of trusted names. + + :param hostname: The name to check. + :param trusted_list: A list of valid names to match. If a name + starts with a dot it will match all subdomains. + + .. versionadded:: 0.9 + """ + if not hostname: + return False + + if isinstance(trusted_list, str): + trusted_list = [trusted_list] + + def _normalize(hostname: str) -> bytes: + if ":" in hostname: + hostname = hostname.rsplit(":", 1)[0] + + return _encode_idna(hostname) + + try: + hostname_bytes = _normalize(hostname) + except UnicodeError: + return False + + for ref in trusted_list: + if ref.startswith("."): + ref = ref[1:] + suffix_match = True + else: + suffix_match = False + + try: + ref_bytes = _normalize(ref) + except UnicodeError: + return False + + if ref_bytes == hostname_bytes: + return True + + if suffix_match and hostname_bytes.endswith(b"." + ref_bytes): + return True + + return False + + +def get_host( + scheme: str, + host_header: t.Optional[str], + server: t.Optional[t.Tuple[str, t.Optional[int]]] = None, + trusted_hosts: t.Optional[t.Iterable[str]] = None, +) -> str: + """Return the host for the given parameters. + + This first checks the ``host_header``. If it's not present, then + ``server`` is used. The host will only contain the port if it is + different than the standard port for the protocol. + + Optionally, verify that the host is trusted using + :func:`host_is_trusted` and raise a + :exc:`~werkzeug.exceptions.SecurityError` if it is not. + + :param scheme: The protocol the request used, like ``"https"``. + :param host_header: The ``Host`` header value. + :param server: Address of the server. ``(host, port)``, or + ``(path, None)`` for unix sockets. + :param trusted_hosts: A list of trusted host names. + + :return: Host, with port if necessary. + :raise ~werkzeug.exceptions.SecurityError: If the host is not + trusted. + """ + host = "" + + if host_header is not None: + host = host_header + elif server is not None: + host = server[0] + + if server[1] is not None: + host = f"{host}:{server[1]}" + + if scheme in {"http", "ws"} and host.endswith(":80"): + host = host[:-3] + elif scheme in {"https", "wss"} and host.endswith(":443"): + host = host[:-4] + + if trusted_hosts is not None: + if not host_is_trusted(host, trusted_hosts): + raise SecurityError(f"Host {host!r} is not trusted.") + + return host + + +def get_current_url( + scheme: str, + host: str, + root_path: t.Optional[str] = None, + path: t.Optional[str] = None, + query_string: t.Optional[bytes] = None, +) -> str: + """Recreate the URL for a request. If an optional part isn't + provided, it and subsequent parts are not included in the URL. + + The URL is an IRI, not a URI, so it may contain Unicode characters. + Use :func:`~werkzeug.urls.iri_to_uri` to convert it to ASCII. + + :param scheme: The protocol the request used, like ``"https"``. + :param host: The host the request was made to. See :func:`get_host`. + :param root_path: Prefix that the application is mounted under. This + is prepended to ``path``. + :param path: The path part of the URL after ``root_path``. + :param query_string: The portion of the URL after the "?". + """ + url = [scheme, "://", host] + + if root_path is None: + url.append("/") + return uri_to_iri("".join(url)) + + url.append(url_quote(root_path.rstrip("/"))) + url.append("/") + + if path is None: + return uri_to_iri("".join(url)) + + url.append(url_quote(path.lstrip("/"))) + + if query_string: + url.append("?") + url.append(url_quote(query_string, safe=":&%=+$!*'(),")) + + return uri_to_iri("".join(url)) + + +def get_content_length( + http_content_length: t.Union[str, None] = None, + http_transfer_encoding: t.Union[str, None] = "", +) -> t.Optional[int]: + """Returns the content length as an integer or ``None`` if + unavailable or chunked transfer encoding is used. + + :param http_content_length: The Content-Length HTTP header. + :param http_transfer_encoding: The Transfer-Encoding HTTP header. + + .. versionadded:: 2.2 + """ + if http_transfer_encoding == "chunked": + return None + + if http_content_length is not None: + try: + return max(0, int(http_content_length)) + except (ValueError, TypeError): + pass + return None diff --git a/src/werkzeug/security.py b/src/werkzeug/security.py new file mode 100644 index 0000000..18d0919 --- /dev/null +++ b/src/werkzeug/security.py @@ -0,0 +1,140 @@ +import hashlib +import hmac +import os +import posixpath +import secrets +import typing as t + +if t.TYPE_CHECKING: + pass + +SALT_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" +DEFAULT_PBKDF2_ITERATIONS = 260000 + +_os_alt_seps: t.List[str] = list( + sep for sep in [os.path.sep, os.path.altsep] if sep is not None and sep != "/" +) + + +def gen_salt(length: int) -> str: + """Generate a random string of SALT_CHARS with specified ``length``.""" + if length <= 0: + raise ValueError("Salt length must be positive") + + return "".join(secrets.choice(SALT_CHARS) for _ in range(length)) + + +def _hash_internal(method: str, salt: str, password: str) -> t.Tuple[str, str]: + """Internal password hash helper. Supports plaintext without salt, + unsalted and salted passwords. In case salted passwords are used + hmac is used. + """ + if method == "plain": + return password, method + + salt = salt.encode("utf-8") + password = password.encode("utf-8") + + if method.startswith("pbkdf2:"): + if not salt: + raise ValueError("Salt is required for PBKDF2") + + args = method[7:].split(":") + + if len(args) not in (1, 2): + raise ValueError("Invalid number of arguments for PBKDF2") + + method = args.pop(0) + iterations = int(args[0] or 0) if args else DEFAULT_PBKDF2_ITERATIONS + return ( + hashlib.pbkdf2_hmac(method, password, salt, iterations).hex(), + f"pbkdf2:{method}:{iterations}", + ) + + if salt: + return hmac.new(salt, password, method).hexdigest(), method + + return hashlib.new(method, password).hexdigest(), method + + +def generate_password_hash( + password: str, method: str = "pbkdf2:sha256", salt_length: int = 16 +) -> str: + """Hash a password with the given method and salt with a string of + the given length. The format of the string returned includes the method + that was used so that :func:`check_password_hash` can check the hash. + + The format for the hashed string looks like this:: + + method$salt$hash + + This method can **not** generate unsalted passwords but it is possible + to set param method='plain' in order to enforce plaintext passwords. + If a salt is used, hmac is used internally to salt the password. + + If PBKDF2 is wanted it can be enabled by setting the method to + ``pbkdf2:method:iterations`` where iterations is optional:: + + pbkdf2:sha256:80000$salt$hash + pbkdf2:sha256$salt$hash + + :param password: the password to hash. + :param method: the hash method to use (one that hashlib supports). Can + optionally be in the format ``pbkdf2:method:iterations`` + to enable PBKDF2. + :param salt_length: the length of the salt in letters. + """ + salt = gen_salt(salt_length) if method != "plain" else "" + h, actual_method = _hash_internal(method, salt, password) + return f"{actual_method}${salt}${h}" + + +def check_password_hash(pwhash: str, password: str) -> bool: + """Check a password against a given salted and hashed password value. + In order to support unsalted legacy passwords this method supports + plain text passwords, md5 and sha1 hashes (both salted and unsalted). + + Returns `True` if the password matched, `False` otherwise. + + :param pwhash: a hashed string like returned by + :func:`generate_password_hash`. + :param password: the plaintext password to compare against the hash. + """ + if pwhash.count("$") < 2: + return False + + method, salt, hashval = pwhash.split("$", 2) + return hmac.compare_digest(_hash_internal(method, salt, password)[0], hashval) + + +def safe_join(directory: str, *pathnames: str) -> t.Optional[str]: + """Safely join zero or more untrusted path components to a base + directory to avoid escaping the base directory. + + :param directory: The trusted base directory. + :param pathnames: The untrusted path components relative to the + base directory. + :return: A safe path, otherwise ``None``. + """ + if not directory: + # Ensure we end up with ./path if directory="" is given, + # otherwise the first untrusted part could become trusted. + directory = "." + + parts = [directory] + + for filename in pathnames: + if filename != "": + filename = posixpath.normpath(filename) + + if ( + any(sep in filename for sep in _os_alt_seps) + or os.path.isabs(filename) + or filename == ".." + or filename.startswith("../") + ): + return None + + parts.append(filename) + + return posixpath.join(*parts) diff --git a/src/werkzeug/serving.py b/src/werkzeug/serving.py new file mode 100644 index 0000000..c482469 --- /dev/null +++ b/src/werkzeug/serving.py @@ -0,0 +1,1098 @@ +"""A WSGI and HTTP server for use **during development only**. This +server is convenient to use, but is not designed to be particularly +stable, secure, or efficient. Use a dedicate WSGI server and HTTP +server when deploying to production. + +It provides features like interactive debugging and code reloading. Use +``run_simple`` to start the server. Put this in a ``run.py`` script: + +.. code-block:: python + + from myapp import create_app + from werkzeug import run_simple +""" +import errno +import io +import os +import socket +import socketserver +import sys +import typing as t +from datetime import datetime as dt +from datetime import timedelta +from datetime import timezone +from http.server import BaseHTTPRequestHandler +from http.server import HTTPServer + +from ._internal import _log +from ._internal import _wsgi_encoding_dance +from .exceptions import InternalServerError +from .urls import uri_to_iri +from .urls import url_parse +from .urls import url_unquote + +try: + import ssl +except ImportError: + + class _SslDummy: + def __getattr__(self, name: str) -> t.Any: + raise RuntimeError( # noqa: B904 + "SSL is unavailable because this Python runtime was not" + " compiled with SSL/TLS support." + ) + + ssl = _SslDummy() # type: ignore + +_log_add_style = True + +if os.name == "nt": + try: + __import__("colorama") + except ImportError: + _log_add_style = False + +can_fork = hasattr(os, "fork") + +if can_fork: + ForkingMixIn = socketserver.ForkingMixIn +else: + + class ForkingMixIn: # type: ignore + pass + + +try: + af_unix = socket.AF_UNIX +except AttributeError: + af_unix = None # type: ignore + +LISTEN_QUEUE = 128 + +_TSSLContextArg = t.Optional[ + t.Union["ssl.SSLContext", t.Tuple[str, t.Optional[str]], "te.Literal['adhoc']"] +] + +if t.TYPE_CHECKING: + import typing_extensions as te # noqa: F401 + from _typeshed.wsgi import WSGIApplication + from _typeshed.wsgi import WSGIEnvironment + from cryptography.hazmat.primitives.asymmetric.rsa import ( + RSAPrivateKeyWithSerialization, + ) + from cryptography.x509 import Certificate + + +class DechunkedInput(io.RawIOBase): + """An input stream that handles Transfer-Encoding 'chunked'""" + + def __init__(self, rfile: t.IO[bytes]) -> None: + self._rfile = rfile + self._done = False + self._len = 0 + + def readable(self) -> bool: + return True + + def read_chunk_len(self) -> int: + try: + line = self._rfile.readline().decode("latin1") + _len = int(line.strip(), 16) + except ValueError as e: + raise OSError("Invalid chunk header") from e + if _len < 0: + raise OSError("Negative chunk length not allowed") + return _len + + def readinto(self, buf: bytearray) -> int: # type: ignore + read = 0 + while not self._done and read < len(buf): + if self._len == 0: + # This is the first chunk or we fully consumed the previous + # one. Read the next length of the next chunk + self._len = self.read_chunk_len() + + if self._len == 0: + # Found the final chunk of size 0. The stream is now exhausted, + # but there is still a final newline that should be consumed + self._done = True + + if self._len > 0: + # There is data (left) in this chunk, so append it to the + # buffer. If this operation fully consumes the chunk, this will + # reset self._len to 0. + n = min(len(buf), self._len) + + # If (read + chunk size) becomes more than len(buf), buf will + # grow beyond the original size and read more data than + # required. So only read as much data as can fit in buf. + if read + n > len(buf): + buf[read:] = self._rfile.read(len(buf) - read) + self._len -= len(buf) - read + read = len(buf) + else: + buf[read : read + n] = self._rfile.read(n) + self._len -= n + read += n + + if self._len == 0: + # Skip the terminating newline of a chunk that has been fully + # consumed. This also applies to the 0-sized final chunk + terminator = self._rfile.readline() + if terminator not in (b"\n", b"\r\n", b"\r"): + raise OSError("Missing chunk terminating newline") + + return read + + +class WSGIRequestHandler(BaseHTTPRequestHandler): + """A request handler that implements WSGI dispatching.""" + + server: "BaseWSGIServer" + + @property + def server_version(self) -> str: # type: ignore + from . import __version__ + + return f"Werkzeug/{__version__}" + + def make_environ(self) -> "WSGIEnvironment": + request_url = url_parse(self.path) + url_scheme = "http" if self.server.ssl_context is None else "https" + + if not self.client_address: + self.client_address = ("", 0) + elif isinstance(self.client_address, str): + self.client_address = (self.client_address, 0) + + # If there was no scheme but the path started with two slashes, + # the first segment may have been incorrectly parsed as the + # netloc, prepend it to the path again. + if not request_url.scheme and request_url.netloc: + path_info = f"/{request_url.netloc}{request_url.path}" + else: + path_info = request_url.path + + path_info = url_unquote(path_info) + + environ: "WSGIEnvironment" = { + "wsgi.version": (1, 0), + "wsgi.url_scheme": url_scheme, + "wsgi.input": self.rfile, + "wsgi.errors": sys.stderr, + "wsgi.multithread": self.server.multithread, + "wsgi.multiprocess": self.server.multiprocess, + "wsgi.run_once": False, + "werkzeug.socket": self.connection, + "SERVER_SOFTWARE": self.server_version, + "REQUEST_METHOD": self.command, + "SCRIPT_NAME": "", + "PATH_INFO": _wsgi_encoding_dance(path_info), + "QUERY_STRING": _wsgi_encoding_dance(request_url.query), + # Non-standard, added by mod_wsgi, uWSGI + "REQUEST_URI": _wsgi_encoding_dance(self.path), + # Non-standard, added by gunicorn + "RAW_URI": _wsgi_encoding_dance(self.path), + "REMOTE_ADDR": self.address_string(), + "REMOTE_PORT": self.port_integer(), + "SERVER_NAME": self.server.server_address[0], + "SERVER_PORT": str(self.server.server_address[1]), + "SERVER_PROTOCOL": self.request_version, + } + + for key, value in self.headers.items(): + key = key.upper().replace("-", "_") + value = value.replace("\r\n", "") + if key not in ("CONTENT_TYPE", "CONTENT_LENGTH"): + key = f"HTTP_{key}" + if key in environ: + value = f"{environ[key]},{value}" + environ[key] = value + + if environ.get("HTTP_TRANSFER_ENCODING", "").strip().lower() == "chunked": + environ["wsgi.input_terminated"] = True + environ["wsgi.input"] = DechunkedInput(environ["wsgi.input"]) + + # Per RFC 2616, if the URL is absolute, use that as the host. + # We're using "has a scheme" to indicate an absolute URL. + if request_url.scheme and request_url.netloc: + environ["HTTP_HOST"] = request_url.netloc + + try: + # binary_form=False gives nicer information, but wouldn't be compatible with + # what Nginx or Apache could return. + peer_cert = self.connection.getpeercert( # type: ignore[attr-defined] + binary_form=True + ) + if peer_cert is not None: + # Nginx and Apache use PEM format. + environ["SSL_CLIENT_CERT"] = ssl.DER_cert_to_PEM_cert(peer_cert) + except ValueError: + # SSL handshake hasn't finished. + self.server.log("error", "Cannot fetch SSL peer certificate info") + except AttributeError: + # Not using TLS, the socket will not have getpeercert(). + pass + + return environ + + def run_wsgi(self) -> None: + if self.headers.get("Expect", "").lower().strip() == "100-continue": + self.wfile.write(b"HTTP/1.1 100 Continue\r\n\r\n") + + self.environ = environ = self.make_environ() + status_set: t.Optional[str] = None + headers_set: t.Optional[t.List[t.Tuple[str, str]]] = None + status_sent: t.Optional[str] = None + headers_sent: t.Optional[t.List[t.Tuple[str, str]]] = None + chunk_response: bool = False + + def write(data: bytes) -> None: + nonlocal status_sent, headers_sent, chunk_response + assert status_set is not None, "write() before start_response" + assert headers_set is not None, "write() before start_response" + if status_sent is None: + status_sent = status_set + headers_sent = headers_set + try: + code_str, msg = status_sent.split(None, 1) + except ValueError: + code_str, msg = status_sent, "" + code = int(code_str) + self.send_response(code, msg) + header_keys = set() + for key, value in headers_sent: + self.send_header(key, value) + header_keys.add(key.lower()) + + # Use chunked transfer encoding if there is no content + # length. Do not use for 1xx and 204 responses. 304 + # responses and HEAD requests are also excluded, which + # is the more conservative behavior and matches other + # parts of the code. + # https://httpwg.org/specs/rfc7230.html#rfc.section.3.3.1 + if ( + not ( + "content-length" in header_keys + or environ["REQUEST_METHOD"] == "HEAD" + or (100 <= code < 200) + or code in {204, 304} + ) + and self.protocol_version >= "HTTP/1.1" + ): + chunk_response = True + self.send_header("Transfer-Encoding", "chunked") + + # Always close the connection. This disables HTTP/1.1 + # keep-alive connections. They aren't handled well by + # Python's http.server because it doesn't know how to + # drain the stream before the next request line. + self.send_header("Connection", "close") + self.end_headers() + + assert isinstance(data, bytes), "applications must write bytes" + + if data: + if chunk_response: + self.wfile.write(hex(len(data))[2:].encode()) + self.wfile.write(b"\r\n") + + self.wfile.write(data) + + if chunk_response: + self.wfile.write(b"\r\n") + + self.wfile.flush() + + def start_response(status, headers, exc_info=None): # type: ignore + nonlocal status_set, headers_set + if exc_info: + try: + if headers_sent: + raise exc_info[1].with_traceback(exc_info[2]) + finally: + exc_info = None + elif headers_set: + raise AssertionError("Headers already set") + status_set = status + headers_set = headers + return write + + def execute(app: "WSGIApplication") -> None: + application_iter = app(environ, start_response) + try: + for data in application_iter: + write(data) + if not headers_sent: + write(b"") + if chunk_response: + self.wfile.write(b"0\r\n\r\n") + finally: + if hasattr(application_iter, "close"): + application_iter.close() # type: ignore + + try: + execute(self.server.app) + except (ConnectionError, socket.timeout) as e: + self.connection_dropped(e, environ) + except Exception as e: + if self.server.passthrough_errors: + raise + + if status_sent is not None and chunk_response: + self.close_connection = True + + try: + # if we haven't yet sent the headers but they are set + # we roll back to be able to set them again. + if status_sent is None: + status_set = None + headers_set = None + execute(InternalServerError()) + except Exception: + pass + + from .debug.tbtools import DebugTraceback + + msg = DebugTraceback(e).render_traceback_text() + self.server.log("error", f"Error on request:\n{msg}") + + def handle(self) -> None: + """Handles a request ignoring dropped connections.""" + try: + super().handle() + except (ConnectionError, socket.timeout) as e: + self.connection_dropped(e) + except Exception as e: + if self.server.ssl_context is not None and is_ssl_error(e): + self.log_error("SSL error occurred: %s", e) + else: + raise + + def connection_dropped( + self, error: BaseException, environ: t.Optional["WSGIEnvironment"] = None + ) -> None: + """Called if the connection was closed by the client. By default + nothing happens. + """ + + def __getattr__(self, name: str) -> t.Any: + # All HTTP methods are handled by run_wsgi. + if name.startswith("do_"): + return self.run_wsgi + + # All other attributes are forwarded to the base class. + return getattr(super(), name) + + def address_string(self) -> str: + if getattr(self, "environ", None): + return self.environ["REMOTE_ADDR"] # type: ignore + + if not self.client_address: + return "" + + return self.client_address[0] + + def port_integer(self) -> int: + return self.client_address[1] + + def log_request( + self, code: t.Union[int, str] = "-", size: t.Union[int, str] = "-" + ) -> None: + try: + path = uri_to_iri(self.path) + msg = f"{self.command} {path} {self.request_version}" + except AttributeError: + # path isn't set if the requestline was bad + msg = self.requestline + + code = str(code) + + if code[0] == "1": # 1xx - Informational + msg = _ansi_style(msg, "bold") + elif code == "200": # 2xx - Success + pass + elif code == "304": # 304 - Resource Not Modified + msg = _ansi_style(msg, "cyan") + elif code[0] == "3": # 3xx - Redirection + msg = _ansi_style(msg, "green") + elif code == "404": # 404 - Resource Not Found + msg = _ansi_style(msg, "yellow") + elif code[0] == "4": # 4xx - Client Error + msg = _ansi_style(msg, "bold", "red") + else: # 5xx, or any other response + msg = _ansi_style(msg, "bold", "magenta") + + self.log("info", '"%s" %s %s', msg, code, size) + + def log_error(self, format: str, *args: t.Any) -> None: + self.log("error", format, *args) + + def log_message(self, format: str, *args: t.Any) -> None: + self.log("info", format, *args) + + def log(self, type: str, message: str, *args: t.Any) -> None: + _log( + type, + f"{self.address_string()} - - [{self.log_date_time_string()}] {message}\n", + *args, + ) + + +def _ansi_style(value: str, *styles: str) -> str: + if not _log_add_style: + return value + + codes = { + "bold": 1, + "red": 31, + "green": 32, + "yellow": 33, + "magenta": 35, + "cyan": 36, + } + + for style in styles: + value = f"\x1b[{codes[style]}m{value}" + + return f"{value}\x1b[0m" + + +def generate_adhoc_ssl_pair( + cn: t.Optional[str] = None, +) -> t.Tuple["Certificate", "RSAPrivateKeyWithSerialization"]: + try: + from cryptography import x509 + from cryptography.x509.oid import NameOID + from cryptography.hazmat.backends import default_backend + from cryptography.hazmat.primitives import hashes + from cryptography.hazmat.primitives.asymmetric import rsa + except ImportError: + raise TypeError( + "Using ad-hoc certificates requires the cryptography library." + ) from None + + backend = default_backend() + pkey = rsa.generate_private_key( + public_exponent=65537, key_size=2048, backend=backend + ) + + # pretty damn sure that this is not actually accepted by anyone + if cn is None: + cn = "*" + + subject = x509.Name( + [ + x509.NameAttribute(NameOID.ORGANIZATION_NAME, "Dummy Certificate"), + x509.NameAttribute(NameOID.COMMON_NAME, cn), + ] + ) + + backend = default_backend() + cert = ( + x509.CertificateBuilder() + .subject_name(subject) + .issuer_name(subject) + .public_key(pkey.public_key()) + .serial_number(x509.random_serial_number()) + .not_valid_before(dt.now(timezone.utc)) + .not_valid_after(dt.now(timezone.utc) + timedelta(days=365)) + .add_extension(x509.ExtendedKeyUsage([x509.OID_SERVER_AUTH]), critical=False) + .add_extension(x509.SubjectAlternativeName([x509.DNSName(cn)]), critical=False) + .sign(pkey, hashes.SHA256(), backend) + ) + return cert, pkey + + +def make_ssl_devcert( + base_path: str, host: t.Optional[str] = None, cn: t.Optional[str] = None +) -> t.Tuple[str, str]: + """Creates an SSL key for development. This should be used instead of + the ``'adhoc'`` key which generates a new cert on each server start. + It accepts a path for where it should store the key and cert and + either a host or CN. If a host is given it will use the CN + ``*.host/CN=host``. + + For more information see :func:`run_simple`. + + .. versionadded:: 0.9 + + :param base_path: the path to the certificate and key. The extension + ``.crt`` is added for the certificate, ``.key`` is + added for the key. + :param host: the name of the host. This can be used as an alternative + for the `cn`. + :param cn: the `CN` to use. + """ + + if host is not None: + cn = f"*.{host}/CN={host}" + cert, pkey = generate_adhoc_ssl_pair(cn=cn) + + from cryptography.hazmat.primitives import serialization + + cert_file = f"{base_path}.crt" + pkey_file = f"{base_path}.key" + + with open(cert_file, "wb") as f: + f.write(cert.public_bytes(serialization.Encoding.PEM)) + with open(pkey_file, "wb") as f: + f.write( + pkey.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.TraditionalOpenSSL, + encryption_algorithm=serialization.NoEncryption(), + ) + ) + + return cert_file, pkey_file + + +def generate_adhoc_ssl_context() -> "ssl.SSLContext": + """Generates an adhoc SSL context for the development server.""" + import tempfile + import atexit + + cert, pkey = generate_adhoc_ssl_pair() + + from cryptography.hazmat.primitives import serialization + + cert_handle, cert_file = tempfile.mkstemp() + pkey_handle, pkey_file = tempfile.mkstemp() + atexit.register(os.remove, pkey_file) + atexit.register(os.remove, cert_file) + + os.write(cert_handle, cert.public_bytes(serialization.Encoding.PEM)) + os.write( + pkey_handle, + pkey.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.TraditionalOpenSSL, + encryption_algorithm=serialization.NoEncryption(), + ), + ) + + os.close(cert_handle) + os.close(pkey_handle) + ctx = load_ssl_context(cert_file, pkey_file) + return ctx + + +def load_ssl_context( + cert_file: str, pkey_file: t.Optional[str] = None, protocol: t.Optional[int] = None +) -> "ssl.SSLContext": + """Loads SSL context from cert/private key files and optional protocol. + Many parameters are directly taken from the API of + :py:class:`ssl.SSLContext`. + + :param cert_file: Path of the certificate to use. + :param pkey_file: Path of the private key to use. If not given, the key + will be obtained from the certificate file. + :param protocol: A ``PROTOCOL`` constant from the :mod:`ssl` module. + Defaults to :data:`ssl.PROTOCOL_TLS_SERVER`. + """ + if protocol is None: + protocol = ssl.PROTOCOL_TLS_SERVER + + ctx = ssl.SSLContext(protocol) + ctx.load_cert_chain(cert_file, pkey_file) + return ctx + + +def is_ssl_error(error: t.Optional[Exception] = None) -> bool: + """Checks if the given error (or the current one) is an SSL error.""" + if error is None: + error = t.cast(Exception, sys.exc_info()[1]) + return isinstance(error, ssl.SSLError) + + +def select_address_family(host: str, port: int) -> socket.AddressFamily: + """Return ``AF_INET4``, ``AF_INET6``, or ``AF_UNIX`` depending on + the host and port.""" + if host.startswith("unix://"): + return socket.AF_UNIX + elif ":" in host and hasattr(socket, "AF_INET6"): + return socket.AF_INET6 + return socket.AF_INET + + +def get_sockaddr( + host: str, port: int, family: socket.AddressFamily +) -> t.Union[t.Tuple[str, int], str]: + """Return a fully qualified socket address that can be passed to + :func:`socket.bind`.""" + if family == af_unix: + return host.split("://", 1)[1] + try: + res = socket.getaddrinfo( + host, port, family, socket.SOCK_STREAM, socket.IPPROTO_TCP + ) + except socket.gaierror: + return host, port + return res[0][4] # type: ignore + + +def get_interface_ip(family: socket.AddressFamily) -> str: + """Get the IP address of an external interface. Used when binding to + 0.0.0.0 or ::1 to show a more useful URL. + + :meta private: + """ + # arbitrary private address + host = "fd31:f903:5ab5:1::1" if family == socket.AF_INET6 else "10.253.155.219" + + with socket.socket(family, socket.SOCK_DGRAM) as s: + try: + s.connect((host, 58162)) + except OSError: + return "::1" if family == socket.AF_INET6 else "127.0.0.1" + + return s.getsockname()[0] # type: ignore + + +class BaseWSGIServer(HTTPServer): + """A WSGI server that that handles one request at a time. + + Use :func:`make_server` to create a server instance. + """ + + multithread = False + multiprocess = False + request_queue_size = LISTEN_QUEUE + + def __init__( + self, + host: str, + port: int, + app: "WSGIApplication", + handler: t.Optional[t.Type[WSGIRequestHandler]] = None, + passthrough_errors: bool = False, + ssl_context: t.Optional[_TSSLContextArg] = None, + fd: t.Optional[int] = None, + ) -> None: + if handler is None: + handler = WSGIRequestHandler + + # If the handler doesn't directly set a protocol version and + # thread or process workers are used, then allow chunked + # responses and keep-alive connections by enabling HTTP/1.1. + if "protocol_version" not in vars(handler) and ( + self.multithread or self.multiprocess + ): + handler.protocol_version = "HTTP/1.1" + + self.host = host + self.port = port + self.app = app + self.passthrough_errors = passthrough_errors + + self.address_family = address_family = select_address_family(host, port) + server_address = get_sockaddr(host, int(port), address_family) + + # Remove a leftover Unix socket file from a previous run. Don't + # remove a file that was set up by run_simple. + if address_family == af_unix and fd is None: + server_address = t.cast(str, server_address) + + if os.path.exists(server_address): + os.unlink(server_address) + + # Bind and activate will be handled manually, it should only + # happen if we're not using a socket that was already set up. + super().__init__( + server_address, # type: ignore[arg-type] + handler, + bind_and_activate=False, + ) + + if fd is None: + # No existing socket descriptor, do bind_and_activate=True. + try: + self.server_bind() + self.server_activate() + except BaseException: + self.server_close() + raise + else: + # Use the passed in socket directly. + self.socket = socket.fromfd(fd, address_family, socket.SOCK_STREAM) + self.server_address = self.socket.getsockname() + + if address_family != af_unix: + # If port was 0, this will record the bound port. + self.port = self.server_address[1] + + if ssl_context is not None: + if isinstance(ssl_context, tuple): + ssl_context = load_ssl_context(*ssl_context) + elif ssl_context == "adhoc": + ssl_context = generate_adhoc_ssl_context() + + self.socket = ssl_context.wrap_socket(self.socket, server_side=True) + self.ssl_context: t.Optional["ssl.SSLContext"] = ssl_context + else: + self.ssl_context = None + + def log(self, type: str, message: str, *args: t.Any) -> None: + _log(type, message, *args) + + def serve_forever(self, poll_interval: float = 0.5) -> None: + try: + super().serve_forever(poll_interval=poll_interval) + except KeyboardInterrupt: + pass + finally: + self.server_close() + + def handle_error( + self, request: t.Any, client_address: t.Union[t.Tuple[str, int], str] + ) -> None: + if self.passthrough_errors: + raise + + return super().handle_error(request, client_address) + + def log_startup(self) -> None: + """Show information about the address when starting the server.""" + dev_warning = ( + "WARNING: This is a development server. Do not use it in a production" + " deployment. Use a production WSGI server instead." + ) + dev_warning = _ansi_style(dev_warning, "bold", "red") + messages = [dev_warning] + + if self.address_family == af_unix: + messages.append(f" * Running on {self.host}") + else: + scheme = "http" if self.ssl_context is None else "https" + display_hostname = self.host + + if self.host in {"0.0.0.0", "::"}: + messages.append(f" * Running on all addresses ({self.host})") + + if self.host == "0.0.0.0": + localhost = "127.0.0.1" + display_hostname = get_interface_ip(socket.AF_INET) + else: + localhost = "[::1]" + display_hostname = get_interface_ip(socket.AF_INET6) + + messages.append(f" * Running on {scheme}://{localhost}:{self.port}") + + if ":" in display_hostname: + display_hostname = f"[{display_hostname}]" + + messages.append(f" * Running on {scheme}://{display_hostname}:{self.port}") + + _log("info", "\n".join(messages)) + + +class ThreadedWSGIServer(socketserver.ThreadingMixIn, BaseWSGIServer): + """A WSGI server that handles concurrent requests in separate + threads. + + Use :func:`make_server` to create a server instance. + """ + + multithread = True + daemon_threads = True + + +class ForkingWSGIServer(ForkingMixIn, BaseWSGIServer): + """A WSGI server that handles concurrent requests in separate forked + processes. + + Use :func:`make_server` to create a server instance. + """ + + multiprocess = True + + def __init__( + self, + host: str, + port: int, + app: "WSGIApplication", + processes: int = 40, + handler: t.Optional[t.Type[WSGIRequestHandler]] = None, + passthrough_errors: bool = False, + ssl_context: t.Optional[_TSSLContextArg] = None, + fd: t.Optional[int] = None, + ) -> None: + if not can_fork: + raise ValueError("Your platform does not support forking.") + + super().__init__(host, port, app, handler, passthrough_errors, ssl_context, fd) + self.max_children = processes + + +def make_server( + host: str, + port: int, + app: "WSGIApplication", + threaded: bool = False, + processes: int = 1, + request_handler: t.Optional[t.Type[WSGIRequestHandler]] = None, + passthrough_errors: bool = False, + ssl_context: t.Optional[_TSSLContextArg] = None, + fd: t.Optional[int] = None, +) -> BaseWSGIServer: + """Create an appropriate WSGI server instance based on the value of + ``threaded`` and ``processes``. + + This is called from :func:`run_simple`, but can be used separately + to have access to the server object, such as to run it in a separate + thread. + + See :func:`run_simple` for parameter docs. + """ + if threaded and processes > 1: + raise ValueError("Cannot have a multi-thread and multi-process server.") + + if threaded: + return ThreadedWSGIServer( + host, port, app, request_handler, passthrough_errors, ssl_context, fd=fd + ) + + if processes > 1: + return ForkingWSGIServer( + host, + port, + app, + processes, + request_handler, + passthrough_errors, + ssl_context, + fd=fd, + ) + + return BaseWSGIServer( + host, port, app, request_handler, passthrough_errors, ssl_context, fd=fd + ) + + +def is_running_from_reloader() -> bool: + """Check if the server is running as a subprocess within the + Werkzeug reloader. + + .. versionadded:: 0.10 + """ + return os.environ.get("WERKZEUG_RUN_MAIN") == "true" + + +def prepare_socket(hostname: str, port: int) -> socket.socket: + """Prepare a socket for use by the WSGI server and reloader. + + The socket is marked inheritable so that it can be kept across + reloads instead of breaking connections. + + Catch errors during bind and show simpler error messages. For + "address already in use", show instructions for resolving the issue, + with special instructions for macOS. + + This is called from :func:`run_simple`, but can be used separately + to control server creation with :func:`make_server`. + """ + address_family = select_address_family(hostname, port) + server_address = get_sockaddr(hostname, port, address_family) + s = socket.socket(address_family, socket.SOCK_STREAM) + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + s.set_inheritable(True) + + # Remove the socket file if it already exists. + if address_family == af_unix: + server_address = t.cast(str, server_address) + + if os.path.exists(server_address): + os.unlink(server_address) + + # Catch connection issues and show them without the traceback. Show + # extra instructions for address not found, and for macOS. + try: + s.bind(server_address) + except OSError as e: + print(e.strerror, file=sys.stderr) + + if e.errno == errno.EADDRINUSE: + print( + f"Port {port} is in use by another program. Either" + " identify and stop that program, or start the" + " server with a different port.", + file=sys.stderr, + ) + + if sys.platform == "darwin" and port == 5000: + print( + "On macOS, try disabling the 'AirPlay Receiver'" + " service from System Preferences -> Sharing.", + file=sys.stderr, + ) + + sys.exit(1) + + s.listen(LISTEN_QUEUE) + return s + + +def run_simple( + hostname: str, + port: int, + application: "WSGIApplication", + use_reloader: bool = False, + use_debugger: bool = False, + use_evalex: bool = True, + extra_files: t.Optional[t.Iterable[str]] = None, + exclude_patterns: t.Optional[t.Iterable[str]] = None, + reloader_interval: int = 1, + reloader_type: str = "auto", + threaded: bool = False, + processes: int = 1, + request_handler: t.Optional[t.Type[WSGIRequestHandler]] = None, + static_files: t.Optional[t.Dict[str, t.Union[str, t.Tuple[str, str]]]] = None, + passthrough_errors: bool = False, + ssl_context: t.Optional[_TSSLContextArg] = None, +) -> None: + """Start a development server for a WSGI application. Various + optional features can be enabled. + + .. warning:: + + Do not use the development server when deploying to production. + It is intended for use only during local development. It is not + designed to be particularly efficient, stable, or secure. + + :param hostname: The host to bind to, for example ``'localhost'``. + Can be a domain, IPv4 or IPv6 address, or file path starting + with ``unix://`` for a Unix socket. + :param port: The port to bind to, for example ``8080``. Using ``0`` + tells the OS to pick a random free port. + :param application: The WSGI application to run. + :param use_reloader: Use a reloader process to restart the server + process when files are changed. + :param use_debugger: Use Werkzeug's debugger, which will show + formatted tracebacks on unhandled exceptions. + :param use_evalex: Make the debugger interactive. A Python terminal + can be opened for any frame in the traceback. Some protection is + provided by requiring a PIN, but this should never be enabled + on a publicly visible server. + :param extra_files: The reloader will watch these files for changes + in addition to Python modules. For example, watch a + configuration file. + :param exclude_patterns: The reloader will ignore changes to any + files matching these :mod:`fnmatch` patterns. For example, + ignore cache files. + :param reloader_interval: How often the reloader tries to check for + changes. + :param reloader_type: The reloader to use. The ``'stat'`` reloader + is built in, but may require significant CPU to watch files. The + ``'watchdog'`` reloader is much more efficient but requires + installing the ``watchdog`` package first. + :param threaded: Handle concurrent requests using threads. Cannot be + used with ``processes``. + :param processes: Handle concurrent requests using up to this number + of processes. Cannot be used with ``threaded``. + :param request_handler: Use a different + :class:`~BaseHTTPServer.BaseHTTPRequestHandler` subclass to + handle requests. + :param static_files: A dict mapping URL prefixes to directories to + serve static files from using + :class:`~werkzeug.middleware.SharedDataMiddleware`. + :param passthrough_errors: Don't catch unhandled exceptions at the + server level, let the serve crash instead. If ``use_debugger`` + is enabled, the debugger will still catch such errors. + :param ssl_context: Configure TLS to serve over HTTPS. Can be an + :class:`ssl.SSLContext` object, a ``(cert_file, key_file)`` + tuple to create a typical context, or the string ``'adhoc'`` to + generate a temporary self-signed certificate. + + .. versionchanged:: 2.1 + Instructions are shown for dealing with an "address already in + use" error. + + .. versionchanged:: 2.1 + Running on ``0.0.0.0`` or ``::`` shows the loopback IP in + addition to a real IP. + + .. versionchanged:: 2.1 + The command-line interface was removed. + + .. versionchanged:: 2.0 + Running on ``0.0.0.0`` or ``::`` shows a real IP address that + was bound as well as a warning not to run the development server + in production. + + .. versionchanged:: 2.0 + The ``exclude_patterns`` parameter was added. + + .. versionchanged:: 0.15 + Bind to a Unix socket by passing a ``hostname`` that starts with + ``unix://``. + + .. versionchanged:: 0.10 + Improved the reloader and added support for changing the backend + through the ``reloader_type`` parameter. + + .. versionchanged:: 0.9 + A command-line interface was added. + + .. versionchanged:: 0.8 + ``ssl_context`` can be a tuple of paths to the certificate and + private key files. + + .. versionchanged:: 0.6 + The ``ssl_context`` parameter was added. + + .. versionchanged:: 0.5 + The ``static_files`` and ``passthrough_errors`` parameters were + added. + """ + if not isinstance(port, int): + raise TypeError("port must be an integer") + + if static_files: + from .middleware.shared_data import SharedDataMiddleware + + application = SharedDataMiddleware(application, static_files) + + if use_debugger: + from .debug import DebuggedApplication + + application = DebuggedApplication(application, evalex=use_evalex) + + if not is_running_from_reloader(): + s = prepare_socket(hostname, port) + fd = s.fileno() + # Silence a ResourceWarning about an unclosed socket. This object is no longer + # used, the server will create another with fromfd. + s.detach() + os.environ["WERKZEUG_SERVER_FD"] = str(fd) + else: + fd = int(os.environ["WERKZEUG_SERVER_FD"]) + + srv = make_server( + hostname, + port, + application, + threaded, + processes, + request_handler, + passthrough_errors, + ssl_context, + fd=fd, + ) + + if not is_running_from_reloader(): + srv.log_startup() + _log("info", _ansi_style("Press CTRL+C to quit", "yellow")) + + if use_reloader: + from ._reloader import run_with_reloader + + run_with_reloader( + srv.serve_forever, + extra_files=extra_files, + exclude_patterns=exclude_patterns, + interval=reloader_interval, + reloader_type=reloader_type, + ) + else: + srv.serve_forever() diff --git a/src/werkzeug/test.py b/src/werkzeug/test.py new file mode 100644 index 0000000..edb4d4a --- /dev/null +++ b/src/werkzeug/test.py @@ -0,0 +1,1337 @@ +import mimetypes +import sys +import typing as t +from collections import defaultdict +from datetime import datetime +from datetime import timedelta +from http.cookiejar import CookieJar +from io import BytesIO +from itertools import chain +from random import random +from tempfile import TemporaryFile +from time import time +from urllib.request import Request as _UrllibRequest + +from ._internal import _get_environ +from ._internal import _make_encode_wrapper +from ._internal import _wsgi_decoding_dance +from ._internal import _wsgi_encoding_dance +from .datastructures import Authorization +from .datastructures import CallbackDict +from .datastructures import CombinedMultiDict +from .datastructures import EnvironHeaders +from .datastructures import FileMultiDict +from .datastructures import Headers +from .datastructures import MultiDict +from .http import dump_cookie +from .http import dump_options_header +from .http import parse_options_header +from .sansio.multipart import Data +from .sansio.multipart import Epilogue +from .sansio.multipart import Field +from .sansio.multipart import File +from .sansio.multipart import MultipartEncoder +from .sansio.multipart import Preamble +from .urls import iri_to_uri +from .urls import url_encode +from .urls import url_fix +from .urls import url_parse +from .urls import url_unparse +from .urls import url_unquote +from .utils import cached_property +from .utils import get_content_type +from .wrappers.request import Request +from .wrappers.response import Response +from .wsgi import ClosingIterator +from .wsgi import get_current_url + +if t.TYPE_CHECKING: + from _typeshed.wsgi import WSGIApplication + from _typeshed.wsgi import WSGIEnvironment + + +def stream_encode_multipart( + data: t.Mapping[str, t.Any], + use_tempfile: bool = True, + threshold: int = 1024 * 500, + boundary: t.Optional[str] = None, + charset: str = "utf-8", +) -> t.Tuple[t.IO[bytes], int, str]: + """Encode a dict of values (either strings or file descriptors or + :class:`FileStorage` objects.) into a multipart encoded string stored + in a file descriptor. + """ + if boundary is None: + boundary = f"---------------WerkzeugFormPart_{time()}{random()}" + + stream: t.IO[bytes] = BytesIO() + total_length = 0 + on_disk = False + write_binary: t.Callable[[bytes], int] + + if use_tempfile: + + def write_binary(s: bytes) -> int: + nonlocal stream, total_length, on_disk + + if on_disk: + return stream.write(s) + else: + length = len(s) + + if length + total_length <= threshold: + stream.write(s) + else: + new_stream = t.cast(t.IO[bytes], TemporaryFile("wb+")) + new_stream.write(stream.getvalue()) # type: ignore + new_stream.write(s) + stream = new_stream + on_disk = True + + total_length += length + return length + + else: + write_binary = stream.write + + encoder = MultipartEncoder(boundary.encode()) + write_binary(encoder.send_event(Preamble(data=b""))) + for key, value in _iter_data(data): + reader = getattr(value, "read", None) + if reader is not None: + filename = getattr(value, "filename", getattr(value, "name", None)) + content_type = getattr(value, "content_type", None) + if content_type is None: + content_type = ( + filename + and mimetypes.guess_type(filename)[0] + or "application/octet-stream" + ) + headers = Headers([("Content-Type", content_type)]) + if filename is None: + write_binary(encoder.send_event(Field(name=key, headers=headers))) + else: + write_binary( + encoder.send_event( + File(name=key, filename=filename, headers=headers) + ) + ) + while True: + chunk = reader(16384) + + if not chunk: + break + + write_binary(encoder.send_event(Data(data=chunk, more_data=True))) + else: + if not isinstance(value, str): + value = str(value) + write_binary(encoder.send_event(Field(name=key, headers=Headers()))) + write_binary( + encoder.send_event(Data(data=value.encode(charset), more_data=False)) + ) + + write_binary(encoder.send_event(Epilogue(data=b""))) + + length = stream.tell() + stream.seek(0) + return stream, length, boundary + + +def encode_multipart( + values: t.Mapping[str, t.Any], + boundary: t.Optional[str] = None, + charset: str = "utf-8", +) -> t.Tuple[str, bytes]: + """Like `stream_encode_multipart` but returns a tuple in the form + (``boundary``, ``data``) where data is bytes. + """ + stream, length, boundary = stream_encode_multipart( + values, use_tempfile=False, boundary=boundary, charset=charset + ) + return boundary, stream.read() + + +class _TestCookieHeaders: + """A headers adapter for cookielib""" + + def __init__(self, headers: t.Union[Headers, t.List[t.Tuple[str, str]]]) -> None: + self.headers = headers + + def getheaders(self, name: str) -> t.Iterable[str]: + headers = [] + name = name.lower() + for k, v in self.headers: + if k.lower() == name: + headers.append(v) + return headers + + def get_all( + self, name: str, default: t.Optional[t.Iterable[str]] = None + ) -> t.Iterable[str]: + headers = self.getheaders(name) + + if not headers: + return default # type: ignore + + return headers + + +class _TestCookieResponse: + """Something that looks like a httplib.HTTPResponse, but is actually just an + adapter for our test responses to make them available for cookielib. + """ + + def __init__(self, headers: t.Union[Headers, t.List[t.Tuple[str, str]]]) -> None: + self.headers = _TestCookieHeaders(headers) + + def info(self) -> _TestCookieHeaders: + return self.headers + + +class _TestCookieJar(CookieJar): + """A cookielib.CookieJar modified to inject and read cookie headers from + and to wsgi environments, and wsgi application responses. + """ + + def inject_wsgi(self, environ: "WSGIEnvironment") -> None: + """Inject the cookies as client headers into the server's wsgi + environment. + """ + cvals = [f"{c.name}={c.value}" for c in self] + + if cvals: + environ["HTTP_COOKIE"] = "; ".join(cvals) + else: + environ.pop("HTTP_COOKIE", None) + + def extract_wsgi( + self, + environ: "WSGIEnvironment", + headers: t.Union[Headers, t.List[t.Tuple[str, str]]], + ) -> None: + """Extract the server's set-cookie headers as cookies into the + cookie jar. + """ + self.extract_cookies( + _TestCookieResponse(headers), # type: ignore + _UrllibRequest(get_current_url(environ)), + ) + + +def _iter_data(data: t.Mapping[str, t.Any]) -> t.Iterator[t.Tuple[str, t.Any]]: + """Iterate over a mapping that might have a list of values, yielding + all key, value pairs. Almost like iter_multi_items but only allows + lists, not tuples, of values so tuples can be used for files. + """ + if isinstance(data, MultiDict): + yield from data.items(multi=True) + else: + for key, value in data.items(): + if isinstance(value, list): + for v in value: + yield key, v + else: + yield key, value + + +_TAnyMultiDict = t.TypeVar("_TAnyMultiDict", bound=MultiDict) + + +class EnvironBuilder: + """This class can be used to conveniently create a WSGI environment + for testing purposes. It can be used to quickly create WSGI environments + or request objects from arbitrary data. + + The signature of this class is also used in some other places as of + Werkzeug 0.5 (:func:`create_environ`, :meth:`Response.from_values`, + :meth:`Client.open`). Because of this most of the functionality is + available through the constructor alone. + + Files and regular form data can be manipulated independently of each + other with the :attr:`form` and :attr:`files` attributes, but are + passed with the same argument to the constructor: `data`. + + `data` can be any of these values: + + - a `str` or `bytes` object: The object is converted into an + :attr:`input_stream`, the :attr:`content_length` is set and you have to + provide a :attr:`content_type`. + - a `dict` or :class:`MultiDict`: The keys have to be strings. The values + have to be either any of the following objects, or a list of any of the + following objects: + + - a :class:`file`-like object: These are converted into + :class:`FileStorage` objects automatically. + - a `tuple`: The :meth:`~FileMultiDict.add_file` method is called + with the key and the unpacked `tuple` items as positional + arguments. + - a `str`: The string is set as form data for the associated key. + - a file-like object: The object content is loaded in memory and then + handled like a regular `str` or a `bytes`. + + :param path: the path of the request. In the WSGI environment this will + end up as `PATH_INFO`. If the `query_string` is not defined + and there is a question mark in the `path` everything after + it is used as query string. + :param base_url: the base URL is a URL that is used to extract the WSGI + URL scheme, host (server name + server port) and the + script root (`SCRIPT_NAME`). + :param query_string: an optional string or dict with URL parameters. + :param method: the HTTP method to use, defaults to `GET`. + :param input_stream: an optional input stream. Do not specify this and + `data`. As soon as an input stream is set you can't + modify :attr:`args` and :attr:`files` unless you + set the :attr:`input_stream` to `None` again. + :param content_type: The content type for the request. As of 0.5 you + don't have to provide this when specifying files + and form data via `data`. + :param content_length: The content length for the request. You don't + have to specify this when providing data via + `data`. + :param errors_stream: an optional error stream that is used for + `wsgi.errors`. Defaults to :data:`stderr`. + :param multithread: controls `wsgi.multithread`. Defaults to `False`. + :param multiprocess: controls `wsgi.multiprocess`. Defaults to `False`. + :param run_once: controls `wsgi.run_once`. Defaults to `False`. + :param headers: an optional list or :class:`Headers` object of headers. + :param data: a string or dict of form data or a file-object. + See explanation above. + :param json: An object to be serialized and assigned to ``data``. + Defaults the content type to ``"application/json"``. + Serialized with the function assigned to :attr:`json_dumps`. + :param environ_base: an optional dict of environment defaults. + :param environ_overrides: an optional dict of environment overrides. + :param charset: the charset used to encode string data. + :param auth: An authorization object to use for the + ``Authorization`` header value. A ``(username, password)`` tuple + is a shortcut for ``Basic`` authorization. + + .. versionchanged:: 2.1 + ``CONTENT_TYPE`` and ``CONTENT_LENGTH`` are not duplicated as + header keys in the environ. + + .. versionchanged:: 2.0 + ``REQUEST_URI`` and ``RAW_URI`` is the full raw URI including + the query string, not only the path. + + .. versionchanged:: 2.0 + The default :attr:`request_class` is ``Request`` instead of + ``BaseRequest``. + + .. versionadded:: 2.0 + Added the ``auth`` parameter. + + .. versionadded:: 0.15 + The ``json`` param and :meth:`json_dumps` method. + + .. versionadded:: 0.15 + The environ has keys ``REQUEST_URI`` and ``RAW_URI`` containing + the path before percent-decoding. This is not part of the WSGI + PEP, but many WSGI servers include it. + + .. versionchanged:: 0.6 + ``path`` and ``base_url`` can now be unicode strings that are + encoded with :func:`iri_to_uri`. + """ + + #: the server protocol to use. defaults to HTTP/1.1 + server_protocol = "HTTP/1.1" + + #: the wsgi version to use. defaults to (1, 0) + wsgi_version = (1, 0) + + #: The default request class used by :meth:`get_request`. + request_class = Request + + import json + + #: The serialization function used when ``json`` is passed. + json_dumps = staticmethod(json.dumps) + del json + + _args: t.Optional[MultiDict] + _query_string: t.Optional[str] + _input_stream: t.Optional[t.IO[bytes]] + _form: t.Optional[MultiDict] + _files: t.Optional[FileMultiDict] + + def __init__( + self, + path: str = "/", + base_url: t.Optional[str] = None, + query_string: t.Optional[t.Union[t.Mapping[str, str], str]] = None, + method: str = "GET", + input_stream: t.Optional[t.IO[bytes]] = None, + content_type: t.Optional[str] = None, + content_length: t.Optional[int] = None, + errors_stream: t.Optional[t.IO[str]] = None, + multithread: bool = False, + multiprocess: bool = False, + run_once: bool = False, + headers: t.Optional[t.Union[Headers, t.Iterable[t.Tuple[str, str]]]] = None, + data: t.Optional[ + t.Union[t.IO[bytes], str, bytes, t.Mapping[str, t.Any]] + ] = None, + environ_base: t.Optional[t.Mapping[str, t.Any]] = None, + environ_overrides: t.Optional[t.Mapping[str, t.Any]] = None, + charset: str = "utf-8", + mimetype: t.Optional[str] = None, + json: t.Optional[t.Mapping[str, t.Any]] = None, + auth: t.Optional[t.Union[Authorization, t.Tuple[str, str]]] = None, + ) -> None: + path_s = _make_encode_wrapper(path) + if query_string is not None and path_s("?") in path: + raise ValueError("Query string is defined in the path and as an argument") + request_uri = url_parse(path) + if query_string is None and path_s("?") in path: + query_string = request_uri.query + self.charset = charset + self.path = iri_to_uri(request_uri.path) + self.request_uri = path + if base_url is not None: + base_url = url_fix(iri_to_uri(base_url, charset), charset) + self.base_url = base_url # type: ignore + if isinstance(query_string, (bytes, str)): + self.query_string = query_string + else: + if query_string is None: + query_string = MultiDict() + elif not isinstance(query_string, MultiDict): + query_string = MultiDict(query_string) + self.args = query_string + self.method = method + if headers is None: + headers = Headers() + elif not isinstance(headers, Headers): + headers = Headers(headers) + self.headers = headers + if content_type is not None: + self.content_type = content_type + if errors_stream is None: + errors_stream = sys.stderr + self.errors_stream = errors_stream + self.multithread = multithread + self.multiprocess = multiprocess + self.run_once = run_once + self.environ_base = environ_base + self.environ_overrides = environ_overrides + self.input_stream = input_stream + self.content_length = content_length + self.closed = False + + if auth is not None: + if isinstance(auth, tuple): + auth = Authorization( + "basic", {"username": auth[0], "password": auth[1]} + ) + + self.headers.set("Authorization", auth.to_header()) + + if json is not None: + if data is not None: + raise TypeError("can't provide both json and data") + + data = self.json_dumps(json) + + if self.content_type is None: + self.content_type = "application/json" + + if data: + if input_stream is not None: + raise TypeError("can't provide input stream and data") + if hasattr(data, "read"): + data = data.read() # type: ignore + if isinstance(data, str): + data = data.encode(self.charset) + if isinstance(data, bytes): + self.input_stream = BytesIO(data) + if self.content_length is None: + self.content_length = len(data) + else: + for key, value in _iter_data(data): # type: ignore + if isinstance(value, (tuple, dict)) or hasattr(value, "read"): + self._add_file_from_data(key, value) + else: + self.form.setlistdefault(key).append(value) + + if mimetype is not None: + self.mimetype = mimetype + + @classmethod + def from_environ( + cls, environ: "WSGIEnvironment", **kwargs: t.Any + ) -> "EnvironBuilder": + """Turn an environ dict back into a builder. Any extra kwargs + override the args extracted from the environ. + + .. versionchanged:: 2.0 + Path and query values are passed through the WSGI decoding + dance to avoid double encoding. + + .. versionadded:: 0.15 + """ + headers = Headers(EnvironHeaders(environ)) + out = { + "path": _wsgi_decoding_dance(environ["PATH_INFO"]), + "base_url": cls._make_base_url( + environ["wsgi.url_scheme"], + headers.pop("Host"), + _wsgi_decoding_dance(environ["SCRIPT_NAME"]), + ), + "query_string": _wsgi_decoding_dance(environ["QUERY_STRING"]), + "method": environ["REQUEST_METHOD"], + "input_stream": environ["wsgi.input"], + "content_type": headers.pop("Content-Type", None), + "content_length": headers.pop("Content-Length", None), + "errors_stream": environ["wsgi.errors"], + "multithread": environ["wsgi.multithread"], + "multiprocess": environ["wsgi.multiprocess"], + "run_once": environ["wsgi.run_once"], + "headers": headers, + } + out.update(kwargs) + return cls(**out) + + def _add_file_from_data( + self, + key: str, + value: t.Union[ + t.IO[bytes], t.Tuple[t.IO[bytes], str], t.Tuple[t.IO[bytes], str, str] + ], + ) -> None: + """Called in the EnvironBuilder to add files from the data dict.""" + if isinstance(value, tuple): + self.files.add_file(key, *value) + else: + self.files.add_file(key, value) + + @staticmethod + def _make_base_url(scheme: str, host: str, script_root: str) -> str: + return url_unparse((scheme, host, script_root, "", "")).rstrip("/") + "/" + + @property + def base_url(self) -> str: + """The base URL is used to extract the URL scheme, host name, + port, and root path. + """ + return self._make_base_url(self.url_scheme, self.host, self.script_root) + + @base_url.setter + def base_url(self, value: t.Optional[str]) -> None: + if value is None: + scheme = "http" + netloc = "localhost" + script_root = "" + else: + scheme, netloc, script_root, qs, anchor = url_parse(value) + if qs or anchor: + raise ValueError("base url must not contain a query string or fragment") + self.script_root = script_root.rstrip("/") + self.host = netloc + self.url_scheme = scheme + + @property + def content_type(self) -> t.Optional[str]: + """The content type for the request. Reflected from and to + the :attr:`headers`. Do not set if you set :attr:`files` or + :attr:`form` for auto detection. + """ + ct = self.headers.get("Content-Type") + if ct is None and not self._input_stream: + if self._files: + return "multipart/form-data" + if self._form: + return "application/x-www-form-urlencoded" + return None + return ct + + @content_type.setter + def content_type(self, value: t.Optional[str]) -> None: + if value is None: + self.headers.pop("Content-Type", None) + else: + self.headers["Content-Type"] = value + + @property + def mimetype(self) -> t.Optional[str]: + """The mimetype (content type without charset etc.) + + .. versionadded:: 0.14 + """ + ct = self.content_type + return ct.split(";")[0].strip() if ct else None + + @mimetype.setter + def mimetype(self, value: str) -> None: + self.content_type = get_content_type(value, self.charset) + + @property + def mimetype_params(self) -> t.Mapping[str, str]: + """The mimetype parameters as dict. For example if the + content type is ``text/html; charset=utf-8`` the params would be + ``{'charset': 'utf-8'}``. + + .. versionadded:: 0.14 + """ + + def on_update(d: CallbackDict) -> None: + self.headers["Content-Type"] = dump_options_header(self.mimetype, d) + + d = parse_options_header(self.headers.get("content-type", ""))[1] + return CallbackDict(d, on_update) + + @property + def content_length(self) -> t.Optional[int]: + """The content length as integer. Reflected from and to the + :attr:`headers`. Do not set if you set :attr:`files` or + :attr:`form` for auto detection. + """ + return self.headers.get("Content-Length", type=int) + + @content_length.setter + def content_length(self, value: t.Optional[int]) -> None: + if value is None: + self.headers.pop("Content-Length", None) + else: + self.headers["Content-Length"] = str(value) + + def _get_form(self, name: str, storage: t.Type[_TAnyMultiDict]) -> _TAnyMultiDict: + """Common behavior for getting the :attr:`form` and + :attr:`files` properties. + + :param name: Name of the internal cached attribute. + :param storage: Storage class used for the data. + """ + if self.input_stream is not None: + raise AttributeError("an input stream is defined") + + rv = getattr(self, name) + + if rv is None: + rv = storage() + setattr(self, name, rv) + + return rv # type: ignore + + def _set_form(self, name: str, value: MultiDict) -> None: + """Common behavior for setting the :attr:`form` and + :attr:`files` properties. + + :param name: Name of the internal cached attribute. + :param value: Value to assign to the attribute. + """ + self._input_stream = None + setattr(self, name, value) + + @property + def form(self) -> MultiDict: + """A :class:`MultiDict` of form values.""" + return self._get_form("_form", MultiDict) + + @form.setter + def form(self, value: MultiDict) -> None: + self._set_form("_form", value) + + @property + def files(self) -> FileMultiDict: + """A :class:`FileMultiDict` of uploaded files. Use + :meth:`~FileMultiDict.add_file` to add new files. + """ + return self._get_form("_files", FileMultiDict) + + @files.setter + def files(self, value: FileMultiDict) -> None: + self._set_form("_files", value) + + @property + def input_stream(self) -> t.Optional[t.IO[bytes]]: + """An optional input stream. This is mutually exclusive with + setting :attr:`form` and :attr:`files`, setting it will clear + those. Do not provide this if the method is not ``POST`` or + another method that has a body. + """ + return self._input_stream + + @input_stream.setter + def input_stream(self, value: t.Optional[t.IO[bytes]]) -> None: + self._input_stream = value + self._form = None + self._files = None + + @property + def query_string(self) -> str: + """The query string. If you set this to a string + :attr:`args` will no longer be available. + """ + if self._query_string is None: + if self._args is not None: + return url_encode(self._args, charset=self.charset) + return "" + return self._query_string + + @query_string.setter + def query_string(self, value: t.Optional[str]) -> None: + self._query_string = value + self._args = None + + @property + def args(self) -> MultiDict: + """The URL arguments as :class:`MultiDict`.""" + if self._query_string is not None: + raise AttributeError("a query string is defined") + if self._args is None: + self._args = MultiDict() + return self._args + + @args.setter + def args(self, value: t.Optional[MultiDict]) -> None: + self._query_string = None + self._args = value + + @property + def server_name(self) -> str: + """The server name (read-only, use :attr:`host` to set)""" + return self.host.split(":", 1)[0] + + @property + def server_port(self) -> int: + """The server port as integer (read-only, use :attr:`host` to set)""" + pieces = self.host.split(":", 1) + + if len(pieces) == 2: + try: + return int(pieces[1]) + except ValueError: + pass + + if self.url_scheme == "https": + return 443 + return 80 + + def __del__(self) -> None: + try: + self.close() + except Exception: + pass + + def close(self) -> None: + """Closes all files. If you put real :class:`file` objects into the + :attr:`files` dict you can call this method to automatically close + them all in one go. + """ + if self.closed: + return + try: + files = self.files.values() + except AttributeError: + files = () # type: ignore + for f in files: + try: + f.close() + except Exception: + pass + self.closed = True + + def get_environ(self) -> "WSGIEnvironment": + """Return the built environ. + + .. versionchanged:: 0.15 + The content type and length headers are set based on + input stream detection. Previously this only set the WSGI + keys. + """ + input_stream = self.input_stream + content_length = self.content_length + + mimetype = self.mimetype + content_type = self.content_type + + if input_stream is not None: + start_pos = input_stream.tell() + input_stream.seek(0, 2) + end_pos = input_stream.tell() + input_stream.seek(start_pos) + content_length = end_pos - start_pos + elif mimetype == "multipart/form-data": + input_stream, content_length, boundary = stream_encode_multipart( + CombinedMultiDict([self.form, self.files]), charset=self.charset + ) + content_type = f'{mimetype}; boundary="{boundary}"' + elif mimetype == "application/x-www-form-urlencoded": + form_encoded = url_encode(self.form, charset=self.charset).encode("ascii") + content_length = len(form_encoded) + input_stream = BytesIO(form_encoded) + else: + input_stream = BytesIO() + + result: "WSGIEnvironment" = {} + if self.environ_base: + result.update(self.environ_base) + + def _path_encode(x: str) -> str: + return _wsgi_encoding_dance(url_unquote(x, self.charset), self.charset) + + raw_uri = _wsgi_encoding_dance(self.request_uri, self.charset) + result.update( + { + "REQUEST_METHOD": self.method, + "SCRIPT_NAME": _path_encode(self.script_root), + "PATH_INFO": _path_encode(self.path), + "QUERY_STRING": _wsgi_encoding_dance(self.query_string, self.charset), + # Non-standard, added by mod_wsgi, uWSGI + "REQUEST_URI": raw_uri, + # Non-standard, added by gunicorn + "RAW_URI": raw_uri, + "SERVER_NAME": self.server_name, + "SERVER_PORT": str(self.server_port), + "HTTP_HOST": self.host, + "SERVER_PROTOCOL": self.server_protocol, + "wsgi.version": self.wsgi_version, + "wsgi.url_scheme": self.url_scheme, + "wsgi.input": input_stream, + "wsgi.errors": self.errors_stream, + "wsgi.multithread": self.multithread, + "wsgi.multiprocess": self.multiprocess, + "wsgi.run_once": self.run_once, + } + ) + + headers = self.headers.copy() + # Don't send these as headers, they're part of the environ. + headers.remove("Content-Type") + headers.remove("Content-Length") + + if content_type is not None: + result["CONTENT_TYPE"] = content_type + + if content_length is not None: + result["CONTENT_LENGTH"] = str(content_length) + + combined_headers = defaultdict(list) + + for key, value in headers.to_wsgi_list(): + combined_headers[f"HTTP_{key.upper().replace('-', '_')}"].append(value) + + for key, values in combined_headers.items(): + result[key] = ", ".join(values) + + if self.environ_overrides: + result.update(self.environ_overrides) + + return result + + def get_request(self, cls: t.Optional[t.Type[Request]] = None) -> Request: + """Returns a request with the data. If the request class is not + specified :attr:`request_class` is used. + + :param cls: The request wrapper to use. + """ + if cls is None: + cls = self.request_class + + return cls(self.get_environ()) + + +class ClientRedirectError(Exception): + """If a redirect loop is detected when using follow_redirects=True with + the :cls:`Client`, then this exception is raised. + """ + + +class Client: + """This class allows you to send requests to a wrapped application. + + The use_cookies parameter indicates whether cookies should be stored and + sent for subsequent requests. This is True by default, but passing False + will disable this behaviour. + + If you want to request some subdomain of your application you may set + `allow_subdomain_redirects` to `True` as if not no external redirects + are allowed. + + .. versionchanged:: 2.1 + Removed deprecated behavior of treating the response as a + tuple. All data is available as properties on the returned + response object. + + .. versionchanged:: 2.0 + ``response_wrapper`` is always a subclass of + :class:``TestResponse``. + + .. versionchanged:: 0.5 + Added the ``use_cookies`` parameter. + """ + + def __init__( + self, + application: "WSGIApplication", + response_wrapper: t.Optional[t.Type["Response"]] = None, + use_cookies: bool = True, + allow_subdomain_redirects: bool = False, + ) -> None: + self.application = application + + if response_wrapper in {None, Response}: + response_wrapper = TestResponse + elif not isinstance(response_wrapper, TestResponse): + response_wrapper = type( + "WrapperTestResponse", + (TestResponse, response_wrapper), # type: ignore + {}, + ) + + self.response_wrapper = t.cast(t.Type["TestResponse"], response_wrapper) + + if use_cookies: + self.cookie_jar: t.Optional[_TestCookieJar] = _TestCookieJar() + else: + self.cookie_jar = None + + self.allow_subdomain_redirects = allow_subdomain_redirects + + def set_cookie( + self, + server_name: str, + key: str, + value: str = "", + max_age: t.Optional[t.Union[timedelta, int]] = None, + expires: t.Optional[t.Union[str, datetime, int, float]] = None, + path: str = "/", + domain: t.Optional[str] = None, + secure: bool = False, + httponly: bool = False, + samesite: t.Optional[str] = None, + charset: str = "utf-8", + ) -> None: + """Sets a cookie in the client's cookie jar. The server name + is required and has to match the one that is also passed to + the open call. + """ + assert self.cookie_jar is not None, "cookies disabled" + header = dump_cookie( + key, + value, + max_age, + expires, + path, + domain, + secure, + httponly, + charset, + samesite=samesite, + ) + environ = create_environ(path, base_url=f"http://{server_name}") + headers = [("Set-Cookie", header)] + self.cookie_jar.extract_wsgi(environ, headers) + + def delete_cookie( + self, + server_name: str, + key: str, + path: str = "/", + domain: t.Optional[str] = None, + secure: bool = False, + httponly: bool = False, + samesite: t.Optional[str] = None, + ) -> None: + """Deletes a cookie in the test client.""" + self.set_cookie( + server_name, + key, + expires=0, + max_age=0, + path=path, + domain=domain, + secure=secure, + httponly=httponly, + samesite=samesite, + ) + + def run_wsgi_app( + self, environ: "WSGIEnvironment", buffered: bool = False + ) -> t.Tuple[t.Iterable[bytes], str, Headers]: + """Runs the wrapped WSGI app with the given environment. + + :meta private: + """ + if self.cookie_jar is not None: + self.cookie_jar.inject_wsgi(environ) + + rv = run_wsgi_app(self.application, environ, buffered=buffered) + + if self.cookie_jar is not None: + self.cookie_jar.extract_wsgi(environ, rv[2]) + + return rv + + def resolve_redirect( + self, response: "TestResponse", buffered: bool = False + ) -> "TestResponse": + """Perform a new request to the location given by the redirect + response to the previous request. + + :meta private: + """ + scheme, netloc, path, qs, anchor = url_parse(response.location) + builder = EnvironBuilder.from_environ( + response.request.environ, path=path, query_string=qs + ) + + to_name_parts = netloc.split(":", 1)[0].split(".") + from_name_parts = builder.server_name.split(".") + + if to_name_parts != [""]: + # The new location has a host, use it for the base URL. + builder.url_scheme = scheme + builder.host = netloc + else: + # A local redirect with autocorrect_location_header=False + # doesn't have a host, so use the request's host. + to_name_parts = from_name_parts + + # Explain why a redirect to a different server name won't be followed. + if to_name_parts != from_name_parts: + if to_name_parts[-len(from_name_parts) :] == from_name_parts: + if not self.allow_subdomain_redirects: + raise RuntimeError("Following subdomain redirects is not enabled.") + else: + raise RuntimeError("Following external redirects is not supported.") + + path_parts = path.split("/") + root_parts = builder.script_root.split("/") + + if path_parts[: len(root_parts)] == root_parts: + # Strip the script root from the path. + builder.path = path[len(builder.script_root) :] + else: + # The new location is not under the script root, so use the + # whole path and clear the previous root. + builder.path = path + builder.script_root = "" + + # Only 307 and 308 preserve all of the original request. + if response.status_code not in {307, 308}: + # HEAD is preserved, everything else becomes GET. + if builder.method != "HEAD": + builder.method = "GET" + + # Clear the body and the headers that describe it. + + if builder.input_stream is not None: + builder.input_stream.close() + builder.input_stream = None + + builder.content_type = None + builder.content_length = None + builder.headers.pop("Transfer-Encoding", None) + + return self.open(builder, buffered=buffered) + + def open( + self, + *args: t.Any, + buffered: bool = False, + follow_redirects: bool = False, + **kwargs: t.Any, + ) -> "TestResponse": + """Generate an environ dict from the given arguments, make a + request to the application using it, and return the response. + + :param args: Passed to :class:`EnvironBuilder` to create the + environ for the request. If a single arg is passed, it can + be an existing :class:`EnvironBuilder` or an environ dict. + :param buffered: Convert the iterator returned by the app into + a list. If the iterator has a ``close()`` method, it is + called automatically. + :param follow_redirects: Make additional requests to follow HTTP + redirects until a non-redirect status is returned. + :attr:`TestResponse.history` lists the intermediate + responses. + + .. versionchanged:: 2.1 + Removed the ``as_tuple`` parameter. + + .. versionchanged:: 2.0 + ``as_tuple`` is deprecated and will be removed in Werkzeug + 2.1. Use :attr:`TestResponse.request` and + ``request.environ`` instead. + + .. versionchanged:: 2.0 + The request input stream is closed when calling + ``response.close()``. Input streams for redirects are + automatically closed. + + .. versionchanged:: 0.5 + If a dict is provided as file in the dict for the ``data`` + parameter the content type has to be called ``content_type`` + instead of ``mimetype``. This change was made for + consistency with :class:`werkzeug.FileWrapper`. + + .. versionchanged:: 0.5 + Added the ``follow_redirects`` parameter. + """ + request: t.Optional["Request"] = None + + if not kwargs and len(args) == 1: + arg = args[0] + + if isinstance(arg, EnvironBuilder): + request = arg.get_request() + elif isinstance(arg, dict): + request = EnvironBuilder.from_environ(arg).get_request() + elif isinstance(arg, Request): + request = arg + + if request is None: + builder = EnvironBuilder(*args, **kwargs) + + try: + request = builder.get_request() + finally: + builder.close() + + response = self.run_wsgi_app(request.environ, buffered=buffered) + response = self.response_wrapper(*response, request=request) + + redirects = set() + history: t.List["TestResponse"] = [] + + if not follow_redirects: + return response + + while response.status_code in { + 301, + 302, + 303, + 305, + 307, + 308, + }: + # Exhaust intermediate response bodies to ensure middleware + # that returns an iterator runs any cleanup code. + if not buffered: + response.make_sequence() + response.close() + + new_redirect_entry = (response.location, response.status_code) + + if new_redirect_entry in redirects: + raise ClientRedirectError( + f"Loop detected: A {response.status_code} redirect" + f" to {response.location} was already made." + ) + + redirects.add(new_redirect_entry) + response.history = tuple(history) + history.append(response) + response = self.resolve_redirect(response, buffered=buffered) + else: + # This is the final request after redirects. + response.history = tuple(history) + # Close the input stream when closing the response, in case + # the input is an open temporary file. + response.call_on_close(request.input_stream.close) + return response + + def get(self, *args: t.Any, **kw: t.Any) -> "TestResponse": + """Call :meth:`open` with ``method`` set to ``GET``.""" + kw["method"] = "GET" + return self.open(*args, **kw) + + def post(self, *args: t.Any, **kw: t.Any) -> "TestResponse": + """Call :meth:`open` with ``method`` set to ``POST``.""" + kw["method"] = "POST" + return self.open(*args, **kw) + + def put(self, *args: t.Any, **kw: t.Any) -> "TestResponse": + """Call :meth:`open` with ``method`` set to ``PUT``.""" + kw["method"] = "PUT" + return self.open(*args, **kw) + + def delete(self, *args: t.Any, **kw: t.Any) -> "TestResponse": + """Call :meth:`open` with ``method`` set to ``DELETE``.""" + kw["method"] = "DELETE" + return self.open(*args, **kw) + + def patch(self, *args: t.Any, **kw: t.Any) -> "TestResponse": + """Call :meth:`open` with ``method`` set to ``PATCH``.""" + kw["method"] = "PATCH" + return self.open(*args, **kw) + + def options(self, *args: t.Any, **kw: t.Any) -> "TestResponse": + """Call :meth:`open` with ``method`` set to ``OPTIONS``.""" + kw["method"] = "OPTIONS" + return self.open(*args, **kw) + + def head(self, *args: t.Any, **kw: t.Any) -> "TestResponse": + """Call :meth:`open` with ``method`` set to ``HEAD``.""" + kw["method"] = "HEAD" + return self.open(*args, **kw) + + def trace(self, *args: t.Any, **kw: t.Any) -> "TestResponse": + """Call :meth:`open` with ``method`` set to ``TRACE``.""" + kw["method"] = "TRACE" + return self.open(*args, **kw) + + def __repr__(self) -> str: + return f"<{type(self).__name__} {self.application!r}>" + + +def create_environ(*args: t.Any, **kwargs: t.Any) -> "WSGIEnvironment": + """Create a new WSGI environ dict based on the values passed. The first + parameter should be the path of the request which defaults to '/'. The + second one can either be an absolute path (in that case the host is + localhost:80) or a full path to the request with scheme, netloc port and + the path to the script. + + This accepts the same arguments as the :class:`EnvironBuilder` + constructor. + + .. versionchanged:: 0.5 + This function is now a thin wrapper over :class:`EnvironBuilder` which + was added in 0.5. The `headers`, `environ_base`, `environ_overrides` + and `charset` parameters were added. + """ + builder = EnvironBuilder(*args, **kwargs) + + try: + return builder.get_environ() + finally: + builder.close() + + +def run_wsgi_app( + app: "WSGIApplication", environ: "WSGIEnvironment", buffered: bool = False +) -> t.Tuple[t.Iterable[bytes], str, Headers]: + """Return a tuple in the form (app_iter, status, headers) of the + application output. This works best if you pass it an application that + returns an iterator all the time. + + Sometimes applications may use the `write()` callable returned + by the `start_response` function. This tries to resolve such edge + cases automatically. But if you don't get the expected output you + should set `buffered` to `True` which enforces buffering. + + If passed an invalid WSGI application the behavior of this function is + undefined. Never pass non-conforming WSGI applications to this function. + + :param app: the application to execute. + :param buffered: set to `True` to enforce buffering. + :return: tuple in the form ``(app_iter, status, headers)`` + """ + # Copy environ to ensure any mutations by the app (ProxyFix, for + # example) don't affect subsequent requests (such as redirects). + environ = _get_environ(environ).copy() + status: str + response: t.Optional[t.Tuple[str, t.List[t.Tuple[str, str]]]] = None + buffer: t.List[bytes] = [] + + def start_response(status, headers, exc_info=None): # type: ignore + nonlocal response + + if exc_info: + try: + raise exc_info[1].with_traceback(exc_info[2]) + finally: + exc_info = None + + response = (status, headers) + return buffer.append + + app_rv = app(environ, start_response) + close_func = getattr(app_rv, "close", None) + app_iter: t.Iterable[bytes] = iter(app_rv) + + # when buffering we emit the close call early and convert the + # application iterator into a regular list + if buffered: + try: + app_iter = list(app_iter) + finally: + if close_func is not None: + close_func() + + # otherwise we iterate the application iter until we have a response, chain + # the already received data with the already collected data and wrap it in + # a new `ClosingIterator` if we need to restore a `close` callable from the + # original return value. + else: + for item in app_iter: + buffer.append(item) + + if response is not None: + break + + if buffer: + app_iter = chain(buffer, app_iter) + + if close_func is not None and app_iter is not app_rv: + app_iter = ClosingIterator(app_iter, close_func) + + status, headers = response # type: ignore + return app_iter, status, Headers(headers) + + +class TestResponse(Response): + """:class:`~werkzeug.wrappers.Response` subclass that provides extra + information about requests made with the test :class:`Client`. + + Test client requests will always return an instance of this class. + If a custom response class is passed to the client, it is + subclassed along with this to support test information. + + If the test request included large files, or if the application is + serving a file, call :meth:`close` to close any open files and + prevent Python showing a ``ResourceWarning``. + + .. versionchanged:: 2.2 + Set the ``default_mimetype`` to None to prevent a mimetype being + assumed if missing. + + .. versionchanged:: 2.1 + Removed deprecated behavior for treating the response instance + as a tuple. + + .. versionadded:: 2.0 + Test client methods always return instances of this class. + """ + + default_mimetype = None + # Don't assume a mimetype, instead use whatever the response provides + + request: Request + """A request object with the environ used to make the request that + resulted in this response. + """ + + history: t.Tuple["TestResponse", ...] + """A list of intermediate responses. Populated when the test request + is made with ``follow_redirects`` enabled. + """ + + # Tell Pytest to ignore this, it's not a test class. + __test__ = False + + def __init__( + self, + response: t.Iterable[bytes], + status: str, + headers: Headers, + request: Request, + history: t.Tuple["TestResponse"] = (), # type: ignore + **kwargs: t.Any, + ) -> None: + super().__init__(response, status, headers, **kwargs) + self.request = request + self.history = history + self._compat_tuple = response, status, headers + + @cached_property + def text(self) -> str: + """The response data as text. A shortcut for + ``response.get_data(as_text=True)``. + + .. versionadded:: 2.1 + """ + return self.get_data(as_text=True) diff --git a/src/werkzeug/testapp.py b/src/werkzeug/testapp.py new file mode 100644 index 0000000..0d7ffbb --- /dev/null +++ b/src/werkzeug/testapp.py @@ -0,0 +1,241 @@ +"""A small application that can be used to test a WSGI server and check +it for WSGI compliance. +""" +import base64 +import os +import sys +import typing as t +from textwrap import wrap + +from markupsafe import escape + +from . import __version__ as _werkzeug_version +from .wrappers.request import Request +from .wrappers.response import Response + +if t.TYPE_CHECKING: + from _typeshed.wsgi import StartResponse + from _typeshed.wsgi import WSGIEnvironment + + +logo = Response( + base64.b64decode( + """ +R0lGODlhoACgAOMIAAEDACwpAEpCAGdgAJaKAM28AOnVAP3rAP///////// +//////////////////////yH5BAEKAAgALAAAAACgAKAAAAT+EMlJq704680R+F0ojmRpnuj0rWnrv +nB8rbRs33gu0bzu/0AObxgsGn3D5HHJbCUFyqZ0ukkSDlAidctNFg7gbI9LZlrBaHGtzAae0eloe25 +7w9EDOX2fst/xenyCIn5/gFqDiVVDV4aGeYiKkhSFjnCQY5OTlZaXgZp8nJ2ekaB0SQOjqphrpnOiq +ncEn65UsLGytLVmQ6m4sQazpbtLqL/HwpnER8bHyLrLOc3Oz8PRONPU1crXN9na263dMt/g4SzjMeX +m5yDpLqgG7OzJ4u8lT/P69ej3JPn69kHzN2OIAHkB9RUYSFCFQYQJFTIkCDBiwoXWGnowaLEjRm7+G +p9A7Hhx4rUkAUaSLJlxHMqVMD/aSycSZkyTplCqtGnRAM5NQ1Ly5OmzZc6gO4d6DGAUKA+hSocWYAo +SlM6oUWX2O/o0KdaVU5vuSQLAa0ADwQgMEMB2AIECZhVSnTno6spgbtXmHcBUrQACcc2FrTrWS8wAf +78cMFBgwIBgbN+qvTt3ayikRBk7BoyGAGABAdYyfdzRQGV3l4coxrqQ84GpUBmrdR3xNIDUPAKDBSA +ADIGDhhqTZIWaDcrVX8EsbNzbkvCOxG8bN5w8ly9H8jyTJHC6DFndQydbguh2e/ctZJFXRxMAqqPVA +tQH5E64SPr1f0zz7sQYjAHg0In+JQ11+N2B0XXBeeYZgBZFx4tqBToiTCPv0YBgQv8JqA6BEf6RhXx +w1ENhRBnWV8ctEX4Ul2zc3aVGcQNC2KElyTDYyYUWvShdjDyMOGMuFjqnII45aogPhz/CodUHFwaDx +lTgsaOjNyhGWJQd+lFoAGk8ObghI0kawg+EV5blH3dr+digkYuAGSaQZFHFz2P/cTaLmhF52QeSb45 +Jwxd+uSVGHlqOZpOeJpCFZ5J+rkAkFjQ0N1tah7JJSZUFNsrkeJUJMIBi8jyaEKIhKPomnC91Uo+NB +yyaJ5umnnpInIFh4t6ZSpGaAVmizqjpByDegYl8tPE0phCYrhcMWSv+uAqHfgH88ak5UXZmlKLVJhd +dj78s1Fxnzo6yUCrV6rrDOkluG+QzCAUTbCwf9SrmMLzK6p+OPHx7DF+bsfMRq7Ec61Av9i6GLw23r +idnZ+/OO0a99pbIrJkproCQMA17OPG6suq3cca5ruDfXCCDoS7BEdvmJn5otdqscn+uogRHHXs8cbh +EIfYaDY1AkrC0cqwcZpnM6ludx72x0p7Fo/hZAcpJDjax0UdHavMKAbiKltMWCF3xxh9k25N/Viud8 +ba78iCvUkt+V6BpwMlErmcgc502x+u1nSxJSJP9Mi52awD1V4yB/QHONsnU3L+A/zR4VL/indx/y64 +gqcj+qgTeweM86f0Qy1QVbvmWH1D9h+alqg254QD8HJXHvjQaGOqEqC22M54PcftZVKVSQG9jhkv7C +JyTyDoAJfPdu8v7DRZAxsP/ky9MJ3OL36DJfCFPASC3/aXlfLOOON9vGZZHydGf8LnxYJuuVIbl83y +Az5n/RPz07E+9+zw2A2ahz4HxHo9Kt79HTMx1Q7ma7zAzHgHqYH0SoZWyTuOLMiHwSfZDAQTn0ajk9 +YQqodnUYjByQZhZak9Wu4gYQsMyEpIOAOQKze8CmEF45KuAHTvIDOfHJNipwoHMuGHBnJElUoDmAyX +c2Qm/R8Ah/iILCCJOEokGowdhDYc/yoL+vpRGwyVSCWFYZNljkhEirGXsalWcAgOdeAdoXcktF2udb +qbUhjWyMQxYO01o6KYKOr6iK3fE4MaS+DsvBsGOBaMb0Y6IxADaJhFICaOLmiWTlDAnY1KzDG4ambL +cWBA8mUzjJsN2KjSaSXGqMCVXYpYkj33mcIApyhQf6YqgeNAmNvuC0t4CsDbSshZJkCS1eNisKqlyG +cF8G2JeiDX6tO6Mv0SmjCa3MFb0bJaGPMU0X7c8XcpvMaOQmCajwSeY9G0WqbBmKv34DsMIEztU6Y2 +KiDlFdt6jnCSqx7Dmt6XnqSKaFFHNO5+FmODxMCWBEaco77lNDGXBM0ECYB/+s7nKFdwSF5hgXumQe +EZ7amRg39RHy3zIjyRCykQh8Zo2iviRKyTDn/zx6EefptJj2Cw+Ep2FSc01U5ry4KLPYsTyWnVGnvb +UpyGlhjBUljyjHhWpf8OFaXwhp9O4T1gU9UeyPPa8A2l0p1kNqPXEVRm1AOs1oAGZU596t6SOR2mcB +Oco1srWtkaVrMUzIErrKri85keKqRQYX9VX0/eAUK1hrSu6HMEX3Qh2sCh0q0D2CtnUqS4hj62sE/z +aDs2Sg7MBS6xnQeooc2R2tC9YrKpEi9pLXfYXp20tDCpSP8rKlrD4axprb9u1Df5hSbz9QU0cRpfgn +kiIzwKucd0wsEHlLpe5yHXuc6FrNelOl7pY2+11kTWx7VpRu97dXA3DO1vbkhcb4zyvERYajQgAADs +=""" + ), + mimetype="image/png", +) + + +TEMPLATE = """\ + + +WSGI Information + +
    + +

    WSGI Information

    +

    + This page displays all available information about the WSGI server and + the underlying Python interpreter. +

    Python Interpreter

    + + + + + + +
    Python Version + %(python_version)s +
    Platform + %(platform)s [%(os)s] +
    API Version + %(api_version)s +
    Byteorder + %(byteorder)s +
    Werkzeug Version + %(werkzeug_version)s +
    +

    WSGI Environment

    + %(wsgi_env)s
    +

    Installed Eggs

    +

    + The following python packages were installed on the system as + Python eggs: +

      %(python_eggs)s
    +

    System Path

    +

    + The following paths are the current contents of the load path. The + following entries are looked up for Python packages. Note that not + all items in this path are folders. Gray and underlined items are + entries pointing to invalid resources or used by custom import hooks + such as the zip importer. +

    + Items with a bright background were expanded for display from a relative + path. If you encounter such paths in the output you might want to check + your setup as relative paths are usually problematic in multithreaded + environments. +

      %(sys_path)s
    +
    +""" + + +def iter_sys_path() -> t.Iterator[t.Tuple[str, bool, bool]]: + if os.name == "posix": + + def strip(x: str) -> str: + prefix = os.path.expanduser("~") + if x.startswith(prefix): + x = f"~{x[len(prefix) :]}" + return x + + else: + + def strip(x: str) -> str: + return x + + cwd = os.path.abspath(os.getcwd()) + for item in sys.path: + path = os.path.join(cwd, item or os.path.curdir) + yield strip(os.path.normpath(path)), not os.path.isdir(path), path != item + + +def render_testapp(req: Request) -> bytes: + try: + import pkg_resources + except ImportError: + eggs: t.Iterable[t.Any] = () + else: + eggs = sorted( + pkg_resources.working_set, + key=lambda x: x.project_name.lower(), # type: ignore + ) + python_eggs = [] + for egg in eggs: + try: + version = egg.version + except (ValueError, AttributeError): + version = "unknown" + python_eggs.append( + f"
  • {escape(egg.project_name)} [{escape(version)}]" + ) + + wsgi_env = [] + sorted_environ = sorted(req.environ.items(), key=lambda x: repr(x[0]).lower()) + for key, value in sorted_environ: + value = "".join(wrap(str(escape(repr(value))))) + wsgi_env.append(f"{escape(key)}{value}") + + sys_path = [] + for item, virtual, expanded in iter_sys_path(): + class_ = [] + if virtual: + class_.append("virtual") + if expanded: + class_.append("exp") + class_ = f' class="{" ".join(class_)}"' if class_ else "" + sys_path.append(f"{escape(item)}") + + return ( + TEMPLATE + % { + "python_version": "
    ".join(escape(sys.version).splitlines()), + "platform": escape(sys.platform), + "os": escape(os.name), + "api_version": sys.api_version, + "byteorder": sys.byteorder, + "werkzeug_version": _werkzeug_version, + "python_eggs": "\n".join(python_eggs), + "wsgi_env": "\n".join(wsgi_env), + "sys_path": "\n".join(sys_path), + } + ).encode("utf-8") + + +def test_app( + environ: "WSGIEnvironment", start_response: "StartResponse" +) -> t.Iterable[bytes]: + """Simple test application that dumps the environment. You can use + it to check if Werkzeug is working properly: + + .. sourcecode:: pycon + + >>> from werkzeug.serving import run_simple + >>> from werkzeug.testapp import test_app + >>> run_simple('localhost', 3000, test_app) + * Running on http://localhost:3000/ + + The application displays important information from the WSGI environment, + the Python interpreter and the installed libraries. + """ + req = Request(environ, populate_request=False) + if req.args.get("resource") == "logo": + response = logo + else: + response = Response(render_testapp(req), mimetype="text/html") + return response(environ, start_response) + + +if __name__ == "__main__": + from .serving import run_simple + + run_simple("localhost", 5000, test_app, use_reloader=True) diff --git a/src/werkzeug/urls.py b/src/werkzeug/urls.py new file mode 100644 index 0000000..67c08b0 --- /dev/null +++ b/src/werkzeug/urls.py @@ -0,0 +1,1067 @@ +"""Functions for working with URLs. + +Contains implementations of functions from :mod:`urllib.parse` that +handle bytes and strings. +""" +import codecs +import os +import re +import typing as t + +from ._internal import _check_str_tuple +from ._internal import _decode_idna +from ._internal import _encode_idna +from ._internal import _make_encode_wrapper +from ._internal import _to_str + +if t.TYPE_CHECKING: + from . import datastructures as ds + +# A regular expression for what a valid schema looks like +_scheme_re = re.compile(r"^[a-zA-Z0-9+-.]+$") + +# Characters that are safe in any part of an URL. +_always_safe = frozenset( + bytearray( + b"abcdefghijklmnopqrstuvwxyz" + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" + b"0123456789" + b"-._~" + b"$!'()*+,;" # RFC3986 sub-delims set, not including query string delimiters &= + ) +) + +_hexdigits = "0123456789ABCDEFabcdef" +_hextobyte = { + f"{a}{b}".encode("ascii"): int(f"{a}{b}", 16) + for a in _hexdigits + for b in _hexdigits +} +_bytetohex = [f"%{char:02X}".encode("ascii") for char in range(256)] + + +class _URLTuple(t.NamedTuple): + scheme: str + netloc: str + path: str + query: str + fragment: str + + +class BaseURL(_URLTuple): + """Superclass of :py:class:`URL` and :py:class:`BytesURL`.""" + + __slots__ = () + _at: str + _colon: str + _lbracket: str + _rbracket: str + + def __str__(self) -> str: + return self.to_url() + + def replace(self, **kwargs: t.Any) -> "BaseURL": + """Return an URL with the same values, except for those parameters + given new values by whichever keyword arguments are specified.""" + return self._replace(**kwargs) + + @property + def host(self) -> t.Optional[str]: + """The host part of the URL if available, otherwise `None`. The + host is either the hostname or the IP address mentioned in the + URL. It will not contain the port. + """ + return self._split_host()[0] + + @property + def ascii_host(self) -> t.Optional[str]: + """Works exactly like :attr:`host` but will return a result that + is restricted to ASCII. If it finds a netloc that is not ASCII + it will attempt to idna decode it. This is useful for socket + operations when the URL might include internationalized characters. + """ + rv = self.host + if rv is not None and isinstance(rv, str): + try: + rv = _encode_idna(rv) # type: ignore + except UnicodeError: + rv = rv.encode("ascii", "ignore") # type: ignore + return _to_str(rv, "ascii", "ignore") + + @property + def port(self) -> t.Optional[int]: + """The port in the URL as an integer if it was present, `None` + otherwise. This does not fill in default ports. + """ + try: + rv = int(_to_str(self._split_host()[1])) + if 0 <= rv <= 65535: + return rv + except (ValueError, TypeError): + pass + return None + + @property + def auth(self) -> t.Optional[str]: + """The authentication part in the URL if available, `None` + otherwise. + """ + return self._split_netloc()[0] + + @property + def username(self) -> t.Optional[str]: + """The username if it was part of the URL, `None` otherwise. + This undergoes URL decoding and will always be a string. + """ + rv = self._split_auth()[0] + if rv is not None: + return _url_unquote_legacy(rv) + return None + + @property + def raw_username(self) -> t.Optional[str]: + """The username if it was part of the URL, `None` otherwise. + Unlike :attr:`username` this one is not being decoded. + """ + return self._split_auth()[0] + + @property + def password(self) -> t.Optional[str]: + """The password if it was part of the URL, `None` otherwise. + This undergoes URL decoding and will always be a string. + """ + rv = self._split_auth()[1] + if rv is not None: + return _url_unquote_legacy(rv) + return None + + @property + def raw_password(self) -> t.Optional[str]: + """The password if it was part of the URL, `None` otherwise. + Unlike :attr:`password` this one is not being decoded. + """ + return self._split_auth()[1] + + def decode_query(self, *args: t.Any, **kwargs: t.Any) -> "ds.MultiDict[str, str]": + """Decodes the query part of the URL. Ths is a shortcut for + calling :func:`url_decode` on the query argument. The arguments and + keyword arguments are forwarded to :func:`url_decode` unchanged. + """ + return url_decode(self.query, *args, **kwargs) + + def join(self, *args: t.Any, **kwargs: t.Any) -> "BaseURL": + """Joins this URL with another one. This is just a convenience + function for calling into :meth:`url_join` and then parsing the + return value again. + """ + return url_parse(url_join(self, *args, **kwargs)) + + def to_url(self) -> str: + """Returns a URL string or bytes depending on the type of the + information stored. This is just a convenience function + for calling :meth:`url_unparse` for this URL. + """ + return url_unparse(self) + + def encode_netloc(self) -> str: + """Encodes the netloc part to an ASCII safe URL as bytes.""" + rv = self.ascii_host or "" + if ":" in rv: + rv = f"[{rv}]" + port = self.port + if port is not None: + rv = f"{rv}:{port}" + auth = ":".join( + filter( + None, + [ + url_quote(self.raw_username or "", "utf-8", "strict", "/:%"), + url_quote(self.raw_password or "", "utf-8", "strict", "/:%"), + ], + ) + ) + if auth: + rv = f"{auth}@{rv}" + return rv + + def decode_netloc(self) -> str: + """Decodes the netloc part into a string.""" + rv = _decode_idna(self.host or "") + + if ":" in rv: + rv = f"[{rv}]" + port = self.port + if port is not None: + rv = f"{rv}:{port}" + auth = ":".join( + filter( + None, + [ + _url_unquote_legacy(self.raw_username or "", "/:%@"), + _url_unquote_legacy(self.raw_password or "", "/:%@"), + ], + ) + ) + if auth: + rv = f"{auth}@{rv}" + return rv + + def to_uri_tuple(self) -> "BaseURL": + """Returns a :class:`BytesURL` tuple that holds a URI. This will + encode all the information in the URL properly to ASCII using the + rules a web browser would follow. + + It's usually more interesting to directly call :meth:`iri_to_uri` which + will return a string. + """ + return url_parse(iri_to_uri(self)) + + def to_iri_tuple(self) -> "BaseURL": + """Returns a :class:`URL` tuple that holds a IRI. This will try + to decode as much information as possible in the URL without + losing information similar to how a web browser does it for the + URL bar. + + It's usually more interesting to directly call :meth:`uri_to_iri` which + will return a string. + """ + return url_parse(uri_to_iri(self)) + + def get_file_location( + self, pathformat: t.Optional[str] = None + ) -> t.Tuple[t.Optional[str], t.Optional[str]]: + """Returns a tuple with the location of the file in the form + ``(server, location)``. If the netloc is empty in the URL or + points to localhost, it's represented as ``None``. + + The `pathformat` by default is autodetection but needs to be set + when working with URLs of a specific system. The supported values + are ``'windows'`` when working with Windows or DOS paths and + ``'posix'`` when working with posix paths. + + If the URL does not point to a local file, the server and location + are both represented as ``None``. + + :param pathformat: The expected format of the path component. + Currently ``'windows'`` and ``'posix'`` are + supported. Defaults to ``None`` which is + autodetect. + """ + if self.scheme != "file": + return None, None + + path = url_unquote(self.path) + host = self.netloc or None + + if pathformat is None: + if os.name == "nt": + pathformat = "windows" + else: + pathformat = "posix" + + if pathformat == "windows": + if path[:1] == "/" and path[1:2].isalpha() and path[2:3] in "|:": + path = f"{path[1:2]}:{path[3:]}" + windows_share = path[:3] in ("\\" * 3, "/" * 3) + import ntpath + + path = ntpath.normpath(path) + # Windows shared drives are represented as ``\\host\\directory``. + # That results in a URL like ``file://///host/directory``, and a + # path like ``///host/directory``. We need to special-case this + # because the path contains the hostname. + if windows_share and host is None: + parts = path.lstrip("\\").split("\\", 1) + if len(parts) == 2: + host, path = parts + else: + host = parts[0] + path = "" + elif pathformat == "posix": + import posixpath + + path = posixpath.normpath(path) + else: + raise TypeError(f"Invalid path format {pathformat!r}") + + if host in ("127.0.0.1", "::1", "localhost"): + host = None + + return host, path + + def _split_netloc(self) -> t.Tuple[t.Optional[str], str]: + if self._at in self.netloc: + auth, _, netloc = self.netloc.partition(self._at) + return auth, netloc + return None, self.netloc + + def _split_auth(self) -> t.Tuple[t.Optional[str], t.Optional[str]]: + auth = self._split_netloc()[0] + if not auth: + return None, None + if self._colon not in auth: + return auth, None + + username, _, password = auth.partition(self._colon) + return username, password + + def _split_host(self) -> t.Tuple[t.Optional[str], t.Optional[str]]: + rv = self._split_netloc()[1] + if not rv: + return None, None + + if not rv.startswith(self._lbracket): + if self._colon in rv: + host, _, port = rv.partition(self._colon) + return host, port + return rv, None + + idx = rv.find(self._rbracket) + if idx < 0: + return rv, None + + host = rv[1:idx] + rest = rv[idx + 1 :] + if rest.startswith(self._colon): + return host, rest[1:] + return host, None + + +class URL(BaseURL): + """Represents a parsed URL. This behaves like a regular tuple but + also has some extra attributes that give further insight into the + URL. + """ + + __slots__ = () + _at = "@" + _colon = ":" + _lbracket = "[" + _rbracket = "]" + + def encode(self, charset: str = "utf-8", errors: str = "replace") -> "BytesURL": + """Encodes the URL to a tuple made out of bytes. The charset is + only being used for the path, query and fragment. + """ + return BytesURL( + self.scheme.encode("ascii"), # type: ignore + self.encode_netloc(), + self.path.encode(charset, errors), # type: ignore + self.query.encode(charset, errors), # type: ignore + self.fragment.encode(charset, errors), # type: ignore + ) + + +class BytesURL(BaseURL): + """Represents a parsed URL in bytes.""" + + __slots__ = () + _at = b"@" # type: ignore + _colon = b":" # type: ignore + _lbracket = b"[" # type: ignore + _rbracket = b"]" # type: ignore + + def __str__(self) -> str: + return self.to_url().decode("utf-8", "replace") # type: ignore + + def encode_netloc(self) -> bytes: # type: ignore + """Returns the netloc unchanged as bytes.""" + return self.netloc # type: ignore + + def decode(self, charset: str = "utf-8", errors: str = "replace") -> "URL": + """Decodes the URL to a tuple made out of strings. The charset is + only being used for the path, query and fragment. + """ + return URL( + self.scheme.decode("ascii"), # type: ignore + self.decode_netloc(), + self.path.decode(charset, errors), # type: ignore + self.query.decode(charset, errors), # type: ignore + self.fragment.decode(charset, errors), # type: ignore + ) + + +_unquote_maps: t.Dict[t.FrozenSet[int], t.Dict[bytes, int]] = {frozenset(): _hextobyte} + + +def _unquote_to_bytes( + string: t.Union[str, bytes], unsafe: t.Union[str, bytes] = "" +) -> bytes: + if isinstance(string, str): + string = string.encode("utf-8") + + if isinstance(unsafe, str): + unsafe = unsafe.encode("utf-8") + + unsafe = frozenset(bytearray(unsafe)) + groups = iter(string.split(b"%")) + result = bytearray(next(groups, b"")) + + try: + hex_to_byte = _unquote_maps[unsafe] + except KeyError: + hex_to_byte = _unquote_maps[unsafe] = { + h: b for h, b in _hextobyte.items() if b not in unsafe + } + + for group in groups: + code = group[:2] + + if code in hex_to_byte: + result.append(hex_to_byte[code]) + result.extend(group[2:]) + else: + result.append(37) # % + result.extend(group) + + return bytes(result) + + +def _url_encode_impl( + obj: t.Union[t.Mapping[str, str], t.Iterable[t.Tuple[str, str]]], + charset: str, + sort: bool, + key: t.Optional[t.Callable[[t.Tuple[str, str]], t.Any]], +) -> t.Iterator[str]: + from .datastructures import iter_multi_items + + iterable: t.Iterable[t.Tuple[str, str]] = iter_multi_items(obj) + + if sort: + iterable = sorted(iterable, key=key) + + for key_str, value_str in iterable: + if value_str is None: + continue + + if not isinstance(key_str, bytes): + key_bytes = str(key_str).encode(charset) + else: + key_bytes = key_str + + if not isinstance(value_str, bytes): + value_bytes = str(value_str).encode(charset) + else: + value_bytes = value_str + + yield f"{_fast_url_quote_plus(key_bytes)}={_fast_url_quote_plus(value_bytes)}" + + +def _url_unquote_legacy(value: str, unsafe: str = "") -> str: + try: + return url_unquote(value, charset="utf-8", errors="strict", unsafe=unsafe) + except UnicodeError: + return url_unquote(value, charset="latin1", unsafe=unsafe) + + +def url_parse( + url: str, scheme: t.Optional[str] = None, allow_fragments: bool = True +) -> BaseURL: + """Parses a URL from a string into a :class:`URL` tuple. If the URL + is lacking a scheme it can be provided as second argument. Otherwise, + it is ignored. Optionally fragments can be stripped from the URL + by setting `allow_fragments` to `False`. + + The inverse of this function is :func:`url_unparse`. + + :param url: the URL to parse. + :param scheme: the default schema to use if the URL is schemaless. + :param allow_fragments: if set to `False` a fragment will be removed + from the URL. + """ + s = _make_encode_wrapper(url) + is_text_based = isinstance(url, str) + + if scheme is None: + scheme = s("") + netloc = query = fragment = s("") + i = url.find(s(":")) + if i > 0 and _scheme_re.match(_to_str(url[:i], errors="replace")): + # make sure "iri" is not actually a port number (in which case + # "scheme" is really part of the path) + rest = url[i + 1 :] + if not rest or any(c not in s("0123456789") for c in rest): + # not a port number + scheme, url = url[:i].lower(), rest + + if url[:2] == s("//"): + delim = len(url) + for c in s("/?#"): + wdelim = url.find(c, 2) + if wdelim >= 0: + delim = min(delim, wdelim) + netloc, url = url[2:delim], url[delim:] + if (s("[") in netloc and s("]") not in netloc) or ( + s("]") in netloc and s("[") not in netloc + ): + raise ValueError("Invalid IPv6 URL") + + if allow_fragments and s("#") in url: + url, fragment = url.split(s("#"), 1) + if s("?") in url: + url, query = url.split(s("?"), 1) + + result_type = URL if is_text_based else BytesURL + return result_type(scheme, netloc, url, query, fragment) + + +def _make_fast_url_quote( + charset: str = "utf-8", + errors: str = "strict", + safe: t.Union[str, bytes] = "/:", + unsafe: t.Union[str, bytes] = "", +) -> t.Callable[[bytes], str]: + """Precompile the translation table for a URL encoding function. + + Unlike :func:`url_quote`, the generated function only takes the + string to quote. + + :param charset: The charset to encode the result with. + :param errors: How to handle encoding errors. + :param safe: An optional sequence of safe characters to never encode. + :param unsafe: An optional sequence of unsafe characters to always encode. + """ + if isinstance(safe, str): + safe = safe.encode(charset, errors) + + if isinstance(unsafe, str): + unsafe = unsafe.encode(charset, errors) + + safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe)) + table = [chr(c) if c in safe else f"%{c:02X}" for c in range(256)] + + def quote(string: bytes) -> str: + return "".join([table[c] for c in string]) + + return quote + + +_fast_url_quote = _make_fast_url_quote() +_fast_quote_plus = _make_fast_url_quote(safe=" ", unsafe="+") + + +def _fast_url_quote_plus(string: bytes) -> str: + return _fast_quote_plus(string).replace(" ", "+") + + +def url_quote( + string: t.Union[str, bytes], + charset: str = "utf-8", + errors: str = "strict", + safe: t.Union[str, bytes] = "/:", + unsafe: t.Union[str, bytes] = "", +) -> str: + """URL encode a single string with a given encoding. + + :param s: the string to quote. + :param charset: the charset to be used. + :param safe: an optional sequence of safe characters. + :param unsafe: an optional sequence of unsafe characters. + + .. versionadded:: 0.9.2 + The `unsafe` parameter was added. + """ + if not isinstance(string, (str, bytes, bytearray)): + string = str(string) + if isinstance(string, str): + string = string.encode(charset, errors) + if isinstance(safe, str): + safe = safe.encode(charset, errors) + if isinstance(unsafe, str): + unsafe = unsafe.encode(charset, errors) + safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe)) + rv = bytearray() + for char in bytearray(string): + if char in safe: + rv.append(char) + else: + rv.extend(_bytetohex[char]) + return bytes(rv).decode(charset) + + +def url_quote_plus( + string: str, charset: str = "utf-8", errors: str = "strict", safe: str = "" +) -> str: + """URL encode a single string with the given encoding and convert + whitespace to "+". + + :param s: The string to quote. + :param charset: The charset to be used. + :param safe: An optional sequence of safe characters. + """ + return url_quote(string, charset, errors, safe + " ", "+").replace(" ", "+") + + +def url_unparse(components: t.Tuple[str, str, str, str, str]) -> str: + """The reverse operation to :meth:`url_parse`. This accepts arbitrary + as well as :class:`URL` tuples and returns a URL as a string. + + :param components: the parsed URL as tuple which should be converted + into a URL string. + """ + _check_str_tuple(components) + scheme, netloc, path, query, fragment = components + s = _make_encode_wrapper(scheme) + url = s("") + + # We generally treat file:///x and file:/x the same which is also + # what browsers seem to do. This also allows us to ignore a schema + # register for netloc utilization or having to differentiate between + # empty and missing netloc. + if netloc or (scheme and path.startswith(s("/"))): + if path and path[:1] != s("/"): + path = s("/") + path + url = s("//") + (netloc or s("")) + path + elif path: + url += path + if scheme: + url = scheme + s(":") + url + if query: + url = url + s("?") + query + if fragment: + url = url + s("#") + fragment + return url + + +def url_unquote( + s: t.Union[str, bytes], + charset: str = "utf-8", + errors: str = "replace", + unsafe: str = "", +) -> str: + """URL decode a single string with a given encoding. If the charset + is set to `None` no decoding is performed and raw bytes are + returned. + + :param s: the string to unquote. + :param charset: the charset of the query string. If set to `None` + no decoding will take place. + :param errors: the error handling for the charset decoding. + """ + rv = _unquote_to_bytes(s, unsafe) + if charset is None: + return rv + return rv.decode(charset, errors) + + +def url_unquote_plus( + s: t.Union[str, bytes], charset: str = "utf-8", errors: str = "replace" +) -> str: + """URL decode a single string with the given `charset` and decode "+" to + whitespace. + + Per default encoding errors are ignored. If you want a different behavior + you can set `errors` to ``'replace'`` or ``'strict'``. + + :param s: The string to unquote. + :param charset: the charset of the query string. If set to `None` + no decoding will take place. + :param errors: The error handling for the `charset` decoding. + """ + if isinstance(s, str): + s = s.replace("+", " ") + else: + s = s.replace(b"+", b" ") + return url_unquote(s, charset, errors) + + +def url_fix(s: str, charset: str = "utf-8") -> str: + r"""Sometimes you get an URL by a user that just isn't a real URL because + it contains unsafe characters like ' ' and so on. This function can fix + some of the problems in a similar way browsers handle data entered by the + user: + + >>> url_fix('http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)') + 'http://de.wikipedia.org/wiki/Elf%20(Begriffskl%C3%A4rung)' + + :param s: the string with the URL to fix. + :param charset: The target charset for the URL if the url was given + as a string. + """ + # First step is to switch to text processing and to convert + # backslashes (which are invalid in URLs anyways) to slashes. This is + # consistent with what Chrome does. + s = _to_str(s, charset, "replace").replace("\\", "/") + + # For the specific case that we look like a malformed windows URL + # we want to fix this up manually: + if s.startswith("file://") and s[7:8].isalpha() and s[8:10] in (":/", "|/"): + s = f"file:///{s[7:]}" + + url = url_parse(s) + path = url_quote(url.path, charset, safe="/%+$!*'(),") + qs = url_quote_plus(url.query, charset, safe=":&%=+$!*'(),") + anchor = url_quote_plus(url.fragment, charset, safe=":&%=+$!*'(),") + return url_unparse((url.scheme, url.encode_netloc(), path, qs, anchor)) + + +# not-unreserved characters remain quoted when unquoting to IRI +_to_iri_unsafe = "".join([chr(c) for c in range(128) if c not in _always_safe]) + + +def _codec_error_url_quote(e: UnicodeError) -> t.Tuple[str, int]: + """Used in :func:`uri_to_iri` after unquoting to re-quote any + invalid bytes. + """ + # the docs state that UnicodeError does have these attributes, + # but mypy isn't picking them up + out = _fast_url_quote(e.object[e.start : e.end]) # type: ignore + return out, e.end # type: ignore + + +codecs.register_error("werkzeug.url_quote", _codec_error_url_quote) + + +def uri_to_iri( + uri: t.Union[str, t.Tuple[str, str, str, str, str]], + charset: str = "utf-8", + errors: str = "werkzeug.url_quote", +) -> str: + """Convert a URI to an IRI. All valid UTF-8 characters are unquoted, + leaving all reserved and invalid characters quoted. If the URL has + a domain, it is decoded from Punycode. + + >>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF") + 'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF' + + :param uri: The URI to convert. + :param charset: The encoding to encode unquoted bytes with. + :param errors: Error handler to use during ``bytes.encode``. By + default, invalid bytes are left quoted. + + .. versionchanged:: 0.15 + All reserved and invalid characters remain quoted. Previously, + only some reserved characters were preserved, and invalid bytes + were replaced instead of left quoted. + + .. versionadded:: 0.6 + """ + if isinstance(uri, tuple): + uri = url_unparse(uri) + + uri = url_parse(_to_str(uri, charset)) + path = url_unquote(uri.path, charset, errors, _to_iri_unsafe) + query = url_unquote(uri.query, charset, errors, _to_iri_unsafe) + fragment = url_unquote(uri.fragment, charset, errors, _to_iri_unsafe) + return url_unparse((uri.scheme, uri.decode_netloc(), path, query, fragment)) + + +# reserved characters remain unquoted when quoting to URI +_to_uri_safe = ":/?#[]@!$&'()*+,;=%" + + +def iri_to_uri( + iri: t.Union[str, t.Tuple[str, str, str, str, str]], + charset: str = "utf-8", + errors: str = "strict", + safe_conversion: bool = False, +) -> str: + """Convert an IRI to a URI. All non-ASCII and unsafe characters are + quoted. If the URL has a domain, it is encoded to Punycode. + + >>> iri_to_uri('http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF') + 'http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF' + + :param iri: The IRI to convert. + :param charset: The encoding of the IRI. + :param errors: Error handler to use during ``bytes.encode``. + :param safe_conversion: Return the URL unchanged if it only contains + ASCII characters and no whitespace. See the explanation below. + + There is a general problem with IRI conversion with some protocols + that are in violation of the URI specification. Consider the + following two IRIs:: + + magnet:?xt=uri:whatever + itms-services://?action=download-manifest + + After parsing, we don't know if the scheme requires the ``//``, + which is dropped if empty, but conveys different meanings in the + final URL if it's present or not. In this case, you can use + ``safe_conversion``, which will return the URL unchanged if it only + contains ASCII characters and no whitespace. This can result in a + URI with unquoted characters if it was not already quoted correctly, + but preserves the URL's semantics. Werkzeug uses this for the + ``Location`` header for redirects. + + .. versionchanged:: 0.15 + All reserved characters remain unquoted. Previously, only some + reserved characters were left unquoted. + + .. versionchanged:: 0.9.6 + The ``safe_conversion`` parameter was added. + + .. versionadded:: 0.6 + """ + if isinstance(iri, tuple): + iri = url_unparse(iri) + + if safe_conversion: + # If we're not sure if it's safe to convert the URL, and it only + # contains ASCII characters, return it unconverted. + try: + native_iri = _to_str(iri) + ascii_iri = native_iri.encode("ascii") + + # Only return if it doesn't have whitespace. (Why?) + if len(ascii_iri.split()) == 1: + return native_iri + except UnicodeError: + pass + + iri = url_parse(_to_str(iri, charset, errors)) + path = url_quote(iri.path, charset, errors, _to_uri_safe) + query = url_quote(iri.query, charset, errors, _to_uri_safe) + fragment = url_quote(iri.fragment, charset, errors, _to_uri_safe) + return url_unparse((iri.scheme, iri.encode_netloc(), path, query, fragment)) + + +def url_decode( + s: t.AnyStr, + charset: str = "utf-8", + include_empty: bool = True, + errors: str = "replace", + separator: str = "&", + cls: t.Optional[t.Type["ds.MultiDict"]] = None, +) -> "ds.MultiDict[str, str]": + """Parse a query string and return it as a :class:`MultiDict`. + + :param s: The query string to parse. + :param charset: Decode bytes to string with this charset. If not + given, bytes are returned as-is. + :param include_empty: Include keys with empty values in the dict. + :param errors: Error handling behavior when decoding bytes. + :param separator: Separator character between pairs. + :param cls: Container to hold result instead of :class:`MultiDict`. + + .. versionchanged:: 2.0 + The ``decode_keys`` parameter is deprecated and will be removed + in Werkzeug 2.1. + + .. versionchanged:: 0.5 + In previous versions ";" and "&" could be used for url decoding. + Now only "&" is supported. If you want to use ";", a different + ``separator`` can be provided. + + .. versionchanged:: 0.5 + The ``cls`` parameter was added. + """ + if cls is None: + from .datastructures import MultiDict # noqa: F811 + + cls = MultiDict + if isinstance(s, str) and not isinstance(separator, str): + separator = separator.decode(charset or "ascii") + elif isinstance(s, bytes) and not isinstance(separator, bytes): + separator = separator.encode(charset or "ascii") # type: ignore + return cls( + _url_decode_impl( + s.split(separator), charset, include_empty, errors # type: ignore + ) + ) + + +def url_decode_stream( + stream: t.IO[bytes], + charset: str = "utf-8", + include_empty: bool = True, + errors: str = "replace", + separator: bytes = b"&", + cls: t.Optional[t.Type["ds.MultiDict"]] = None, + limit: t.Optional[int] = None, +) -> "ds.MultiDict[str, str]": + """Works like :func:`url_decode` but decodes a stream. The behavior + of stream and limit follows functions like + :func:`~werkzeug.wsgi.make_line_iter`. The generator of pairs is + directly fed to the `cls` so you can consume the data while it's + parsed. + + :param stream: a stream with the encoded querystring + :param charset: the charset of the query string. If set to `None` + no decoding will take place. + :param include_empty: Set to `False` if you don't want empty values to + appear in the dict. + :param errors: the decoding error behavior. + :param separator: the pair separator to be used, defaults to ``&`` + :param cls: an optional dict class to use. If this is not specified + or `None` the default :class:`MultiDict` is used. + :param limit: the content length of the URL data. Not necessary if + a limited stream is provided. + + .. versionchanged:: 2.0 + The ``decode_keys`` and ``return_iterator`` parameters are + deprecated and will be removed in Werkzeug 2.1. + + .. versionadded:: 0.8 + """ + from .wsgi import make_chunk_iter + + pair_iter = make_chunk_iter(stream, separator, limit) + decoder = _url_decode_impl(pair_iter, charset, include_empty, errors) + + if cls is None: + from .datastructures import MultiDict # noqa: F811 + + cls = MultiDict + + return cls(decoder) + + +def _url_decode_impl( + pair_iter: t.Iterable[t.AnyStr], charset: str, include_empty: bool, errors: str +) -> t.Iterator[t.Tuple[str, str]]: + for pair in pair_iter: + if not pair: + continue + s = _make_encode_wrapper(pair) + equal = s("=") + if equal in pair: + key, value = pair.split(equal, 1) + else: + if not include_empty: + continue + key = pair + value = s("") + yield ( + url_unquote_plus(key, charset, errors), + url_unquote_plus(value, charset, errors), + ) + + +def url_encode( + obj: t.Union[t.Mapping[str, str], t.Iterable[t.Tuple[str, str]]], + charset: str = "utf-8", + sort: bool = False, + key: t.Optional[t.Callable[[t.Tuple[str, str]], t.Any]] = None, + separator: str = "&", +) -> str: + """URL encode a dict/`MultiDict`. If a value is `None` it will not appear + in the result string. Per default only values are encoded into the target + charset strings. + + :param obj: the object to encode into a query string. + :param charset: the charset of the query string. + :param sort: set to `True` if you want parameters to be sorted by `key`. + :param separator: the separator to be used for the pairs. + :param key: an optional function to be used for sorting. For more details + check out the :func:`sorted` documentation. + + .. versionchanged:: 2.0 + The ``encode_keys`` parameter is deprecated and will be removed + in Werkzeug 2.1. + + .. versionchanged:: 0.5 + Added the ``sort``, ``key``, and ``separator`` parameters. + """ + separator = _to_str(separator, "ascii") + return separator.join(_url_encode_impl(obj, charset, sort, key)) + + +def url_encode_stream( + obj: t.Union[t.Mapping[str, str], t.Iterable[t.Tuple[str, str]]], + stream: t.Optional[t.IO[str]] = None, + charset: str = "utf-8", + sort: bool = False, + key: t.Optional[t.Callable[[t.Tuple[str, str]], t.Any]] = None, + separator: str = "&", +) -> None: + """Like :meth:`url_encode` but writes the results to a stream + object. If the stream is `None` a generator over all encoded + pairs is returned. + + :param obj: the object to encode into a query string. + :param stream: a stream to write the encoded object into or `None` if + an iterator over the encoded pairs should be returned. In + that case the separator argument is ignored. + :param charset: the charset of the query string. + :param sort: set to `True` if you want parameters to be sorted by `key`. + :param separator: the separator to be used for the pairs. + :param key: an optional function to be used for sorting. For more details + check out the :func:`sorted` documentation. + + .. versionchanged:: 2.0 + The ``encode_keys`` parameter is deprecated and will be removed + in Werkzeug 2.1. + + .. versionadded:: 0.8 + """ + separator = _to_str(separator, "ascii") + gen = _url_encode_impl(obj, charset, sort, key) + if stream is None: + return gen # type: ignore + for idx, chunk in enumerate(gen): + if idx: + stream.write(separator) + stream.write(chunk) + return None + + +def url_join( + base: t.Union[str, t.Tuple[str, str, str, str, str]], + url: t.Union[str, t.Tuple[str, str, str, str, str]], + allow_fragments: bool = True, +) -> str: + """Join a base URL and a possibly relative URL to form an absolute + interpretation of the latter. + + :param base: the base URL for the join operation. + :param url: the URL to join. + :param allow_fragments: indicates whether fragments should be allowed. + """ + if isinstance(base, tuple): + base = url_unparse(base) + if isinstance(url, tuple): + url = url_unparse(url) + + _check_str_tuple((base, url)) + s = _make_encode_wrapper(base) + + if not base: + return url + if not url: + return base + + bscheme, bnetloc, bpath, bquery, bfragment = url_parse( + base, allow_fragments=allow_fragments + ) + scheme, netloc, path, query, fragment = url_parse(url, bscheme, allow_fragments) + if scheme != bscheme: + return url + if netloc: + return url_unparse((scheme, netloc, path, query, fragment)) + netloc = bnetloc + + if path[:1] == s("/"): + segments = path.split(s("/")) + elif not path: + segments = bpath.split(s("/")) + if not query: + query = bquery + else: + segments = bpath.split(s("/"))[:-1] + path.split(s("/")) + + # If the rightmost part is "./" we want to keep the slash but + # remove the dot. + if segments[-1] == s("."): + segments[-1] = s("") + + # Resolve ".." and "." + segments = [segment for segment in segments if segment != s(".")] + while True: + i = 1 + n = len(segments) - 1 + while i < n: + if segments[i] == s("..") and segments[i - 1] not in (s(""), s("..")): + del segments[i - 1 : i + 1] + break + i += 1 + else: + break + + # Remove trailing ".." if the URL is absolute + unwanted_marker = [s(""), s("..")] + while segments[:2] == unwanted_marker: + del segments[1] + + path = s("/").join(segments) + return url_unparse((scheme, netloc, path, query, fragment)) diff --git a/src/werkzeug/user_agent.py b/src/werkzeug/user_agent.py new file mode 100644 index 0000000..66ffcbe --- /dev/null +++ b/src/werkzeug/user_agent.py @@ -0,0 +1,47 @@ +import typing as t + + +class UserAgent: + """Represents a parsed user agent header value. + + The default implementation does no parsing, only the :attr:`string` + attribute is set. A subclass may parse the string to set the + common attributes or expose other information. Set + :attr:`werkzeug.wrappers.Request.user_agent_class` to use a + subclass. + + :param string: The header value to parse. + + .. versionadded:: 2.0 + This replaces the previous ``useragents`` module, but does not + provide a built-in parser. + """ + + platform: t.Optional[str] = None + """The OS name, if it could be parsed from the string.""" + + browser: t.Optional[str] = None + """The browser name, if it could be parsed from the string.""" + + version: t.Optional[str] = None + """The browser version, if it could be parsed from the string.""" + + language: t.Optional[str] = None + """The browser language, if it could be parsed from the string.""" + + def __init__(self, string: str) -> None: + self.string: str = string + """The original header value.""" + + def __repr__(self) -> str: + return f"<{type(self).__name__} {self.browser}/{self.version}>" + + def __str__(self) -> str: + return self.string + + def __bool__(self) -> bool: + return bool(self.browser) + + def to_header(self) -> str: + """Convert to a header value.""" + return self.string diff --git a/src/werkzeug/utils.py b/src/werkzeug/utils.py new file mode 100644 index 0000000..672e6e5 --- /dev/null +++ b/src/werkzeug/utils.py @@ -0,0 +1,705 @@ +import io +import mimetypes +import os +import pkgutil +import re +import sys +import typing as t +import unicodedata +from datetime import datetime +from time import time +from zlib import adler32 + +from markupsafe import escape + +from ._internal import _DictAccessorProperty +from ._internal import _missing +from ._internal import _TAccessorValue +from .datastructures import Headers +from .exceptions import NotFound +from .exceptions import RequestedRangeNotSatisfiable +from .security import safe_join +from .urls import url_quote +from .wsgi import wrap_file + +if t.TYPE_CHECKING: + from _typeshed.wsgi import WSGIEnvironment + from .wrappers.request import Request + from .wrappers.response import Response + +_T = t.TypeVar("_T") + +_entity_re = re.compile(r"&([^;]+);") +_filename_ascii_strip_re = re.compile(r"[^A-Za-z0-9_.-]") +_windows_device_files = ( + "CON", + "AUX", + "COM1", + "COM2", + "COM3", + "COM4", + "LPT1", + "LPT2", + "LPT3", + "PRN", + "NUL", +) + + +class cached_property(property, t.Generic[_T]): + """A :func:`property` that is only evaluated once. Subsequent access + returns the cached value. Setting the property sets the cached + value. Deleting the property clears the cached value, accessing it + again will evaluate it again. + + .. code-block:: python + + class Example: + @cached_property + def value(self): + # calculate something important here + return 42 + + e = Example() + e.value # evaluates + e.value # uses cache + e.value = 16 # sets cache + del e.value # clears cache + + If the class defines ``__slots__``, it must add ``_cache_{name}`` as + a slot. Alternatively, it can add ``__dict__``, but that's usually + not desirable. + + .. versionchanged:: 2.1 + Works with ``__slots__``. + + .. versionchanged:: 2.0 + ``del obj.name`` clears the cached value. + """ + + def __init__( + self, + fget: t.Callable[[t.Any], _T], + name: t.Optional[str] = None, + doc: t.Optional[str] = None, + ) -> None: + super().__init__(fget, doc=doc) + self.__name__ = name or fget.__name__ + self.slot_name = f"_cache_{self.__name__}" + self.__module__ = fget.__module__ + + def __set__(self, obj: object, value: _T) -> None: + if hasattr(obj, "__dict__"): + obj.__dict__[self.__name__] = value + else: + setattr(obj, self.slot_name, value) + + def __get__(self, obj: object, type: type = None) -> _T: # type: ignore + if obj is None: + return self # type: ignore + + obj_dict = getattr(obj, "__dict__", None) + + if obj_dict is not None: + value: _T = obj_dict.get(self.__name__, _missing) + else: + value = getattr(obj, self.slot_name, _missing) # type: ignore[arg-type] + + if value is _missing: + value = self.fget(obj) # type: ignore + + if obj_dict is not None: + obj.__dict__[self.__name__] = value + else: + setattr(obj, self.slot_name, value) + + return value + + def __delete__(self, obj: object) -> None: + if hasattr(obj, "__dict__"): + del obj.__dict__[self.__name__] + else: + setattr(obj, self.slot_name, _missing) + + +class environ_property(_DictAccessorProperty[_TAccessorValue]): + """Maps request attributes to environment variables. This works not only + for the Werkzeug request object, but also any other class with an + environ attribute: + + >>> class Test(object): + ... environ = {'key': 'value'} + ... test = environ_property('key') + >>> var = Test() + >>> var.test + 'value' + + If you pass it a second value it's used as default if the key does not + exist, the third one can be a converter that takes a value and converts + it. If it raises :exc:`ValueError` or :exc:`TypeError` the default value + is used. If no default value is provided `None` is used. + + Per default the property is read only. You have to explicitly enable it + by passing ``read_only=False`` to the constructor. + """ + + read_only = True + + def lookup(self, obj: "Request") -> "WSGIEnvironment": + return obj.environ + + +class header_property(_DictAccessorProperty[_TAccessorValue]): + """Like `environ_property` but for headers.""" + + def lookup(self, obj: t.Union["Request", "Response"]) -> Headers: + return obj.headers + + +# https://cgit.freedesktop.org/xdg/shared-mime-info/tree/freedesktop.org.xml.in +# https://www.iana.org/assignments/media-types/media-types.xhtml +# Types listed in the XDG mime info that have a charset in the IANA registration. +_charset_mimetypes = { + "application/ecmascript", + "application/javascript", + "application/sql", + "application/xml", + "application/xml-dtd", + "application/xml-external-parsed-entity", +} + + +def get_content_type(mimetype: str, charset: str) -> str: + """Returns the full content type string with charset for a mimetype. + + If the mimetype represents text, the charset parameter will be + appended, otherwise the mimetype is returned unchanged. + + :param mimetype: The mimetype to be used as content type. + :param charset: The charset to be appended for text mimetypes. + :return: The content type. + + .. versionchanged:: 0.15 + Any type that ends with ``+xml`` gets a charset, not just those + that start with ``application/``. Known text types such as + ``application/javascript`` are also given charsets. + """ + if ( + mimetype.startswith("text/") + or mimetype in _charset_mimetypes + or mimetype.endswith("+xml") + ): + mimetype += f"; charset={charset}" + + return mimetype + + +def secure_filename(filename: str) -> str: + r"""Pass it a filename and it will return a secure version of it. This + filename can then safely be stored on a regular file system and passed + to :func:`os.path.join`. The filename returned is an ASCII only string + for maximum portability. + + On windows systems the function also makes sure that the file is not + named after one of the special device files. + + >>> secure_filename("My cool movie.mov") + 'My_cool_movie.mov' + >>> secure_filename("../../../etc/passwd") + 'etc_passwd' + >>> secure_filename('i contain cool \xfcml\xe4uts.txt') + 'i_contain_cool_umlauts.txt' + + The function might return an empty filename. It's your responsibility + to ensure that the filename is unique and that you abort or + generate a random filename if the function returned an empty one. + + .. versionadded:: 0.5 + + :param filename: the filename to secure + """ + filename = unicodedata.normalize("NFKD", filename) + filename = filename.encode("ascii", "ignore").decode("ascii") + + for sep in os.path.sep, os.path.altsep: + if sep: + filename = filename.replace(sep, " ") + filename = str(_filename_ascii_strip_re.sub("", "_".join(filename.split()))).strip( + "._" + ) + + # on nt a couple of special files are present in each folder. We + # have to ensure that the target file is not such a filename. In + # this case we prepend an underline + if ( + os.name == "nt" + and filename + and filename.split(".")[0].upper() in _windows_device_files + ): + filename = f"_{filename}" + + return filename + + +def redirect( + location: str, code: int = 302, Response: t.Optional[t.Type["Response"]] = None +) -> "Response": + """Returns a response object (a WSGI application) that, if called, + redirects the client to the target location. Supported codes are + 301, 302, 303, 305, 307, and 308. 300 is not supported because + it's not a real redirect and 304 because it's the answer for a + request with a request with defined If-Modified-Since headers. + + .. versionadded:: 0.6 + The location can now be a unicode string that is encoded using + the :func:`iri_to_uri` function. + + .. versionadded:: 0.10 + The class used for the Response object can now be passed in. + + :param location: the location the response should redirect to. + :param code: the redirect status code. defaults to 302. + :param class Response: a Response class to use when instantiating a + response. The default is :class:`werkzeug.wrappers.Response` if + unspecified. + """ + if Response is None: + from .wrappers import Response # type: ignore + + display_location = escape(location) + if isinstance(location, str): + # Safe conversion is necessary here as we might redirect + # to a broken URI scheme (for instance itms-services). + from .urls import iri_to_uri + + location = iri_to_uri(location, safe_conversion=True) + + response = Response( # type: ignore + "\n" + "\n" + "Redirecting...\n" + "

    Redirecting...

    \n" + "

    You should be redirected automatically to the target URL: " + f'{display_location}. If' + " not, click the link.\n", + code, + mimetype="text/html", + ) + response.headers["Location"] = location + return response + + +def append_slash_redirect(environ: "WSGIEnvironment", code: int = 308) -> "Response": + """Redirect to the current URL with a slash appended. + + If the current URL is ``/user/42``, the redirect URL will be + ``42/``. When joined to the current URL during response + processing or by the browser, this will produce ``/user/42/``. + + The behavior is undefined if the path ends with a slash already. If + called unconditionally on a URL, it may produce a redirect loop. + + :param environ: Use the path and query from this WSGI environment + to produce the redirect URL. + :param code: the status code for the redirect. + + .. versionchanged:: 2.1 + Produce a relative URL that only modifies the last segment. + Relevant when the current path has multiple segments. + + .. versionchanged:: 2.1 + The default status code is 308 instead of 301. This preserves + the request method and body. + """ + tail = environ["PATH_INFO"].rpartition("/")[2] + + if not tail: + new_path = "./" + else: + new_path = f"{tail}/" + + query_string = environ.get("QUERY_STRING") + + if query_string: + new_path = f"{new_path}?{query_string}" + + return redirect(new_path, code) + + +def send_file( + path_or_file: t.Union[os.PathLike, str, t.IO[bytes]], + environ: "WSGIEnvironment", + mimetype: t.Optional[str] = None, + as_attachment: bool = False, + download_name: t.Optional[str] = None, + conditional: bool = True, + etag: t.Union[bool, str] = True, + last_modified: t.Optional[t.Union[datetime, int, float]] = None, + max_age: t.Optional[ + t.Union[int, t.Callable[[t.Optional[str]], t.Optional[int]]] + ] = None, + use_x_sendfile: bool = False, + response_class: t.Optional[t.Type["Response"]] = None, + _root_path: t.Optional[t.Union[os.PathLike, str]] = None, +) -> "Response": + """Send the contents of a file to the client. + + The first argument can be a file path or a file-like object. Paths + are preferred in most cases because Werkzeug can manage the file and + get extra information from the path. Passing a file-like object + requires that the file is opened in binary mode, and is mostly + useful when building a file in memory with :class:`io.BytesIO`. + + Never pass file paths provided by a user. The path is assumed to be + trusted, so a user could craft a path to access a file you didn't + intend. + + If the WSGI server sets a ``file_wrapper`` in ``environ``, it is + used, otherwise Werkzeug's built-in wrapper is used. Alternatively, + if the HTTP server supports ``X-Sendfile``, ``use_x_sendfile=True`` + will tell the server to send the given path, which is much more + efficient than reading it in Python. + + :param path_or_file: The path to the file to send, relative to the + current working directory if a relative path is given. + Alternatively, a file-like object opened in binary mode. Make + sure the file pointer is seeked to the start of the data. + :param environ: The WSGI environ for the current request. + :param mimetype: The MIME type to send for the file. If not + provided, it will try to detect it from the file name. + :param as_attachment: Indicate to a browser that it should offer to + save the file instead of displaying it. + :param download_name: The default name browsers will use when saving + the file. Defaults to the passed file name. + :param conditional: Enable conditional and range responses based on + request headers. Requires passing a file path and ``environ``. + :param etag: Calculate an ETag for the file, which requires passing + a file path. Can also be a string to use instead. + :param last_modified: The last modified time to send for the file, + in seconds. If not provided, it will try to detect it from the + file path. + :param max_age: How long the client should cache the file, in + seconds. If set, ``Cache-Control`` will be ``public``, otherwise + it will be ``no-cache`` to prefer conditional caching. + :param use_x_sendfile: Set the ``X-Sendfile`` header to let the + server to efficiently send the file. Requires support from the + HTTP server. Requires passing a file path. + :param response_class: Build the response using this class. Defaults + to :class:`~werkzeug.wrappers.Response`. + :param _root_path: Do not use. For internal use only. Use + :func:`send_from_directory` to safely send files under a path. + + .. versionchanged:: 2.0.2 + ``send_file`` only sets a detected ``Content-Encoding`` if + ``as_attachment`` is disabled. + + .. versionadded:: 2.0 + Adapted from Flask's implementation. + + .. versionchanged:: 2.0 + ``download_name`` replaces Flask's ``attachment_filename`` + parameter. If ``as_attachment=False``, it is passed with + ``Content-Disposition: inline`` instead. + + .. versionchanged:: 2.0 + ``max_age`` replaces Flask's ``cache_timeout`` parameter. + ``conditional`` is enabled and ``max_age`` is not set by + default. + + .. versionchanged:: 2.0 + ``etag`` replaces Flask's ``add_etags`` parameter. It can be a + string to use instead of generating one. + + .. versionchanged:: 2.0 + If an encoding is returned when guessing ``mimetype`` from + ``download_name``, set the ``Content-Encoding`` header. + """ + if response_class is None: + from .wrappers import Response + + response_class = Response + + path: t.Optional[str] = None + file: t.Optional[t.IO[bytes]] = None + size: t.Optional[int] = None + mtime: t.Optional[float] = None + headers = Headers() + + if isinstance(path_or_file, (os.PathLike, str)) or hasattr( + path_or_file, "__fspath__" + ): + path_or_file = t.cast(t.Union[os.PathLike, str], path_or_file) + + # Flask will pass app.root_path, allowing its send_file wrapper + # to not have to deal with paths. + if _root_path is not None: + path = os.path.join(_root_path, path_or_file) + else: + path = os.path.abspath(path_or_file) + + stat = os.stat(path) + size = stat.st_size + mtime = stat.st_mtime + else: + file = path_or_file + + if download_name is None and path is not None: + download_name = os.path.basename(path) + + if mimetype is None: + if download_name is None: + raise TypeError( + "Unable to detect the MIME type because a file name is" + " not available. Either set 'download_name', pass a" + " path instead of a file, or set 'mimetype'." + ) + + mimetype, encoding = mimetypes.guess_type(download_name) + + if mimetype is None: + mimetype = "application/octet-stream" + + # Don't send encoding for attachments, it causes browsers to + # save decompress tar.gz files. + if encoding is not None and not as_attachment: + headers.set("Content-Encoding", encoding) + + if download_name is not None: + try: + download_name.encode("ascii") + except UnicodeEncodeError: + simple = unicodedata.normalize("NFKD", download_name) + simple = simple.encode("ascii", "ignore").decode("ascii") + quoted = url_quote(download_name, safe="") + names = {"filename": simple, "filename*": f"UTF-8''{quoted}"} + else: + names = {"filename": download_name} + + value = "attachment" if as_attachment else "inline" + headers.set("Content-Disposition", value, **names) + elif as_attachment: + raise TypeError( + "No name provided for attachment. Either set" + " 'download_name' or pass a path instead of a file." + ) + + if use_x_sendfile and path is not None: + headers["X-Sendfile"] = path + data = None + else: + if file is None: + file = open(path, "rb") # type: ignore + elif isinstance(file, io.BytesIO): + size = file.getbuffer().nbytes + elif isinstance(file, io.TextIOBase): + raise ValueError("Files must be opened in binary mode or use BytesIO.") + + data = wrap_file(environ, file) + + rv = response_class( + data, mimetype=mimetype, headers=headers, direct_passthrough=True + ) + + if size is not None: + rv.content_length = size + + if last_modified is not None: + rv.last_modified = last_modified # type: ignore + elif mtime is not None: + rv.last_modified = mtime # type: ignore + + rv.cache_control.no_cache = True + + # Flask will pass app.get_send_file_max_age, allowing its send_file + # wrapper to not have to deal with paths. + if callable(max_age): + max_age = max_age(path) + + if max_age is not None: + if max_age > 0: + rv.cache_control.no_cache = None + rv.cache_control.public = True + + rv.cache_control.max_age = max_age + rv.expires = int(time() + max_age) # type: ignore + + if isinstance(etag, str): + rv.set_etag(etag) + elif etag and path is not None: + check = adler32(path.encode("utf-8")) & 0xFFFFFFFF + rv.set_etag(f"{mtime}-{size}-{check}") + + if conditional: + try: + rv = rv.make_conditional(environ, accept_ranges=True, complete_length=size) + except RequestedRangeNotSatisfiable: + if file is not None: + file.close() + + raise + + # Some x-sendfile implementations incorrectly ignore the 304 + # status code and send the file anyway. + if rv.status_code == 304: + rv.headers.pop("x-sendfile", None) + + return rv + + +def send_from_directory( + directory: t.Union[os.PathLike, str], + path: t.Union[os.PathLike, str], + environ: "WSGIEnvironment", + **kwargs: t.Any, +) -> "Response": + """Send a file from within a directory using :func:`send_file`. + + This is a secure way to serve files from a folder, such as static + files or uploads. Uses :func:`~werkzeug.security.safe_join` to + ensure the path coming from the client is not maliciously crafted to + point outside the specified directory. + + If the final path does not point to an existing regular file, + returns a 404 :exc:`~werkzeug.exceptions.NotFound` error. + + :param directory: The directory that ``path`` must be located under. + :param path: The path to the file to send, relative to + ``directory``. + :param environ: The WSGI environ for the current request. + :param kwargs: Arguments to pass to :func:`send_file`. + + .. versionadded:: 2.0 + Adapted from Flask's implementation. + """ + path = safe_join(os.fspath(directory), os.fspath(path)) + + if path is None: + raise NotFound() + + # Flask will pass app.root_path, allowing its send_from_directory + # wrapper to not have to deal with paths. + if "_root_path" in kwargs: + path = os.path.join(kwargs["_root_path"], path) + + try: + if not os.path.isfile(path): + raise NotFound() + except ValueError: + # path contains null byte on Python < 3.8 + raise NotFound() from None + + return send_file(path, environ, **kwargs) + + +def import_string(import_name: str, silent: bool = False) -> t.Any: + """Imports an object based on a string. This is useful if you want to + use import paths as endpoints or something similar. An import path can + be specified either in dotted notation (``xml.sax.saxutils.escape``) + or with a colon as object delimiter (``xml.sax.saxutils:escape``). + + If `silent` is True the return value will be `None` if the import fails. + + :param import_name: the dotted name for the object to import. + :param silent: if set to `True` import errors are ignored and + `None` is returned instead. + :return: imported object + """ + import_name = import_name.replace(":", ".") + try: + try: + __import__(import_name) + except ImportError: + if "." not in import_name: + raise + else: + return sys.modules[import_name] + + module_name, obj_name = import_name.rsplit(".", 1) + module = __import__(module_name, globals(), locals(), [obj_name]) + try: + return getattr(module, obj_name) + except AttributeError as e: + raise ImportError(e) from None + + except ImportError as e: + if not silent: + raise ImportStringError(import_name, e).with_traceback( + sys.exc_info()[2] + ) from None + + return None + + +def find_modules( + import_path: str, include_packages: bool = False, recursive: bool = False +) -> t.Iterator[str]: + """Finds all the modules below a package. This can be useful to + automatically import all views / controllers so that their metaclasses / + function decorators have a chance to register themselves on the + application. + + Packages are not returned unless `include_packages` is `True`. This can + also recursively list modules but in that case it will import all the + packages to get the correct load path of that module. + + :param import_path: the dotted name for the package to find child modules. + :param include_packages: set to `True` if packages should be returned, too. + :param recursive: set to `True` if recursion should happen. + :return: generator + """ + module = import_string(import_path) + path = getattr(module, "__path__", None) + if path is None: + raise ValueError(f"{import_path!r} is not a package") + basename = f"{module.__name__}." + for _importer, modname, ispkg in pkgutil.iter_modules(path): + modname = basename + modname + if ispkg: + if include_packages: + yield modname + if recursive: + yield from find_modules(modname, include_packages, True) + else: + yield modname + + +class ImportStringError(ImportError): + """Provides information about a failed :func:`import_string` attempt.""" + + #: String in dotted notation that failed to be imported. + import_name: str + #: Wrapped exception. + exception: BaseException + + def __init__(self, import_name: str, exception: BaseException) -> None: + self.import_name = import_name + self.exception = exception + msg = import_name + name = "" + tracked = [] + for part in import_name.replace(":", ".").split("."): + name = f"{name}.{part}" if name else part + imported = import_string(name, silent=True) + if imported: + tracked.append((name, getattr(imported, "__file__", None))) + else: + track = [f"- {n!r} found in {i!r}." for n, i in tracked] + track.append(f"- {name!r} not found.") + track_str = "\n".join(track) + msg = ( + f"import_string() failed for {import_name!r}. Possible reasons" + f" are:\n\n" + "- missing __init__.py in a package;\n" + "- package or module path not included in sys.path;\n" + "- duplicated package or module name taking precedence in" + " sys.path;\n" + "- missing module, class, function or variable;\n\n" + f"Debugged import:\n\n{track_str}\n\n" + f"Original exception:\n\n{type(exception).__name__}: {exception}" + ) + break + + super().__init__(msg) + + def __repr__(self) -> str: + return f"<{type(self).__name__}({self.import_name!r}, {self.exception!r})>" diff --git a/src/werkzeug/wrappers/__init__.py b/src/werkzeug/wrappers/__init__.py new file mode 100644 index 0000000..b8c45d7 --- /dev/null +++ b/src/werkzeug/wrappers/__init__.py @@ -0,0 +1,3 @@ +from .request import Request as Request +from .response import Response as Response +from .response import ResponseStream diff --git a/src/werkzeug/wrappers/request.py b/src/werkzeug/wrappers/request.py new file mode 100644 index 0000000..57b739c --- /dev/null +++ b/src/werkzeug/wrappers/request.py @@ -0,0 +1,614 @@ +import functools +import json +import typing +import typing as t +from io import BytesIO + +from .._internal import _wsgi_decoding_dance +from ..datastructures import CombinedMultiDict +from ..datastructures import EnvironHeaders +from ..datastructures import FileStorage +from ..datastructures import ImmutableMultiDict +from ..datastructures import iter_multi_items +from ..datastructures import MultiDict +from ..formparser import default_stream_factory +from ..formparser import FormDataParser +from ..sansio.request import Request as _SansIORequest +from ..utils import cached_property +from ..utils import environ_property +from ..wsgi import _get_server +from ..wsgi import get_input_stream +from werkzeug.exceptions import BadRequest + +if t.TYPE_CHECKING: + import typing_extensions as te + from _typeshed.wsgi import WSGIApplication + from _typeshed.wsgi import WSGIEnvironment + + +class Request(_SansIORequest): + """Represents an incoming WSGI HTTP request, with headers and body + taken from the WSGI environment. Has properties and methods for + using the functionality defined by various HTTP specs. The data in + requests object is read-only. + + Text data is assumed to use UTF-8 encoding, which should be true for + the vast majority of modern clients. Using an encoding set by the + client is unsafe in Python due to extra encodings it provides, such + as ``zip``. To change the assumed encoding, subclass and replace + :attr:`charset`. + + :param environ: The WSGI environ is generated by the WSGI server and + contains information about the server configuration and client + request. + :param populate_request: Add this request object to the WSGI environ + as ``environ['werkzeug.request']``. Can be useful when + debugging. + :param shallow: Makes reading from :attr:`stream` (and any method + that would read from it) raise a :exc:`RuntimeError`. Useful to + prevent consuming the form data in middleware, which would make + it unavailable to the final application. + + .. versionchanged:: 2.1 + Remove the ``disable_data_descriptor`` attribute. + + .. versionchanged:: 2.0 + Combine ``BaseRequest`` and mixins into a single ``Request`` + class. Using the old classes is deprecated and will be removed + in Werkzeug 2.1. + + .. versionchanged:: 0.5 + Read-only mode is enforced with immutable classes for all data. + """ + + #: the maximum content length. This is forwarded to the form data + #: parsing function (:func:`parse_form_data`). When set and the + #: :attr:`form` or :attr:`files` attribute is accessed and the + #: parsing fails because more than the specified value is transmitted + #: a :exc:`~werkzeug.exceptions.RequestEntityTooLarge` exception is raised. + #: + #: Have a look at :doc:`/request_data` for more details. + #: + #: .. versionadded:: 0.5 + max_content_length: t.Optional[int] = None + + #: the maximum form field size. This is forwarded to the form data + #: parsing function (:func:`parse_form_data`). When set and the + #: :attr:`form` or :attr:`files` attribute is accessed and the + #: data in memory for post data is longer than the specified value a + #: :exc:`~werkzeug.exceptions.RequestEntityTooLarge` exception is raised. + #: + #: Have a look at :doc:`/request_data` for more details. + #: + #: .. versionadded:: 0.5 + max_form_memory_size: t.Optional[int] = None + + #: The form data parser that should be used. Can be replaced to customize + #: the form date parsing. + form_data_parser_class: t.Type[FormDataParser] = FormDataParser + + #: The WSGI environment containing HTTP headers and information from + #: the WSGI server. + environ: "WSGIEnvironment" + + #: Set when creating the request object. If ``True``, reading from + #: the request body will cause a ``RuntimeException``. Useful to + #: prevent modifying the stream from middleware. + shallow: bool + + def __init__( + self, + environ: "WSGIEnvironment", + populate_request: bool = True, + shallow: bool = False, + ) -> None: + super().__init__( + method=environ.get("REQUEST_METHOD", "GET"), + scheme=environ.get("wsgi.url_scheme", "http"), + server=_get_server(environ), + root_path=_wsgi_decoding_dance( + environ.get("SCRIPT_NAME") or "", self.charset, self.encoding_errors + ), + path=_wsgi_decoding_dance( + environ.get("PATH_INFO") or "", self.charset, self.encoding_errors + ), + query_string=environ.get("QUERY_STRING", "").encode("latin1"), + headers=EnvironHeaders(environ), + remote_addr=environ.get("REMOTE_ADDR"), + ) + self.environ = environ + self.shallow = shallow + + if populate_request and not shallow: + self.environ["werkzeug.request"] = self + + @classmethod + def from_values(cls, *args: t.Any, **kwargs: t.Any) -> "Request": + """Create a new request object based on the values provided. If + environ is given missing values are filled from there. This method is + useful for small scripts when you need to simulate a request from an URL. + Do not use this method for unittesting, there is a full featured client + object (:class:`Client`) that allows to create multipart requests, + support for cookies etc. + + This accepts the same options as the + :class:`~werkzeug.test.EnvironBuilder`. + + .. versionchanged:: 0.5 + This method now accepts the same arguments as + :class:`~werkzeug.test.EnvironBuilder`. Because of this the + `environ` parameter is now called `environ_overrides`. + + :return: request object + """ + from ..test import EnvironBuilder + + charset = kwargs.pop("charset", cls.charset) + kwargs["charset"] = charset + builder = EnvironBuilder(*args, **kwargs) + try: + return builder.get_request(cls) + finally: + builder.close() + + @classmethod + def application( + cls, f: t.Callable[["Request"], "WSGIApplication"] + ) -> "WSGIApplication": + """Decorate a function as responder that accepts the request as + the last argument. This works like the :func:`responder` + decorator but the function is passed the request object as the + last argument and the request object will be closed + automatically:: + + @Request.application + def my_wsgi_app(request): + return Response('Hello World!') + + As of Werkzeug 0.14 HTTP exceptions are automatically caught and + converted to responses instead of failing. + + :param f: the WSGI callable to decorate + :return: a new WSGI callable + """ + #: return a callable that wraps the -2nd argument with the request + #: and calls the function with all the arguments up to that one and + #: the request. The return value is then called with the latest + #: two arguments. This makes it possible to use this decorator for + #: both standalone WSGI functions as well as bound methods and + #: partially applied functions. + from ..exceptions import HTTPException + + @functools.wraps(f) + def application(*args): # type: ignore + request = cls(args[-2]) + with request: + try: + resp = f(*args[:-2] + (request,)) + except HTTPException as e: + resp = e.get_response(args[-2]) + return resp(*args[-2:]) + + return t.cast("WSGIApplication", application) + + def _get_file_stream( + self, + total_content_length: t.Optional[int], + content_type: t.Optional[str], + filename: t.Optional[str] = None, + content_length: t.Optional[int] = None, + ) -> t.IO[bytes]: + """Called to get a stream for the file upload. + + This must provide a file-like class with `read()`, `readline()` + and `seek()` methods that is both writeable and readable. + + The default implementation returns a temporary file if the total + content length is higher than 500KB. Because many browsers do not + provide a content length for the files only the total content + length matters. + + :param total_content_length: the total content length of all the + data in the request combined. This value + is guaranteed to be there. + :param content_type: the mimetype of the uploaded file. + :param filename: the filename of the uploaded file. May be `None`. + :param content_length: the length of this file. This value is usually + not provided because webbrowsers do not provide + this value. + """ + return default_stream_factory( + total_content_length=total_content_length, + filename=filename, + content_type=content_type, + content_length=content_length, + ) + + @property + def want_form_data_parsed(self) -> bool: + """``True`` if the request method carries content. By default + this is true if a ``Content-Type`` is sent. + + .. versionadded:: 0.8 + """ + return bool(self.environ.get("CONTENT_TYPE")) + + def make_form_data_parser(self) -> FormDataParser: + """Creates the form data parser. Instantiates the + :attr:`form_data_parser_class` with some parameters. + + .. versionadded:: 0.8 + """ + return self.form_data_parser_class( + self._get_file_stream, + self.charset, + self.encoding_errors, + self.max_form_memory_size, + self.max_content_length, + self.parameter_storage_class, + ) + + def _load_form_data(self) -> None: + """Method used internally to retrieve submitted data. After calling + this sets `form` and `files` on the request object to multi dicts + filled with the incoming form data. As a matter of fact the input + stream will be empty afterwards. You can also call this method to + force the parsing of the form data. + + .. versionadded:: 0.8 + """ + # abort early if we have already consumed the stream + if "form" in self.__dict__: + return + + if self.want_form_data_parsed: + parser = self.make_form_data_parser() + data = parser.parse( + self._get_stream_for_parsing(), + self.mimetype, + self.content_length, + self.mimetype_params, + ) + else: + data = ( + self.stream, + self.parameter_storage_class(), + self.parameter_storage_class(), + ) + + # inject the values into the instance dict so that we bypass + # our cached_property non-data descriptor. + d = self.__dict__ + d["stream"], d["form"], d["files"] = data + + def _get_stream_for_parsing(self) -> t.IO[bytes]: + """This is the same as accessing :attr:`stream` with the difference + that if it finds cached data from calling :meth:`get_data` first it + will create a new stream out of the cached data. + + .. versionadded:: 0.9.3 + """ + cached_data = getattr(self, "_cached_data", None) + if cached_data is not None: + return BytesIO(cached_data) + return self.stream + + def close(self) -> None: + """Closes associated resources of this request object. This + closes all file handles explicitly. You can also use the request + object in a with statement which will automatically close it. + + .. versionadded:: 0.9 + """ + files = self.__dict__.get("files") + for _key, value in iter_multi_items(files or ()): + value.close() + + def __enter__(self) -> "Request": + return self + + def __exit__(self, exc_type, exc_value, tb) -> None: # type: ignore + self.close() + + @cached_property + def stream(self) -> t.IO[bytes]: + """ + If the incoming form data was not encoded with a known mimetype + the data is stored unmodified in this stream for consumption. Most + of the time it is a better idea to use :attr:`data` which will give + you that data as a string. The stream only returns the data once. + + Unlike :attr:`input_stream` this stream is properly guarded that you + can't accidentally read past the length of the input. Werkzeug will + internally always refer to this stream to read data which makes it + possible to wrap this object with a stream that does filtering. + + .. versionchanged:: 0.9 + This stream is now always available but might be consumed by the + form parser later on. Previously the stream was only set if no + parsing happened. + """ + if self.shallow: + raise RuntimeError( + "This request was created with 'shallow=True', reading" + " from the input stream is disabled." + ) + + return get_input_stream(self.environ) + + input_stream = environ_property[t.IO[bytes]]( + "wsgi.input", + doc="""The WSGI input stream. + + In general it's a bad idea to use this one because you can + easily read past the boundary. Use the :attr:`stream` + instead.""", + ) + + @cached_property + def data(self) -> bytes: + """ + Contains the incoming request data as string in case it came with + a mimetype Werkzeug does not handle. + """ + return self.get_data(parse_form_data=True) + + @typing.overload + def get_data( # type: ignore + self, + cache: bool = True, + as_text: "te.Literal[False]" = False, + parse_form_data: bool = False, + ) -> bytes: + ... + + @typing.overload + def get_data( + self, + cache: bool = True, + as_text: "te.Literal[True]" = ..., + parse_form_data: bool = False, + ) -> str: + ... + + def get_data( + self, cache: bool = True, as_text: bool = False, parse_form_data: bool = False + ) -> t.Union[bytes, str]: + """This reads the buffered incoming data from the client into one + bytes object. By default this is cached but that behavior can be + changed by setting `cache` to `False`. + + Usually it's a bad idea to call this method without checking the + content length first as a client could send dozens of megabytes or more + to cause memory problems on the server. + + Note that if the form data was already parsed this method will not + return anything as form data parsing does not cache the data like + this method does. To implicitly invoke form data parsing function + set `parse_form_data` to `True`. When this is done the return value + of this method will be an empty string if the form parser handles + the data. This generally is not necessary as if the whole data is + cached (which is the default) the form parser will used the cached + data to parse the form data. Please be generally aware of checking + the content length first in any case before calling this method + to avoid exhausting server memory. + + If `as_text` is set to `True` the return value will be a decoded + string. + + .. versionadded:: 0.9 + """ + rv = getattr(self, "_cached_data", None) + if rv is None: + if parse_form_data: + self._load_form_data() + rv = self.stream.read() + if cache: + self._cached_data = rv + if as_text: + rv = rv.decode(self.charset, self.encoding_errors) + return rv + + @cached_property + def form(self) -> "ImmutableMultiDict[str, str]": + """The form parameters. By default an + :class:`~werkzeug.datastructures.ImmutableMultiDict` + is returned from this function. This can be changed by setting + :attr:`parameter_storage_class` to a different type. This might + be necessary if the order of the form data is important. + + Please keep in mind that file uploads will not end up here, but instead + in the :attr:`files` attribute. + + .. versionchanged:: 0.9 + + Previous to Werkzeug 0.9 this would only contain form data for POST + and PUT requests. + """ + self._load_form_data() + return self.form + + @cached_property + def values(self) -> "CombinedMultiDict[str, str]": + """A :class:`werkzeug.datastructures.CombinedMultiDict` that + combines :attr:`args` and :attr:`form`. + + For GET requests, only ``args`` are present, not ``form``. + + .. versionchanged:: 2.0 + For GET requests, only ``args`` are present, not ``form``. + """ + sources = [self.args] + + if self.method != "GET": + # GET requests can have a body, and some caching proxies + # might not treat that differently than a normal GET + # request, allowing form data to "invisibly" affect the + # cache without indication in the query string / URL. + sources.append(self.form) + + args = [] + + for d in sources: + if not isinstance(d, MultiDict): + d = MultiDict(d) + + args.append(d) + + return CombinedMultiDict(args) + + @cached_property + def files(self) -> "ImmutableMultiDict[str, FileStorage]": + """:class:`~werkzeug.datastructures.MultiDict` object containing + all uploaded files. Each key in :attr:`files` is the name from the + ````. Each value in :attr:`files` is a + Werkzeug :class:`~werkzeug.datastructures.FileStorage` object. + + It basically behaves like a standard file object you know from Python, + with the difference that it also has a + :meth:`~werkzeug.datastructures.FileStorage.save` function that can + store the file on the filesystem. + + Note that :attr:`files` will only contain data if the request method was + POST, PUT or PATCH and the ``

    `` that posted to the request had + ``enctype="multipart/form-data"``. It will be empty otherwise. + + See the :class:`~werkzeug.datastructures.MultiDict` / + :class:`~werkzeug.datastructures.FileStorage` documentation for + more details about the used data structure. + """ + self._load_form_data() + return self.files + + @property + def script_root(self) -> str: + """Alias for :attr:`self.root_path`. ``environ["SCRIPT_ROOT"]`` + without a trailing slash. + """ + return self.root_path + + @cached_property + def url_root(self) -> str: + """Alias for :attr:`root_url`. The URL with scheme, host, and + root path. For example, ``https://example.com/app/``. + """ + return self.root_url + + remote_user = environ_property[str]( + "REMOTE_USER", + doc="""If the server supports user authentication, and the + script is protected, this attribute contains the username the + user has authenticated as.""", + ) + is_multithread = environ_property[bool]( + "wsgi.multithread", + doc="""boolean that is `True` if the application is served by a + multithreaded WSGI server.""", + ) + is_multiprocess = environ_property[bool]( + "wsgi.multiprocess", + doc="""boolean that is `True` if the application is served by a + WSGI server that spawns multiple processes.""", + ) + is_run_once = environ_property[bool]( + "wsgi.run_once", + doc="""boolean that is `True` if the application will be + executed only once in a process lifetime. This is the case for + CGI for example, but it's not guaranteed that the execution only + happens one time.""", + ) + + # JSON + + #: A module or other object that has ``dumps`` and ``loads`` + #: functions that match the API of the built-in :mod:`json` module. + json_module = json + + @property + def json(self) -> t.Optional[t.Any]: + """The parsed JSON data if :attr:`mimetype` indicates JSON + (:mimetype:`application/json`, see :attr:`is_json`). + + Calls :meth:`get_json` with default arguments. + + If the request content type is not ``application/json``, this + will raise a 400 Bad Request error. + + .. versionchanged:: 2.1 + Raise a 400 error if the content type is incorrect. + """ + return self.get_json() + + # Cached values for ``(silent=False, silent=True)``. Initialized + # with sentinel values. + _cached_json: t.Tuple[t.Any, t.Any] = (Ellipsis, Ellipsis) + + def get_json( + self, force: bool = False, silent: bool = False, cache: bool = True + ) -> t.Optional[t.Any]: + """Parse :attr:`data` as JSON. + + If the mimetype does not indicate JSON + (:mimetype:`application/json`, see :attr:`is_json`), or parsing + fails, :meth:`on_json_loading_failed` is called and + its return value is used as the return value. By default this + raises a 400 Bad Request error. + + :param force: Ignore the mimetype and always try to parse JSON. + :param silent: Silence mimetype and parsing errors, and + return ``None`` instead. + :param cache: Store the parsed JSON to return for subsequent + calls. + + .. versionchanged:: 2.1 + Raise a 400 error if the content type is incorrect. + """ + if cache and self._cached_json[silent] is not Ellipsis: + return self._cached_json[silent] + + if not (force or self.is_json): + if not silent: + return self.on_json_loading_failed(None) + else: + return None + + data = self.get_data(cache=cache) + + try: + rv = self.json_module.loads(data) + except ValueError as e: + if silent: + rv = None + + if cache: + normal_rv, _ = self._cached_json + self._cached_json = (normal_rv, rv) + else: + rv = self.on_json_loading_failed(e) + + if cache: + _, silent_rv = self._cached_json + self._cached_json = (rv, silent_rv) + else: + if cache: + self._cached_json = (rv, rv) + + return rv + + def on_json_loading_failed(self, e: t.Optional[ValueError]) -> t.Any: + """Called if :meth:`get_json` fails and isn't silenced. + + If this method returns a value, it is used as the return value + for :meth:`get_json`. The default implementation raises + :exc:`~werkzeug.exceptions.BadRequest`. + + :param e: If parsing failed, this is the exception. It will be + ``None`` if the content type wasn't ``application/json``. + """ + if e is not None: + raise BadRequest(f"Failed to decode JSON object: {e}") + + raise BadRequest( + "Did not attempt to load JSON data because the request" + " Content-Type was not 'application/json'." + ) diff --git a/src/werkzeug/wrappers/response.py b/src/werkzeug/wrappers/response.py new file mode 100644 index 0000000..7e888cb --- /dev/null +++ b/src/werkzeug/wrappers/response.py @@ -0,0 +1,877 @@ +import json +import typing +import typing as t +import warnings +from http import HTTPStatus + +from .._internal import _to_bytes +from ..datastructures import Headers +from ..http import remove_entity_headers +from ..sansio.response import Response as _SansIOResponse +from ..urls import iri_to_uri +from ..urls import url_join +from ..utils import cached_property +from ..wsgi import ClosingIterator +from ..wsgi import get_current_url +from werkzeug._internal import _get_environ +from werkzeug.http import generate_etag +from werkzeug.http import http_date +from werkzeug.http import is_resource_modified +from werkzeug.http import parse_etags +from werkzeug.http import parse_range_header +from werkzeug.wsgi import _RangeWrapper + +if t.TYPE_CHECKING: + import typing_extensions as te + from _typeshed.wsgi import StartResponse + from _typeshed.wsgi import WSGIApplication + from _typeshed.wsgi import WSGIEnvironment + from .request import Request + + +def _warn_if_string(iterable: t.Iterable) -> None: + """Helper for the response objects to check if the iterable returned + to the WSGI server is not a string. + """ + if isinstance(iterable, str): + warnings.warn( + "Response iterable was set to a string. This will appear to" + " work but means that the server will send the data to the" + " client one character at a time. This is almost never" + " intended behavior, use 'response.data' to assign strings" + " to the response object.", + stacklevel=2, + ) + + +def _iter_encoded( + iterable: t.Iterable[t.Union[str, bytes]], charset: str +) -> t.Iterator[bytes]: + for item in iterable: + if isinstance(item, str): + yield item.encode(charset) + else: + yield item + + +def _clean_accept_ranges(accept_ranges: t.Union[bool, str]) -> str: + if accept_ranges is True: + return "bytes" + elif accept_ranges is False: + return "none" + elif isinstance(accept_ranges, str): + return accept_ranges + raise ValueError("Invalid accept_ranges value") + + +class Response(_SansIOResponse): + """Represents an outgoing WSGI HTTP response with body, status, and + headers. Has properties and methods for using the functionality + defined by various HTTP specs. + + The response body is flexible to support different use cases. The + simple form is passing bytes, or a string which will be encoded as + UTF-8. Passing an iterable of bytes or strings makes this a + streaming response. A generator is particularly useful for building + a CSV file in memory or using SSE (Server Sent Events). A file-like + object is also iterable, although the + :func:`~werkzeug.utils.send_file` helper should be used in that + case. + + The response object is itself a WSGI application callable. When + called (:meth:`__call__`) with ``environ`` and ``start_response``, + it will pass its status and headers to ``start_response`` then + return its body as an iterable. + + .. code-block:: python + + from werkzeug.wrappers.response import Response + + def index(): + return Response("Hello, World!") + + def application(environ, start_response): + path = environ.get("PATH_INFO") or "/" + + if path == "/": + response = index() + else: + response = Response("Not Found", status=404) + + return response(environ, start_response) + + :param response: The data for the body of the response. A string or + bytes, or tuple or list of strings or bytes, for a fixed-length + response, or any other iterable of strings or bytes for a + streaming response. Defaults to an empty body. + :param status: The status code for the response. Either an int, in + which case the default status message is added, or a string in + the form ``{code} {message}``, like ``404 Not Found``. Defaults + to 200. + :param headers: A :class:`~werkzeug.datastructures.Headers` object, + or a list of ``(key, value)`` tuples that will be converted to a + ``Headers`` object. + :param mimetype: The mime type (content type without charset or + other parameters) of the response. If the value starts with + ``text/`` (or matches some other special cases), the charset + will be added to create the ``content_type``. + :param content_type: The full content type of the response. + Overrides building the value from ``mimetype``. + :param direct_passthrough: Pass the response body directly through + as the WSGI iterable. This can be used when the body is a binary + file or other iterator of bytes, to skip some unnecessary + checks. Use :func:`~werkzeug.utils.send_file` instead of setting + this manually. + + .. versionchanged:: 2.0 + Combine ``BaseResponse`` and mixins into a single ``Response`` + class. Using the old classes is deprecated and will be removed + in Werkzeug 2.1. + + .. versionchanged:: 0.5 + The ``direct_passthrough`` parameter was added. + """ + + #: if set to `False` accessing properties on the response object will + #: not try to consume the response iterator and convert it into a list. + #: + #: .. versionadded:: 0.6.2 + #: + #: That attribute was previously called `implicit_seqence_conversion`. + #: (Notice the typo). If you did use this feature, you have to adapt + #: your code to the name change. + implicit_sequence_conversion = True + + #: If a redirect ``Location`` header is a relative URL, make it an + #: absolute URL, including scheme and domain. + #: + #: .. versionchanged:: 2.1 + #: This is disabled by default, so responses will send relative + #: redirects. + #: + #: .. versionadded:: 0.8 + autocorrect_location_header = False + + #: Should this response object automatically set the content-length + #: header if possible? This is true by default. + #: + #: .. versionadded:: 0.8 + automatically_set_content_length = True + + #: The response body to send as the WSGI iterable. A list of strings + #: or bytes represents a fixed-length response, any other iterable + #: is a streaming response. Strings are encoded to bytes as UTF-8. + #: + #: Do not set to a plain string or bytes, that will cause sending + #: the response to be very inefficient as it will iterate one byte + #: at a time. + response: t.Union[t.Iterable[str], t.Iterable[bytes]] + + def __init__( + self, + response: t.Optional[ + t.Union[t.Iterable[bytes], bytes, t.Iterable[str], str] + ] = None, + status: t.Optional[t.Union[int, str, HTTPStatus]] = None, + headers: t.Optional[ + t.Union[ + t.Mapping[str, t.Union[str, int, t.Iterable[t.Union[str, int]]]], + t.Iterable[t.Tuple[str, t.Union[str, int]]], + ] + ] = None, + mimetype: t.Optional[str] = None, + content_type: t.Optional[str] = None, + direct_passthrough: bool = False, + ) -> None: + super().__init__( + status=status, + headers=headers, + mimetype=mimetype, + content_type=content_type, + ) + + #: Pass the response body directly through as the WSGI iterable. + #: This can be used when the body is a binary file or other + #: iterator of bytes, to skip some unnecessary checks. Use + #: :func:`~werkzeug.utils.send_file` instead of setting this + #: manually. + self.direct_passthrough = direct_passthrough + self._on_close: t.List[t.Callable[[], t.Any]] = [] + + # we set the response after the headers so that if a class changes + # the charset attribute, the data is set in the correct charset. + if response is None: + self.response = [] + elif isinstance(response, (str, bytes, bytearray)): + self.set_data(response) + else: + self.response = response + + def call_on_close(self, func: t.Callable[[], t.Any]) -> t.Callable[[], t.Any]: + """Adds a function to the internal list of functions that should + be called as part of closing down the response. Since 0.7 this + function also returns the function that was passed so that this + can be used as a decorator. + + .. versionadded:: 0.6 + """ + self._on_close.append(func) + return func + + def __repr__(self) -> str: + if self.is_sequence: + body_info = f"{sum(map(len, self.iter_encoded()))} bytes" + else: + body_info = "streamed" if self.is_streamed else "likely-streamed" + return f"<{type(self).__name__} {body_info} [{self.status}]>" + + @classmethod + def force_type( + cls, response: "Response", environ: t.Optional["WSGIEnvironment"] = None + ) -> "Response": + """Enforce that the WSGI response is a response object of the current + type. Werkzeug will use the :class:`Response` internally in many + situations like the exceptions. If you call :meth:`get_response` on an + exception you will get back a regular :class:`Response` object, even + if you are using a custom subclass. + + This method can enforce a given response type, and it will also + convert arbitrary WSGI callables into response objects if an environ + is provided:: + + # convert a Werkzeug response object into an instance of the + # MyResponseClass subclass. + response = MyResponseClass.force_type(response) + + # convert any WSGI application into a response object + response = MyResponseClass.force_type(response, environ) + + This is especially useful if you want to post-process responses in + the main dispatcher and use functionality provided by your subclass. + + Keep in mind that this will modify response objects in place if + possible! + + :param response: a response object or wsgi application. + :param environ: a WSGI environment object. + :return: a response object. + """ + if not isinstance(response, Response): + if environ is None: + raise TypeError( + "cannot convert WSGI application into response" + " objects without an environ" + ) + + from ..test import run_wsgi_app + + response = Response(*run_wsgi_app(response, environ)) + + response.__class__ = cls + return response + + @classmethod + def from_app( + cls, app: "WSGIApplication", environ: "WSGIEnvironment", buffered: bool = False + ) -> "Response": + """Create a new response object from an application output. This + works best if you pass it an application that returns a generator all + the time. Sometimes applications may use the `write()` callable + returned by the `start_response` function. This tries to resolve such + edge cases automatically. But if you don't get the expected output + you should set `buffered` to `True` which enforces buffering. + + :param app: the WSGI application to execute. + :param environ: the WSGI environment to execute against. + :param buffered: set to `True` to enforce buffering. + :return: a response object. + """ + from ..test import run_wsgi_app + + return cls(*run_wsgi_app(app, environ, buffered)) + + @typing.overload + def get_data(self, as_text: "te.Literal[False]" = False) -> bytes: + ... + + @typing.overload + def get_data(self, as_text: "te.Literal[True]") -> str: + ... + + def get_data(self, as_text: bool = False) -> t.Union[bytes, str]: + """The string representation of the response body. Whenever you call + this property the response iterable is encoded and flattened. This + can lead to unwanted behavior if you stream big data. + + This behavior can be disabled by setting + :attr:`implicit_sequence_conversion` to `False`. + + If `as_text` is set to `True` the return value will be a decoded + string. + + .. versionadded:: 0.9 + """ + self._ensure_sequence() + rv = b"".join(self.iter_encoded()) + + if as_text: + return rv.decode(self.charset) + + return rv + + def set_data(self, value: t.Union[bytes, str]) -> None: + """Sets a new string as response. The value must be a string or + bytes. If a string is set it's encoded to the charset of the + response (utf-8 by default). + + .. versionadded:: 0.9 + """ + # if a string is set, it's encoded directly so that we + # can set the content length + if isinstance(value, str): + value = value.encode(self.charset) + else: + value = bytes(value) + self.response = [value] + if self.automatically_set_content_length: + self.headers["Content-Length"] = str(len(value)) + + data = property( + get_data, + set_data, + doc="A descriptor that calls :meth:`get_data` and :meth:`set_data`.", + ) + + def calculate_content_length(self) -> t.Optional[int]: + """Returns the content length if available or `None` otherwise.""" + try: + self._ensure_sequence() + except RuntimeError: + return None + return sum(len(x) for x in self.iter_encoded()) + + def _ensure_sequence(self, mutable: bool = False) -> None: + """This method can be called by methods that need a sequence. If + `mutable` is true, it will also ensure that the response sequence + is a standard Python list. + + .. versionadded:: 0.6 + """ + if self.is_sequence: + # if we need a mutable object, we ensure it's a list. + if mutable and not isinstance(self.response, list): + self.response = list(self.response) # type: ignore + return + if self.direct_passthrough: + raise RuntimeError( + "Attempted implicit sequence conversion but the" + " response object is in direct passthrough mode." + ) + if not self.implicit_sequence_conversion: + raise RuntimeError( + "The response object required the iterable to be a" + " sequence, but the implicit conversion was disabled." + " Call make_sequence() yourself." + ) + self.make_sequence() + + def make_sequence(self) -> None: + """Converts the response iterator in a list. By default this happens + automatically if required. If `implicit_sequence_conversion` is + disabled, this method is not automatically called and some properties + might raise exceptions. This also encodes all the items. + + .. versionadded:: 0.6 + """ + if not self.is_sequence: + # if we consume an iterable we have to ensure that the close + # method of the iterable is called if available when we tear + # down the response + close = getattr(self.response, "close", None) + self.response = list(self.iter_encoded()) + if close is not None: + self.call_on_close(close) + + def iter_encoded(self) -> t.Iterator[bytes]: + """Iter the response encoded with the encoding of the response. + If the response object is invoked as WSGI application the return + value of this method is used as application iterator unless + :attr:`direct_passthrough` was activated. + """ + if __debug__: + _warn_if_string(self.response) + # Encode in a separate function so that self.response is fetched + # early. This allows us to wrap the response with the return + # value from get_app_iter or iter_encoded. + return _iter_encoded(self.response, self.charset) + + @property + def is_streamed(self) -> bool: + """If the response is streamed (the response is not an iterable with + a length information) this property is `True`. In this case streamed + means that there is no information about the number of iterations. + This is usually `True` if a generator is passed to the response object. + + This is useful for checking before applying some sort of post + filtering that should not take place for streamed responses. + """ + try: + len(self.response) # type: ignore + except (TypeError, AttributeError): + return True + return False + + @property + def is_sequence(self) -> bool: + """If the iterator is buffered, this property will be `True`. A + response object will consider an iterator to be buffered if the + response attribute is a list or tuple. + + .. versionadded:: 0.6 + """ + return isinstance(self.response, (tuple, list)) + + def close(self) -> None: + """Close the wrapped response if possible. You can also use the object + in a with statement which will automatically close it. + + .. versionadded:: 0.9 + Can now be used in a with statement. + """ + if hasattr(self.response, "close"): + self.response.close() # type: ignore + for func in self._on_close: + func() + + def __enter__(self) -> "Response": + return self + + def __exit__(self, exc_type, exc_value, tb): # type: ignore + self.close() + + def freeze(self) -> None: + """Make the response object ready to be pickled. Does the + following: + + * Buffer the response into a list, ignoring + :attr:`implicity_sequence_conversion` and + :attr:`direct_passthrough`. + * Set the ``Content-Length`` header. + * Generate an ``ETag`` header if one is not already set. + + .. versionchanged:: 2.1 + Removed the ``no_etag`` parameter. + + .. versionchanged:: 2.0 + An ``ETag`` header is added, the ``no_etag`` parameter is + deprecated and will be removed in Werkzeug 2.1. + + .. versionchanged:: 0.6 + The ``Content-Length`` header is set. + """ + # Always freeze the encoded response body, ignore + # implicit_sequence_conversion and direct_passthrough. + self.response = list(self.iter_encoded()) + self.headers["Content-Length"] = str(sum(map(len, self.response))) + self.add_etag() + + def get_wsgi_headers(self, environ: "WSGIEnvironment") -> Headers: + """This is automatically called right before the response is started + and returns headers modified for the given environment. It returns a + copy of the headers from the response with some modifications applied + if necessary. + + For example the location header (if present) is joined with the root + URL of the environment. Also the content length is automatically set + to zero here for certain status codes. + + .. versionchanged:: 0.6 + Previously that function was called `fix_headers` and modified + the response object in place. Also since 0.6, IRIs in location + and content-location headers are handled properly. + + Also starting with 0.6, Werkzeug will attempt to set the content + length if it is able to figure it out on its own. This is the + case if all the strings in the response iterable are already + encoded and the iterable is buffered. + + :param environ: the WSGI environment of the request. + :return: returns a new :class:`~werkzeug.datastructures.Headers` + object. + """ + headers = Headers(self.headers) + location: t.Optional[str] = None + content_location: t.Optional[str] = None + content_length: t.Optional[t.Union[str, int]] = None + status = self.status_code + + # iterate over the headers to find all values in one go. Because + # get_wsgi_headers is used each response that gives us a tiny + # speedup. + for key, value in headers: + ikey = key.lower() + if ikey == "location": + location = value + elif ikey == "content-location": + content_location = value + elif ikey == "content-length": + content_length = value + + # make sure the location header is an absolute URL + if location is not None: + old_location = location + if isinstance(location, str): + # Safe conversion is necessary here as we might redirect + # to a broken URI scheme (for instance itms-services). + location = iri_to_uri(location, safe_conversion=True) + + if self.autocorrect_location_header: + current_url = get_current_url(environ, strip_querystring=True) + if isinstance(current_url, str): + current_url = iri_to_uri(current_url) + location = url_join(current_url, location) + if location != old_location: + headers["Location"] = location + + # make sure the content location is a URL + if content_location is not None and isinstance(content_location, str): + headers["Content-Location"] = iri_to_uri(content_location) + + if 100 <= status < 200 or status == 204: + # Per section 3.3.2 of RFC 7230, "a server MUST NOT send a + # Content-Length header field in any response with a status + # code of 1xx (Informational) or 204 (No Content)." + headers.remove("Content-Length") + elif status == 304: + remove_entity_headers(headers) + + # if we can determine the content length automatically, we + # should try to do that. But only if this does not involve + # flattening the iterator or encoding of strings in the + # response. We however should not do that if we have a 304 + # response. + if ( + self.automatically_set_content_length + and self.is_sequence + and content_length is None + and status not in (204, 304) + and not (100 <= status < 200) + ): + try: + content_length = sum(len(_to_bytes(x, "ascii")) for x in self.response) + except UnicodeError: + # Something other than bytes, can't safely figure out + # the length of the response. + pass + else: + headers["Content-Length"] = str(content_length) + + return headers + + def get_app_iter(self, environ: "WSGIEnvironment") -> t.Iterable[bytes]: + """Returns the application iterator for the given environ. Depending + on the request method and the current status code the return value + might be an empty response rather than the one from the response. + + If the request method is `HEAD` or the status code is in a range + where the HTTP specification requires an empty response, an empty + iterable is returned. + + .. versionadded:: 0.6 + + :param environ: the WSGI environment of the request. + :return: a response iterable. + """ + status = self.status_code + if ( + environ["REQUEST_METHOD"] == "HEAD" + or 100 <= status < 200 + or status in (204, 304) + ): + iterable: t.Iterable[bytes] = () + elif self.direct_passthrough: + if __debug__: + _warn_if_string(self.response) + return self.response # type: ignore + else: + iterable = self.iter_encoded() + return ClosingIterator(iterable, self.close) + + def get_wsgi_response( + self, environ: "WSGIEnvironment" + ) -> t.Tuple[t.Iterable[bytes], str, t.List[t.Tuple[str, str]]]: + """Returns the final WSGI response as tuple. The first item in + the tuple is the application iterator, the second the status and + the third the list of headers. The response returned is created + specially for the given environment. For example if the request + method in the WSGI environment is ``'HEAD'`` the response will + be empty and only the headers and status code will be present. + + .. versionadded:: 0.6 + + :param environ: the WSGI environment of the request. + :return: an ``(app_iter, status, headers)`` tuple. + """ + headers = self.get_wsgi_headers(environ) + app_iter = self.get_app_iter(environ) + return app_iter, self.status, headers.to_wsgi_list() + + def __call__( + self, environ: "WSGIEnvironment", start_response: "StartResponse" + ) -> t.Iterable[bytes]: + """Process this response as WSGI application. + + :param environ: the WSGI environment. + :param start_response: the response callable provided by the WSGI + server. + :return: an application iterator + """ + app_iter, status, headers = self.get_wsgi_response(environ) + start_response(status, headers) + return app_iter + + # JSON + + #: A module or other object that has ``dumps`` and ``loads`` + #: functions that match the API of the built-in :mod:`json` module. + json_module = json + + @property + def json(self) -> t.Optional[t.Any]: + """The parsed JSON data if :attr:`mimetype` indicates JSON + (:mimetype:`application/json`, see :attr:`is_json`). + + Calls :meth:`get_json` with default arguments. + """ + return self.get_json() + + def get_json(self, force: bool = False, silent: bool = False) -> t.Optional[t.Any]: + """Parse :attr:`data` as JSON. Useful during testing. + + If the mimetype does not indicate JSON + (:mimetype:`application/json`, see :attr:`is_json`), this + returns ``None``. + + Unlike :meth:`Request.get_json`, the result is not cached. + + :param force: Ignore the mimetype and always try to parse JSON. + :param silent: Silence parsing errors and return ``None`` + instead. + """ + if not (force or self.is_json): + return None + + data = self.get_data() + + try: + return self.json_module.loads(data) + except ValueError: + if not silent: + raise + + return None + + # Stream + + @cached_property + def stream(self) -> "ResponseStream": + """The response iterable as write-only stream.""" + return ResponseStream(self) + + def _wrap_range_response(self, start: int, length: int) -> None: + """Wrap existing Response in case of Range Request context.""" + if self.status_code == 206: + self.response = _RangeWrapper(self.response, start, length) # type: ignore + + def _is_range_request_processable(self, environ: "WSGIEnvironment") -> bool: + """Return ``True`` if `Range` header is present and if underlying + resource is considered unchanged when compared with `If-Range` header. + """ + return ( + "HTTP_IF_RANGE" not in environ + or not is_resource_modified( + environ, + self.headers.get("etag"), + None, + self.headers.get("last-modified"), + ignore_if_range=False, + ) + ) and "HTTP_RANGE" in environ + + def _process_range_request( + self, + environ: "WSGIEnvironment", + complete_length: t.Optional[int] = None, + accept_ranges: t.Optional[t.Union[bool, str]] = None, + ) -> bool: + """Handle Range Request related headers (RFC7233). If `Accept-Ranges` + header is valid, and Range Request is processable, we set the headers + as described by the RFC, and wrap the underlying response in a + RangeWrapper. + + Returns ``True`` if Range Request can be fulfilled, ``False`` otherwise. + + :raises: :class:`~werkzeug.exceptions.RequestedRangeNotSatisfiable` + if `Range` header could not be parsed or satisfied. + + .. versionchanged:: 2.0 + Returns ``False`` if the length is 0. + """ + from ..exceptions import RequestedRangeNotSatisfiable + + if ( + accept_ranges is None + or complete_length is None + or complete_length == 0 + or not self._is_range_request_processable(environ) + ): + return False + + parsed_range = parse_range_header(environ.get("HTTP_RANGE")) + + if parsed_range is None: + raise RequestedRangeNotSatisfiable(complete_length) + + range_tuple = parsed_range.range_for_length(complete_length) + content_range_header = parsed_range.to_content_range_header(complete_length) + + if range_tuple is None or content_range_header is None: + raise RequestedRangeNotSatisfiable(complete_length) + + content_length = range_tuple[1] - range_tuple[0] + self.headers["Content-Length"] = content_length + self.headers["Accept-Ranges"] = accept_ranges + self.content_range = content_range_header # type: ignore + self.status_code = 206 + self._wrap_range_response(range_tuple[0], content_length) + return True + + def make_conditional( + self, + request_or_environ: t.Union["WSGIEnvironment", "Request"], + accept_ranges: t.Union[bool, str] = False, + complete_length: t.Optional[int] = None, + ) -> "Response": + """Make the response conditional to the request. This method works + best if an etag was defined for the response already. The `add_etag` + method can be used to do that. If called without etag just the date + header is set. + + This does nothing if the request method in the request or environ is + anything but GET or HEAD. + + For optimal performance when handling range requests, it's recommended + that your response data object implements `seekable`, `seek` and `tell` + methods as described by :py:class:`io.IOBase`. Objects returned by + :meth:`~werkzeug.wsgi.wrap_file` automatically implement those methods. + + It does not remove the body of the response because that's something + the :meth:`__call__` function does for us automatically. + + Returns self so that you can do ``return resp.make_conditional(req)`` + but modifies the object in-place. + + :param request_or_environ: a request object or WSGI environment to be + used to make the response conditional + against. + :param accept_ranges: This parameter dictates the value of + `Accept-Ranges` header. If ``False`` (default), + the header is not set. If ``True``, it will be set + to ``"bytes"``. If ``None``, it will be set to + ``"none"``. If it's a string, it will use this + value. + :param complete_length: Will be used only in valid Range Requests. + It will set `Content-Range` complete length + value and compute `Content-Length` real value. + This parameter is mandatory for successful + Range Requests completion. + :raises: :class:`~werkzeug.exceptions.RequestedRangeNotSatisfiable` + if `Range` header could not be parsed or satisfied. + + .. versionchanged:: 2.0 + Range processing is skipped if length is 0 instead of + raising a 416 Range Not Satisfiable error. + """ + environ = _get_environ(request_or_environ) + if environ["REQUEST_METHOD"] in ("GET", "HEAD"): + # if the date is not in the headers, add it now. We however + # will not override an already existing header. Unfortunately + # this header will be overridden by many WSGI servers including + # wsgiref. + if "date" not in self.headers: + self.headers["Date"] = http_date() + accept_ranges = _clean_accept_ranges(accept_ranges) + is206 = self._process_range_request(environ, complete_length, accept_ranges) + if not is206 and not is_resource_modified( + environ, + self.headers.get("etag"), + None, + self.headers.get("last-modified"), + ): + if parse_etags(environ.get("HTTP_IF_MATCH")): + self.status_code = 412 + else: + self.status_code = 304 + if ( + self.automatically_set_content_length + and "content-length" not in self.headers + ): + length = self.calculate_content_length() + if length is not None: + self.headers["Content-Length"] = length + return self + + def add_etag(self, overwrite: bool = False, weak: bool = False) -> None: + """Add an etag for the current response if there is none yet. + + .. versionchanged:: 2.0 + SHA-1 is used to generate the value. MD5 may not be + available in some environments. + """ + if overwrite or "etag" not in self.headers: + self.set_etag(generate_etag(self.get_data()), weak) + + +class ResponseStream: + """A file descriptor like object used by :meth:`Response.stream` to + represent the body of the stream. It directly pushes into the + response iterable of the response object. + """ + + mode = "wb+" + + def __init__(self, response: Response): + self.response = response + self.closed = False + + def write(self, value: bytes) -> int: + if self.closed: + raise ValueError("I/O operation on closed file") + self.response._ensure_sequence(mutable=True) + self.response.response.append(value) # type: ignore + self.response.headers.pop("Content-Length", None) + return len(value) + + def writelines(self, seq: t.Iterable[bytes]) -> None: + for item in seq: + self.write(item) + + def close(self) -> None: + self.closed = True + + def flush(self) -> None: + if self.closed: + raise ValueError("I/O operation on closed file") + + def isatty(self) -> bool: + if self.closed: + raise ValueError("I/O operation on closed file") + return False + + def tell(self) -> int: + self.response._ensure_sequence() + return sum(map(len, self.response.response)) + + @property + def encoding(self) -> str: + return self.response.charset diff --git a/src/werkzeug/wsgi.py b/src/werkzeug/wsgi.py new file mode 100644 index 0000000..24ece0b --- /dev/null +++ b/src/werkzeug/wsgi.py @@ -0,0 +1,1020 @@ +import io +import re +import typing as t +import warnings +from functools import partial +from functools import update_wrapper +from itertools import chain + +from ._internal import _make_encode_wrapper +from ._internal import _to_bytes +from ._internal import _to_str +from .sansio import utils as _sansio_utils +from .sansio.utils import host_is_trusted # noqa: F401 # Imported as part of API +from .urls import _URLTuple +from .urls import uri_to_iri +from .urls import url_join +from .urls import url_parse +from .urls import url_quote + +if t.TYPE_CHECKING: + from _typeshed.wsgi import WSGIApplication + from _typeshed.wsgi import WSGIEnvironment + + +def responder(f: t.Callable[..., "WSGIApplication"]) -> "WSGIApplication": + """Marks a function as responder. Decorate a function with it and it + will automatically call the return value as WSGI application. + + Example:: + + @responder + def application(environ, start_response): + return Response('Hello World!') + """ + return update_wrapper(lambda *a: f(*a)(*a[-2:]), f) + + +def get_current_url( + environ: "WSGIEnvironment", + root_only: bool = False, + strip_querystring: bool = False, + host_only: bool = False, + trusted_hosts: t.Optional[t.Iterable[str]] = None, +) -> str: + """Recreate the URL for a request from the parts in a WSGI + environment. + + The URL is an IRI, not a URI, so it may contain Unicode characters. + Use :func:`~werkzeug.urls.iri_to_uri` to convert it to ASCII. + + :param environ: The WSGI environment to get the URL parts from. + :param root_only: Only build the root path, don't include the + remaining path or query string. + :param strip_querystring: Don't include the query string. + :param host_only: Only build the scheme and host. + :param trusted_hosts: A list of trusted host names to validate the + host against. + """ + parts = { + "scheme": environ["wsgi.url_scheme"], + "host": get_host(environ, trusted_hosts), + } + + if not host_only: + parts["root_path"] = environ.get("SCRIPT_NAME", "") + + if not root_only: + parts["path"] = environ.get("PATH_INFO", "") + + if not strip_querystring: + parts["query_string"] = environ.get("QUERY_STRING", "").encode("latin1") + + return _sansio_utils.get_current_url(**parts) + + +def _get_server( + environ: "WSGIEnvironment", +) -> t.Optional[t.Tuple[str, t.Optional[int]]]: + name = environ.get("SERVER_NAME") + + if name is None: + return None + + try: + port: t.Optional[int] = int(environ.get("SERVER_PORT", None)) + except (TypeError, ValueError): + # unix socket + port = None + + return name, port + + +def get_host( + environ: "WSGIEnvironment", trusted_hosts: t.Optional[t.Iterable[str]] = None +) -> str: + """Return the host for the given WSGI environment. + + The ``Host`` header is preferred, then ``SERVER_NAME`` if it's not + set. The returned host will only contain the port if it is different + than the standard port for the protocol. + + Optionally, verify that the host is trusted using + :func:`host_is_trusted` and raise a + :exc:`~werkzeug.exceptions.SecurityError` if it is not. + + :param environ: A WSGI environment dict. + :param trusted_hosts: A list of trusted host names. + + :return: Host, with port if necessary. + :raise ~werkzeug.exceptions.SecurityError: If the host is not + trusted. + """ + return _sansio_utils.get_host( + environ["wsgi.url_scheme"], + environ.get("HTTP_HOST"), + _get_server(environ), + trusted_hosts, + ) + + +def get_content_length(environ: "WSGIEnvironment") -> t.Optional[int]: + """Returns the content length from the WSGI environment as + integer. If it's not available or chunked transfer encoding is used, + ``None`` is returned. + + .. versionadded:: 0.9 + + :param environ: the WSGI environ to fetch the content length from. + """ + return _sansio_utils.get_content_length( + http_content_length=environ.get("CONTENT_LENGTH"), + http_transfer_encoding=environ.get("HTTP_TRANSFER_ENCODING", ""), + ) + + +def get_input_stream( + environ: "WSGIEnvironment", safe_fallback: bool = True +) -> t.IO[bytes]: + """Returns the input stream from the WSGI environment and wraps it + in the most sensible way possible. The stream returned is not the + raw WSGI stream in most cases but one that is safe to read from + without taking into account the content length. + + If content length is not set, the stream will be empty for safety reasons. + If the WSGI server supports chunked or infinite streams, it should set + the ``wsgi.input_terminated`` value in the WSGI environ to indicate that. + + .. versionadded:: 0.9 + + :param environ: the WSGI environ to fetch the stream from. + :param safe_fallback: use an empty stream as a safe fallback when the + content length is not set. Disabling this allows infinite streams, + which can be a denial-of-service risk. + """ + stream = t.cast(t.IO[bytes], environ["wsgi.input"]) + content_length = get_content_length(environ) + + # A wsgi extension that tells us if the input is terminated. In + # that case we return the stream unchanged as we know we can safely + # read it until the end. + if environ.get("wsgi.input_terminated"): + return stream + + # If the request doesn't specify a content length, returning the stream is + # potentially dangerous because it could be infinite, malicious or not. If + # safe_fallback is true, return an empty stream instead for safety. + if content_length is None: + return io.BytesIO() if safe_fallback else stream + + # Otherwise limit the stream to the content length + return t.cast(t.IO[bytes], LimitedStream(stream, content_length)) + + +def get_query_string(environ: "WSGIEnvironment") -> str: + """Returns the ``QUERY_STRING`` from the WSGI environment. This also + takes care of the WSGI decoding dance. The string returned will be + restricted to ASCII characters. + + :param environ: WSGI environment to get the query string from. + + .. deprecated:: 2.2 + Will be removed in Werkzeug 2.3. + + .. versionadded:: 0.9 + """ + warnings.warn( + "'get_query_string' is deprecated and will be removed in Werkzeug 2.3.", + DeprecationWarning, + stacklevel=2, + ) + qs = environ.get("QUERY_STRING", "").encode("latin1") + # QUERY_STRING really should be ascii safe but some browsers + # will send us some unicode stuff (I am looking at you IE). + # In that case we want to urllib quote it badly. + return url_quote(qs, safe=":&%=+$!*'(),") + + +def get_path_info( + environ: "WSGIEnvironment", charset: str = "utf-8", errors: str = "replace" +) -> str: + """Return the ``PATH_INFO`` from the WSGI environment and decode it + unless ``charset`` is ``None``. + + :param environ: WSGI environment to get the path from. + :param charset: The charset for the path info, or ``None`` if no + decoding should be performed. + :param errors: The decoding error handling. + + .. versionadded:: 0.9 + """ + path = environ.get("PATH_INFO", "").encode("latin1") + return _to_str(path, charset, errors, allow_none_charset=True) # type: ignore + + +def get_script_name( + environ: "WSGIEnvironment", charset: str = "utf-8", errors: str = "replace" +) -> str: + """Return the ``SCRIPT_NAME`` from the WSGI environment and decode + it unless `charset` is set to ``None``. + + :param environ: WSGI environment to get the path from. + :param charset: The charset for the path, or ``None`` if no decoding + should be performed. + :param errors: The decoding error handling. + + .. deprecated:: 2.2 + Will be removed in Werkzeug 2.3. + + .. versionadded:: 0.9 + """ + warnings.warn( + "'get_script_name' is deprecated and will be removed in Werkzeug 2.3.", + DeprecationWarning, + stacklevel=2, + ) + path = environ.get("SCRIPT_NAME", "").encode("latin1") + return _to_str(path, charset, errors, allow_none_charset=True) # type: ignore + + +def pop_path_info( + environ: "WSGIEnvironment", charset: str = "utf-8", errors: str = "replace" +) -> t.Optional[str]: + """Removes and returns the next segment of `PATH_INFO`, pushing it onto + `SCRIPT_NAME`. Returns `None` if there is nothing left on `PATH_INFO`. + + If the `charset` is set to `None` bytes are returned. + + If there are empty segments (``'/foo//bar``) these are ignored but + properly pushed to the `SCRIPT_NAME`: + + >>> env = {'SCRIPT_NAME': '/foo', 'PATH_INFO': '/a/b'} + >>> pop_path_info(env) + 'a' + >>> env['SCRIPT_NAME'] + '/foo/a' + >>> pop_path_info(env) + 'b' + >>> env['SCRIPT_NAME'] + '/foo/a/b' + + .. deprecated:: 2.2 + Will be removed in Werkzeug 2.3. + + .. versionadded:: 0.5 + + .. versionchanged:: 0.9 + The path is now decoded and a charset and encoding + parameter can be provided. + + :param environ: the WSGI environment that is modified. + :param charset: The ``encoding`` parameter passed to + :func:`bytes.decode`. + :param errors: The ``errors`` paramater passed to + :func:`bytes.decode`. + """ + warnings.warn( + "'pop_path_info' is deprecated and will be removed in Werkzeug 2.3.", + DeprecationWarning, + stacklevel=2, + ) + + path = environ.get("PATH_INFO") + if not path: + return None + + script_name = environ.get("SCRIPT_NAME", "") + + # shift multiple leading slashes over + old_path = path + path = path.lstrip("/") + if path != old_path: + script_name += "/" * (len(old_path) - len(path)) + + if "/" not in path: + environ["PATH_INFO"] = "" + environ["SCRIPT_NAME"] = script_name + path + rv = path.encode("latin1") + else: + segment, path = path.split("/", 1) + environ["PATH_INFO"] = f"/{path}" + environ["SCRIPT_NAME"] = script_name + segment + rv = segment.encode("latin1") + + return _to_str(rv, charset, errors, allow_none_charset=True) # type: ignore + + +def peek_path_info( + environ: "WSGIEnvironment", charset: str = "utf-8", errors: str = "replace" +) -> t.Optional[str]: + """Returns the next segment on the `PATH_INFO` or `None` if there + is none. Works like :func:`pop_path_info` without modifying the + environment: + + >>> env = {'SCRIPT_NAME': '/foo', 'PATH_INFO': '/a/b'} + >>> peek_path_info(env) + 'a' + >>> peek_path_info(env) + 'a' + + If the `charset` is set to `None` bytes are returned. + + .. deprecated:: 2.2 + Will be removed in Werkzeug 2.3. + + .. versionadded:: 0.5 + + .. versionchanged:: 0.9 + The path is now decoded and a charset and encoding + parameter can be provided. + + :param environ: the WSGI environment that is checked. + """ + warnings.warn( + "'peek_path_info' is deprecated and will be removed in Werkzeug 2.3.", + DeprecationWarning, + stacklevel=2, + ) + + segments = environ.get("PATH_INFO", "").lstrip("/").split("/", 1) + if segments: + return _to_str( # type: ignore + segments[0].encode("latin1"), charset, errors, allow_none_charset=True + ) + return None + + +def extract_path_info( + environ_or_baseurl: t.Union[str, "WSGIEnvironment"], + path_or_url: t.Union[str, _URLTuple], + charset: str = "utf-8", + errors: str = "werkzeug.url_quote", + collapse_http_schemes: bool = True, +) -> t.Optional[str]: + """Extracts the path info from the given URL (or WSGI environment) and + path. The path info returned is a string. The URLs might also be IRIs. + + If the path info could not be determined, `None` is returned. + + Some examples: + + >>> extract_path_info('http://example.com/app', '/app/hello') + '/hello' + >>> extract_path_info('http://example.com/app', + ... 'https://example.com/app/hello') + '/hello' + >>> extract_path_info('http://example.com/app', + ... 'https://example.com/app/hello', + ... collapse_http_schemes=False) is None + True + + Instead of providing a base URL you can also pass a WSGI environment. + + :param environ_or_baseurl: a WSGI environment dict, a base URL or + base IRI. This is the root of the + application. + :param path_or_url: an absolute path from the server root, a + relative path (in which case it's the path info) + or a full URL. + :param charset: the charset for byte data in URLs + :param errors: the error handling on decode + :param collapse_http_schemes: if set to `False` the algorithm does + not assume that http and https on the + same server point to the same + resource. + + .. deprecated:: 2.2 + Will be removed in Werkzeug 2.3. + + .. versionchanged:: 0.15 + The ``errors`` parameter defaults to leaving invalid bytes + quoted instead of replacing them. + + .. versionadded:: 0.6 + + """ + warnings.warn( + "'extract_path_info' is deprecated and will be removed in Werkzeug 2.3.", + DeprecationWarning, + stacklevel=2, + ) + + def _normalize_netloc(scheme: str, netloc: str) -> str: + parts = netloc.split("@", 1)[-1].split(":", 1) + port: t.Optional[str] + + if len(parts) == 2: + netloc, port = parts + if (scheme == "http" and port == "80") or ( + scheme == "https" and port == "443" + ): + port = None + else: + netloc = parts[0] + port = None + + if port is not None: + netloc += f":{port}" + + return netloc + + # make sure whatever we are working on is a IRI and parse it + path = uri_to_iri(path_or_url, charset, errors) + if isinstance(environ_or_baseurl, dict): + environ_or_baseurl = get_current_url(environ_or_baseurl, root_only=True) + base_iri = uri_to_iri(environ_or_baseurl, charset, errors) + base_scheme, base_netloc, base_path = url_parse(base_iri)[:3] + cur_scheme, cur_netloc, cur_path = url_parse(url_join(base_iri, path))[:3] + + # normalize the network location + base_netloc = _normalize_netloc(base_scheme, base_netloc) + cur_netloc = _normalize_netloc(cur_scheme, cur_netloc) + + # is that IRI even on a known HTTP scheme? + if collapse_http_schemes: + for scheme in base_scheme, cur_scheme: + if scheme not in ("http", "https"): + return None + else: + if not (base_scheme in ("http", "https") and base_scheme == cur_scheme): + return None + + # are the netlocs compatible? + if base_netloc != cur_netloc: + return None + + # are we below the application path? + base_path = base_path.rstrip("/") + if not cur_path.startswith(base_path): + return None + + return f"/{cur_path[len(base_path) :].lstrip('/')}" + + +class ClosingIterator: + """The WSGI specification requires that all middlewares and gateways + respect the `close` callback of the iterable returned by the application. + Because it is useful to add another close action to a returned iterable + and adding a custom iterable is a boring task this class can be used for + that:: + + return ClosingIterator(app(environ, start_response), [cleanup_session, + cleanup_locals]) + + If there is just one close function it can be passed instead of the list. + + A closing iterator is not needed if the application uses response objects + and finishes the processing if the response is started:: + + try: + return response(environ, start_response) + finally: + cleanup_session() + cleanup_locals() + """ + + def __init__( + self, + iterable: t.Iterable[bytes], + callbacks: t.Optional[ + t.Union[t.Callable[[], None], t.Iterable[t.Callable[[], None]]] + ] = None, + ) -> None: + iterator = iter(iterable) + self._next = t.cast(t.Callable[[], bytes], partial(next, iterator)) + if callbacks is None: + callbacks = [] + elif callable(callbacks): + callbacks = [callbacks] + else: + callbacks = list(callbacks) + iterable_close = getattr(iterable, "close", None) + if iterable_close: + callbacks.insert(0, iterable_close) + self._callbacks = callbacks + + def __iter__(self) -> "ClosingIterator": + return self + + def __next__(self) -> bytes: + return self._next() + + def close(self) -> None: + for callback in self._callbacks: + callback() + + +def wrap_file( + environ: "WSGIEnvironment", file: t.IO[bytes], buffer_size: int = 8192 +) -> t.Iterable[bytes]: + """Wraps a file. This uses the WSGI server's file wrapper if available + or otherwise the generic :class:`FileWrapper`. + + .. versionadded:: 0.5 + + If the file wrapper from the WSGI server is used it's important to not + iterate over it from inside the application but to pass it through + unchanged. If you want to pass out a file wrapper inside a response + object you have to set :attr:`Response.direct_passthrough` to `True`. + + More information about file wrappers are available in :pep:`333`. + + :param file: a :class:`file`-like object with a :meth:`~file.read` method. + :param buffer_size: number of bytes for one iteration. + """ + return environ.get("wsgi.file_wrapper", FileWrapper)( # type: ignore + file, buffer_size + ) + + +class FileWrapper: + """This class can be used to convert a :class:`file`-like object into + an iterable. It yields `buffer_size` blocks until the file is fully + read. + + You should not use this class directly but rather use the + :func:`wrap_file` function that uses the WSGI server's file wrapper + support if it's available. + + .. versionadded:: 0.5 + + If you're using this object together with a :class:`Response` you have + to use the `direct_passthrough` mode. + + :param file: a :class:`file`-like object with a :meth:`~file.read` method. + :param buffer_size: number of bytes for one iteration. + """ + + def __init__(self, file: t.IO[bytes], buffer_size: int = 8192) -> None: + self.file = file + self.buffer_size = buffer_size + + def close(self) -> None: + if hasattr(self.file, "close"): + self.file.close() + + def seekable(self) -> bool: + if hasattr(self.file, "seekable"): + return self.file.seekable() + if hasattr(self.file, "seek"): + return True + return False + + def seek(self, *args: t.Any) -> None: + if hasattr(self.file, "seek"): + self.file.seek(*args) + + def tell(self) -> t.Optional[int]: + if hasattr(self.file, "tell"): + return self.file.tell() + return None + + def __iter__(self) -> "FileWrapper": + return self + + def __next__(self) -> bytes: + data = self.file.read(self.buffer_size) + if data: + return data + raise StopIteration() + + +class _RangeWrapper: + # private for now, but should we make it public in the future ? + + """This class can be used to convert an iterable object into + an iterable that will only yield a piece of the underlying content. + It yields blocks until the underlying stream range is fully read. + The yielded blocks will have a size that can't exceed the original + iterator defined block size, but that can be smaller. + + If you're using this object together with a :class:`Response` you have + to use the `direct_passthrough` mode. + + :param iterable: an iterable object with a :meth:`__next__` method. + :param start_byte: byte from which read will start. + :param byte_range: how many bytes to read. + """ + + def __init__( + self, + iterable: t.Union[t.Iterable[bytes], t.IO[bytes]], + start_byte: int = 0, + byte_range: t.Optional[int] = None, + ): + self.iterable = iter(iterable) + self.byte_range = byte_range + self.start_byte = start_byte + self.end_byte = None + + if byte_range is not None: + self.end_byte = start_byte + byte_range + + self.read_length = 0 + self.seekable = ( + hasattr(iterable, "seekable") and iterable.seekable() # type: ignore + ) + self.end_reached = False + + def __iter__(self) -> "_RangeWrapper": + return self + + def _next_chunk(self) -> bytes: + try: + chunk = next(self.iterable) + self.read_length += len(chunk) + return chunk + except StopIteration: + self.end_reached = True + raise + + def _first_iteration(self) -> t.Tuple[t.Optional[bytes], int]: + chunk = None + if self.seekable: + self.iterable.seek(self.start_byte) # type: ignore + self.read_length = self.iterable.tell() # type: ignore + contextual_read_length = self.read_length + else: + while self.read_length <= self.start_byte: + chunk = self._next_chunk() + if chunk is not None: + chunk = chunk[self.start_byte - self.read_length :] + contextual_read_length = self.start_byte + return chunk, contextual_read_length + + def _next(self) -> bytes: + if self.end_reached: + raise StopIteration() + chunk = None + contextual_read_length = self.read_length + if self.read_length == 0: + chunk, contextual_read_length = self._first_iteration() + if chunk is None: + chunk = self._next_chunk() + if self.end_byte is not None and self.read_length >= self.end_byte: + self.end_reached = True + return chunk[: self.end_byte - contextual_read_length] + return chunk + + def __next__(self) -> bytes: + chunk = self._next() + if chunk: + return chunk + self.end_reached = True + raise StopIteration() + + def close(self) -> None: + if hasattr(self.iterable, "close"): + self.iterable.close() # type: ignore + + +def _make_chunk_iter( + stream: t.Union[t.Iterable[bytes], t.IO[bytes]], + limit: t.Optional[int], + buffer_size: int, +) -> t.Iterator[bytes]: + """Helper for the line and chunk iter functions.""" + if isinstance(stream, (bytes, bytearray, str)): + raise TypeError( + "Passed a string or byte object instead of true iterator or stream." + ) + if not hasattr(stream, "read"): + for item in stream: + if item: + yield item + return + stream = t.cast(t.IO[bytes], stream) + if not isinstance(stream, LimitedStream) and limit is not None: + stream = t.cast(t.IO[bytes], LimitedStream(stream, limit)) + _read = stream.read + while True: + item = _read(buffer_size) + if not item: + break + yield item + + +def make_line_iter( + stream: t.Union[t.Iterable[bytes], t.IO[bytes]], + limit: t.Optional[int] = None, + buffer_size: int = 10 * 1024, + cap_at_buffer: bool = False, +) -> t.Iterator[bytes]: + """Safely iterates line-based over an input stream. If the input stream + is not a :class:`LimitedStream` the `limit` parameter is mandatory. + + This uses the stream's :meth:`~file.read` method internally as opposite + to the :meth:`~file.readline` method that is unsafe and can only be used + in violation of the WSGI specification. The same problem applies to the + `__iter__` function of the input stream which calls :meth:`~file.readline` + without arguments. + + If you need line-by-line processing it's strongly recommended to iterate + over the input stream using this helper function. + + .. versionchanged:: 0.8 + This function now ensures that the limit was reached. + + .. versionadded:: 0.9 + added support for iterators as input stream. + + .. versionadded:: 0.11.10 + added support for the `cap_at_buffer` parameter. + + :param stream: the stream or iterate to iterate over. + :param limit: the limit in bytes for the stream. (Usually + content length. Not necessary if the `stream` + is a :class:`LimitedStream`. + :param buffer_size: The optional buffer size. + :param cap_at_buffer: if this is set chunks are split if they are longer + than the buffer size. Internally this is implemented + that the buffer size might be exhausted by a factor + of two however. + """ + _iter = _make_chunk_iter(stream, limit, buffer_size) + + first_item = next(_iter, "") + if not first_item: + return + + s = _make_encode_wrapper(first_item) + empty = t.cast(bytes, s("")) + cr = t.cast(bytes, s("\r")) + lf = t.cast(bytes, s("\n")) + crlf = t.cast(bytes, s("\r\n")) + + _iter = t.cast(t.Iterator[bytes], chain((first_item,), _iter)) + + def _iter_basic_lines() -> t.Iterator[bytes]: + _join = empty.join + buffer: t.List[bytes] = [] + while True: + new_data = next(_iter, "") + if not new_data: + break + new_buf: t.List[bytes] = [] + buf_size = 0 + for item in t.cast( + t.Iterator[bytes], chain(buffer, new_data.splitlines(True)) + ): + new_buf.append(item) + buf_size += len(item) + if item and item[-1:] in crlf: + yield _join(new_buf) + new_buf = [] + elif cap_at_buffer and buf_size >= buffer_size: + rv = _join(new_buf) + while len(rv) >= buffer_size: + yield rv[:buffer_size] + rv = rv[buffer_size:] + new_buf = [rv] + buffer = new_buf + if buffer: + yield _join(buffer) + + # This hackery is necessary to merge 'foo\r' and '\n' into one item + # of 'foo\r\n' if we were unlucky and we hit a chunk boundary. + previous = empty + for item in _iter_basic_lines(): + if item == lf and previous[-1:] == cr: + previous += item + item = empty + if previous: + yield previous + previous = item + if previous: + yield previous + + +def make_chunk_iter( + stream: t.Union[t.Iterable[bytes], t.IO[bytes]], + separator: bytes, + limit: t.Optional[int] = None, + buffer_size: int = 10 * 1024, + cap_at_buffer: bool = False, +) -> t.Iterator[bytes]: + """Works like :func:`make_line_iter` but accepts a separator + which divides chunks. If you want newline based processing + you should use :func:`make_line_iter` instead as it + supports arbitrary newline markers. + + .. versionadded:: 0.8 + + .. versionadded:: 0.9 + added support for iterators as input stream. + + .. versionadded:: 0.11.10 + added support for the `cap_at_buffer` parameter. + + :param stream: the stream or iterate to iterate over. + :param separator: the separator that divides chunks. + :param limit: the limit in bytes for the stream. (Usually + content length. Not necessary if the `stream` + is otherwise already limited). + :param buffer_size: The optional buffer size. + :param cap_at_buffer: if this is set chunks are split if they are longer + than the buffer size. Internally this is implemented + that the buffer size might be exhausted by a factor + of two however. + """ + _iter = _make_chunk_iter(stream, limit, buffer_size) + + first_item = next(_iter, b"") + if not first_item: + return + + _iter = t.cast(t.Iterator[bytes], chain((first_item,), _iter)) + if isinstance(first_item, str): + separator = _to_str(separator) + _split = re.compile(f"({re.escape(separator)})").split + _join = "".join + else: + separator = _to_bytes(separator) + _split = re.compile(b"(" + re.escape(separator) + b")").split + _join = b"".join + + buffer: t.List[bytes] = [] + while True: + new_data = next(_iter, b"") + if not new_data: + break + chunks = _split(new_data) + new_buf: t.List[bytes] = [] + buf_size = 0 + for item in chain(buffer, chunks): + if item == separator: + yield _join(new_buf) + new_buf = [] + buf_size = 0 + else: + buf_size += len(item) + new_buf.append(item) + + if cap_at_buffer and buf_size >= buffer_size: + rv = _join(new_buf) + while len(rv) >= buffer_size: + yield rv[:buffer_size] + rv = rv[buffer_size:] + new_buf = [rv] + buf_size = len(rv) + + buffer = new_buf + if buffer: + yield _join(buffer) + + +class LimitedStream(io.IOBase): + """Wraps a stream so that it doesn't read more than n bytes. If the + stream is exhausted and the caller tries to get more bytes from it + :func:`on_exhausted` is called which by default returns an empty + string. The return value of that function is forwarded + to the reader function. So if it returns an empty string + :meth:`read` will return an empty string as well. + + The limit however must never be higher than what the stream can + output. Otherwise :meth:`readlines` will try to read past the + limit. + + .. admonition:: Note on WSGI compliance + + calls to :meth:`readline` and :meth:`readlines` are not + WSGI compliant because it passes a size argument to the + readline methods. Unfortunately the WSGI PEP is not safely + implementable without a size argument to :meth:`readline` + because there is no EOF marker in the stream. As a result + of that the use of :meth:`readline` is discouraged. + + For the same reason iterating over the :class:`LimitedStream` + is not portable. It internally calls :meth:`readline`. + + We strongly suggest using :meth:`read` only or using the + :func:`make_line_iter` which safely iterates line-based + over a WSGI input stream. + + :param stream: the stream to wrap. + :param limit: the limit for the stream, must not be longer than + what the string can provide if the stream does not + end with `EOF` (like `wsgi.input`) + """ + + def __init__(self, stream: t.IO[bytes], limit: int) -> None: + self._read = stream.read + self._readline = stream.readline + self._pos = 0 + self.limit = limit + + def __iter__(self) -> "LimitedStream": + return self + + @property + def is_exhausted(self) -> bool: + """If the stream is exhausted this attribute is `True`.""" + return self._pos >= self.limit + + def on_exhausted(self) -> bytes: + """This is called when the stream tries to read past the limit. + The return value of this function is returned from the reading + function. + """ + # Read null bytes from the stream so that we get the + # correct end of stream marker. + return self._read(0) + + def on_disconnect(self) -> bytes: + """What should happen if a disconnect is detected? The return + value of this function is returned from read functions in case + the client went away. By default a + :exc:`~werkzeug.exceptions.ClientDisconnected` exception is raised. + """ + from .exceptions import ClientDisconnected + + raise ClientDisconnected() + + def exhaust(self, chunk_size: int = 1024 * 64) -> None: + """Exhaust the stream. This consumes all the data left until the + limit is reached. + + :param chunk_size: the size for a chunk. It will read the chunk + until the stream is exhausted and throw away + the results. + """ + to_read = self.limit - self._pos + chunk = chunk_size + while to_read > 0: + chunk = min(to_read, chunk) + self.read(chunk) + to_read -= chunk + + def read(self, size: t.Optional[int] = None) -> bytes: + """Read `size` bytes or if size is not provided everything is read. + + :param size: the number of bytes read. + """ + if self._pos >= self.limit: + return self.on_exhausted() + if size is None or size == -1: # -1 is for consistence with file + size = self.limit + to_read = min(self.limit - self._pos, size) + try: + read = self._read(to_read) + except (OSError, ValueError): + return self.on_disconnect() + if to_read and len(read) != to_read: + return self.on_disconnect() + self._pos += len(read) + return read + + def readline(self, size: t.Optional[int] = None) -> bytes: + """Reads one line from the stream.""" + if self._pos >= self.limit: + return self.on_exhausted() + if size is None: + size = self.limit - self._pos + else: + size = min(size, self.limit - self._pos) + try: + line = self._readline(size) + except (ValueError, OSError): + return self.on_disconnect() + if size and not line: + return self.on_disconnect() + self._pos += len(line) + return line + + def readlines(self, size: t.Optional[int] = None) -> t.List[bytes]: + """Reads a file into a list of strings. It calls :meth:`readline` + until the file is read to the end. It does support the optional + `size` argument if the underlying stream supports it for + `readline`. + """ + last_pos = self._pos + result = [] + if size is not None: + end = min(self.limit, last_pos + size) + else: + end = self.limit + while True: + if size is not None: + size -= last_pos - self._pos + if self._pos >= end: + break + result.append(self.readline(size)) + if size is not None: + last_pos = self._pos + return result + + def tell(self) -> int: + """Returns the position of the stream. + + .. versionadded:: 0.9 + """ + return self._pos + + def __next__(self) -> bytes: + line = self.readline() + if not line: + raise StopIteration() + return line + + def readable(self) -> bool: + return True diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..7ce0896 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,131 @@ +import http.client +import json +import os +import socket +import ssl +import sys +from pathlib import Path + +import ephemeral_port_reserve +import pytest +from xprocess import ProcessStarter + +from werkzeug.utils import cached_property + +run_path = str(Path(__file__).parent / "live_apps" / "run.py") + + +class UnixSocketHTTPConnection(http.client.HTTPConnection): + def connect(self): + self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + self.sock.connect(self.host) + + +class DevServerClient: + def __init__(self, kwargs): + host = kwargs.get("hostname", "127.0.0.1") + + if not host.startswith("unix"): + port = kwargs.get("port") + + if port is None: + kwargs["port"] = port = ephemeral_port_reserve.reserve(host) + + scheme = "https" if "ssl_context" in kwargs else "http" + self.addr = f"{host}:{port}" + self.url = f"{scheme}://{self.addr}" + else: + self.addr = host[7:] # strip "unix://" + self.url = host + + self.log = None + + def tail_log(self, path): + self.log = open(path) + self.log.read() + + def connect(self, **kwargs): + protocol = self.url.partition(":")[0] + + if protocol == "https": + if "context" not in kwargs: + context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + context.check_hostname = False + context.verify_mode = ssl.CERT_NONE + kwargs["context"] = context + + return http.client.HTTPSConnection(self.addr, **kwargs) + + if protocol == "unix": + return UnixSocketHTTPConnection(self.addr, **kwargs) + + return http.client.HTTPConnection(self.addr, **kwargs) + + def request(self, path="", **kwargs): + kwargs.setdefault("method", "GET") + kwargs.setdefault("url", path) + conn = self.connect() + conn.request(**kwargs) + + with conn.getresponse() as response: + response.data = response.read() + + conn.close() + + if response.headers.get("Content-Type", "").startswith("application/json"): + response.json = json.loads(response.data) + else: + response.json = None + + return response + + def wait_for_log(self, start): + while True: + for line in self.log: + if line.startswith(start): + return + + def wait_for_reload(self): + self.wait_for_log(" * Restarting with ") + + +@pytest.fixture() +def dev_server(xprocess, request, tmp_path): + """A function that will start a dev server in an external process + and return a client for interacting with the server. + """ + + def start_dev_server(name="standard", **kwargs): + client = DevServerClient(kwargs) + + class Starter(ProcessStarter): + args = [sys.executable, run_path, name, json.dumps(kwargs)] + # Extend the existing env, otherwise Windows and CI fails. + # Modules will be imported from tmp_path for the reloader. + # Unbuffered output so the logs update immediately. + env = {**os.environ, "PYTHONPATH": str(tmp_path), "PYTHONUNBUFFERED": "1"} + + @cached_property + def pattern(self): + client.request("/ensure") + return "GET /ensure" + + # Each test that uses the fixture will have a different log. + xp_name = f"dev_server-{request.node.name}" + _, log_path = xprocess.ensure(xp_name, Starter, restart=True) + client.tail_log(log_path) + + @request.addfinalizer + def close(): + xprocess.getinfo(xp_name).terminate() + client.log.close() + + return client + + return start_dev_server + + +@pytest.fixture() +def standard_app(dev_server): + """Equivalent to ``dev_server("standard")``.""" + return dev_server() diff --git a/tests/live_apps/data_app.py b/tests/live_apps/data_app.py new file mode 100644 index 0000000..a7158c7 --- /dev/null +++ b/tests/live_apps/data_app.py @@ -0,0 +1,19 @@ +import json + +from werkzeug.wrappers import Request +from werkzeug.wrappers import Response + + +@Request.application +def app(request): + return Response( + json.dumps( + { + "environ": request.environ, + "form": request.form, + "files": {k: v.read().decode("utf8") for k, v in request.files.items()}, + }, + default=lambda x: str(x), + ), + content_type="application/json", + ) diff --git a/tests/live_apps/reloader_app.py b/tests/live_apps/reloader_app.py new file mode 100644 index 0000000..4e98ca6 --- /dev/null +++ b/tests/live_apps/reloader_app.py @@ -0,0 +1,23 @@ +import os +import sys + +from werkzeug import _reloader +from werkzeug.wrappers import Request +from werkzeug.wrappers import Response + +# Tox puts the tmp dir in the venv sys.prefix, patch the reloader so +# it doesn't skip real_app. +if "TOX_ENV_DIR" in os.environ: + _reloader._stat_ignore_scan = tuple( + set(_reloader._stat_ignore_scan) - {sys.prefix, sys.exec_prefix} + ) + + +@Request.application +def app(request): + import real_app # type: ignore + + return Response.from_app(real_app.app, request.environ) + + +kwargs = {"use_reloader": True, "reloader_interval": 0.1} diff --git a/tests/live_apps/run.py b/tests/live_apps/run.py new file mode 100644 index 0000000..aacdcb6 --- /dev/null +++ b/tests/live_apps/run.py @@ -0,0 +1,32 @@ +import json +import sys +from importlib import import_module + +from werkzeug.serving import generate_adhoc_ssl_context +from werkzeug.serving import run_simple +from werkzeug.wrappers import Request +from werkzeug.wrappers import Response + +name = sys.argv[1] +mod = import_module(f"{name}_app") + + +@Request.application +def app(request): + if request.path == "/ensure": + return Response() + + return Response.from_app(mod.app, request.environ) + + +kwargs = getattr(mod, "kwargs", {}) +kwargs.update(hostname="127.0.0.1", port=5000, application=app) +kwargs.update(json.loads(sys.argv[2])) +ssl_context = kwargs.get("ssl_context") + +if ssl_context == "custom": + kwargs["ssl_context"] = generate_adhoc_ssl_context() +elif isinstance(ssl_context, list): + kwargs["ssl_context"] = tuple(ssl_context) + +run_simple(**kwargs) diff --git a/tests/live_apps/standard_app.py b/tests/live_apps/standard_app.py new file mode 100644 index 0000000..ef7798d --- /dev/null +++ b/tests/live_apps/standard_app.py @@ -0,0 +1,15 @@ +import json + +from werkzeug.wrappers import Request +from werkzeug.wrappers import Response + + +@Request.application +def app(request): + if request.path == "/crash": + raise Exception("crash requested") + + return Response( + json.dumps(request.environ, default=lambda x: str(x)), + content_type="application/json", + ) diff --git a/tests/live_apps/streaming_app.py b/tests/live_apps/streaming_app.py new file mode 100644 index 0000000..c8aad46 --- /dev/null +++ b/tests/live_apps/streaming_app.py @@ -0,0 +1,14 @@ +from werkzeug.wrappers import Request +from werkzeug.wrappers import Response + + +@Request.application +def app(request): + def gen(): + for x in range(5): + yield f"{x}\n" + + if request.path == "/crash": + raise Exception("crash requested") + + return Response(gen()) diff --git a/tests/middleware/test_dispatcher.py b/tests/middleware/test_dispatcher.py new file mode 100644 index 0000000..5a25a6c --- /dev/null +++ b/tests/middleware/test_dispatcher.py @@ -0,0 +1,34 @@ +from werkzeug._internal import _to_bytes +from werkzeug.middleware.dispatcher import DispatcherMiddleware +from werkzeug.test import create_environ +from werkzeug.test import run_wsgi_app + + +def test_dispatcher(): + def null_application(environ, start_response): + start_response("404 NOT FOUND", [("Content-Type", "text/plain")]) + yield b"NOT FOUND" + + def dummy_application(environ, start_response): + start_response("200 OK", [("Content-Type", "text/plain")]) + yield _to_bytes(environ["SCRIPT_NAME"]) + + app = DispatcherMiddleware( + null_application, + {"/test1": dummy_application, "/test2/very": dummy_application}, + ) + tests = { + "/test1": ("/test1", "/test1/asfd", "/test1/very"), + "/test2/very": ("/test2/very", "/test2/very/long/path/after/script/name"), + } + + for name, urls in tests.items(): + for p in urls: + environ = create_environ(p) + app_iter, status, headers = run_wsgi_app(app, environ) + assert status == "200 OK" + assert b"".join(app_iter).strip() == _to_bytes(name) + + app_iter, status, headers = run_wsgi_app(app, create_environ("/missing")) + assert status == "404 NOT FOUND" + assert b"".join(app_iter).strip() == b"NOT FOUND" diff --git a/tests/middleware/test_http_proxy.py b/tests/middleware/test_http_proxy.py new file mode 100644 index 0000000..a1497c5 --- /dev/null +++ b/tests/middleware/test_http_proxy.py @@ -0,0 +1,52 @@ +import pytest + +from werkzeug.middleware.http_proxy import ProxyMiddleware +from werkzeug.test import Client +from werkzeug.wrappers import Response + + +@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") +@pytest.mark.dev_server +def test_http_proxy(standard_app): + app = ProxyMiddleware( + Response("ROOT"), + { + "/foo": { + "target": standard_app.url, + "host": "faked.invalid", + "headers": {"X-Special": "foo"}, + }, + "/bar": { + "target": standard_app.url, + "host": None, + "remove_prefix": True, + "headers": {"X-Special": "bar"}, + }, + "/autohost": {"target": standard_app.url}, + }, + ) + + client = Client(app) + + r = client.get("/") + assert r.data == b"ROOT" + + r = client.get("/foo/bar") + assert r.json["HTTP_X_SPECIAL"] == "foo" + assert r.json["HTTP_HOST"] == "faked.invalid" + assert r.json["PATH_INFO"] == "/foo/bar" + + r = client.get("/bar/baz?a=a&b=b") + assert r.json["HTTP_X_SPECIAL"] == "bar" + assert r.json["HTTP_HOST"] == "localhost" + assert r.json["PATH_INFO"] == "/baz" + assert r.json["QUERY_STRING"] == "a=a&b=b" + + r = client.get("/autohost/aha") + assert "HTTP_X_SPECIAL" not in r.json + assert r.json["HTTP_HOST"] == "127.0.0.1" + assert r.json["PATH_INFO"] == "/autohost/aha" + + # test if characters allowed in URL are not encoded by proxy + r = client.get("/autohost/$") + assert r.json["REQUEST_URI"] == "/autohost/$" diff --git a/tests/middleware/test_lint.py b/tests/middleware/test_lint.py new file mode 100644 index 0000000..ca2b92e --- /dev/null +++ b/tests/middleware/test_lint.py @@ -0,0 +1,86 @@ +import pytest + +from werkzeug.middleware.lint import HTTPWarning +from werkzeug.middleware.lint import LintMiddleware +from werkzeug.middleware.lint import WSGIWarning +from werkzeug.test import create_environ +from werkzeug.test import run_wsgi_app + + +def dummy_application(environ, start_response): + start_response("200 OK", [("Content-Type", "text/plain")]) + return [b"Foo"] + + +def test_lint_middleware(): + """Test lint middleware runs for a dummy applications without warnings""" + app = LintMiddleware(dummy_application) + + environ = create_environ("/test") + app_iter, status, headers = run_wsgi_app(app, environ, buffered=True) + assert status == "200 OK" + + +@pytest.mark.parametrize( + "key, value, message", + [ + ("wsgi.version", (0, 7), "Environ is not a WSGI 1.0 environ."), + ("SCRIPT_NAME", "test", "'SCRIPT_NAME' does not start with a slash:"), + ("PATH_INFO", "test", "'PATH_INFO' does not start with a slash:"), + ], +) +def test_lint_middleware_check_environ(key, value, message): + app = LintMiddleware(dummy_application) + + environ = create_environ("/test") + environ[key] = value + with pytest.warns(WSGIWarning, match=message): + app_iter, status, headers = run_wsgi_app(app, environ, buffered=True) + assert status == "200 OK" + + +def test_lint_middleware_invalid_status(): + def my_dummy_application(environ, start_response): + start_response("20 OK", [("Content-Type", "text/plain")]) + return [b"Foo"] + + app = LintMiddleware(my_dummy_application) + + environ = create_environ("/test") + with pytest.warns(WSGIWarning) as record: + run_wsgi_app(app, environ, buffered=True) + + # Returning status 20 should raise three different warnings + assert len(record) == 3 + + +@pytest.mark.parametrize( + "headers, message", + [ + (tuple([("Content-Type", "text/plain")]), "Header list is not a list."), + (["fo"], "Header items must be 2-item tuples."), + ([("status", "foo")], "The status header is not supported."), + ], +) +def test_lint_middleware_http_headers(headers, message): + def my_dummy_application(environ, start_response): + start_response("200 OK", headers) + return [b"Foo"] + + app = LintMiddleware(my_dummy_application) + + environ = create_environ("/test") + with pytest.warns(WSGIWarning, match=message): + run_wsgi_app(app, environ, buffered=True) + + +def test_lint_middleware_invalid_location(): + def my_dummy_application(environ, start_response): + start_response("200 OK", [("location", "foo")]) + return [b"Foo"] + + app = LintMiddleware(my_dummy_application) + + environ = create_environ("/test") + with pytest.warns(HTTPWarning, match="Absolute URLs required for location header."): + run_wsgi_app(app, environ, buffered=True) diff --git a/tests/middleware/test_proxy_fix.py b/tests/middleware/test_proxy_fix.py new file mode 100644 index 0000000..6dc3181 --- /dev/null +++ b/tests/middleware/test_proxy_fix.py @@ -0,0 +1,194 @@ +import pytest + +from werkzeug.middleware.proxy_fix import ProxyFix +from werkzeug.routing import Map +from werkzeug.routing import Rule +from werkzeug.test import Client +from werkzeug.test import create_environ +from werkzeug.utils import redirect +from werkzeug.wrappers import Request +from werkzeug.wrappers import Response + + +@pytest.mark.parametrize( + ("kwargs", "base", "url_root"), + ( + pytest.param( + {}, + { + "REMOTE_ADDR": "192.168.0.2", + "HTTP_HOST": "spam", + "HTTP_X_FORWARDED_FOR": "192.168.0.1", + "HTTP_X_FORWARDED_PROTO": "https", + }, + "https://spam/", + id="for", + ), + pytest.param( + {"x_proto": 1}, + {"HTTP_HOST": "spam", "HTTP_X_FORWARDED_PROTO": "https"}, + "https://spam/", + id="proto", + ), + pytest.param( + {"x_host": 1}, + {"HTTP_HOST": "spam", "HTTP_X_FORWARDED_HOST": "eggs"}, + "http://eggs/", + id="host", + ), + pytest.param( + {"x_port": 1}, + {"HTTP_HOST": "spam", "HTTP_X_FORWARDED_PORT": "8080"}, + "http://spam:8080/", + id="port, host without port", + ), + pytest.param( + {"x_port": 1}, + {"HTTP_HOST": "spam:9000", "HTTP_X_FORWARDED_PORT": "8080"}, + "http://spam:8080/", + id="port, host with port", + ), + pytest.param( + {"x_port": 1}, + { + "SERVER_NAME": "spam", + "SERVER_PORT": "9000", + "HTTP_X_FORWARDED_PORT": "8080", + }, + "http://spam:8080/", + id="port, name", + ), + pytest.param( + {"x_prefix": 1}, + {"HTTP_HOST": "spam", "HTTP_X_FORWARDED_PREFIX": "/eggs"}, + "http://spam/eggs/", + id="prefix", + ), + pytest.param( + {"x_for": 1, "x_proto": 1, "x_host": 1, "x_port": 1, "x_prefix": 1}, + { + "REMOTE_ADDR": "192.168.0.2", + "HTTP_HOST": "spam:9000", + "HTTP_X_FORWARDED_FOR": "192.168.0.1", + "HTTP_X_FORWARDED_PROTO": "https", + "HTTP_X_FORWARDED_HOST": "eggs", + "HTTP_X_FORWARDED_PORT": "443", + "HTTP_X_FORWARDED_PREFIX": "/ham", + }, + "https://eggs/ham/", + id="all", + ), + pytest.param( + {"x_for": 2}, + { + "REMOTE_ADDR": "192.168.0.3", + "HTTP_HOST": "spam", + "HTTP_X_FORWARDED_FOR": "192.168.0.1, 192.168.0.2", + }, + "http://spam/", + id="multiple for", + ), + pytest.param( + {"x_for": 0}, + { + "REMOTE_ADDR": "192.168.0.1", + "HTTP_HOST": "spam", + "HTTP_X_FORWARDED_FOR": "192.168.0.2", + }, + "http://spam/", + id="ignore 0", + ), + pytest.param( + {"x_for": 3}, + { + "REMOTE_ADDR": "192.168.0.1", + "HTTP_HOST": "spam", + "HTTP_X_FORWARDED_FOR": "192.168.0.3, 192.168.0.2", + }, + "http://spam/", + id="ignore len < trusted", + ), + pytest.param( + {}, + { + "REMOTE_ADDR": "192.168.0.2", + "HTTP_HOST": "spam", + "HTTP_X_FORWARDED_FOR": "192.168.0.3, 192.168.0.1", + }, + "http://spam/", + id="ignore untrusted", + ), + pytest.param( + {"x_for": 2}, + { + "REMOTE_ADDR": "192.168.0.1", + "HTTP_HOST": "spam", + "HTTP_X_FORWARDED_FOR": ", 192.168.0.3", + }, + "http://spam/", + id="ignore empty", + ), + pytest.param( + {"x_for": 2, "x_prefix": 1}, + { + "REMOTE_ADDR": "192.168.0.2", + "HTTP_HOST": "spam", + "HTTP_X_FORWARDED_FOR": "192.168.0.1, 192.168.0.3", + "HTTP_X_FORWARDED_PREFIX": "/ham, /eggs", + }, + "http://spam/eggs/", + id="prefix < for", + ), + pytest.param( + {"x_host": 1}, + {"HTTP_HOST": "spam", "HTTP_X_FORWARDED_HOST": "[2001:db8::a]"}, + "http://[2001:db8::a]/", + id="ipv6 host", + ), + pytest.param( + {"x_port": 1}, + {"HTTP_HOST": "[2001:db8::a]", "HTTP_X_FORWARDED_PORT": "8080"}, + "http://[2001:db8::a]:8080/", + id="ipv6 port, host without port", + ), + pytest.param( + {"x_port": 1}, + {"HTTP_HOST": "[2001:db8::a]:9000", "HTTP_X_FORWARDED_PORT": "8080"}, + "http://[2001:db8::a]:8080/", + id="ipv6 - port, host with port", + ), + ), +) +def test_proxy_fix(monkeypatch, kwargs, base, url_root): + monkeypatch.setattr(Response, "autocorrect_location_header", True) + + @Request.application + def app(request): + # for header + assert request.remote_addr == "192.168.0.1" + # proto, host, port, prefix headers + assert request.url_root == url_root + + urls = url_map.bind_to_environ(request.environ) + parrot_url = urls.build("parrot") + # build includes prefix + assert urls.build("parrot") == "/".join((request.script_root, "parrot")) + # match doesn't include prefix + assert urls.match("/parrot")[0] == "parrot" + + # With autocorrect_location_header enabled, location header will + # start with url_root + return redirect(parrot_url) + + url_map = Map([Rule("/parrot", endpoint="parrot")]) + app = ProxyFix(app, **kwargs) + + base.setdefault("REMOTE_ADDR", "192.168.0.1") + environ = create_environ(environ_overrides=base) + + # host is always added, remove it if the test doesn't set it + if "HTTP_HOST" not in base: + del environ["HTTP_HOST"] + + response = Client(app).open(Request(environ)) + assert response.location == f"{url_root}parrot" diff --git a/tests/middleware/test_shared_data.py b/tests/middleware/test_shared_data.py new file mode 100644 index 0000000..26e6464 --- /dev/null +++ b/tests/middleware/test_shared_data.py @@ -0,0 +1,62 @@ +import os +from contextlib import closing + +from werkzeug.middleware.shared_data import SharedDataMiddleware +from werkzeug.test import create_environ +from werkzeug.test import run_wsgi_app + + +def test_get_file_loader(): + app = SharedDataMiddleware(None, {}) + assert callable(app.get_file_loader("foo")) + + +def test_shared_data_middleware(tmpdir): + def null_application(environ, start_response): + start_response("404 NOT FOUND", [("Content-Type", "text/plain")]) + yield b"NOT FOUND" + + test_dir = str(tmpdir) + + with open(os.path.join(test_dir, "äöü"), "w") as test_file: + test_file.write("FOUND") + + for t in [list, dict]: + app = SharedDataMiddleware( + null_application, + t( + [ + ("/", os.path.join(os.path.dirname(__file__), "..", "res")), + ("/sources", os.path.join(os.path.dirname(__file__), "..", "res")), + ("/pkg", ("werkzeug.debug", "shared")), + ("/foo", test_dir), + ] + ), + ) + + for p in "/test.txt", "/sources/test.txt", "/foo/äöü": + app_iter, status, headers = run_wsgi_app(app, create_environ(p)) + assert status == "200 OK" + + if p.endswith(".txt"): + content_type = next(v for k, v in headers if k == "Content-Type") + assert content_type == "text/plain; charset=utf-8" + + with closing(app_iter) as app_iter: + data = b"".join(app_iter).strip() + + assert data == b"FOUND" + + app_iter, status, headers = run_wsgi_app( + app, create_environ("/pkg/debugger.js") + ) + + with closing(app_iter) as app_iter: + contents = b"".join(app_iter) + + assert b"docReady(() =>" in contents + + for path in ("/missing", "/pkg", "/pkg/", "/pkg/missing.txt"): + app_iter, status, headers = run_wsgi_app(app, create_environ(path)) + assert status == "404 NOT FOUND" + assert b"".join(app_iter).strip() == b"NOT FOUND" diff --git a/tests/multipart/firefox3-2png1txt/file1.png b/tests/multipart/firefox3-2png1txt/file1.png new file mode 100644 index 0000000..9b3422c Binary files /dev/null and b/tests/multipart/firefox3-2png1txt/file1.png differ diff --git a/tests/multipart/firefox3-2png1txt/file2.png b/tests/multipart/firefox3-2png1txt/file2.png new file mode 100644 index 0000000..fb2efb8 Binary files /dev/null and b/tests/multipart/firefox3-2png1txt/file2.png differ diff --git a/tests/multipart/firefox3-2png1txt/request.http b/tests/multipart/firefox3-2png1txt/request.http new file mode 100644 index 0000000..721e04e Binary files /dev/null and b/tests/multipart/firefox3-2png1txt/request.http differ diff --git a/tests/multipart/firefox3-2png1txt/text.txt b/tests/multipart/firefox3-2png1txt/text.txt new file mode 100644 index 0000000..4491a1e --- /dev/null +++ b/tests/multipart/firefox3-2png1txt/text.txt @@ -0,0 +1 @@ +example text diff --git a/tests/multipart/firefox3-2pnglongtext/file1.png b/tests/multipart/firefox3-2pnglongtext/file1.png new file mode 100644 index 0000000..89c8129 Binary files /dev/null and b/tests/multipart/firefox3-2pnglongtext/file1.png differ diff --git a/tests/multipart/firefox3-2pnglongtext/file2.png b/tests/multipart/firefox3-2pnglongtext/file2.png new file mode 100644 index 0000000..6332fef Binary files /dev/null and b/tests/multipart/firefox3-2pnglongtext/file2.png differ diff --git a/tests/multipart/firefox3-2pnglongtext/request.http b/tests/multipart/firefox3-2pnglongtext/request.http new file mode 100644 index 0000000..489290b Binary files /dev/null and b/tests/multipart/firefox3-2pnglongtext/request.http differ diff --git a/tests/multipart/firefox3-2pnglongtext/text.txt b/tests/multipart/firefox3-2pnglongtext/text.txt new file mode 100644 index 0000000..833ab62 --- /dev/null +++ b/tests/multipart/firefox3-2pnglongtext/text.txt @@ -0,0 +1,3 @@ +--long text +--with boundary +--lookalikes-- diff --git a/tests/multipart/ie6-2png1txt/file1.png b/tests/multipart/ie6-2png1txt/file1.png new file mode 100644 index 0000000..9b3422c Binary files /dev/null and b/tests/multipart/ie6-2png1txt/file1.png differ diff --git a/tests/multipart/ie6-2png1txt/file2.png b/tests/multipart/ie6-2png1txt/file2.png new file mode 100644 index 0000000..fb2efb8 Binary files /dev/null and b/tests/multipart/ie6-2png1txt/file2.png differ diff --git a/tests/multipart/ie6-2png1txt/request.http b/tests/multipart/ie6-2png1txt/request.http new file mode 100644 index 0000000..389cbfb Binary files /dev/null and b/tests/multipart/ie6-2png1txt/request.http differ diff --git a/tests/multipart/ie6-2png1txt/text.txt b/tests/multipart/ie6-2png1txt/text.txt new file mode 100644 index 0000000..a32e65e --- /dev/null +++ b/tests/multipart/ie6-2png1txt/text.txt @@ -0,0 +1 @@ +ie6 sucks :-/ diff --git a/tests/multipart/ie7_full_path_request.http b/tests/multipart/ie7_full_path_request.http new file mode 100644 index 0000000..d11f1a7 Binary files /dev/null and b/tests/multipart/ie7_full_path_request.http differ diff --git a/tests/multipart/opera8-2png1txt/file1.png b/tests/multipart/opera8-2png1txt/file1.png new file mode 100644 index 0000000..7542db1 Binary files /dev/null and b/tests/multipart/opera8-2png1txt/file1.png differ diff --git a/tests/multipart/opera8-2png1txt/file2.png b/tests/multipart/opera8-2png1txt/file2.png new file mode 100644 index 0000000..658c711 Binary files /dev/null and b/tests/multipart/opera8-2png1txt/file2.png differ diff --git a/tests/multipart/opera8-2png1txt/request.http b/tests/multipart/opera8-2png1txt/request.http new file mode 100644 index 0000000..8f32591 Binary files /dev/null and b/tests/multipart/opera8-2png1txt/request.http differ diff --git a/tests/multipart/opera8-2png1txt/text.txt b/tests/multipart/opera8-2png1txt/text.txt new file mode 100644 index 0000000..ea10aa5 --- /dev/null +++ b/tests/multipart/opera8-2png1txt/text.txt @@ -0,0 +1 @@ +blafasel öäü diff --git a/tests/multipart/webkit3-2png1txt/file1.png b/tests/multipart/webkit3-2png1txt/file1.png new file mode 100644 index 0000000..afca073 Binary files /dev/null and b/tests/multipart/webkit3-2png1txt/file1.png differ diff --git a/tests/multipart/webkit3-2png1txt/file2.png b/tests/multipart/webkit3-2png1txt/file2.png new file mode 100644 index 0000000..2a7da6e Binary files /dev/null and b/tests/multipart/webkit3-2png1txt/file2.png differ diff --git a/tests/multipart/webkit3-2png1txt/request.http b/tests/multipart/webkit3-2png1txt/request.http new file mode 100644 index 0000000..b4ce0ee Binary files /dev/null and b/tests/multipart/webkit3-2png1txt/request.http differ diff --git a/tests/multipart/webkit3-2png1txt/text.txt b/tests/multipart/webkit3-2png1txt/text.txt new file mode 100644 index 0000000..1753790 --- /dev/null +++ b/tests/multipart/webkit3-2png1txt/text.txt @@ -0,0 +1 @@ +this is another text with ümläüts diff --git a/tests/res/index.html b/tests/res/index.html new file mode 100644 index 0000000..2ecc7e8 --- /dev/null +++ b/tests/res/index.html @@ -0,0 +1,10 @@ + + + + + Title + + + + + diff --git a/tests/res/test.txt b/tests/res/test.txt new file mode 100644 index 0000000..a8efdcc Binary files /dev/null and b/tests/res/test.txt differ diff --git a/tests/sansio/__init__.py b/tests/sansio/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sansio/test_multipart.py b/tests/sansio/test_multipart.py new file mode 100644 index 0000000..f9c48b4 --- /dev/null +++ b/tests/sansio/test_multipart.py @@ -0,0 +1,80 @@ +from werkzeug.datastructures import Headers +from werkzeug.sansio.multipart import Data +from werkzeug.sansio.multipart import Epilogue +from werkzeug.sansio.multipart import Field +from werkzeug.sansio.multipart import File +from werkzeug.sansio.multipart import MultipartDecoder +from werkzeug.sansio.multipart import MultipartEncoder +from werkzeug.sansio.multipart import NeedData +from werkzeug.sansio.multipart import Preamble + + +def test_decoder_simple() -> None: + boundary = b"---------------------------9704338192090380615194531385$" + decoder = MultipartDecoder(boundary) + data = """ +-----------------------------9704338192090380615194531385$ +Content-Disposition: form-data; name="fname" + +ß∑œß∂ƒå∂ +-----------------------------9704338192090380615194531385$ +Content-Disposition: form-data; name="lname"; filename="bob" + +asdasd +-----------------------------9704338192090380615194531385$-- + """.replace( + "\n", "\r\n" + ).encode( + "utf-8" + ) + decoder.receive_data(data) + decoder.receive_data(None) + events = [decoder.next_event()] + while not isinstance(events[-1], Epilogue) and len(events) < 6: + events.append(decoder.next_event()) + assert events == [ + Preamble(data=b""), + Field( + name="fname", + headers=Headers([("Content-Disposition", 'form-data; name="fname"')]), + ), + Data(data="ß∑œß∂ƒå∂".encode(), more_data=False), + File( + name="lname", + filename="bob", + headers=Headers( + [("Content-Disposition", 'form-data; name="lname"; filename="bob"')] + ), + ), + Data(data=b"asdasd", more_data=False), + Epilogue(data=b" "), + ] + encoder = MultipartEncoder(boundary) + result = b"" + for event in events: + result += encoder.send_event(event) + assert data == result + + +def test_chunked_boundaries() -> None: + boundary = b"--boundary" + decoder = MultipartDecoder(boundary) + decoder.receive_data(b"--") + assert isinstance(decoder.next_event(), NeedData) + decoder.receive_data(b"--boundary\r\n") + assert isinstance(decoder.next_event(), Preamble) + decoder.receive_data(b"Content-Disposition: form-data;") + assert isinstance(decoder.next_event(), NeedData) + decoder.receive_data(b'name="fname"\r\n\r\n') + assert isinstance(decoder.next_event(), Field) + decoder.receive_data(b"longer than the boundary") + assert isinstance(decoder.next_event(), Data) + decoder.receive_data(b"also longer, but includes a linebreak\r\n--") + assert isinstance(decoder.next_event(), Data) + assert isinstance(decoder.next_event(), NeedData) + decoder.receive_data(b"--boundary--\r\n") + event = decoder.next_event() + assert isinstance(event, Data) + assert not event.more_data + decoder.receive_data(None) + assert isinstance(decoder.next_event(), Epilogue) diff --git a/tests/sansio/test_request.py b/tests/sansio/test_request.py new file mode 100644 index 0000000..310b244 --- /dev/null +++ b/tests/sansio/test_request.py @@ -0,0 +1,27 @@ +import typing as t + +import pytest + +from werkzeug.datastructures import Headers +from werkzeug.sansio.request import Request + + +@pytest.mark.parametrize( + "headers, expected", + [ + (Headers({"Transfer-Encoding": "chunked", "Content-Length": "6"}), None), + (Headers({"Transfer-Encoding": "something", "Content-Length": "6"}), 6), + (Headers({"Content-Length": "6"}), 6), + (Headers(), None), + ], +) +def test_content_length(headers: Headers, expected: t.Optional[int]) -> None: + req = Request("POST", "http", None, "", "", b"", headers, None) + assert req.content_length == expected + + +def test_cookies() -> None: + headers = Headers([("Cookie", "a=b"), ("Content-Type", "text"), ("Cookie", "a=c")]) + req = Request("GET", "http", None, "", "", b"", headers, None) + assert req.cookies.get("a") == "b" + assert req.cookies.getlist("a") == ["b", "c"] diff --git a/tests/sansio/test_utils.py b/tests/sansio/test_utils.py new file mode 100644 index 0000000..8c8faa6 --- /dev/null +++ b/tests/sansio/test_utils.py @@ -0,0 +1,32 @@ +import typing as t + +import pytest + +from werkzeug.sansio.utils import get_host + + +@pytest.mark.parametrize( + ("scheme", "host_header", "server", "expected"), + [ + ("http", "spam", None, "spam"), + ("http", "spam:80", None, "spam"), + ("https", "spam", None, "spam"), + ("https", "spam:443", None, "spam"), + ("http", "spam:8080", None, "spam:8080"), + ("ws", "spam", None, "spam"), + ("ws", "spam:80", None, "spam"), + ("wss", "spam", None, "spam"), + ("wss", "spam:443", None, "spam"), + ("http", None, ("spam", 80), "spam"), + ("http", None, ("spam", 8080), "spam:8080"), + ("http", None, ("unix/socket", None), "unix/socket"), + ("http", "spam", ("eggs", 80), "spam"), + ], +) +def test_get_host( + scheme: str, + host_header: t.Optional[str], + server: t.Optional[t.Tuple[str, t.Optional[int]]], + expected: str, +) -> None: + assert get_host(scheme, host_header, server) == expected diff --git a/tests/test_datastructures.py b/tests/test_datastructures.py new file mode 100644 index 0000000..7f63b64 --- /dev/null +++ b/tests/test_datastructures.py @@ -0,0 +1,1220 @@ +import io +import pickle +import tempfile +import typing as t +from contextlib import contextmanager +from copy import copy +from copy import deepcopy + +import pytest + +from werkzeug import datastructures as ds +from werkzeug import http +from werkzeug.exceptions import BadRequestKeyError + + +class TestNativeItermethods: + def test_basic(self): + class StupidDict: + def keys(self, multi=1): + return iter(["a", "b", "c"] * multi) + + def values(self, multi=1): + return iter([1, 2, 3] * multi) + + def items(self, multi=1): + return iter( + zip(iter(self.keys(multi=multi)), iter(self.values(multi=multi))) + ) + + d = StupidDict() + expected_keys = ["a", "b", "c"] + expected_values = [1, 2, 3] + expected_items = list(zip(expected_keys, expected_values)) + + assert list(d.keys()) == expected_keys + assert list(d.values()) == expected_values + assert list(d.items()) == expected_items + + assert list(d.keys(2)) == expected_keys * 2 + assert list(d.values(2)) == expected_values * 2 + assert list(d.items(2)) == expected_items * 2 + + +class _MutableMultiDictTests: + storage_class: t.Type["ds.MultiDict"] + + def test_pickle(self): + cls = self.storage_class + + def create_instance(module=None): + if module is None: + d = cls() + else: + old = cls.__module__ + cls.__module__ = module + d = cls() + cls.__module__ = old + d.setlist(b"foo", [1, 2, 3, 4]) + d.setlist(b"bar", b"foo bar baz".split()) + return d + + for protocol in range(pickle.HIGHEST_PROTOCOL + 1): + d = create_instance() + s = pickle.dumps(d, protocol) + ud = pickle.loads(s) + assert type(ud) == type(d) + assert ud == d + alternative = pickle.dumps(create_instance("werkzeug"), protocol) + assert pickle.loads(alternative) == d + ud[b"newkey"] = b"bla" + assert ud != d + + def test_multidict_dict_interop(self): + # https://github.com/pallets/werkzeug/pull/2043 + md = self.storage_class([("a", 1), ("a", 2)]) + assert dict(md)["a"] != [1, 2] + assert dict(md)["a"] == 1 + assert dict(md) == {**md} == {"a": 1} + + def test_basic_interface(self): + md = self.storage_class() + assert isinstance(md, dict) + + mapping = [ + ("a", 1), + ("b", 2), + ("a", 2), + ("d", 3), + ("a", 1), + ("a", 3), + ("d", 4), + ("c", 3), + ] + md = self.storage_class(mapping) + + # simple getitem gives the first value + assert md["a"] == 1 + assert md["c"] == 3 + with pytest.raises(KeyError): + md["e"] + assert md.get("a") == 1 + + # list getitem + assert md.getlist("a") == [1, 2, 1, 3] + assert md.getlist("d") == [3, 4] + # do not raise if key not found + assert md.getlist("x") == [] + + # simple setitem overwrites all values + md["a"] = 42 + assert md.getlist("a") == [42] + + # list setitem + md.setlist("a", [1, 2, 3]) + assert md["a"] == 1 + assert md.getlist("a") == [1, 2, 3] + + # verify that it does not change original lists + l1 = [1, 2, 3] + md.setlist("a", l1) + del l1[:] + assert md["a"] == 1 + + # setdefault, setlistdefault + assert md.setdefault("u", 23) == 23 + assert md.getlist("u") == [23] + del md["u"] + + md.setlist("u", [-1, -2]) + + # delitem + del md["u"] + with pytest.raises(KeyError): + md["u"] + del md["d"] + assert md.getlist("d") == [] + + # keys, values, items, lists + assert list(sorted(md.keys())) == ["a", "b", "c"] + assert list(sorted(md.keys())) == ["a", "b", "c"] + + assert list(sorted(md.values())) == [1, 2, 3] + assert list(sorted(md.values())) == [1, 2, 3] + + assert list(sorted(md.items())) == [("a", 1), ("b", 2), ("c", 3)] + assert list(sorted(md.items(multi=True))) == [ + ("a", 1), + ("a", 2), + ("a", 3), + ("b", 2), + ("c", 3), + ] + assert list(sorted(md.items())) == [("a", 1), ("b", 2), ("c", 3)] + assert list(sorted(md.items(multi=True))) == [ + ("a", 1), + ("a", 2), + ("a", 3), + ("b", 2), + ("c", 3), + ] + + assert list(sorted(md.lists())) == [("a", [1, 2, 3]), ("b", [2]), ("c", [3])] + assert list(sorted(md.lists())) == [("a", [1, 2, 3]), ("b", [2]), ("c", [3])] + + # copy method + c = md.copy() + assert c["a"] == 1 + assert c.getlist("a") == [1, 2, 3] + + # copy method 2 + c = copy(md) + assert c["a"] == 1 + assert c.getlist("a") == [1, 2, 3] + + # deepcopy method + c = md.deepcopy() + assert c["a"] == 1 + assert c.getlist("a") == [1, 2, 3] + + # deepcopy method 2 + c = deepcopy(md) + assert c["a"] == 1 + assert c.getlist("a") == [1, 2, 3] + + # update with a multidict + od = self.storage_class([("a", 4), ("a", 5), ("y", 0)]) + md.update(od) + assert md.getlist("a") == [1, 2, 3, 4, 5] + assert md.getlist("y") == [0] + + # update with a regular dict + md = c + od = {"a": 4, "y": 0} + md.update(od) + assert md.getlist("a") == [1, 2, 3, 4] + assert md.getlist("y") == [0] + + # pop, poplist, popitem, popitemlist + assert md.pop("y") == 0 + assert "y" not in md + assert md.poplist("a") == [1, 2, 3, 4] + assert "a" not in md + assert md.poplist("missing") == [] + + # remaining: b=2, c=3 + popped = md.popitem() + assert popped in [("b", 2), ("c", 3)] + popped = md.popitemlist() + assert popped in [("b", [2]), ("c", [3])] + + # type conversion + md = self.storage_class({"a": "4", "b": ["2", "3"]}) + assert md.get("a", type=int) == 4 + assert md.getlist("b", type=int) == [2, 3] + + # repr + md = self.storage_class([("a", 1), ("a", 2), ("b", 3)]) + assert "('a', 1)" in repr(md) + assert "('a', 2)" in repr(md) + assert "('b', 3)" in repr(md) + + # add and getlist + md.add("c", "42") + md.add("c", "23") + assert md.getlist("c") == ["42", "23"] + md.add("c", "blah") + assert md.getlist("c", type=int) == [42, 23] + + # setdefault + md = self.storage_class() + md.setdefault("x", []).append(42) + md.setdefault("x", []).append(23) + assert md["x"] == [42, 23] + + # to dict + md = self.storage_class() + md["foo"] = 42 + md.add("bar", 1) + md.add("bar", 2) + assert md.to_dict() == {"foo": 42, "bar": 1} + assert md.to_dict(flat=False) == {"foo": [42], "bar": [1, 2]} + + # popitem from empty dict + with pytest.raises(KeyError): + self.storage_class().popitem() + + with pytest.raises(KeyError): + self.storage_class().popitemlist() + + # key errors are of a special type + with pytest.raises(BadRequestKeyError): + self.storage_class()[42] + + # setlist works + md = self.storage_class() + md["foo"] = 42 + md.setlist("foo", [1, 2]) + assert md.getlist("foo") == [1, 2] + + +class _ImmutableDictTests: + storage_class: t.Type[dict] + + def test_follows_dict_interface(self): + cls = self.storage_class + + data = {"foo": 1, "bar": 2, "baz": 3} + d = cls(data) + + assert d["foo"] == 1 + assert d["bar"] == 2 + assert d["baz"] == 3 + assert sorted(d.keys()) == ["bar", "baz", "foo"] + assert "foo" in d + assert "foox" not in d + assert len(d) == 3 + + def test_copies_are_mutable(self): + cls = self.storage_class + immutable = cls({"a": 1}) + with pytest.raises(TypeError): + immutable.pop("a") + + mutable = immutable.copy() + mutable.pop("a") + assert "a" in immutable + assert mutable is not immutable + assert copy(immutable) is immutable + + def test_dict_is_hashable(self): + cls = self.storage_class + immutable = cls({"a": 1, "b": 2}) + immutable2 = cls({"a": 2, "b": 2}) + x = {immutable} + assert immutable in x + assert immutable2 not in x + x.discard(immutable) + assert immutable not in x + assert immutable2 not in x + x.add(immutable2) + assert immutable not in x + assert immutable2 in x + x.add(immutable) + assert immutable in x + assert immutable2 in x + + +class TestImmutableTypeConversionDict(_ImmutableDictTests): + storage_class = ds.ImmutableTypeConversionDict + + +class TestImmutableMultiDict(_ImmutableDictTests): + storage_class = ds.ImmutableMultiDict + + def test_multidict_is_hashable(self): + cls = self.storage_class + immutable = cls({"a": [1, 2], "b": 2}) + immutable2 = cls({"a": [1], "b": 2}) + x = {immutable} + assert immutable in x + assert immutable2 not in x + x.discard(immutable) + assert immutable not in x + assert immutable2 not in x + x.add(immutable2) + assert immutable not in x + assert immutable2 in x + x.add(immutable) + assert immutable in x + assert immutable2 in x + + +class TestImmutableDict(_ImmutableDictTests): + storage_class = ds.ImmutableDict + + +class TestImmutableOrderedMultiDict(_ImmutableDictTests): + storage_class = ds.ImmutableOrderedMultiDict + + def test_ordered_multidict_is_hashable(self): + a = self.storage_class([("a", 1), ("b", 1), ("a", 2)]) + b = self.storage_class([("a", 1), ("a", 2), ("b", 1)]) + assert hash(a) != hash(b) + + +class TestMultiDict(_MutableMultiDictTests): + storage_class = ds.MultiDict + + def test_multidict_pop(self): + def make_d(): + return self.storage_class({"foo": [1, 2, 3, 4]}) + + d = make_d() + assert d.pop("foo") == 1 + assert not d + d = make_d() + assert d.pop("foo", 32) == 1 + assert not d + d = make_d() + assert d.pop("foos", 32) == 32 + assert d + + with pytest.raises(KeyError): + d.pop("foos") + + def test_multidict_pop_raise_badrequestkeyerror_for_empty_list_value(self): + mapping = [("a", "b"), ("a", "c")] + md = self.storage_class(mapping) + + md.setlistdefault("empty", []) + + with pytest.raises(KeyError): + md.pop("empty") + + def test_multidict_popitem_raise_badrequestkeyerror_for_empty_list_value(self): + mapping = [] + md = self.storage_class(mapping) + + md.setlistdefault("empty", []) + + with pytest.raises(BadRequestKeyError): + md.popitem() + + def test_setlistdefault(self): + md = self.storage_class() + assert md.setlistdefault("u", [-1, -2]) == [-1, -2] + assert md.getlist("u") == [-1, -2] + assert md["u"] == -1 + + def test_iter_interfaces(self): + mapping = [ + ("a", 1), + ("b", 2), + ("a", 2), + ("d", 3), + ("a", 1), + ("a", 3), + ("d", 4), + ("c", 3), + ] + md = self.storage_class(mapping) + assert list(zip(md.keys(), md.listvalues())) == list(md.lists()) + assert list(zip(md, md.listvalues())) == list(md.lists()) + assert list(zip(md.keys(), md.listvalues())) == list(md.lists()) + + def test_getitem_raise_badrequestkeyerror_for_empty_list_value(self): + mapping = [("a", "b"), ("a", "c")] + md = self.storage_class(mapping) + + md.setlistdefault("empty", []) + + with pytest.raises(KeyError): + md["empty"] + + +class TestOrderedMultiDict(_MutableMultiDictTests): + storage_class = ds.OrderedMultiDict + + def test_ordered_interface(self): + cls = self.storage_class + + d = cls() + assert not d + d.add("foo", "bar") + assert len(d) == 1 + d.add("foo", "baz") + assert len(d) == 1 + assert list(d.items()) == [("foo", "bar")] + assert list(d) == ["foo"] + assert list(d.items(multi=True)) == [("foo", "bar"), ("foo", "baz")] + del d["foo"] + assert not d + assert len(d) == 0 + assert list(d) == [] + + d.update([("foo", 1), ("foo", 2), ("bar", 42)]) + d.add("foo", 3) + assert d.getlist("foo") == [1, 2, 3] + assert d.getlist("bar") == [42] + assert list(d.items()) == [("foo", 1), ("bar", 42)] + + expected = ["foo", "bar"] + + assert list(d.keys()) == expected + assert list(d) == expected + assert list(d.keys()) == expected + + assert list(d.items(multi=True)) == [ + ("foo", 1), + ("foo", 2), + ("bar", 42), + ("foo", 3), + ] + assert len(d) == 2 + + assert d.pop("foo") == 1 + assert d.pop("blafasel", None) is None + assert d.pop("blafasel", 42) == 42 + assert len(d) == 1 + assert d.poplist("bar") == [42] + assert not d + + assert d.get("missingkey") is None + + d.add("foo", 42) + d.add("foo", 23) + d.add("bar", 2) + d.add("foo", 42) + assert d == ds.MultiDict(d) + id = self.storage_class(d) + assert d == id + d.add("foo", 2) + assert d != id + + d.update({"blah": [1, 2, 3]}) + assert d["blah"] == 1 + assert d.getlist("blah") == [1, 2, 3] + + # setlist works + d = self.storage_class() + d["foo"] = 42 + d.setlist("foo", [1, 2]) + assert d.getlist("foo") == [1, 2] + with pytest.raises(BadRequestKeyError): + d.pop("missing") + + with pytest.raises(BadRequestKeyError): + d["missing"] + + # popping + d = self.storage_class() + d.add("foo", 23) + d.add("foo", 42) + d.add("foo", 1) + assert d.popitem() == ("foo", 23) + with pytest.raises(BadRequestKeyError): + d.popitem() + assert not d + + d.add("foo", 23) + d.add("foo", 42) + d.add("foo", 1) + assert d.popitemlist() == ("foo", [23, 42, 1]) + + with pytest.raises(BadRequestKeyError): + d.popitemlist() + + # Unhashable + d = self.storage_class() + d.add("foo", 23) + pytest.raises(TypeError, hash, d) + + def test_iterables(self): + a = ds.MultiDict((("key_a", "value_a"),)) + b = ds.MultiDict((("key_b", "value_b"),)) + ab = ds.CombinedMultiDict((a, b)) + + assert sorted(ab.lists()) == [("key_a", ["value_a"]), ("key_b", ["value_b"])] + assert sorted(ab.listvalues()) == [["value_a"], ["value_b"]] + assert sorted(ab.keys()) == ["key_a", "key_b"] + + assert sorted(ab.lists()) == [("key_a", ["value_a"]), ("key_b", ["value_b"])] + assert sorted(ab.listvalues()) == [["value_a"], ["value_b"]] + assert sorted(ab.keys()) == ["key_a", "key_b"] + + def test_get_description(self): + data = ds.OrderedMultiDict() + + with pytest.raises(BadRequestKeyError) as exc_info: + data["baz"] + + assert "baz" not in exc_info.value.get_description() + exc_info.value.show_exception = True + assert "baz" in exc_info.value.get_description() + + with pytest.raises(BadRequestKeyError) as exc_info: + data.pop("baz") + + exc_info.value.show_exception = True + assert "baz" in exc_info.value.get_description() + exc_info.value.args = () + assert "baz" not in exc_info.value.get_description() + + +class TestTypeConversionDict: + storage_class = ds.TypeConversionDict + + def test_value_conversion(self): + d = self.storage_class(foo="1") + assert d.get("foo", type=int) == 1 + + def test_return_default_when_conversion_is_not_possible(self): + d = self.storage_class(foo="bar") + assert d.get("foo", default=-1, type=int) == -1 + + def test_propagate_exceptions_in_conversion(self): + d = self.storage_class(foo="bar") + switch = {"a": 1} + with pytest.raises(KeyError): + d.get("foo", type=lambda x: switch[x]) + + +class TestCombinedMultiDict: + storage_class = ds.CombinedMultiDict + + def test_basic_interface(self): + d1 = ds.MultiDict([("foo", "1")]) + d2 = ds.MultiDict([("bar", "2"), ("bar", "3")]) + d = self.storage_class([d1, d2]) + + # lookup + assert d["foo"] == "1" + assert d["bar"] == "2" + assert d.getlist("bar") == ["2", "3"] + + assert sorted(d.items()) == [("bar", "2"), ("foo", "1")] + assert sorted(d.items(multi=True)) == [("bar", "2"), ("bar", "3"), ("foo", "1")] + assert "missingkey" not in d + assert "foo" in d + + # type lookup + assert d.get("foo", type=int) == 1 + assert d.getlist("bar", type=int) == [2, 3] + + # get key errors for missing stuff + with pytest.raises(KeyError): + d["missing"] + + # make sure that they are immutable + with pytest.raises(TypeError): + d["foo"] = "blub" + + # copies are mutable + d = d.copy() + d["foo"] = "blub" + + # make sure lists merges + md1 = ds.MultiDict((("foo", "bar"), ("foo", "baz"))) + md2 = ds.MultiDict((("foo", "blafasel"),)) + x = self.storage_class((md1, md2)) + assert list(x.lists()) == [("foo", ["bar", "baz", "blafasel"])] + + # make sure dicts are created properly + assert x.to_dict() == {"foo": "bar"} + assert x.to_dict(flat=False) == {"foo": ["bar", "baz", "blafasel"]} + + def test_length(self): + d1 = ds.MultiDict([("foo", "1")]) + d2 = ds.MultiDict([("bar", "2")]) + assert len(d1) == len(d2) == 1 + d = self.storage_class([d1, d2]) + assert len(d) == 2 + d1.clear() + assert len(d1) == 0 + assert len(d) == 1 + + +class TestHeaders: + storage_class = ds.Headers + + def test_basic_interface(self): + headers = self.storage_class() + headers.add("Content-Type", "text/plain") + headers.add("X-Foo", "bar") + assert "x-Foo" in headers + assert "Content-type" in headers + + with pytest.raises(ValueError): + headers.add("X-Example", "foo\r\n bar") + + headers["Content-Type"] = "foo/bar" + assert headers["Content-Type"] == "foo/bar" + assert len(headers.getlist("Content-Type")) == 1 + + # list conversion + assert headers.to_wsgi_list() == [("Content-Type", "foo/bar"), ("X-Foo", "bar")] + assert str(headers) == "Content-Type: foo/bar\r\nX-Foo: bar\r\n\r\n" + assert str(self.storage_class()) == "\r\n" + + # extended add + headers.add("Content-Disposition", "attachment", filename="foo") + assert headers["Content-Disposition"] == "attachment; filename=foo" + + headers.add("x", "y", z='"') + assert headers["x"] == r'y; z="\""' + + # string conversion + headers.add("a", 1) + assert headers["a"] == "1" + + def test_defaults_and_conversion(self): + # defaults + headers = self.storage_class( + [ + ("Content-Type", "text/plain"), + ("X-Foo", "bar"), + ("X-Bar", "1"), + ("X-Bar", "2"), + ] + ) + assert headers.getlist("x-bar") == ["1", "2"] + assert headers.get("x-Bar") == "1" + assert headers.get("Content-Type") == "text/plain" + + assert headers.setdefault("X-Foo", "nope") == "bar" + assert headers.setdefault("X-Bar", "nope") == "1" + assert headers.setdefault("X-Baz", "quux") == "quux" + assert headers.setdefault("X-Baz", "nope") == "quux" + headers.pop("X-Baz") + + # newlines are not allowed in values + with pytest.raises(ValueError): + self.storage_class([("X-Example", "foo\r\n bar")]) + + # type conversion + assert headers.get("x-bar", type=int) == 1 + assert headers.getlist("x-bar", type=int) == [1, 2] + + # list like operations + assert headers[0] == ("Content-Type", "text/plain") + assert headers[:1] == self.storage_class([("Content-Type", "text/plain")]) + del headers[:2] + del headers[-1] + assert headers == self.storage_class([("X-Bar", "1")]) + + def test_copying(self): + a = self.storage_class([("foo", "bar")]) + b = a.copy() + a.add("foo", "baz") + assert a.getlist("foo") == ["bar", "baz"] + assert b.getlist("foo") == ["bar"] + + def test_popping(self): + headers = self.storage_class([("a", 1)]) + # headers object expect string values. If a non string value + # is passed, it tries converting it to a string + assert headers.pop("a") == "1" + assert headers.pop("b", "2") == "2" + + with pytest.raises(KeyError): + headers.pop("c") + + def test_set_arguments(self): + a = self.storage_class() + a.set("Content-Disposition", "useless") + a.set("Content-Disposition", "attachment", filename="foo") + assert a["Content-Disposition"] == "attachment; filename=foo" + + def test_reject_newlines(self): + h = self.storage_class() + + for variation in "foo\nbar", "foo\r\nbar", "foo\rbar": + with pytest.raises(ValueError): + h["foo"] = variation + with pytest.raises(ValueError): + h.add("foo", variation) + with pytest.raises(ValueError): + h.add("foo", "test", option=variation) + with pytest.raises(ValueError): + h.set("foo", variation) + with pytest.raises(ValueError): + h.set("foo", "test", option=variation) + + def test_slicing(self): + # there's nothing wrong with these being native strings + # Headers doesn't care about the data types + h = self.storage_class() + h.set("X-Foo-Poo", "bleh") + h.set("Content-Type", "application/whocares") + h.set("X-Forwarded-For", "192.168.0.123") + h[:] = [(k, v) for k, v in h if k.startswith("X-")] + assert list(h) == [("X-Foo-Poo", "bleh"), ("X-Forwarded-For", "192.168.0.123")] + + def test_bytes_operations(self): + h = self.storage_class() + h.set("X-Foo-Poo", "bleh") + h.set("X-Whoops", b"\xff") + h.set(b"X-Bytes", b"something") + + assert h.get("x-foo-poo", as_bytes=True) == b"bleh" + assert h.get("x-whoops", as_bytes=True) == b"\xff" + assert h.get("x-bytes") == "something" + + def test_extend(self): + h = self.storage_class([("a", "0"), ("b", "1"), ("c", "2")]) + h.extend(ds.Headers([("a", "3"), ("a", "4")])) + assert h.getlist("a") == ["0", "3", "4"] + h.extend(b=["5", "6"]) + assert h.getlist("b") == ["1", "5", "6"] + h.extend({"c": "7", "d": ["8", "9"]}, c="10") + assert h.getlist("c") == ["2", "7", "10"] + assert h.getlist("d") == ["8", "9"] + + with pytest.raises(TypeError): + h.extend({"x": "x"}, {"x": "x"}) + + def test_update(self): + h = self.storage_class([("a", "0"), ("b", "1"), ("c", "2")]) + h.update(ds.Headers([("a", "3"), ("a", "4")])) + assert h.getlist("a") == ["3", "4"] + h.update(b=["5", "6"]) + assert h.getlist("b") == ["5", "6"] + h.update({"c": "7", "d": ["8", "9"]}) + assert h.getlist("c") == ["7"] + assert h.getlist("d") == ["8", "9"] + h.update({"c": "10"}, c="11") + assert h.getlist("c") == ["11"] + + with pytest.raises(TypeError): + h.extend({"x": "x"}, {"x": "x"}) + + def test_setlist(self): + h = self.storage_class([("a", "0"), ("b", "1"), ("c", "2")]) + h.setlist("b", ["3", "4"]) + assert h[1] == ("b", "3") + assert h[-1] == ("b", "4") + h.setlist("b", []) + assert "b" not in h + h.setlist("d", ["5"]) + assert h["d"] == "5" + + def test_setlistdefault(self): + h = self.storage_class([("a", "0"), ("b", "1"), ("c", "2")]) + assert h.setlistdefault("a", ["3"]) == ["0"] + assert h.setlistdefault("d", ["4", "5"]) == ["4", "5"] + + def test_to_wsgi_list(self): + h = self.storage_class() + h.set("Key", "Value") + for key, value in h.to_wsgi_list(): + assert key == "Key" + assert value == "Value" + + def test_to_wsgi_list_bytes(self): + h = self.storage_class() + h.set(b"Key", b"Value") + for key, value in h.to_wsgi_list(): + assert key == "Key" + assert value == "Value" + + def test_equality(self): + # test equality, given keys are case insensitive + h1 = self.storage_class() + h1.add("X-Foo", "foo") + h1.add("X-Bar", "bah") + h1.add("X-Bar", "humbug") + + h2 = self.storage_class() + h2.add("x-foo", "foo") + h2.add("x-bar", "bah") + h2.add("x-bar", "humbug") + + assert h1 == h2 + + +class TestEnvironHeaders: + storage_class = ds.EnvironHeaders + + def test_basic_interface(self): + # this happens in multiple WSGI servers because they + # use a vary naive way to convert the headers; + broken_env = { + "HTTP_CONTENT_TYPE": "text/html", + "CONTENT_TYPE": "text/html", + "HTTP_CONTENT_LENGTH": "0", + "CONTENT_LENGTH": "0", + "HTTP_ACCEPT": "*", + "wsgi.version": (1, 0), + } + headers = self.storage_class(broken_env) + assert headers + assert len(headers) == 3 + assert sorted(headers) == [ + ("Accept", "*"), + ("Content-Length", "0"), + ("Content-Type", "text/html"), + ] + assert not self.storage_class({"wsgi.version": (1, 0)}) + assert len(self.storage_class({"wsgi.version": (1, 0)})) == 0 + assert 42 not in headers + + def test_skip_empty_special_vars(self): + env = {"HTTP_X_FOO": "42", "CONTENT_TYPE": "", "CONTENT_LENGTH": ""} + headers = self.storage_class(env) + assert dict(headers) == {"X-Foo": "42"} + + env = {"HTTP_X_FOO": "42", "CONTENT_TYPE": "", "CONTENT_LENGTH": "0"} + headers = self.storage_class(env) + assert dict(headers) == {"X-Foo": "42", "Content-Length": "0"} + + def test_return_type_is_str(self): + headers = self.storage_class({"HTTP_FOO": "\xe2\x9c\x93"}) + assert headers["Foo"] == "\xe2\x9c\x93" + assert next(iter(headers)) == ("Foo", "\xe2\x9c\x93") + + def test_bytes_operations(self): + foo_val = "\xff" + h = self.storage_class({"HTTP_X_FOO": foo_val}) + + assert h.get("x-foo", as_bytes=True) == b"\xff" + assert h.get("x-foo") == "\xff" + + +class TestHeaderSet: + storage_class = ds.HeaderSet + + def test_basic_interface(self): + hs = self.storage_class() + hs.add("foo") + hs.add("bar") + assert "Bar" in hs + assert hs.find("foo") == 0 + assert hs.find("BAR") == 1 + assert hs.find("baz") < 0 + hs.discard("missing") + hs.discard("foo") + assert hs.find("foo") < 0 + assert hs.find("bar") == 0 + + with pytest.raises(IndexError): + hs.index("missing") + + assert hs.index("bar") == 0 + assert hs + hs.clear() + assert not hs + + +class TestImmutableList: + storage_class = ds.ImmutableList + + def test_list_hashable(self): + data = (1, 2, 3, 4) + store = self.storage_class(data) + assert hash(data) == hash(store) + assert data != store + + +def make_call_asserter(func=None): + """Utility to assert a certain number of function calls. + + :param func: Additional callback for each function call. + + .. code-block:: python + assert_calls, func = make_call_asserter() + with assert_calls(2): + func() + func() + """ + calls = [0] + + @contextmanager + def asserter(count, msg=None): + calls[0] = 0 + yield + assert calls[0] == count + + def wrapped(*args, **kwargs): + calls[0] += 1 + if func is not None: + return func(*args, **kwargs) + + return asserter, wrapped + + +class TestCallbackDict: + storage_class = ds.CallbackDict + + def test_callback_dict_reads(self): + assert_calls, func = make_call_asserter() + initial = {"a": "foo", "b": "bar"} + dct = self.storage_class(initial=initial, on_update=func) + with assert_calls(0, "callback triggered by read-only method"): + # read-only methods + dct["a"] + dct.get("a") + pytest.raises(KeyError, lambda: dct["x"]) + assert "a" in dct + list(iter(dct)) + dct.copy() + with assert_calls(0, "callback triggered without modification"): + # methods that may write but don't + dct.pop("z", None) + dct.setdefault("a") + + def test_callback_dict_writes(self): + assert_calls, func = make_call_asserter() + initial = {"a": "foo", "b": "bar"} + dct = self.storage_class(initial=initial, on_update=func) + with assert_calls(8, "callback not triggered by write method"): + # always-write methods + dct["z"] = 123 + dct["z"] = 123 # must trigger again + del dct["z"] + dct.pop("b", None) + dct.setdefault("x") + dct.popitem() + dct.update([]) + dct.clear() + with assert_calls(0, "callback triggered by failed del"): + pytest.raises(KeyError, lambda: dct.__delitem__("x")) + with assert_calls(0, "callback triggered by failed pop"): + pytest.raises(KeyError, lambda: dct.pop("x")) + + +class TestCacheControl: + def test_repr(self): + cc = ds.RequestCacheControl([("max-age", "0"), ("private", "True")]) + assert repr(cc) == "" + + def test_set_none(self): + cc = ds.ResponseCacheControl([("max-age", "0")]) + assert cc.no_cache is None + cc.no_cache = None + assert cc.no_cache is None + cc.no_cache = False + assert cc.no_cache is False + + +class TestContentSecurityPolicy: + def test_construct(self): + csp = ds.ContentSecurityPolicy([("font-src", "'self'"), ("media-src", "*")]) + assert csp.font_src == "'self'" + assert csp.media_src == "*" + policies = [policy.strip() for policy in csp.to_header().split(";")] + assert "font-src 'self'" in policies + assert "media-src *" in policies + + def test_properties(self): + csp = ds.ContentSecurityPolicy() + csp.default_src = "* 'self' quart.com" + csp.img_src = "'none'" + policies = [policy.strip() for policy in csp.to_header().split(";")] + assert "default-src * 'self' quart.com" in policies + assert "img-src 'none'" in policies + + +class TestAccept: + storage_class = ds.Accept + + def test_accept_basic(self): + accept = self.storage_class( + [("tinker", 0), ("tailor", 0.333), ("soldier", 0.667), ("sailor", 1)] + ) + # check __getitem__ on indices + assert accept[3] == ("tinker", 0) + assert accept[2] == ("tailor", 0.333) + assert accept[1] == ("soldier", 0.667) + assert accept[0], ("sailor", 1) + # check __getitem__ on string + assert accept["tinker"] == 0 + assert accept["tailor"] == 0.333 + assert accept["soldier"] == 0.667 + assert accept["sailor"] == 1 + assert accept["spy"] == 0 + # check quality method + assert accept.quality("tinker") == 0 + assert accept.quality("tailor") == 0.333 + assert accept.quality("soldier") == 0.667 + assert accept.quality("sailor") == 1 + assert accept.quality("spy") == 0 + # check __contains__ + assert "sailor" in accept + assert "spy" not in accept + # check index method + assert accept.index("tinker") == 3 + assert accept.index("tailor") == 2 + assert accept.index("soldier") == 1 + assert accept.index("sailor") == 0 + with pytest.raises(ValueError): + accept.index("spy") + # check find method + assert accept.find("tinker") == 3 + assert accept.find("tailor") == 2 + assert accept.find("soldier") == 1 + assert accept.find("sailor") == 0 + assert accept.find("spy") == -1 + # check to_header method + assert accept.to_header() == "sailor,soldier;q=0.667,tailor;q=0.333,tinker;q=0" + # check best_match method + assert ( + accept.best_match(["tinker", "tailor", "soldier", "sailor"], default=None) + == "sailor" + ) + assert ( + accept.best_match(["tinker", "tailor", "soldier"], default=None) + == "soldier" + ) + assert accept.best_match(["tinker", "tailor"], default=None) == "tailor" + assert accept.best_match(["tinker"], default=None) is None + assert accept.best_match(["tinker"], default="x") == "x" + + def test_accept_wildcard(self): + accept = self.storage_class([("*", 0), ("asterisk", 1)]) + assert "*" in accept + assert accept.best_match(["asterisk", "star"], default=None) == "asterisk" + assert accept.best_match(["star"], default=None) is None + + def test_accept_keep_order(self): + accept = self.storage_class([("*", 1)]) + assert accept.best_match(["alice", "bob"]) == "alice" + assert accept.best_match(["bob", "alice"]) == "bob" + accept = self.storage_class([("alice", 1), ("bob", 1)]) + assert accept.best_match(["alice", "bob"]) == "alice" + assert accept.best_match(["bob", "alice"]) == "bob" + + def test_accept_wildcard_specificity(self): + accept = self.storage_class([("asterisk", 0), ("star", 0.5), ("*", 1)]) + assert accept.best_match(["star", "asterisk"], default=None) == "star" + assert accept.best_match(["asterisk", "star"], default=None) == "star" + assert accept.best_match(["asterisk", "times"], default=None) == "times" + assert accept.best_match(["asterisk"], default=None) is None + + def test_accept_equal_quality(self): + accept = self.storage_class([("a", 1), ("b", 1)]) + assert accept.best == "a" + + +class TestMIMEAccept: + @pytest.mark.parametrize( + ("values", "matches", "default", "expect"), + [ + ([("text/*", 1)], ["text/html"], None, "text/html"), + ([("text/*", 1)], ["image/png"], "text/plain", "text/plain"), + ([("text/*", 1)], ["image/png"], None, None), + ( + [("*/*", 1), ("text/html", 1)], + ["image/png", "text/html"], + None, + "text/html", + ), + ( + [("*/*", 1), ("text/html", 1)], + ["image/png", "text/plain"], + None, + "image/png", + ), + ( + [("*/*", 1), ("text/html", 1), ("image/*", 1)], + ["image/png", "text/html"], + None, + "text/html", + ), + ( + [("*/*", 1), ("text/html", 1), ("image/*", 1)], + ["text/plain", "image/png"], + None, + "image/png", + ), + ( + [("text/html", 1), ("text/html; level=1", 1)], + ["text/html;level=1"], + None, + "text/html;level=1", + ), + ], + ) + def test_mime_accept(self, values, matches, default, expect): + accept = ds.MIMEAccept(values) + match = accept.best_match(matches, default=default) + assert match == expect + + +class TestLanguageAccept: + @pytest.mark.parametrize( + ("values", "matches", "default", "expect"), + ( + ([("en-us", 1)], ["en"], None, "en"), + ([("en", 1)], ["en_US"], None, "en_US"), + ([("en-GB", 1)], ["en-US"], None, None), + ([("de_AT", 1), ("de", 0.9)], ["en"], None, None), + ([("de_AT", 1), ("de", 0.9), ("en-US", 0.8)], ["de", "en"], None, "de"), + ([("de_AT", 0.9), ("en-US", 1)], ["en"], None, "en"), + ([("en-us", 1)], ["en-us"], None, "en-us"), + ([("en-us", 1)], ["en-us", "en"], None, "en-us"), + ([("en-GB", 1)], ["en-US", "en"], "en-US", "en"), + ([("de_AT", 1)], ["en-US", "en"], "en-US", "en-US"), + ([("aus-EN", 1)], ["aus"], None, "aus"), + ([("aus", 1)], ["aus-EN"], None, "aus-EN"), + ), + ) + def test_best_match_fallback(self, values, matches, default, expect): + accept = ds.LanguageAccept(values) + best = accept.best_match(matches, default=default) + assert best == expect + + +class TestFileStorage: + storage_class = ds.FileStorage + + def test_mimetype_always_lowercase(self): + file_storage = self.storage_class(content_type="APPLICATION/JSON") + assert file_storage.mimetype == "application/json" + + @pytest.mark.parametrize("data", [io.StringIO("one\ntwo"), io.BytesIO(b"one\ntwo")]) + def test_bytes_proper_sentinel(self, data): + # iterate over new lines and don't enter an infinite loop + storage = self.storage_class(data) + idx = -1 + + for idx, _line in enumerate(storage): + assert idx < 2 + + assert idx == 1 + + @pytest.mark.parametrize("stream", (tempfile.SpooledTemporaryFile, io.BytesIO)) + def test_proxy_can_access_stream_attrs(self, stream): + """``SpooledTemporaryFile`` doesn't implement some of + ``IOBase``. Ensure that ``FileStorage`` can still access the + attributes from the backing file object. + + https://github.com/pallets/werkzeug/issues/1344 + https://github.com/python/cpython/pull/3249 + """ + file_storage = self.storage_class(stream=stream()) + + for name in ("fileno", "writable", "readable", "seekable"): + assert hasattr(file_storage, name) + + file_storage.close() + + def test_save_to_pathlib_dst(self, tmp_path): + src = tmp_path / "src.txt" + src.write_text("test") + dst = tmp_path / "dst.txt" + + with src.open("rb") as f: + storage = self.storage_class(f) + storage.save(dst) + + assert dst.read_text() == "test" + + def test_save_to_bytes_io(self): + storage = self.storage_class(io.BytesIO(b"one\ntwo")) + dst = io.BytesIO() + storage.save(dst) + assert dst.getvalue() == b"one\ntwo" + + def test_save_to_file(self, tmp_path): + path = tmp_path / "file.data" + storage = self.storage_class(io.BytesIO(b"one\ntwo")) + with path.open("wb") as dst: + storage.save(dst) + with path.open("rb") as src: + assert src.read() == b"one\ntwo" + + +@pytest.mark.parametrize("ranges", ([(0, 1), (-5, None)], [(5, None)])) +def test_range_to_header(ranges): + header = ds.Range("byes", ranges).to_header() + r = http.parse_range_header(header) + assert r.ranges == ranges + + +@pytest.mark.parametrize( + "ranges", ([(0, 0)], [(None, 1)], [(1, 0)], [(0, 1), (-5, 10)]) +) +def test_range_validates_ranges(ranges): + with pytest.raises(ValueError): + ds.Range("bytes", ranges) diff --git a/tests/test_debug.py b/tests/test_debug.py new file mode 100644 index 0000000..cf171d1 --- /dev/null +++ b/tests/test_debug.py @@ -0,0 +1,294 @@ +import re +import sys + +import pytest + +from werkzeug.debug import console +from werkzeug.debug import DebuggedApplication +from werkzeug.debug import DebugTraceback +from werkzeug.debug import get_machine_id +from werkzeug.debug.console import HTMLStringO +from werkzeug.debug.repr import debug_repr +from werkzeug.debug.repr import DebugReprGenerator +from werkzeug.debug.repr import dump +from werkzeug.debug.repr import helper +from werkzeug.test import Client +from werkzeug.wrappers import Request + + +class TestDebugRepr: + def test_basic_repr(self): + assert debug_repr([]) == "[]" + assert debug_repr([1, 2]) == ( + '[1, 2]' + ) + assert debug_repr([1, "test"]) == ( + '[1,' + ' 'test']' + ) + assert debug_repr([None]) == '[None]' + + def test_string_repr(self): + assert debug_repr("") == '''' + assert debug_repr("foo") == ''foo'' + assert debug_repr("s" * 80) == ( + f''{"s" * 69}' + f'{"s" * 11}'' + ) + assert debug_repr("<" * 80) == ( + f''{"<" * 69}' + f'{"<" * 11}'' + ) + + def test_string_subclass_repr(self): + class Test(str): + pass + + assert debug_repr(Test("foo")) == ( + 'test_debug.' + 'Test('foo')' + ) + + def test_sequence_repr(self): + assert debug_repr(list(range(20))) == ( + '[0, 1, ' + '2, 3, ' + '4, 5, ' + '6, 7, ' + '8, ' + '9, 10, ' + '11, 12, ' + '13, 14, ' + '15, 16, ' + '17, 18, ' + '19]' + ) + + def test_mapping_repr(self): + assert debug_repr({}) == "{}" + assert debug_repr({"foo": 42}) == ( + '{'foo'' + ': 42' + "}" + ) + assert debug_repr(dict(zip(range(10), [None] * 10))) == ( + '{0' + ': None' + ", " + '1' + ': None' + ", " + '2' + ': None' + ", " + '3' + ': None' + ", " + '' + '4' + ': None' + ", " + '5' + ': None' + ", " + '6' + ': None' + ", " + '7' + ': None' + ", " + '8' + ': None' + ", " + '9' + ': None' + "}" + ) + assert debug_repr((1, "zwei", "drei")) == ( + '(1, '' + 'zwei', 'drei')' + ) + + def test_custom_repr(self): + class Foo: + def __repr__(self): + return "" + + assert debug_repr(Foo()) == '<Foo 42>' + + def test_list_subclass_repr(self): + class MyList(list): + pass + + assert debug_repr(MyList([1, 2])) == ( + 'test_debug.MyList([' + '1, 2])' + ) + + def test_regex_repr(self): + assert ( + debug_repr(re.compile(r"foo\d")) + == "re.compile(r'foo\\d')" + ) + # No ur'' in Py3 + # https://bugs.python.org/issue15096 + assert debug_repr(re.compile("foo\\d")) == ( + "re.compile(r'foo\\d')" + ) + + def test_set_repr(self): + assert ( + debug_repr(frozenset("x")) + == 'frozenset(['x'])' + ) + assert debug_repr(set("x")) == ( + 'set(['x'])' + ) + + def test_recursive_repr(self): + a = [1] + a.append(a) + assert debug_repr(a) == '[1, [...]]' + + def test_broken_repr(self): + class Foo: + def __repr__(self): + raise Exception("broken!") + + assert debug_repr(Foo()) == ( + '<broken repr (Exception: ' + "broken!)>" + ) + + +class Foo: + x = 42 + y = 23 + + def __init__(self): + self.z = 15 + + +class TestDebugHelpers: + def test_object_dumping(self): + drg = DebugReprGenerator() + out = drg.dump_object(Foo()) + assert re.search("Details for test_debug.Foo object at", out) + assert re.search('x.*42', out, flags=re.DOTALL) + assert re.search('y.*23', out, flags=re.DOTALL) + assert re.search('z.*15', out, flags=re.DOTALL) + + out = drg.dump_object({"x": 42, "y": 23}) + assert re.search("Contents of", out) + assert re.search('x.*42', out, flags=re.DOTALL) + assert re.search('y.*23', out, flags=re.DOTALL) + + out = drg.dump_object({"x": 42, "y": 23, 23: 11}) + assert not re.search("Contents of", out) + + out = drg.dump_locals({"x": 42, "y": 23}) + assert re.search("Local variables in frame", out) + assert re.search('x.*42', out, flags=re.DOTALL) + assert re.search('y.*23', out, flags=re.DOTALL) + + def test_debug_dump(self): + old = sys.stdout + sys.stdout = HTMLStringO() + try: + dump([1, 2, 3]) + x = sys.stdout.reset() + dump() + y = sys.stdout.reset() + finally: + sys.stdout = old + + assert "Details for list object at" in x + assert '1' in x + assert "Local variables in frame" in y + assert "x" in y + assert "old" in y + + def test_debug_help(self): + old = sys.stdout + sys.stdout = HTMLStringO() + try: + helper([1, 2, 3]) + x = sys.stdout.reset() + finally: + sys.stdout = old + + assert "Help on list object" in x + assert "__delitem__" in x + + def test_exc_divider_found_on_chained_exception(self): + @Request.application + def app(request): + def do_something(): + raise ValueError("inner") + + try: + do_something() + except ValueError: + raise KeyError("outer") # noqa: B904 + + debugged = DebuggedApplication(app) + client = Client(debugged) + response = client.get("/") + data = response.get_data(as_text=True) + assert 'raise ValueError("inner")' in data + assert '
    ' in data + assert 'raise KeyError("outer")' in data + + +def test_get_machine_id(): + rv = get_machine_id() + assert isinstance(rv, bytes) + + +@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") +@pytest.mark.parametrize("crash", (True, False)) +@pytest.mark.dev_server +def test_basic(dev_server, crash): + c = dev_server(use_debugger=True) + r = c.request("/crash" if crash else "") + assert r.status == (500 if crash else 200) + + if crash: + assert b"The debugger caught an exception in your WSGI application" in r.data + else: + assert r.json["PATH_INFO"] == "/" + + +def test_console_closure_variables(monkeypatch): + # restore the original display hook + monkeypatch.setattr(sys, "displayhook", console._displayhook) + c = console.Console() + c.eval("y = 5") + c.eval("x = lambda: y") + ret = c.eval("x()") + assert ret == ">>> x()\n5\n" + + +@pytest.mark.timeout(2) +def test_chained_exception_cycle(): + try: + try: + raise ValueError() + except ValueError: + raise TypeError() # noqa: B904 + except TypeError as e: + # create a cycle and make it available outside the except block + e.__context__.__context__ = error = e + + # if cycles aren't broken, this will time out + tb = DebugTraceback(error) + assert len(tb.all_tracebacks) == 2 + + +def test_exception_without_traceback(): + try: + raise Exception("msg1") + except Exception as e: + # filter_hidden_frames should skip this since it has no traceback + e.__context__ = Exception("msg2") + DebugTraceback(e) diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py new file mode 100644 index 0000000..d8fed96 --- /dev/null +++ b/tests/test_exceptions.py @@ -0,0 +1,172 @@ +from datetime import datetime + +import pytest +from markupsafe import escape +from markupsafe import Markup + +from werkzeug import exceptions +from werkzeug.datastructures import Headers +from werkzeug.datastructures import WWWAuthenticate +from werkzeug.exceptions import HTTPException +from werkzeug.wrappers import Response + + +def test_proxy_exception(): + orig_resp = Response("Hello World") + with pytest.raises(exceptions.HTTPException) as excinfo: + exceptions.abort(orig_resp) + resp = excinfo.value.get_response({}) + assert resp is orig_resp + assert resp.get_data() == b"Hello World" + + +@pytest.mark.parametrize( + "test", + [ + (exceptions.BadRequest, 400), + (exceptions.Unauthorized, 401, 'Basic "test realm"'), + (exceptions.Forbidden, 403), + (exceptions.NotFound, 404), + (exceptions.MethodNotAllowed, 405, ["GET", "HEAD"]), + (exceptions.NotAcceptable, 406), + (exceptions.RequestTimeout, 408), + (exceptions.Gone, 410), + (exceptions.LengthRequired, 411), + (exceptions.PreconditionFailed, 412), + (exceptions.RequestEntityTooLarge, 413), + (exceptions.RequestURITooLarge, 414), + (exceptions.UnsupportedMediaType, 415), + (exceptions.UnprocessableEntity, 422), + (exceptions.Locked, 423), + (exceptions.InternalServerError, 500), + (exceptions.NotImplemented, 501), + (exceptions.BadGateway, 502), + (exceptions.ServiceUnavailable, 503), + ], +) +def test_aborter_general(test): + exc_type = test[0] + args = test[1:] + + with pytest.raises(exc_type) as exc_info: + exceptions.abort(*args) + assert type(exc_info.value) is exc_type + + +def test_abort_description_markup(): + with pytest.raises(HTTPException) as exc_info: + exceptions.abort(400, Markup("<")) + + assert "<" in str(exc_info.value) + + +def test_aborter_custom(): + myabort = exceptions.Aborter({1: exceptions.NotFound}) + pytest.raises(LookupError, myabort, 404) + pytest.raises(exceptions.NotFound, myabort, 1) + + myabort = exceptions.Aborter(extra={1: exceptions.NotFound}) + pytest.raises(exceptions.NotFound, myabort, 404) + pytest.raises(exceptions.NotFound, myabort, 1) + + +def test_exception_repr(): + exc = exceptions.NotFound() + assert str(exc) == ( + "404 Not Found: The requested URL was not found on the server." + " If you entered the URL manually please check your spelling" + " and try again." + ) + assert repr(exc) == "" + + exc = exceptions.NotFound("Not There") + assert str(exc) == "404 Not Found: Not There" + assert repr(exc) == "" + + exc = exceptions.HTTPException("An error message") + assert str(exc) == "??? Unknown Error: An error message" + assert repr(exc) == "" + + +def test_method_not_allowed_methods(): + exc = exceptions.MethodNotAllowed(["GET", "HEAD", "POST"]) + h = dict(exc.get_headers({})) + assert h["Allow"] == "GET, HEAD, POST" + assert "The method is not allowed" in exc.get_description() + + +def test_unauthorized_www_authenticate(): + basic = WWWAuthenticate() + basic.set_basic("test") + digest = WWWAuthenticate() + digest.set_digest("test", "test") + + exc = exceptions.Unauthorized(www_authenticate=basic) + h = Headers(exc.get_headers({})) + assert h["WWW-Authenticate"] == str(basic) + + exc = exceptions.Unauthorized(www_authenticate=[digest, basic]) + h = Headers(exc.get_headers({})) + assert h.get_all("WWW-Authenticate") == [str(digest), str(basic)] + + exc = exceptions.Unauthorized() + h = Headers(exc.get_headers({})) + assert "WWW-Authenticate" not in h + + +def test_response_header_content_type_should_contain_charset(): + exc = exceptions.HTTPException("An error message") + h = exc.get_response({}) + assert h.headers["Content-Type"] == "text/html; charset=utf-8" + + +@pytest.mark.parametrize( + ("cls", "value", "expect"), + [ + (exceptions.TooManyRequests, 20, "20"), + ( + exceptions.ServiceUnavailable, + datetime(2020, 1, 4, 18, 52, 16), + "Sat, 04 Jan 2020 18:52:16 GMT", + ), + ], +) +def test_retry_after_mixin(cls, value, expect): + e = cls(retry_after=value) + h = dict(e.get_headers({})) + assert h["Retry-After"] == expect + + +@pytest.mark.parametrize( + "cls", + sorted( + (e for e in HTTPException.__subclasses__() if e.code and e.code >= 400), + key=lambda e: e.code, # type: ignore + ), +) +def test_passing_response(cls): + class TestResponse(Response): + pass + + exc = cls(response=TestResponse()) + rp = exc.get_response({}) + assert isinstance(rp, TestResponse) + + +def test_description_none(): + HTTPException().get_response() + + +@pytest.mark.parametrize( + "cls", + sorted( + (e for e in HTTPException.__subclasses__() if e.code), + key=lambda e: e.code, # type: ignore + ), +) +def test_response_body(cls): + exc = cls() + response_body = exc.get_body() + assert response_body.startswith("\n\n") + assert f"{exc.code} {escape(exc.name)}" in response_body + assert exc.get_description() in response_body diff --git a/tests/test_formparser.py b/tests/test_formparser.py new file mode 100644 index 0000000..49010b4 --- /dev/null +++ b/tests/test_formparser.py @@ -0,0 +1,448 @@ +import csv +import io +from os.path import dirname +from os.path import join + +import pytest + +from werkzeug import formparser +from werkzeug.datastructures import MultiDict +from werkzeug.exceptions import RequestEntityTooLarge +from werkzeug.formparser import FormDataParser +from werkzeug.formparser import parse_form_data +from werkzeug.test import Client +from werkzeug.test import create_environ +from werkzeug.wrappers import Request +from werkzeug.wrappers import Response + + +@Request.application +def form_data_consumer(request): + result_object = request.args["object"] + if result_object == "text": + return Response(repr(request.form["text"])) + f = request.files[result_object] + return Response( + b"\n".join( + ( + repr(f.filename).encode("ascii"), + repr(f.name).encode("ascii"), + repr(f.content_type).encode("ascii"), + f.stream.read(), + ) + ) + ) + + +def get_contents(filename): + with open(filename, "rb") as f: + return f.read() + + +class TestFormParser: + def test_limiting(self): + data = b"foo=Hello+World&bar=baz" + req = Request.from_values( + input_stream=io.BytesIO(data), + content_length=len(data), + content_type="application/x-www-form-urlencoded", + method="POST", + ) + req.max_content_length = 400 + assert req.form["foo"] == "Hello World" + + req = Request.from_values( + input_stream=io.BytesIO(data), + content_length=len(data), + content_type="application/x-www-form-urlencoded", + method="POST", + ) + req.max_form_memory_size = 7 + pytest.raises(RequestEntityTooLarge, lambda: req.form["foo"]) + + req = Request.from_values( + input_stream=io.BytesIO(data), + content_length=len(data), + content_type="application/x-www-form-urlencoded", + method="POST", + ) + req.max_form_memory_size = 400 + assert req.form["foo"] == "Hello World" + + data = ( + b"--foo\r\nContent-Disposition: form-field; name=foo\r\n\r\n" + b"Hello World\r\n" + b"--foo\r\nContent-Disposition: form-field; name=bar\r\n\r\n" + b"bar=baz\r\n--foo--" + ) + req = Request.from_values( + input_stream=io.BytesIO(data), + content_length=len(data), + content_type="multipart/form-data; boundary=foo", + method="POST", + ) + req.max_content_length = 4 + pytest.raises(RequestEntityTooLarge, lambda: req.form["foo"]) + + # when the request entity is too large, the input stream should be + # drained so that firefox (and others) do not report connection reset + # when run through gunicorn + # a sufficiently large stream is necessary for block-based reads + input_stream = io.BytesIO(b"foo=" + b"x" * 128 * 1024) + req = Request.from_values( + input_stream=input_stream, + content_length=len(data), + content_type="multipart/form-data; boundary=foo", + method="POST", + ) + req.max_content_length = 4 + pytest.raises(RequestEntityTooLarge, lambda: req.form["foo"]) + # ensure that the stream is exhausted + assert input_stream.read() == b"" + + req = Request.from_values( + input_stream=io.BytesIO(data), + content_length=len(data), + content_type="multipart/form-data; boundary=foo", + method="POST", + ) + req.max_content_length = 400 + assert req.form["foo"] == "Hello World" + + req = Request.from_values( + input_stream=io.BytesIO(data), + content_length=len(data), + content_type="multipart/form-data; boundary=foo", + method="POST", + ) + req.max_form_memory_size = 7 + pytest.raises(RequestEntityTooLarge, lambda: req.form["foo"]) + + req = Request.from_values( + input_stream=io.BytesIO(data), + content_length=len(data), + content_type="multipart/form-data; boundary=foo", + method="POST", + ) + req.max_form_memory_size = 400 + assert req.form["foo"] == "Hello World" + + def test_missing_multipart_boundary(self): + data = ( + b"--foo\r\nContent-Disposition: form-field; name=foo\r\n\r\n" + b"Hello World\r\n" + b"--foo\r\nContent-Disposition: form-field; name=bar\r\n\r\n" + b"bar=baz\r\n--foo--" + ) + req = Request.from_values( + input_stream=io.BytesIO(data), + content_length=len(data), + content_type="multipart/form-data", + method="POST", + ) + assert req.form == {} + + def test_parse_form_data_put_without_content(self): + # A PUT without a Content-Type header returns empty data + + # Both rfc1945 and rfc2616 (1.0 and 1.1) say "Any HTTP/[1.0/1.1] message + # containing an entity-body SHOULD include a Content-Type header field + # defining the media type of that body." In the case where either + # headers are omitted, parse_form_data should still work. + env = create_environ("/foo", "http://example.org/", method="PUT") + + stream, form, files = formparser.parse_form_data(env) + assert stream.read() == b"" + assert len(form) == 0 + assert len(files) == 0 + + def test_parse_form_data_get_without_content(self): + env = create_environ("/foo", "http://example.org/", method="GET") + + stream, form, files = formparser.parse_form_data(env) + assert stream.read() == b"" + assert len(form) == 0 + assert len(files) == 0 + + @pytest.mark.parametrize( + ("no_spooled", "size"), ((False, 100), (False, 3000), (True, 100), (True, 3000)) + ) + def test_default_stream_factory(self, no_spooled, size, monkeypatch): + if no_spooled: + monkeypatch.setattr("werkzeug.formparser.SpooledTemporaryFile", None) + + data = b"a,b,c\n" * size + with Request.from_values( + data={"foo": (io.BytesIO(data), "test.txt")}, method="POST" + ) as req: + reader = csv.reader(io.TextIOWrapper(req.files["foo"])) + # This fails if file_storage doesn't implement IOBase. + # https://github.com/pallets/werkzeug/issues/1344 + # https://github.com/python/cpython/pull/3249 + assert sum(1 for _ in reader) == size + + def test_parse_bad_content_type(self): + parser = FormDataParser() + assert parser.parse("", "bad-mime-type", 0) == ( + "", + MultiDict([]), + MultiDict([]), + ) + + def test_parse_from_environ(self): + parser = FormDataParser() + stream, _, _ = parser.parse_from_environ({"wsgi.input": ""}) + assert stream is not None + + +@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") +class TestMultiPart: + def test_basic(self): + resources = join(dirname(__file__), "multipart") + client = Client(form_data_consumer) + + repository = [ + ( + "firefox3-2png1txt", + "---------------------------186454651713519341951581030105", + [ + ("anchor.png", "file1", "image/png", "file1.png"), + ("application_edit.png", "file2", "image/png", "file2.png"), + ], + "example text", + ), + ( + "firefox3-2pnglongtext", + "---------------------------14904044739787191031754711748", + [ + ("accept.png", "file1", "image/png", "file1.png"), + ("add.png", "file2", "image/png", "file2.png"), + ], + "--long text\r\n--with boundary\r\n--lookalikes--", + ), + ( + "opera8-2png1txt", + "----------zEO9jQKmLc2Cq88c23Dx19", + [ + ("arrow_branch.png", "file1", "image/png", "file1.png"), + ("award_star_bronze_1.png", "file2", "image/png", "file2.png"), + ], + "blafasel öäü", + ), + ( + "webkit3-2png1txt", + "----WebKitFormBoundaryjdSFhcARk8fyGNy6", + [ + ("gtk-apply.png", "file1", "image/png", "file1.png"), + ("gtk-no.png", "file2", "image/png", "file2.png"), + ], + "this is another text with ümläüts", + ), + ( + "ie6-2png1txt", + "---------------------------7d91b03a20128", + [ + ("file1.png", "file1", "image/x-png", "file1.png"), + ("file2.png", "file2", "image/x-png", "file2.png"), + ], + "ie6 sucks :-/", + ), + ] + + for name, boundary, files, text in repository: + folder = join(resources, name) + data = get_contents(join(folder, "request.http")) + for filename, field, content_type, fsname in files: + with client.post( + f"/?object={field}", + data=data, + content_type=f'multipart/form-data; boundary="{boundary}"', + content_length=len(data), + ) as response: + lines = response.get_data().split(b"\n", 3) + assert lines[0] == repr(filename).encode("ascii") + assert lines[1] == repr(field).encode("ascii") + assert lines[2] == repr(content_type).encode("ascii") + assert lines[3] == get_contents(join(folder, fsname)) + + with client.post( + "/?object=text", + data=data, + content_type=f'multipart/form-data; boundary="{boundary}"', + content_length=len(data), + ) as response: + assert response.get_data() == repr(text).encode("utf-8") + + @pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") + def test_ie7_unc_path(self): + client = Client(form_data_consumer) + data_file = join(dirname(__file__), "multipart", "ie7_full_path_request.http") + data = get_contents(data_file) + boundary = "---------------------------7da36d1b4a0164" + with client.post( + "/?object=cb_file_upload_multiple", + data=data, + content_type=f'multipart/form-data; boundary="{boundary}"', + content_length=len(data), + ) as response: + lines = response.get_data().split(b"\n", 3) + assert lines[0] == b"'Sellersburg Town Council Meeting 02-22-2010doc.doc'" + + def test_end_of_file(self): + # This test looks innocent but it was actually timing out in + # the Werkzeug 0.5 release version (#394) + data = ( + b"--foo\r\n" + b'Content-Disposition: form-data; name="test"; filename="test.txt"\r\n' + b"Content-Type: text/plain\r\n\r\n" + b"file contents and no end" + ) + with Request.from_values( + input_stream=io.BytesIO(data), + content_length=len(data), + content_type="multipart/form-data; boundary=foo", + method="POST", + ) as data: + assert not data.files + assert not data.form + + def test_file_no_content_type(self): + data = ( + b"--foo\r\n" + b'Content-Disposition: form-data; name="test"; filename="test.txt"\r\n\r\n' + b"file contents\r\n--foo--" + ) + with Request.from_values( + input_stream=io.BytesIO(data), + content_length=len(data), + content_type="multipart/form-data; boundary=foo", + method="POST", + ) as data: + assert data.files["test"].filename == "test.txt" + assert data.files["test"].read() == b"file contents" + + def test_extra_newline(self): + # this test looks innocent but it was actually timing out in + # the Werkzeug 0.5 release version (#394) + data = ( + b"\r\n\r\n--foo\r\n" + b'Content-Disposition: form-data; name="foo"\r\n\r\n' + b"a string\r\n" + b"--foo--" + ) + data = Request.from_values( + input_stream=io.BytesIO(data), + content_length=len(data), + content_type="multipart/form-data; boundary=foo", + method="POST", + ) + assert not data.files + assert data.form["foo"] == "a string" + + def test_headers(self): + data = ( + b"--foo\r\n" + b'Content-Disposition: form-data; name="foo"; filename="foo.txt"\r\n' + b"X-Custom-Header: blah\r\n" + b"Content-Type: text/plain; charset=utf-8\r\n\r\n" + b"file contents, just the contents\r\n" + b"--foo--" + ) + with Request.from_values( + input_stream=io.BytesIO(data), + content_length=len(data), + content_type="multipart/form-data; boundary=foo", + method="POST", + ) as req: + foo = req.files["foo"] + assert foo.mimetype == "text/plain" + assert foo.mimetype_params == {"charset": "utf-8"} + assert foo.headers["content-type"] == foo.content_type + assert foo.content_type == "text/plain; charset=utf-8" + assert foo.headers["x-custom-header"] == "blah" + + @pytest.mark.parametrize("ending", [b"\n", b"\r", b"\r\n"]) + def test_nonstandard_line_endings(self, ending: bytes): + data = ending.join( + ( + b"--foo", + b"Content-Disposition: form-data; name=foo", + b"", + b"this is just bar", + b"--foo", + b"Content-Disposition: form-data; name=bar", + b"", + b"blafasel", + b"--foo--", + ) + ) + req = Request.from_values( + input_stream=io.BytesIO(data), + content_length=len(data), + content_type="multipart/form-data; boundary=foo", + method="POST", + ) + assert req.form["foo"] == "this is just bar" + assert req.form["bar"] == "blafasel" + + def test_failures(self): + def parse_multipart(stream, boundary, content_length): + parser = formparser.MultiPartParser(content_length) + return parser.parse(stream, boundary, content_length) + + data = b"--foo\r\n\r\nHello World\r\n--foo--" + pytest.raises(ValueError, parse_multipart, io.BytesIO(data), b"foo", len(data)) + + data = ( + b"--foo\r\nContent-Disposition: form-field; name=foo\r\n\r\nHello World\r\n" + ) + pytest.raises(ValueError, parse_multipart, io.BytesIO(data), b"foo", len(data)) + + def test_empty_multipart(self): + environ = {} + data = b"--boundary--" + environ["REQUEST_METHOD"] = "POST" + environ["CONTENT_TYPE"] = "multipart/form-data; boundary=boundary" + environ["CONTENT_LENGTH"] = str(len(data)) + environ["wsgi.input"] = io.BytesIO(data) + stream, form, files = parse_form_data(environ, silent=False) + rv = stream.read() + assert rv == b"" + assert form == MultiDict() + assert files == MultiDict() + + +class TestMultiPartParser: + def test_constructor_not_pass_stream_factory_and_cls(self): + parser = formparser.MultiPartParser() + + assert parser.stream_factory is formparser.default_stream_factory + assert parser.cls is MultiDict + + def test_constructor_pass_stream_factory_and_cls(self): + def stream_factory(): + pass + + parser = formparser.MultiPartParser(stream_factory=stream_factory, cls=dict) + + assert parser.stream_factory is stream_factory + assert parser.cls is dict + + def test_file_rfc2231_filename_continuations(self): + data = ( + b"--foo\r\n" + b"Content-Type: text/plain; charset=utf-8\r\n" + b"Content-Disposition: form-data; name=rfc2231;\r\n" + b" filename*0*=ascii''a%20b%20;\r\n" + b" filename*1*=c%20d%20;\r\n" + b' filename*2="e f.txt"\r\n\r\n' + b"file contents\r\n--foo--" + ) + with Request.from_values( + input_stream=io.BytesIO(data), + content_length=len(data), + content_type="multipart/form-data; boundary=foo", + method="POST", + ) as request: + assert request.files["rfc2231"].filename == "a b c d e f.txt" + assert request.files["rfc2231"].read() == b"file contents" diff --git a/tests/test_http.py b/tests/test_http.py new file mode 100644 index 0000000..3760dc1 --- /dev/null +++ b/tests/test_http.py @@ -0,0 +1,717 @@ +import base64 +from datetime import date +from datetime import datetime +from datetime import timedelta +from datetime import timezone + +import pytest + +from werkzeug import datastructures +from werkzeug import http +from werkzeug._internal import _wsgi_encoding_dance +from werkzeug.test import create_environ + + +class TestHTTPUtility: + def test_accept(self): + a = http.parse_accept_header("en-us,ru;q=0.5") + assert list(a.values()) == ["en-us", "ru"] + assert a.best == "en-us" + assert a.find("ru") == 1 + pytest.raises(ValueError, a.index, "de") + assert a.to_header() == "en-us,ru;q=0.5" + + def test_mime_accept(self): + a = http.parse_accept_header( + "text/xml,application/xml," + "application/xhtml+xml," + "application/foo;quiet=no; bar=baz;q=0.6," + "text/html;q=0.9,text/plain;q=0.8," + "image/png,*/*;q=0.5", + datastructures.MIMEAccept, + ) + pytest.raises(ValueError, lambda: a["missing"]) + assert a["image/png"] == 1 + assert a["text/plain"] == 0.8 + assert a["foo/bar"] == 0.5 + assert a["application/foo;quiet=no; bar=baz"] == 0.6 + assert a[a.find("foo/bar")] == ("*/*", 0.5) + + def test_accept_matches(self): + a = http.parse_accept_header( + "text/xml,application/xml,application/xhtml+xml," + "text/html;q=0.9,text/plain;q=0.8," + "image/png", + datastructures.MIMEAccept, + ) + assert ( + a.best_match(["text/html", "application/xhtml+xml"]) + == "application/xhtml+xml" + ) + assert a.best_match(["text/html"]) == "text/html" + assert a.best_match(["foo/bar"]) is None + assert a.best_match(["foo/bar", "bar/foo"], default="foo/bar") == "foo/bar" + assert a.best_match(["application/xml", "text/xml"]) == "application/xml" + + def test_accept_mime_specificity(self): + a = http.parse_accept_header( + "text/*, text/html, text/html;level=1, */*", datastructures.MIMEAccept + ) + assert a.best_match(["text/html; version=1", "text/html"]) == "text/html" + assert a.best_match(["text/html", "text/html; level=1"]) == "text/html; level=1" + + def test_charset_accept(self): + a = http.parse_accept_header( + "ISO-8859-1,utf-8;q=0.7,*;q=0.7", datastructures.CharsetAccept + ) + assert a["iso-8859-1"] == a["iso8859-1"] + assert a["iso-8859-1"] == 1 + assert a["UTF8"] == 0.7 + assert a["ebcdic"] == 0.7 + + def test_language_accept(self): + a = http.parse_accept_header( + "de-AT,de;q=0.8,en;q=0.5", datastructures.LanguageAccept + ) + assert a.best == "de-AT" + assert "de_AT" in a + assert "en" in a + assert a["de-at"] == 1 + assert a["en"] == 0.5 + + def test_set_header(self): + hs = http.parse_set_header('foo, Bar, "Blah baz", Hehe') + assert "blah baz" in hs + assert "foobar" not in hs + assert "foo" in hs + assert list(hs) == ["foo", "Bar", "Blah baz", "Hehe"] + hs.add("Foo") + assert hs.to_header() == 'foo, Bar, "Blah baz", Hehe' + + def test_list_header(self): + hl = http.parse_list_header("foo baz, blah") + assert hl == ["foo baz", "blah"] + + def test_dict_header(self): + d = http.parse_dict_header('foo="bar baz", blah=42') + assert d == {"foo": "bar baz", "blah": "42"} + + def test_cache_control_header(self): + cc = http.parse_cache_control_header("max-age=0, no-cache") + assert cc.max_age == 0 + assert cc.no_cache + cc = http.parse_cache_control_header( + 'private, community="UCI"', None, datastructures.ResponseCacheControl + ) + assert cc.private + assert cc["community"] == "UCI" + + c = datastructures.ResponseCacheControl() + assert c.no_cache is None + assert c.private is None + c.no_cache = True + assert c.no_cache == "*" + c.private = True + assert c.private == "*" + del c.private + assert c.private is None + # max_age is an int, other types are converted + c.max_age = 3.1 + assert c.max_age == 3 + del c.max_age + c.s_maxage = 3.1 + assert c.s_maxage == 3 + del c.s_maxage + assert c.to_header() == "no-cache" + + def test_csp_header(self): + csp = http.parse_csp_header( + "default-src 'self'; script-src 'unsafe-inline' *; img-src" + ) + assert csp.default_src == "'self'" + assert csp.script_src == "'unsafe-inline' *" + assert csp.img_src is None + + def test_authorization_header(self): + a = http.parse_authorization_header("Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==") + assert a.type == "basic" + assert a.username == "Aladdin" + assert a.password == "open sesame" + + a = http.parse_authorization_header( + "Basic 0YDRg9GB0YHQutC40IE60JHRg9C60LLRiw==" + ) + assert a.type == "basic" + assert a.username == "русскиЁ" + assert a.password == "Буквы" + + a = http.parse_authorization_header("Basic 5pmu6YCa6K+dOuS4reaWhw==") + assert a.type == "basic" + assert a.username == "普通话" + assert a.password == "中文" + + a = http.parse_authorization_header( + '''Digest username="Mufasa", + realm="testrealm@host.invalid", + nonce="dcd98b7102dd2f0e8b11d0f600bfb0c093", + uri="/dir/index.html", + qop=auth, + nc=00000001, + cnonce="0a4f113b", + response="6629fae49393a05397450978507c4ef1", + opaque="5ccc069c403ebaf9f0171e9517f40e41"''' + ) + assert a.type == "digest" + assert a.username == "Mufasa" + assert a.realm == "testrealm@host.invalid" + assert a.nonce == "dcd98b7102dd2f0e8b11d0f600bfb0c093" + assert a.uri == "/dir/index.html" + assert a.qop == "auth" + assert a.nc == "00000001" + assert a.cnonce == "0a4f113b" + assert a.response == "6629fae49393a05397450978507c4ef1" + assert a.opaque == "5ccc069c403ebaf9f0171e9517f40e41" + + a = http.parse_authorization_header( + '''Digest username="Mufasa", + realm="testrealm@host.invalid", + nonce="dcd98b7102dd2f0e8b11d0f600bfb0c093", + uri="/dir/index.html", + response="e257afa1414a3340d93d30955171dd0e", + opaque="5ccc069c403ebaf9f0171e9517f40e41"''' + ) + assert a.type == "digest" + assert a.username == "Mufasa" + assert a.realm == "testrealm@host.invalid" + assert a.nonce == "dcd98b7102dd2f0e8b11d0f600bfb0c093" + assert a.uri == "/dir/index.html" + assert a.response == "e257afa1414a3340d93d30955171dd0e" + assert a.opaque == "5ccc069c403ebaf9f0171e9517f40e41" + + assert http.parse_authorization_header("") is None + assert http.parse_authorization_header(None) is None + assert http.parse_authorization_header("foo") is None + + def test_bad_authorization_header_encoding(self): + """If the base64 encoded bytes can't be decoded as UTF-8""" + content = base64.b64encode(b"\xffser:pass").decode() + assert http.parse_authorization_header(f"Basic {content}") is None + + def test_www_authenticate_header(self): + wa = http.parse_www_authenticate_header('Basic realm="WallyWorld"') + assert wa.type == "basic" + assert wa.realm == "WallyWorld" + wa.realm = "Foo Bar" + assert wa.to_header() == 'Basic realm="Foo Bar"' + + wa = http.parse_www_authenticate_header( + '''Digest + realm="testrealm@host.com", + qop="auth,auth-int", + nonce="dcd98b7102dd2f0e8b11d0f600bfb0c093", + opaque="5ccc069c403ebaf9f0171e9517f40e41"''' + ) + assert wa.type == "digest" + assert wa.realm == "testrealm@host.com" + assert "auth" in wa.qop + assert "auth-int" in wa.qop + assert wa.nonce == "dcd98b7102dd2f0e8b11d0f600bfb0c093" + assert wa.opaque == "5ccc069c403ebaf9f0171e9517f40e41" + + wa = http.parse_www_authenticate_header("broken") + assert wa.type == "broken" + + assert not http.parse_www_authenticate_header("").type + assert not http.parse_www_authenticate_header("") + + def test_etags(self): + assert http.quote_etag("foo") == '"foo"' + assert http.quote_etag("foo", True) == 'W/"foo"' + assert http.unquote_etag('"foo"') == ("foo", False) + assert http.unquote_etag('W/"foo"') == ("foo", True) + es = http.parse_etags('"foo", "bar", W/"baz", blar') + assert sorted(es) == ["bar", "blar", "foo"] + assert "foo" in es + assert "baz" not in es + assert es.contains_weak("baz") + assert "blar" in es + assert es.contains_raw('W/"baz"') + assert es.contains_raw('"foo"') + assert sorted(es.to_header().split(", ")) == [ + '"bar"', + '"blar"', + '"foo"', + 'W/"baz"', + ] + + def test_etags_nonzero(self): + etags = http.parse_etags('W/"foo"') + assert bool(etags) + assert etags.contains_raw('W/"foo"') + + def test_remove_entity_headers(self): + now = http.http_date() + headers1 = [ + ("Date", now), + ("Content-Type", "text/html"), + ("Content-Length", "0"), + ] + headers2 = datastructures.Headers(headers1) + + http.remove_entity_headers(headers1) + assert headers1 == [("Date", now)] + + http.remove_entity_headers(headers2) + assert headers2 == datastructures.Headers([("Date", now)]) + + def test_remove_hop_by_hop_headers(self): + headers1 = [("Connection", "closed"), ("Foo", "bar"), ("Keep-Alive", "wtf")] + headers2 = datastructures.Headers(headers1) + + http.remove_hop_by_hop_headers(headers1) + assert headers1 == [("Foo", "bar")] + + http.remove_hop_by_hop_headers(headers2) + assert headers2 == datastructures.Headers([("Foo", "bar")]) + + def test_parse_options_header(self): + assert http.parse_options_header(None) == ("", {}) + assert http.parse_options_header("") == ("", {}) + assert http.parse_options_header(r'something; foo="other\"thing"') == ( + "something", + {"foo": 'other"thing'}, + ) + assert http.parse_options_header(r'something; foo="other\"thing"; meh=42') == ( + "something", + {"foo": 'other"thing', "meh": "42"}, + ) + assert http.parse_options_header( + r'something; foo="other\"thing"; meh=42; bleh' + ) == ("something", {"foo": 'other"thing', "meh": "42", "bleh": None}) + assert http.parse_options_header( + 'something; foo="other;thing"; meh=42; bleh' + ) == ("something", {"foo": "other;thing", "meh": "42", "bleh": None}) + assert http.parse_options_header('something; foo="otherthing"; meh=; bleh') == ( + "something", + {"foo": "otherthing", "meh": None, "bleh": None}, + ) + # Issue #404 + assert http.parse_options_header( + 'multipart/form-data; name="foo bar"; filename="bar foo"' + ) == ("multipart/form-data", {"name": "foo bar", "filename": "bar foo"}) + # Examples from RFC + assert http.parse_options_header("audio/*; q=0.2, audio/basic") == ( + "audio/*", + {"q": "0.2"}, + ) + + assert http.parse_options_header( + "text/plain; q=0.5, text/html\n text/x-dvi; q=0.8, text/x-c" + ) == ("text/plain", {"q": "0.5"}) + # Issue #932 + assert http.parse_options_header( + "form-data; name=\"a_file\"; filename*=UTF-8''" + '"%c2%a3%20and%20%e2%82%ac%20rates"' + ) == ("form-data", {"name": "a_file", "filename": "\xa3 and \u20ac rates"}) + assert http.parse_options_header( + "form-data; name*=UTF-8''\"%C5%AAn%C4%ADc%C5%8Dde%CC%BD\"; " + 'filename="some_file.txt"' + ) == ( + "form-data", + {"name": "\u016an\u012dc\u014dde\u033d", "filename": "some_file.txt"}, + ) + + def test_parse_options_header_value_with_quotes(self): + assert http.parse_options_header( + 'form-data; name="file"; filename="t\'es\'t.txt"' + ) == ("form-data", {"name": "file", "filename": "t'es't.txt"}) + assert http.parse_options_header( + "form-data; name=\"file\"; filename*=UTF-8''\"'🐍'.txt\"" + ) == ("form-data", {"name": "file", "filename": "'🐍'.txt"}) + + def test_parse_options_header_broken_values(self): + # Issue #995 + assert http.parse_options_header(" ") == ("", {}) + assert http.parse_options_header(" , ") == ("", {}) + assert http.parse_options_header(" ; ") == ("", {}) + assert http.parse_options_header(" ,; ") == ("", {}) + assert http.parse_options_header(" , a ") == ("", {}) + assert http.parse_options_header(" ; a ") == ("", {}) + + def test_parse_options_header_case_insensitive(self): + _, options = http.parse_options_header(r'something; fileName="File.ext"') + assert options["filename"] == "File.ext" + + def test_dump_options_header(self): + assert http.dump_options_header("foo", {"bar": 42}) == "foo; bar=42" + assert http.dump_options_header("foo", {"bar": 42, "fizz": None}) in ( + "foo; bar=42; fizz", + "foo; fizz; bar=42", + ) + + def test_dump_header(self): + assert http.dump_header([1, 2, 3]) == "1, 2, 3" + assert http.dump_header([1, 2, 3], allow_token=False) == '"1", "2", "3"' + assert http.dump_header({"foo": "bar"}, allow_token=False) == 'foo="bar"' + assert http.dump_header({"foo": "bar"}) == "foo=bar" + + def test_is_resource_modified(self): + env = create_environ() + + # any method is allowed + env["REQUEST_METHOD"] = "POST" + assert http.is_resource_modified(env, etag="testing") + env["REQUEST_METHOD"] = "GET" + + # etagify from data + pytest.raises(TypeError, http.is_resource_modified, env, data="42", etag="23") + env["HTTP_IF_NONE_MATCH"] = http.generate_etag(b"awesome") + assert not http.is_resource_modified(env, data=b"awesome") + + env["HTTP_IF_MODIFIED_SINCE"] = http.http_date(datetime(2008, 1, 1, 12, 30)) + assert not http.is_resource_modified( + env, last_modified=datetime(2008, 1, 1, 12, 00) + ) + assert http.is_resource_modified( + env, last_modified=datetime(2008, 1, 1, 13, 00) + ) + + def test_is_resource_modified_for_range_requests(self): + env = create_environ() + + env["HTTP_IF_MODIFIED_SINCE"] = http.http_date(datetime(2008, 1, 1, 12, 30)) + env["HTTP_IF_RANGE"] = http.generate_etag(b"awesome_if_range") + # Range header not present, so If-Range should be ignored + assert not http.is_resource_modified( + env, + data=b"not_the_same", + ignore_if_range=False, + last_modified=datetime(2008, 1, 1, 12, 30), + ) + + env["HTTP_RANGE"] = "" + assert not http.is_resource_modified( + env, data=b"awesome_if_range", ignore_if_range=False + ) + assert http.is_resource_modified( + env, data=b"not_the_same", ignore_if_range=False + ) + + env["HTTP_IF_RANGE"] = http.http_date(datetime(2008, 1, 1, 13, 30)) + assert http.is_resource_modified( + env, last_modified=datetime(2008, 1, 1, 14, 00), ignore_if_range=False + ) + assert not http.is_resource_modified( + env, last_modified=datetime(2008, 1, 1, 13, 30), ignore_if_range=False + ) + assert http.is_resource_modified( + env, last_modified=datetime(2008, 1, 1, 13, 30), ignore_if_range=True + ) + + def test_parse_cookie(self): + cookies = http.parse_cookie( + "dismiss-top=6; CP=null*; PHPSESSID=0a539d42abc001cdc762809248d4beed;" + 'a=42; b="\\";"; ; fo234{=bar;blub=Blah; "__Secure-c"=d' + ) + assert cookies.to_dict() == { + "CP": "null*", + "PHPSESSID": "0a539d42abc001cdc762809248d4beed", + "a": "42", + "dismiss-top": "6", + "b": '";', + "fo234{": "bar", + "blub": "Blah", + '"__Secure-c"': "d", + } + + def test_dump_cookie(self): + rv = http.dump_cookie( + "foo", "bar baz blub", 360, httponly=True, sync_expires=False + ) + assert set(rv.split("; ")) == { + "HttpOnly", + "Max-Age=360", + "Path=/", + 'foo="bar baz blub"', + } + assert http.dump_cookie("key", "xxx/") == "key=xxx/; Path=/" + assert http.dump_cookie("key", "xxx=") == "key=xxx=; Path=/" + + def test_bad_cookies(self): + cookies = http.parse_cookie( + "first=IamTheFirst ; a=1; oops ; a=2 ;second = andMeTwo;" + ) + expect = { + "first": ["IamTheFirst"], + "a": ["1", "2"], + "oops": [""], + "second": ["andMeTwo"], + } + assert cookies.to_dict(flat=False) == expect + assert cookies["a"] == "1" + assert cookies.getlist("a") == ["1", "2"] + + def test_empty_keys_are_ignored(self): + cookies = http.parse_cookie("spam=ham; duck=mallard; ; ") + expect = {"spam": "ham", "duck": "mallard"} + assert cookies.to_dict() == expect + + def test_cookie_quoting(self): + val = http.dump_cookie("foo", "?foo") + assert val == 'foo="?foo"; Path=/' + assert http.parse_cookie(val).to_dict() == {"foo": "?foo", "Path": "/"} + assert http.parse_cookie(r'foo="foo\054bar"').to_dict(), {"foo": "foo,bar"} + + def test_parse_set_cookie_directive(self): + val = 'foo="?foo"; version="0.1";' + assert http.parse_cookie(val).to_dict() == {"foo": "?foo", "version": "0.1"} + + def test_cookie_domain_resolving(self): + val = http.dump_cookie("foo", "bar", domain="\N{SNOWMAN}.com") + assert val == "foo=bar; Domain=xn--n3h.com; Path=/" + + def test_cookie_unicode_dumping(self): + val = http.dump_cookie("foo", "\N{SNOWMAN}") + h = datastructures.Headers() + h.add("Set-Cookie", val) + assert h["Set-Cookie"] == 'foo="\\342\\230\\203"; Path=/' + + cookies = http.parse_cookie(h["Set-Cookie"]) + assert cookies["foo"] == "\N{SNOWMAN}" + + def test_cookie_unicode_keys(self): + # Yes, this is technically against the spec but happens + val = http.dump_cookie("fö", "fö") + assert val == _wsgi_encoding_dance('fö="f\\303\\266"; Path=/', "utf-8") + cookies = http.parse_cookie(val) + assert cookies["fö"] == "fö" + + def test_cookie_unicode_parsing(self): + # This is submitted by Firefox if you set a Unicode cookie. + cookies = http.parse_cookie("fö=fö") + assert cookies["fö"] == "fö" + + def test_cookie_domain_encoding(self): + val = http.dump_cookie("foo", "bar", domain="\N{SNOWMAN}.com") + assert val == "foo=bar; Domain=xn--n3h.com; Path=/" + + val = http.dump_cookie("foo", "bar", domain=".\N{SNOWMAN}.com") + assert val == "foo=bar; Domain=.xn--n3h.com; Path=/" + + val = http.dump_cookie("foo", "bar", domain=".foo.com") + assert val == "foo=bar; Domain=.foo.com; Path=/" + + def test_cookie_maxsize(self, recwarn): + val = http.dump_cookie("foo", "bar" * 1360 + "b") + assert len(recwarn) == 0 + assert len(val) == 4093 + + http.dump_cookie("foo", "bar" * 1360 + "ba") + assert len(recwarn) == 1 + w = recwarn.pop() + assert "cookie is too large" in str(w.message) + + http.dump_cookie("foo", b"w" * 502, max_size=512) + assert len(recwarn) == 1 + w = recwarn.pop() + assert "the limit is 512 bytes" in str(w.message) + + @pytest.mark.parametrize( + ("samesite", "expected"), + ( + ("strict", "foo=bar; Path=/; SameSite=Strict"), + ("lax", "foo=bar; Path=/; SameSite=Lax"), + ("none", "foo=bar; Path=/; SameSite=None"), + (None, "foo=bar; Path=/"), + ), + ) + def test_cookie_samesite_attribute(self, samesite, expected): + value = http.dump_cookie("foo", "bar", samesite=samesite) + assert value == expected + + def test_cookie_samesite_invalid(self): + with pytest.raises(ValueError): + http.dump_cookie("foo", "bar", samesite="invalid") + + +class TestRange: + def test_if_range_parsing(self): + rv = http.parse_if_range_header('"Test"') + assert rv.etag == "Test" + assert rv.date is None + assert rv.to_header() == '"Test"' + + # weak information is dropped + rv = http.parse_if_range_header('W/"Test"') + assert rv.etag == "Test" + assert rv.date is None + assert rv.to_header() == '"Test"' + + # broken etags are supported too + rv = http.parse_if_range_header("bullshit") + assert rv.etag == "bullshit" + assert rv.date is None + assert rv.to_header() == '"bullshit"' + + rv = http.parse_if_range_header("Thu, 01 Jan 1970 00:00:00 GMT") + assert rv.etag is None + assert rv.date == datetime(1970, 1, 1, tzinfo=timezone.utc) + assert rv.to_header() == "Thu, 01 Jan 1970 00:00:00 GMT" + + for x in "", None: + rv = http.parse_if_range_header(x) + assert rv.etag is None + assert rv.date is None + assert rv.to_header() == "" + + def test_range_parsing(self): + rv = http.parse_range_header("bytes=52") + assert rv is None + + rv = http.parse_range_header("bytes=52-") + assert rv.units == "bytes" + assert rv.ranges == [(52, None)] + assert rv.to_header() == "bytes=52-" + + rv = http.parse_range_header("bytes=52-99") + assert rv.units == "bytes" + assert rv.ranges == [(52, 100)] + assert rv.to_header() == "bytes=52-99" + + rv = http.parse_range_header("bytes=52-99,-1000") + assert rv.units == "bytes" + assert rv.ranges == [(52, 100), (-1000, None)] + assert rv.to_header() == "bytes=52-99,-1000" + + rv = http.parse_range_header("bytes = 1 - 100") + assert rv.units == "bytes" + assert rv.ranges == [(1, 101)] + assert rv.to_header() == "bytes=1-100" + + rv = http.parse_range_header("AWesomes=0-999") + assert rv.units == "awesomes" + assert rv.ranges == [(0, 1000)] + assert rv.to_header() == "awesomes=0-999" + + rv = http.parse_range_header("bytes=-") + assert rv is None + + rv = http.parse_range_header("bytes=bad") + assert rv is None + + rv = http.parse_range_header("bytes=bad-1") + assert rv is None + + rv = http.parse_range_header("bytes=-bad") + assert rv is None + + rv = http.parse_range_header("bytes=52-99, bad") + assert rv is None + + def test_content_range_parsing(self): + rv = http.parse_content_range_header("bytes 0-98/*") + assert rv.units == "bytes" + assert rv.start == 0 + assert rv.stop == 99 + assert rv.length is None + assert rv.to_header() == "bytes 0-98/*" + + rv = http.parse_content_range_header("bytes 0-98/*asdfsa") + assert rv is None + + rv = http.parse_content_range_header("bytes 0-99/100") + assert rv.to_header() == "bytes 0-99/100" + rv.start = None + rv.stop = None + assert rv.units == "bytes" + assert rv.to_header() == "bytes */100" + + rv = http.parse_content_range_header("bytes */100") + assert rv.start is None + assert rv.stop is None + assert rv.length == 100 + assert rv.units == "bytes" + + +class TestRegression: + def test_best_match_works(self): + # was a bug in 0.6 + rv = http.parse_accept_header( + "foo=,application/xml,application/xhtml+xml," + "text/html;q=0.9,text/plain;q=0.8," + "image/png,*/*;q=0.5", + datastructures.MIMEAccept, + ).best_match(["foo/bar"]) + assert rv == "foo/bar" + + +@pytest.mark.parametrize( + "value", + [ + "Basic V2Vya3pldWc6V2VrcnpldWc=", + 'Digest username=Mufasa, realm="testrealm@host.invalid",' + ' nonce=dcd98b7102dd2f0e8b11d0f600bfb0c093, uri="/dir/index.html", qop=auth,' + " nc=00000001, cnonce=0a4f113b, response=6629fae49393a05397450978507c4ef1," + " opaque=5ccc069c403ebaf9f0171e9517f40e41", + ], +) +def test_authorization_to_header(value: str) -> None: + parsed = http.parse_authorization_header(value) + assert parsed is not None + assert parsed.to_header() == value + + +@pytest.mark.parametrize( + ("value", "expect"), + [ + ( + "Sun, 06 Nov 1994 08:49:37 GMT ", + datetime(1994, 11, 6, 8, 49, 37, tzinfo=timezone.utc), + ), + ( + "Sunday, 06-Nov-94 08:49:37 GMT", + datetime(1994, 11, 6, 8, 49, 37, tzinfo=timezone.utc), + ), + ( + " Sun Nov 6 08:49:37 1994", + datetime(1994, 11, 6, 8, 49, 37, tzinfo=timezone.utc), + ), + ("foo", None), + ( + " Sun 02 Feb 1343 08:49:37 GMT", + datetime(1343, 2, 2, 8, 49, 37, tzinfo=timezone.utc), + ), + ( + "Thu, 01 Jan 1970 00:00:00 GMT", + datetime(1970, 1, 1, tzinfo=timezone.utc), + ), + ("Thu, 33 Jan 1970 00:00:00 GMT", None), + ], +) +def test_parse_date(value, expect): + assert http.parse_date(value) == expect + + +@pytest.mark.parametrize( + ("value", "expect"), + [ + ( + datetime(1994, 11, 6, 8, 49, 37, tzinfo=timezone.utc), + "Sun, 06 Nov 1994 08:49:37 GMT", + ), + ( + datetime(1994, 11, 6, 8, 49, 37, tzinfo=timezone(timedelta(hours=-8))), + "Sun, 06 Nov 1994 16:49:37 GMT", + ), + (datetime(1994, 11, 6, 8, 49, 37), "Sun, 06 Nov 1994 08:49:37 GMT"), + (0, "Thu, 01 Jan 1970 00:00:00 GMT"), + (datetime(1970, 1, 1), "Thu, 01 Jan 1970 00:00:00 GMT"), + (datetime(1, 1, 1), "Mon, 01 Jan 0001 00:00:00 GMT"), + (datetime(999, 1, 1), "Tue, 01 Jan 0999 00:00:00 GMT"), + (datetime(1000, 1, 1), "Wed, 01 Jan 1000 00:00:00 GMT"), + (datetime(2020, 1, 1), "Wed, 01 Jan 2020 00:00:00 GMT"), + (date(2020, 1, 1), "Wed, 01 Jan 2020 00:00:00 GMT"), + ], +) +def test_http_date(value, expect): + assert http.http_date(value) == expect diff --git a/tests/test_internal.py b/tests/test_internal.py new file mode 100644 index 0000000..6e673fd --- /dev/null +++ b/tests/test_internal.py @@ -0,0 +1,56 @@ +from warnings import filterwarnings +from warnings import resetwarnings + +import pytest + +from werkzeug import _internal as internal +from werkzeug.test import create_environ +from werkzeug.wrappers import Request +from werkzeug.wrappers import Response + + +def test_easteregg(): + req = Request.from_values("/?macgybarchakku") + resp = Response.force_type(internal._easteregg(None), req) + assert b"About Werkzeug" in resp.get_data() + assert b"the Swiss Army knife of Python web development" in resp.get_data() + + +def test_wrapper_internals(): + req = Request.from_values(data={"foo": "bar"}, method="POST") + req._load_form_data() + assert req.form.to_dict() == {"foo": "bar"} + + # second call does not break + req._load_form_data() + assert req.form.to_dict() == {"foo": "bar"} + + # check reprs + assert repr(req) == "" + resp = Response() + assert repr(resp) == "" + resp.set_data("Hello World!") + assert repr(resp) == "" + resp.response = iter(["Test"]) + assert repr(resp) == "" + + # string data does not set content length + response = Response(["Hällo Wörld"]) + headers = response.get_wsgi_headers(create_environ()) + assert "Content-Length" not in headers + + response = Response(["Hällo Wörld".encode()]) + headers = response.get_wsgi_headers(create_environ()) + assert "Content-Length" in headers + + # check for internal warnings + filterwarnings("error", category=Warning) + response = Response() + environ = create_environ() + response.response = "What the...?" + pytest.raises(Warning, lambda: list(response.iter_encoded())) + pytest.raises(Warning, lambda: list(response.get_app_iter(environ))) + response.direct_passthrough = True + pytest.raises(Warning, lambda: list(response.iter_encoded())) + pytest.raises(Warning, lambda: list(response.get_app_iter(environ))) + resetwarnings() diff --git a/tests/test_local.py b/tests/test_local.py new file mode 100644 index 0000000..2af69d2 --- /dev/null +++ b/tests/test_local.py @@ -0,0 +1,615 @@ +import asyncio +import copy +import math +import operator +import time +from contextvars import ContextVar +from threading import Thread + +import pytest + +from werkzeug import local + +# Since the tests are creating local instances, use global context vars +# to avoid accumulating anonymous context vars that can't be collected. +_cv_ns = ContextVar("werkzeug.tests.ns") +_cv_stack = ContextVar("werkzeug.tests.stack") +_cv_val = ContextVar("werkzeug.tests.val") + + +@pytest.fixture(autouse=True) +def reset_context_vars(): + ns_token = _cv_ns.set({}) + stack_token = _cv_stack.set([]) + yield + _cv_ns.reset(ns_token) + _cv_stack.reset(stack_token) + + +def test_basic_local(): + ns = local.Local(_cv_ns) + ns.foo = 0 + values = [] + + def value_setter(idx): + time.sleep(0.01 * idx) + ns.foo = idx + time.sleep(0.02) + values.append(ns.foo) + + threads = [Thread(target=value_setter, args=(x,)) for x in [1, 2, 3]] + for thread in threads: + thread.start() + for thread in threads: + thread.join() + assert sorted(values) == [1, 2, 3] + + def delfoo(): + del ns.foo + + delfoo() + pytest.raises(AttributeError, lambda: ns.foo) + pytest.raises(AttributeError, delfoo) + + local.release_local(ns) + + +def test_basic_local_asyncio(): + ns = local.Local(_cv_ns) + ns.foo = 0 + values = [] + + async def value_setter(idx): + await asyncio.sleep(0.01 * idx) + ns.foo = idx + await asyncio.sleep(0.02) + values.append(ns.foo) + + async def main(): + futures = [asyncio.ensure_future(value_setter(i)) for i in [1, 2, 3]] + await asyncio.gather(*futures) + + asyncio.run(main()) + assert sorted(values) == [1, 2, 3] + + def delfoo(): + del ns.foo + + delfoo() + pytest.raises(AttributeError, lambda: ns.foo) + pytest.raises(AttributeError, delfoo) + + local.release_local(ns) + + +def test_local_release(): + ns = local.Local(_cv_ns) + ns.foo = 42 + local.release_local(ns) + assert not hasattr(ns, "foo") + + ls = local.LocalStack(_cv_stack) + ls.push(42) + local.release_local(ls) + assert ls.top is None + + +def test_local_stack(): + ls = local.LocalStack(_cv_stack) + assert ls.top is None + ls.push(42) + assert ls.top == 42 + ls.push(23) + assert ls.top == 23 + ls.pop() + assert ls.top == 42 + ls.pop() + assert ls.top is None + assert ls.pop() is None + assert ls.pop() is None + + proxy = ls() + ls.push([1, 2]) + assert proxy == [1, 2] + ls.push((1, 2)) + assert proxy == (1, 2) + ls.pop() + ls.pop() + assert repr(proxy) == "" + + +def test_local_stack_asyncio(): + ls = local.LocalStack(_cv_stack) + ls.push(1) + + async def task(): + ls.push(1) + assert len(ls._storage.get()) == 2 + + async def main(): + futures = [asyncio.ensure_future(task()) for _ in range(3)] + await asyncio.gather(*futures) + + asyncio.run(main()) + + +def test_proxy_local(): + ns = local.Local(_cv_ns) + ns.foo = [] + p = local.LocalProxy(ns, "foo") + p.append(42) + p.append(23) + p[1:] = [1, 2, 3] + assert p == [42, 1, 2, 3] + assert p == ns.foo + ns.foo += [1] + assert list(p) == [42, 1, 2, 3, 1] + p_from_local = ns("foo") + p_from_local.append(2) + assert p == p_from_local + assert p._get_current_object() is ns.foo + + +def test_proxy_callable(): + value = 42 + p = local.LocalProxy(lambda: value) + assert p == 42 + value = [23] + p.append(42) + assert p == [23, 42] + assert value == [23, 42] + assert p._get_current_object() is value + + +def test_proxy_wrapped(): + class SomeClassWithWrapped: + __wrapped__ = "wrapped" + + proxy = local.LocalProxy(_cv_val) + assert proxy.__wrapped__ is _cv_val + _cv_val.set(42) + + with pytest.raises(AttributeError): + proxy.__wrapped__ + + ns = local.Local(_cv_ns) + ns.foo = SomeClassWithWrapped() + ns.bar = 42 + + assert ns("foo").__wrapped__ == "wrapped" + + with pytest.raises(AttributeError): + ns("bar").__wrapped__ + + +def test_proxy_doc(): + def example(): + """example doc""" + + assert local.LocalProxy(lambda: example).__doc__ == "example doc" + # The __doc__ descriptor shouldn't block the LocalProxy's class doc. + assert local.LocalProxy.__doc__.startswith("A proxy") + + +def test_proxy_fallback(): + local_stack = local.LocalStack(_cv_stack) + local_proxy = local_stack() + + assert repr(local_proxy) == "" + assert isinstance(local_proxy, local.LocalProxy) + assert local_proxy.__class__ is local.LocalProxy + assert "LocalProxy" in local_proxy.__doc__ + + local_stack.push(42) + + assert repr(local_proxy) == "42" + assert isinstance(local_proxy, int) + assert local_proxy.__class__ is int + assert "int(" in local_proxy.__doc__ + + +def test_proxy_unbound(): + ns = local.Local(_cv_ns) + p = ns("value") + assert repr(p) == "" + assert not p + assert dir(p) == [] + + +def _make_proxy(value): + ns = local.Local(_cv_ns) + ns.value = value + p = ns("value") + return ns, p + + +def test_proxy_type(): + _, p = _make_proxy([]) + assert isinstance(p, list) + assert p.__class__ is list + assert issubclass(type(p), local.LocalProxy) + assert type(p) is local.LocalProxy + + +def test_proxy_string_representations(): + class Example: + def __repr__(self): + return "a" + + def __bytes__(self): + return b"b" + + def __index__(self): + return 23 + + _, p = _make_proxy(Example()) + assert str(p) == "a" + assert repr(p) == "a" + assert bytes(p) == b"b" + # __index__ + assert bin(p) == "0b10111" + assert oct(p) == "0o27" + assert hex(p) == "0x17" + + +def test_proxy_hash(): + ns, p = _make_proxy("abc") + assert hash(ns.value) == hash(p) + + +@pytest.mark.parametrize( + "op", + [ + operator.lt, + operator.le, + operator.eq, + operator.ne, + operator.gt, + operator.ge, + operator.add, + operator.sub, + operator.mul, + operator.truediv, + operator.floordiv, + operator.mod, + divmod, + pow, + operator.lshift, + operator.rshift, + operator.and_, + operator.or_, + operator.xor, + ], +) +def test_proxy_binop_int(op): + _, p = _make_proxy(2) + assert op(p, 3) == op(2, 3) + # r-op + assert op(3, p) == op(3, 2) + + +@pytest.mark.parametrize("op", [operator.neg, operator.pos, abs, operator.invert]) +def test_proxy_uop_int(op): + _, p = _make_proxy(-2) + assert op(p) == op(-2) + + +def test_proxy_numeric(): + class Example: + def __complex__(self): + return 1 + 2j + + def __int__(self): + return 1 + + def __float__(self): + return 2.1 + + def __round__(self, n=None): + if n is not None: + return 3.3 + + return 3 + + def __trunc__(self): + return 4 + + def __floor__(self): + return 5 + + def __ceil__(self): + return 6 + + def __index__(self): + return 2 + + _, p = _make_proxy(Example()) + assert complex(p) == 1 + 2j + assert int(p) == 1 + assert float(p) == 2.1 + assert round(p) == 3 + assert round(p, 2) == 3.3 + assert math.trunc(p) == 4 + assert math.floor(p) == 5 + assert math.ceil(p) == 6 + assert [1, 2, 3][p] == 3 # __index__ + + +@pytest.mark.parametrize( + "op", + [ + operator.iadd, + operator.isub, + operator.imul, + operator.imatmul, + operator.itruediv, + operator.ifloordiv, + operator.imod, + operator.ipow, + operator.ilshift, + operator.irshift, + operator.iand, + operator.ior, + operator.ixor, + ], +) +def test_proxy_iop(op): + class Example: + value = 1 + + def fake_op(self, other): + self.value = other + return self + + __iadd__ = fake_op + __isub__ = fake_op + __imul__ = fake_op + __imatmul__ = fake_op + __itruediv__ = fake_op + __ifloordiv__ = fake_op + __imod__ = fake_op + __ipow__ = fake_op + __ilshift__ = fake_op + __irshift__ = fake_op + __iand__ = fake_op + __ior__ = fake_op + __ixor__ = fake_op + + ns, p = _make_proxy(Example()) + p_out = op(p, 2) + assert type(p_out) is local.LocalProxy + assert p.value == 2 + assert ns.value.value == 2 + + +def test_proxy_matmul(): + class Example: + def __matmul__(self, other): + return 2 * other + + def __rmatmul__(self, other): + return 2 * other + + _, p = _make_proxy(Example()) + assert p @ 3 == 6 + assert 4 @ p == 8 + + +def test_proxy_str(): + _, p = _make_proxy("{act} %s") + assert p + " world" == "{act} %s world" + assert "say " + p == "say {act} %s" + assert p * 2 == "{act} %s{act} %s" + assert 2 * p == p * 2 + assert p % ("world",) == "{act} world" + assert p.format(act="test") == "test %s" + + +def test_proxy_list(): + _, p = _make_proxy([1, 2, 3]) + assert len(p) == 3 + assert p[0] == 1 + assert 3 in p + assert 4 not in p + assert tuple(p) == (1, 2, 3) + assert list(reversed(p)) == [3, 2, 1] + p[0] = 4 + assert p == [4, 2, 3] + del p[-1] + assert p == [4, 2] + p += [5] + assert p[-1] == 5 + p *= 2 + assert len(p) == 6 + p[:] = [] + assert not p + p.append(1) + assert p + assert p + [2] == [1, 2] + assert [2] + p == [2, 1] + + +def test_proxy_copy(): + class Foo: + def __copy__(self): + return self + + def __deepcopy__(self, memo): + return self + + ns, p = _make_proxy(Foo()) + assert copy.copy(p) is ns.value + assert copy.deepcopy(p) is ns.value + + a = [] + _, p = _make_proxy([a]) + assert copy.copy(p) == [a] + assert copy.copy(p)[0] is a + assert copy.deepcopy(p) == [a] + assert copy.deepcopy(p)[0] is not a + + +def test_proxy_iterator(): + a = [1, 2, 3] + _, p = _make_proxy(iter(a)) + assert next(p) == 1 + + +def test_proxy_length_hint(): + class Example: + def __length_hint__(self): + return 2 + + _, p = _make_proxy(Example()) + assert operator.length_hint(p) == 2 + + +def test_proxy_context_manager(): + class Example: + value = 2 + + def __enter__(self): + self.value += 1 + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.value -= 1 + + _, p = _make_proxy(Example()) + assert p.value == 2 + + with p: + assert p.value == 3 + + assert p.value == 2 + + +def test_proxy_class(): + class Meta(type): + def __instancecheck__(cls, instance): + return True + + def __subclasscheck__(cls, subclass): + return True + + class Parent: + pass + + class Example(Parent, metaclass=Meta): + pass + + class Child(Example): + pass + + _, p = _make_proxy(Example) + assert type(p()) is Example + assert isinstance(1, p) + assert issubclass(int, p) + assert p.__mro__ == (Example, Parent, object) + assert p.__bases__ == (Parent,) + assert p.__subclasses__() == [Child] + + +def test_proxy_attributes(): + class Example: + def __init__(self): + object.__setattr__(self, "values", {}) + + def __getattribute__(self, name): + if name == "ham": + return "eggs" + + return super().__getattribute__(name) + + def __getattr__(self, name): + return self.values.get(name) + + def __setattr__(self, name, value): + self.values[name] = value + + def __delattr__(self, name): + del self.values[name] + + def __dir__(self): + return sorted(self.values.keys()) + + _, p = _make_proxy(Example()) + assert p.nothing is None + assert p.__dict__ == {"values": {}} + assert dir(p) == [] + + p.x = 1 + assert p.x == 1 + assert dir(p) == ["x"] + + del p.x + assert dir(p) == [] + + assert p.ham == "eggs" + p.ham = "spam" + assert p.ham == "eggs" + assert p.values["ham"] == "spam" + + +def test_proxy_await(): + async def get(): + return 1 + + _, p = _make_proxy(get()) + + async def main(): + return await p + + out = asyncio.run(main()) + assert out == 1 + + +def test_proxy_aiter(): + class Example: + value = 3 + + def __aiter__(self): + return self + + async def __anext__(self): + if self.value: + self.value -= 1 + return self.value + + raise StopAsyncIteration + + _, p = _make_proxy(Example()) + + async def main(): + out = [] + + async for v in p: + out.append(v) + + return out + + out = asyncio.run(main()) + assert out == [2, 1, 0] + + +def test_proxy_async_context_manager(): + class Example: + value = 2 + + async def __aenter__(self): + self.value += 1 + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + self.value -= 1 + + _, p = _make_proxy(Example()) + + async def main(): + async with p: + assert p.value == 3 + + assert p.value == 2 + return True + + assert asyncio.run(main()) diff --git a/tests/test_routing.py b/tests/test_routing.py new file mode 100644 index 0000000..15d25a7 --- /dev/null +++ b/tests/test_routing.py @@ -0,0 +1,1490 @@ +import gc +import typing as t +import uuid + +import pytest + +from werkzeug import routing as r +from werkzeug.datastructures import ImmutableDict +from werkzeug.datastructures import MultiDict +from werkzeug.exceptions import MethodNotAllowed +from werkzeug.exceptions import NotFound +from werkzeug.test import create_environ +from werkzeug.wrappers import Response + + +def test_basic_routing(): + map = r.Map( + [ + r.Rule("/", endpoint="index"), + r.Rule("/foo", endpoint="foo"), + r.Rule("/bar/", endpoint="bar"), + r.Rule("/ws", endpoint="ws", websocket=True), + r.Rule("/", endpoint="indexws", websocket=True), + ] + ) + adapter = map.bind("example.org", "/") + assert adapter.match("/") == ("index", {}) + assert adapter.match("/foo") == ("foo", {}) + assert adapter.match("/bar/") == ("bar", {}) + pytest.raises(r.RequestRedirect, lambda: adapter.match("/bar")) + pytest.raises(NotFound, lambda: adapter.match("/blub")) + + adapter = map.bind("example.org", "/", url_scheme="ws") + assert adapter.match("/") == ("indexws", {}) + + adapter = map.bind("example.org", "/test") + with pytest.raises(r.RequestRedirect) as excinfo: + adapter.match("/bar") + assert excinfo.value.new_url == "http://example.org/test/bar/" + + adapter = map.bind("example.org", "/") + with pytest.raises(r.RequestRedirect) as excinfo: + adapter.match("/bar") + assert excinfo.value.new_url == "http://example.org/bar/" + + adapter = map.bind("example.org", "/") + with pytest.raises(r.RequestRedirect) as excinfo: + adapter.match("/bar", query_args={"aha": "muhaha"}) + assert excinfo.value.new_url == "http://example.org/bar/?aha=muhaha" + + adapter = map.bind("example.org", "/") + with pytest.raises(r.RequestRedirect) as excinfo: + adapter.match("/bar", query_args="aha=muhaha") + assert excinfo.value.new_url == "http://example.org/bar/?aha=muhaha" + + adapter = map.bind_to_environ(create_environ("/bar?foo=bar", "http://example.org/")) + with pytest.raises(r.RequestRedirect) as excinfo: + adapter.match() + assert excinfo.value.new_url == "http://example.org/bar/?foo=bar" + + adapter = map.bind("example.org", "/ws", url_scheme="wss") + assert adapter.match("/ws", websocket=True) == ("ws", {}) + with pytest.raises(r.WebsocketMismatch): + adapter.match("/ws", websocket=False) + with pytest.raises(r.WebsocketMismatch): + adapter.match("/foo", websocket=True) + + adapter = map.bind_to_environ( + create_environ( + "/ws?foo=bar", + "http://example.org/", + headers=[("Connection", "Upgrade"), ("upgrade", "WebSocket")], + ) + ) + assert adapter.match("/ws") == ("ws", {}) + with pytest.raises(r.WebsocketMismatch): + adapter.match("/ws", websocket=False) + + adapter = map.bind_to_environ( + create_environ( + "/ws?foo=bar", + "http://example.org/", + headers=[("Connection", "keep-alive, Upgrade"), ("upgrade", "websocket")], + ) + ) + assert adapter.match("/ws") == ("ws", {}) + with pytest.raises(r.WebsocketMismatch): + adapter.match("/ws", websocket=False) + + +def test_merge_slashes_match(): + url_map = r.Map( + [ + r.Rule("/no/tail", endpoint="no_tail"), + r.Rule("/yes/tail/", endpoint="yes_tail"), + r.Rule("/with/", endpoint="with_path"), + r.Rule("/no//merge", endpoint="no_merge", merge_slashes=False), + ] + ) + adapter = url_map.bind("localhost", "/") + + with pytest.raises(r.RequestRedirect) as excinfo: + adapter.match("/no//tail") + + assert excinfo.value.new_url.endswith("/no/tail") + + with pytest.raises(r.RequestRedirect) as excinfo: + adapter.match("/yes//tail") + + assert excinfo.value.new_url.endswith("/yes/tail/") + + with pytest.raises(r.RequestRedirect) as excinfo: + adapter.match("/yes/tail//") + + assert excinfo.value.new_url.endswith("/yes/tail/") + + assert adapter.match("/no/tail")[0] == "no_tail" + assert adapter.match("/yes/tail/")[0] == "yes_tail" + + _, rv = adapter.match("/with/http://example.com/") + assert rv["path"] == "http://example.com/" + _, rv = adapter.match("/with/x//y") + assert rv["path"] == "x//y" + + assert adapter.match("/no//merge")[0] == "no_merge" + + +@pytest.mark.parametrize( + ("path", "expected"), + [("/merge/%//path", "/merge/%25/path"), ("/merge//st/path", "/merge/st/path")], +) +def test_merge_slash_encoding(path, expected): + """This test is to make sure URLs are not double-encoded + when a redirect is thrown with `merge_slash = True`""" + url_map = r.Map( + [ + r.Rule("/merge//path"), + ] + ) + adapter = url_map.bind("localhost", "/") + + with pytest.raises(r.RequestRedirect) as excinfo: + adapter.match(path) + + assert excinfo.value.new_url.endswith(expected) + + +def test_merge_slashes_build(): + url_map = r.Map( + [ + r.Rule("/yes//merge", endpoint="yes_merge"), + r.Rule("/no//merge", endpoint="no_merge", merge_slashes=False), + ] + ) + adapter = url_map.bind("localhost", "/") + assert adapter.build("yes_merge") == "/yes/merge" + assert adapter.build("no_merge") == "/no//merge" + + +def test_strict_slashes_redirect(): + map = r.Map( + [ + r.Rule("/bar/", endpoint="get", methods=["GET"]), + r.Rule("/bar", endpoint="post", methods=["POST"]), + r.Rule("/foo/", endpoint="foo", methods=["POST"]), + ] + ) + adapter = map.bind("example.org", "/") + + # Check if the actual routes works + assert adapter.match("/bar/", method="GET") == ("get", {}) + assert adapter.match("/bar", method="POST") == ("post", {}) + + # Check if exceptions are correct + pytest.raises(r.RequestRedirect, adapter.match, "/bar", method="GET") + pytest.raises(MethodNotAllowed, adapter.match, "/bar/", method="POST") + with pytest.raises(r.RequestRedirect) as error_info: + adapter.match("/foo", method="POST") + assert error_info.value.code == 308 + + # Check differently defined order + map = r.Map( + [ + r.Rule("/bar", endpoint="post", methods=["POST"]), + r.Rule("/bar/", endpoint="get", methods=["GET"]), + ] + ) + adapter = map.bind("example.org", "/") + + # Check if the actual routes works + assert adapter.match("/bar/", method="GET") == ("get", {}) + assert adapter.match("/bar", method="POST") == ("post", {}) + + # Check if exceptions are correct + pytest.raises(r.RequestRedirect, adapter.match, "/bar", method="GET") + pytest.raises(MethodNotAllowed, adapter.match, "/bar/", method="POST") + + # Check what happens when only slash route is defined + map = r.Map([r.Rule("/bar/", endpoint="get", methods=["GET"])]) + adapter = map.bind("example.org", "/") + + # Check if the actual routes works + assert adapter.match("/bar/", method="GET") == ("get", {}) + + # Check if exceptions are correct + pytest.raises(r.RequestRedirect, adapter.match, "/bar", method="GET") + pytest.raises(MethodNotAllowed, adapter.match, "/bar/", method="POST") + + +def test_strict_slashes_leaves_dont_consume(): + # See issue #1074 + map = r.Map( + [ + r.Rule("/path1", endpoint="leaf"), + r.Rule("/path1/", endpoint="branch"), + r.Rule("/path2", endpoint="leaf", strict_slashes=False), + r.Rule("/path2/", endpoint="branch"), + r.Rule("/path3", endpoint="leaf"), + r.Rule("/path3/", endpoint="branch", strict_slashes=False), + r.Rule("/path4", endpoint="leaf", strict_slashes=False), + r.Rule("/path4/", endpoint="branch", strict_slashes=False), + r.Rule("/path5", endpoint="leaf"), + ], + strict_slashes=False, + ) + + adapter = map.bind("example.org", "/") + + assert adapter.match("/path1", method="GET") == ("leaf", {}) + assert adapter.match("/path1/", method="GET") == ("branch", {}) + assert adapter.match("/path2", method="GET") == ("leaf", {}) + assert adapter.match("/path2/", method="GET") == ("branch", {}) + assert adapter.match("/path3", method="GET") == ("leaf", {}) + assert adapter.match("/path3/", method="GET") == ("branch", {}) + assert adapter.match("/path4", method="GET") == ("leaf", {}) + assert adapter.match("/path4/", method="GET") == ("branch", {}) + assert adapter.match("/path5/", method="GET") == ("leaf", {}) + + +def test_environ_defaults(): + environ = create_environ("/foo") + assert environ["PATH_INFO"] == "/foo" + m = r.Map([r.Rule("/foo", endpoint="foo"), r.Rule("/bar", endpoint="bar")]) + a = m.bind_to_environ(environ) + assert a.match("/foo") == ("foo", {}) + assert a.match() == ("foo", {}) + assert a.match("/bar") == ("bar", {}) + pytest.raises(NotFound, a.match, "/bars") + + +def test_environ_nonascii_pathinfo(): + environ = create_environ("/лошадь") + m = r.Map([r.Rule("/", endpoint="index"), r.Rule("/лошадь", endpoint="horse")]) + a = m.bind_to_environ(environ) + assert a.match("/") == ("index", {}) + assert a.match("/лошадь") == ("horse", {}) + pytest.raises(NotFound, a.match, "/барсук") + + +def test_basic_building(): + map = r.Map( + [ + r.Rule("/", endpoint="index"), + r.Rule("/foo", endpoint="foo"), + r.Rule("/bar/", endpoint="bar"), + r.Rule("/bar/", endpoint="bari"), + r.Rule("/bar/", endpoint="barf"), + r.Rule("/bar/", endpoint="barp"), + r.Rule("/hehe", endpoint="blah", subdomain="blah"), + r.Rule("/ws", endpoint="ws", websocket=True), + ] + ) + adapter = map.bind("example.org", "/", subdomain="blah") + + assert adapter.build("index", {}) == "http://example.org/" + assert adapter.build("foo", {}) == "http://example.org/foo" + assert adapter.build("bar", {"baz": "blub"}) == "http://example.org/bar/blub" + assert adapter.build("bari", {"bazi": 50}) == "http://example.org/bar/50" + assert adapter.build("barf", {"bazf": 0.815}) == "http://example.org/bar/0.815" + assert adapter.build("barp", {"bazp": "la/di"}) == "http://example.org/bar/la/di" + assert adapter.build("blah", {}) == "/hehe" + pytest.raises(r.BuildError, lambda: adapter.build("urks")) + + adapter = map.bind("example.org", "/test", subdomain="blah") + assert adapter.build("index", {}) == "http://example.org/test/" + assert adapter.build("foo", {}) == "http://example.org/test/foo" + assert adapter.build("bar", {"baz": "blub"}) == "http://example.org/test/bar/blub" + assert adapter.build("bari", {"bazi": 50}) == "http://example.org/test/bar/50" + assert adapter.build("barf", {"bazf": 0.815}) == "http://example.org/test/bar/0.815" + assert ( + adapter.build("barp", {"bazp": "la/di"}) == "http://example.org/test/bar/la/di" + ) + assert adapter.build("blah", {}) == "/test/hehe" + + adapter = map.bind("example.org") + assert adapter.build("foo", {}) == "/foo" + assert adapter.build("foo", {}, force_external=True) == "http://example.org/foo" + adapter = map.bind("example.org", url_scheme="") + assert adapter.build("foo", {}) == "/foo" + assert adapter.build("foo", {}, force_external=True) == "//example.org/foo" + assert ( + adapter.build("foo", {}, url_scheme="https", force_external=True) + == "https://example.org/foo" + ) + + adapter = map.bind("example.org", url_scheme="ws") + assert adapter.build("ws", {}) == "ws://example.org/ws" + assert adapter.build("foo", {}, force_external=True) == "http://example.org/foo" + assert adapter.build("foo", {}) == "/foo" + assert adapter.build("ws", {}, url_scheme="https") == "wss://example.org/ws" + + +def test_long_build(): + long_args = {f"v{x}": x for x in range(10000)} + map = r.Map( + [ + r.Rule( + "".join(f"/<{k}>" for k in long_args.keys()), + endpoint="bleep", + build_only=True, + ) + ] + ) + adapter = map.bind("localhost", "/") + url = f"{adapter.build('bleep', long_args)}/" + for v in long_args.values(): + assert f"/{v}" in url + + +def test_defaults(): + map = r.Map( + [ + r.Rule("/foo/", defaults={"page": 1}, endpoint="foo"), + r.Rule("/foo/", endpoint="foo"), + ] + ) + adapter = map.bind("example.org", "/") + + assert adapter.match("/foo/") == ("foo", {"page": 1}) + pytest.raises(r.RequestRedirect, lambda: adapter.match("/foo/1")) + assert adapter.match("/foo/2") == ("foo", {"page": 2}) + assert adapter.build("foo", {}) == "/foo/" + assert adapter.build("foo", {"page": 1}) == "/foo/" + assert adapter.build("foo", {"page": 2}) == "/foo/2" + + +def test_negative(): + map = r.Map( + [ + r.Rule("/foos/", endpoint="foos"), + r.Rule("/bars/", endpoint="bars"), + r.Rule("/foo/", endpoint="foo"), + r.Rule("/bar/", endpoint="bar"), + ] + ) + adapter = map.bind("example.org", "/") + + assert adapter.match("/foos/-2") == ("foos", {"page": -2}) + assert adapter.match("/foos/-50") == ("foos", {"page": -50}) + assert adapter.match("/bars/-2.0") == ("bars", {"page": -2.0}) + assert adapter.match("/bars/-0.185") == ("bars", {"page": -0.185}) + + # Make sure signed values are rejected in unsigned mode + pytest.raises(NotFound, lambda: adapter.match("/foo/-2")) + pytest.raises(NotFound, lambda: adapter.match("/foo/-50")) + pytest.raises(NotFound, lambda: adapter.match("/bar/-0.185")) + pytest.raises(NotFound, lambda: adapter.match("/bar/-2.0")) + + +def test_greedy(): + map = r.Map( + [ + r.Rule("/foo", endpoint="foo"), + r.Rule("/", endpoint="bar"), + r.Rule("//", endpoint="bar"), + ] + ) + adapter = map.bind("example.org", "/") + + assert adapter.match("/foo") == ("foo", {}) + assert adapter.match("/blub") == ("bar", {"bar": "blub"}) + assert adapter.match("/he/he") == ("bar", {"bar": "he", "blub": "he"}) + + assert adapter.build("foo", {}) == "/foo" + assert adapter.build("bar", {"bar": "blub"}) == "/blub" + assert adapter.build("bar", {"bar": "blub", "blub": "bar"}) == "/blub/bar" + + +def test_path(): + map = r.Map( + [ + r.Rule("/", defaults={"name": "FrontPage"}, endpoint="page"), + r.Rule("/Special", endpoint="special"), + r.Rule("/", endpoint="year"), + r.Rule("/:foo", endpoint="foopage"), + r.Rule("/:", endpoint="twopage"), + r.Rule("/", endpoint="page"), + r.Rule("//edit", endpoint="editpage"), + r.Rule("//silly/", endpoint="sillypage"), + r.Rule("//silly//edit", endpoint="editsillypage"), + r.Rule("/Talk:", endpoint="talk"), + r.Rule("/User:", endpoint="user"), + r.Rule("/User:/", endpoint="userpage"), + r.Rule( + "/User:/comment/-", + endpoint="usercomment", + ), + r.Rule("/Files/", endpoint="files"), + r.Rule("///", endpoint="admin"), + ] + ) + adapter = map.bind("example.org", "/") + + assert adapter.match("/") == ("page", {"name": "FrontPage"}) + pytest.raises(r.RequestRedirect, lambda: adapter.match("/FrontPage")) + assert adapter.match("/Special") == ("special", {}) + assert adapter.match("/2007") == ("year", {"year": 2007}) + assert adapter.match("/Some:foo") == ("foopage", {"name": "Some"}) + assert adapter.match("/Some:bar") == ("twopage", {"name": "Some", "name2": "bar"}) + assert adapter.match("/Some/Page") == ("page", {"name": "Some/Page"}) + assert adapter.match("/Some/Page/edit") == ("editpage", {"name": "Some/Page"}) + assert adapter.match("/Foo/silly/bar") == ( + "sillypage", + {"name": "Foo", "name2": "bar"}, + ) + assert adapter.match("/Foo/silly/bar/edit") == ( + "editsillypage", + {"name": "Foo", "name2": "bar"}, + ) + assert adapter.match("/Talk:Foo/Bar") == ("talk", {"name": "Foo/Bar"}) + assert adapter.match("/User:thomas") == ("user", {"username": "thomas"}) + assert adapter.match("/User:thomas/projects/werkzeug") == ( + "userpage", + {"username": "thomas", "name": "projects/werkzeug"}, + ) + assert adapter.match("/User:thomas/comment/123-456") == ( + "usercomment", + {"username": "thomas", "id": 123, "replyId": 456}, + ) + assert adapter.match("/Files/downloads/werkzeug/0.2.zip") == ( + "files", + {"file": "downloads/werkzeug/0.2.zip"}, + ) + assert adapter.match("/Jerry/eats/cheese") == ( + "admin", + {"admin": "Jerry", "manage": "eats", "things": "cheese"}, + ) + + +def test_dispatch(): + env = create_environ("/") + map = r.Map([r.Rule("/", endpoint="root"), r.Rule("/foo/", endpoint="foo")]) + adapter = map.bind_to_environ(env) + + raise_this = None + + def view_func(endpoint, values): + if raise_this is not None: + raise raise_this + return Response(repr((endpoint, values))) + + def dispatch(path, quiet=False): + return Response.force_type( + adapter.dispatch(view_func, path, catch_http_exceptions=quiet), env + ) + + assert dispatch("/").data == b"('root', {})" + assert dispatch("/foo").status_code == 308 + raise_this = NotFound() + pytest.raises(NotFound, lambda: dispatch("/bar")) + assert dispatch("/bar", True).status_code == 404 + + +def test_http_host_before_server_name(): + env = { + "HTTP_HOST": "wiki.example.com", + "SERVER_NAME": "web0.example.com", + "SERVER_PORT": "80", + "SCRIPT_NAME": "", + "PATH_INFO": "", + "REQUEST_METHOD": "GET", + "wsgi.url_scheme": "http", + } + map = r.Map([r.Rule("/", endpoint="index", subdomain="wiki")]) + adapter = map.bind_to_environ(env, server_name="example.com") + assert adapter.match("/") == ("index", {}) + assert adapter.build("index", force_external=True) == "http://wiki.example.com/" + assert adapter.build("index") == "/" + + env["HTTP_HOST"] = "admin.example.com" + adapter = map.bind_to_environ(env, server_name="example.com") + assert adapter.build("index") == "http://wiki.example.com/" + + +def test_invalid_subdomain_warning(): + env = create_environ("/foo") + env["SERVER_NAME"] = env["HTTP_HOST"] = "foo.example.com" + m = r.Map([r.Rule("/foo", endpoint="foo")]) + with pytest.warns(UserWarning) as record: + a = m.bind_to_environ(env, server_name="bar.example.com") + assert a.subdomain == "" + assert len(record) == 1 + + +@pytest.mark.parametrize( + ("base", "name"), + (("http://localhost", "localhost:80"), ("https://localhost", "localhost:443")), +) +def test_server_name_match_default_port(base, name): + environ = create_environ("/foo", base_url=base) + map = r.Map([r.Rule("/foo", endpoint="foo")]) + adapter = map.bind_to_environ(environ, server_name=name) + assert adapter.match() == ("foo", {}) + + +def test_adapter_url_parameter_sorting(): + map = r.Map( + [r.Rule("/", endpoint="index")], sort_parameters=True, sort_key=lambda x: x[1] + ) + adapter = map.bind("localhost", "/") + assert ( + adapter.build("index", {"x": 20, "y": 10, "z": 30}, force_external=True) + == "http://localhost/?y=10&x=20&z=30" + ) + + +def test_request_direct_charset_bug(): + map = r.Map([r.Rule("/öäü/")]) + adapter = map.bind("localhost", "/") + + with pytest.raises(r.RequestRedirect) as excinfo: + adapter.match("/öäü") + assert excinfo.value.new_url == "http://localhost/%C3%B6%C3%A4%C3%BC/" + + +def test_request_redirect_default(): + map = r.Map([r.Rule("/foo", defaults={"bar": 42}), r.Rule("/foo/")]) + adapter = map.bind("localhost", "/") + + with pytest.raises(r.RequestRedirect) as excinfo: + adapter.match("/foo/42") + assert excinfo.value.new_url == "http://localhost/foo" + + +def test_request_redirect_default_subdomain(): + map = r.Map( + [ + r.Rule("/foo", defaults={"bar": 42}, subdomain="test"), + r.Rule("/foo/", subdomain="other"), + ] + ) + adapter = map.bind("localhost", "/", subdomain="other") + + with pytest.raises(r.RequestRedirect) as excinfo: + adapter.match("/foo/42") + assert excinfo.value.new_url == "http://test.localhost/foo" + + +def test_adapter_match_return_rule(): + rule = r.Rule("/foo/", endpoint="foo") + map = r.Map([rule]) + adapter = map.bind("localhost", "/") + assert adapter.match("/foo/", return_rule=True) == (rule, {}) + + +def test_server_name_interpolation(): + server_name = "example.invalid" + map = r.Map( + [r.Rule("/", endpoint="index"), r.Rule("/", endpoint="alt", subdomain="alt")] + ) + + env = create_environ("/", f"http://{server_name}/") + adapter = map.bind_to_environ(env, server_name=server_name) + assert adapter.match() == ("index", {}) + + env = create_environ("/", f"http://alt.{server_name}/") + adapter = map.bind_to_environ(env, server_name=server_name) + assert adapter.match() == ("alt", {}) + + env = create_environ("/", f"http://{server_name}/") + + with pytest.warns(UserWarning): + adapter = map.bind_to_environ(env, server_name="foo") + assert adapter.subdomain == "" + + +def test_rule_emptying(): + rule = r.Rule("/foo", {"meh": "muh"}, "x", ["POST"], False, "x", True, None) + rule2 = rule.empty() + assert rule.__dict__ == rule2.__dict__ + rule.methods.add("GET") + assert rule.__dict__ != rule2.__dict__ + rule.methods.discard("GET") + rule.defaults["meh"] = "aha" + assert rule.__dict__ != rule2.__dict__ + + +def test_rule_unhashable(): + rule = r.Rule("/foo", {"meh": "muh"}, "x", ["POST"], False, "x", True, None) + pytest.raises(TypeError, hash, rule) + + +def test_rule_templates(): + testcase = r.RuleTemplate( + [ + r.Submount( + "/test/$app", + [ + r.Rule("/foo/", endpoint="handle_foo"), + r.Rule("/bar/", endpoint="handle_bar"), + r.Rule("/baz/", endpoint="handle_baz"), + ], + ), + r.EndpointPrefix( + "${app}", + [ + r.Rule("/${app}-blah", endpoint="bar"), + r.Rule("/${app}-meh", endpoint="baz"), + ], + ), + r.Subdomain( + "$app", + [r.Rule("/blah", endpoint="x_bar"), r.Rule("/meh", endpoint="x_baz")], + ), + ] + ) + + url_map = r.Map( + [ + testcase(app="test1"), + testcase(app="test2"), + testcase(app="test3"), + testcase(app="test4"), + ] + ) + + out = sorted((x.rule, x.subdomain, x.endpoint) for x in url_map.iter_rules()) + + assert out == [ + ("/blah", "test1", "x_bar"), + ("/blah", "test2", "x_bar"), + ("/blah", "test3", "x_bar"), + ("/blah", "test4", "x_bar"), + ("/meh", "test1", "x_baz"), + ("/meh", "test2", "x_baz"), + ("/meh", "test3", "x_baz"), + ("/meh", "test4", "x_baz"), + ("/test/test1/bar/", "", "handle_bar"), + ("/test/test1/baz/", "", "handle_baz"), + ("/test/test1/foo/", "", "handle_foo"), + ("/test/test2/bar/", "", "handle_bar"), + ("/test/test2/baz/", "", "handle_baz"), + ("/test/test2/foo/", "", "handle_foo"), + ("/test/test3/bar/", "", "handle_bar"), + ("/test/test3/baz/", "", "handle_baz"), + ("/test/test3/foo/", "", "handle_foo"), + ("/test/test4/bar/", "", "handle_bar"), + ("/test/test4/baz/", "", "handle_baz"), + ("/test/test4/foo/", "", "handle_foo"), + ("/test1-blah", "", "test1bar"), + ("/test1-meh", "", "test1baz"), + ("/test2-blah", "", "test2bar"), + ("/test2-meh", "", "test2baz"), + ("/test3-blah", "", "test3bar"), + ("/test3-meh", "", "test3baz"), + ("/test4-blah", "", "test4bar"), + ("/test4-meh", "", "test4baz"), + ] + + +def test_non_string_parts(): + m = r.Map([r.Rule("/", endpoint="foo")]) + a = m.bind("example.com") + assert a.build("foo", {"foo": 42}) == "/42" + + +def test_complex_routing_rules(): + m = r.Map( + [ + r.Rule("/", endpoint="index"), + r.Rule("/", endpoint="an_int"), + r.Rule("/", endpoint="a_string"), + r.Rule("/foo/", endpoint="nested"), + r.Rule("/foobar/", endpoint="nestedbar"), + r.Rule("/foo//", endpoint="nested_show"), + r.Rule("/foo//edit", endpoint="nested_edit"), + r.Rule("/users/", endpoint="users", defaults={"page": 1}), + r.Rule("/users/page/", endpoint="users"), + r.Rule("/foox", endpoint="foox"), + r.Rule("//", endpoint="barx_path_path"), + ] + ) + a = m.bind("example.com") + + assert a.match("/") == ("index", {}) + assert a.match("/42") == ("an_int", {"blub": 42}) + assert a.match("/blub") == ("a_string", {"blub": "blub"}) + assert a.match("/foo/") == ("nested", {}) + assert a.match("/foobar/") == ("nestedbar", {}) + assert a.match("/foo/1/2/3/") == ("nested_show", {"testing": "1/2/3"}) + assert a.match("/foo/1/2/3/edit") == ("nested_edit", {"testing": "1/2/3"}) + assert a.match("/users/") == ("users", {"page": 1}) + assert a.match("/users/page/2") == ("users", {"page": 2}) + assert a.match("/foox") == ("foox", {}) + assert a.match("/1/2/3") == ("barx_path_path", {"bar": "1", "blub": "2/3"}) + + assert a.build("index") == "/" + assert a.build("an_int", {"blub": 42}) == "/42" + assert a.build("a_string", {"blub": "test"}) == "/test" + assert a.build("nested") == "/foo/" + assert a.build("nestedbar") == "/foobar/" + assert a.build("nested_show", {"testing": "1/2/3"}) == "/foo/1/2/3/" + assert a.build("nested_edit", {"testing": "1/2/3"}) == "/foo/1/2/3/edit" + assert a.build("users", {"page": 1}) == "/users/" + assert a.build("users", {"page": 2}) == "/users/page/2" + assert a.build("foox") == "/foox" + assert a.build("barx_path_path", {"bar": "1", "blub": "2/3"}) == "/1/2/3" + + +def test_default_converters(): + class MyMap(r.Map): + default_converters = r.Map.default_converters.copy() + default_converters["foo"] = r.UnicodeConverter + + assert isinstance(r.Map.default_converters, ImmutableDict) + m = MyMap( + [ + r.Rule("/a/", endpoint="a"), + r.Rule("/b/", endpoint="b"), + r.Rule("/c/", endpoint="c"), + ], + converters={"bar": r.UnicodeConverter}, + ) + a = m.bind("example.org", "/") + assert a.match("/a/1") == ("a", {"a": "1"}) + assert a.match("/b/2") == ("b", {"b": "2"}) + assert a.match("/c/3") == ("c", {"c": "3"}) + assert "foo" not in r.Map.default_converters + + +def test_uuid_converter(): + m = r.Map([r.Rule("/a/", endpoint="a")]) + a = m.bind("example.org", "/") + route, kwargs = a.match("/a/a8098c1a-f86e-11da-bd1a-00112444be1e") + assert type(kwargs["a_uuid"]) == uuid.UUID + + +def test_converter_with_tuples(): + """ + Regression test for https://github.com/pallets/werkzeug/issues/709 + """ + + class TwoValueConverter(r.BaseConverter): + part_isolating = False + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.regex = r"(\w\w+)/(\w\w+)" + + def to_python(self, two_values): + one, two = two_values.split("/") + return one, two + + def to_url(self, values): + return f"{values[0]}/{values[1]}" + + map = r.Map( + [r.Rule("//", endpoint="handler")], + converters={"two": TwoValueConverter}, + ) + a = map.bind("example.org", "/") + route, kwargs = a.match("/qwert/yuiop/") + assert kwargs["foo"] == ("qwert", "yuiop") + + +def test_anyconverter(): + m = r.Map( + [ + r.Rule("/", endpoint="no_dot"), + r.Rule("/", endpoint="yes_dot"), + ] + ) + a = m.bind("example.org", "/") + assert a.match("/a1") == ("no_dot", {"a": "a1"}) + assert a.match("/a2") == ("no_dot", {"a": "a2"}) + assert a.match("/a.1") == ("yes_dot", {"a": "a.1"}) + assert a.match("/a.2") == ("yes_dot", {"a": "a.2"}) + + +def test_any_converter_build_validates_value() -> None: + m = r.Map([r.Rule("/", endpoint="actor")]) + a = m.bind("localhost") + + assert a.build("actor", {"value": "patient"}) == "/patient" + assert a.build("actor", {"value": "provider"}) == "/provider" + + with pytest.raises(ValueError) as exc: + a.build("actor", {"value": "invalid"}) + + assert str(exc.value) == "'invalid' is not one of 'patient', 'provider'" + + +@pytest.mark.parametrize( + ("endpoint", "value", "expect"), + [ + ("int", 1, "/1"), + ("int", None, r.BuildError), + ("int", [1], TypeError), + ("list", [1], "/1"), + ("list", [1, None, 2], "/1.None.2"), + ("list", 1, TypeError), + ], +) +def test_build_values_dict(endpoint, value, expect): + class ListConverter(r.BaseConverter): + def to_url(self, value: t.Any) -> str: + return super().to_url(".".join(map(str, value))) + + url_map = r.Map( + [r.Rule("/", endpoint="int"), r.Rule("/", endpoint="list")], + converters={"list": ListConverter}, + ) + adapter = url_map.bind("localhost") + + if isinstance(expect, str): + assert adapter.build(endpoint, {"v": value}) == expect + else: + with pytest.raises(expect): + adapter.build(endpoint, {"v": value}) + + +@pytest.mark.parametrize( + ("endpoint", "value", "expect"), + [ + ("int", 1, "/1"), + ("int", [1], "/1"), + ("int", [], r.BuildError), + ("int", None, TypeError), + ("int", [None], TypeError), + ("list", 1, TypeError), + ("list", [1], TypeError), + ("list", [[1]], "/1"), + ("list", [1, None, 2], "/1.None.2"), + ], +) +def test_build_values_multidict(endpoint, value, expect): + class ListConverter(r.BaseConverter): + def to_url(self, value: t.Any) -> str: + return super().to_url(".".join(map(str, value))) + + url_map = r.Map( + [r.Rule("/", endpoint="int"), r.Rule("/", endpoint="list")], + converters={"list": ListConverter}, + ) + adapter = url_map.bind("localhost") + + if isinstance(expect, str): + assert adapter.build(endpoint, MultiDict({"v": value})) == expect + else: + with pytest.raises(expect): + adapter.build(endpoint, MultiDict({"v": value})) + + +@pytest.mark.parametrize( + ("value", "expect"), + [ + (None, ""), + ([None], ""), + ([None, None], ""), + ("", "?v="), + ([""], "?v="), + (0, "?v=0"), + (1.0, "?v=1.0"), + ([1, 2], "?v=1&v=2"), + ([1, None, 2], "?v=1&v=2"), + ([1, "", 2], "?v=1&v=&v=2"), + ], +) +def test_build_append_unknown_dict(value, expect): + map = r.Map([r.Rule("/", endpoint="a")]) + adapter = map.bind("localhost") + assert adapter.build("a", {"v": value}) == f"/{expect}" + assert adapter.build("a", {"v": value}, append_unknown=False) == "/" + + +@pytest.mark.parametrize( + ("value", "expect"), + [ + (None, ""), + ([None], ""), + ([None, None], ""), + ("", "?v="), + ([""], "?v="), + (0, "?v=0"), + (1.0, "?v=1.0"), + ([1, 2], "?v=1&v=2"), + ([1, None, 2], "?v=1&v=2"), + ([1, "", 2], "?v=1&v=&v=2"), + ], +) +def test_build_append_unknown_multidict(value, expect): + map = r.Map([r.Rule("/", endpoint="a")]) + adapter = map.bind("localhost") + assert adapter.build("a", MultiDict({"v": value})) == f"/{expect}" + assert adapter.build("a", MultiDict({"v": value}), append_unknown=False) == "/" + + +def test_build_drop_none(): + map = r.Map([r.Rule("/flob/", endpoint="endp")]) + adapter = map.bind("", "/") + params = {"flub": None, "flop": None} + with pytest.raises(r.BuildError): + x = adapter.build("endp", params) + assert not x + params = {"flub": "x", "flop": None} + url = adapter.build("endp", params) + assert "flop" not in url + + +def test_method_fallback(): + map = r.Map( + [ + r.Rule("/", endpoint="index", methods=["GET"]), + r.Rule("/", endpoint="hello_name", methods=["GET"]), + r.Rule("/select", endpoint="hello_select", methods=["POST"]), + r.Rule("/search_get", endpoint="search", methods=["GET"]), + r.Rule("/search_post", endpoint="search", methods=["POST"]), + ] + ) + adapter = map.bind("example.com") + assert adapter.build("index") == "/" + assert adapter.build("index", method="GET") == "/" + assert adapter.build("hello_name", {"name": "foo"}) == "/foo" + assert adapter.build("hello_select") == "/select" + assert adapter.build("hello_select", method="POST") == "/select" + assert adapter.build("search") == "/search_get" + assert adapter.build("search", method="GET") == "/search_get" + assert adapter.build("search", method="POST") == "/search_post" + + +def test_implicit_head(): + url_map = r.Map( + [ + r.Rule("/get", methods=["GET"], endpoint="a"), + r.Rule("/post", methods=["POST"], endpoint="b"), + ] + ) + adapter = url_map.bind("example.org") + assert adapter.match("/get", method="HEAD") == ("a", {}) + pytest.raises(MethodNotAllowed, adapter.match, "/post", method="HEAD") + + +def test_pass_str_as_router_methods(): + with pytest.raises(TypeError): + r.Rule("/get", methods="GET") + + +def test_protocol_joining_bug(): + m = r.Map([r.Rule("/", endpoint="x")]) + a = m.bind("example.org") + assert a.build("x", {"foo": "x:y"}) == "/x:y" + assert a.build("x", {"foo": "x:y"}, force_external=True) == "http://example.org/x:y" + + +def test_allowed_methods_querying(): + m = r.Map( + [r.Rule("/", methods=["GET", "HEAD"]), r.Rule("/foo", methods=["POST"])] + ) + a = m.bind("example.org") + assert sorted(a.allowed_methods("/foo")) == ["GET", "HEAD", "POST"] + + +def test_external_building_with_port(): + map = r.Map([r.Rule("/", endpoint="index")]) + adapter = map.bind("example.org:5000", "/") + built_url = adapter.build("index", {}, force_external=True) + assert built_url == "http://example.org:5000/", built_url + + +def test_external_building_with_port_bind_to_environ(): + map = r.Map([r.Rule("/", endpoint="index")]) + adapter = map.bind_to_environ( + create_environ("/", "http://example.org:5000/"), server_name="example.org:5000" + ) + built_url = adapter.build("index", {}, force_external=True) + assert built_url == "http://example.org:5000/", built_url + + +def test_external_building_with_port_bind_to_environ_wrong_servername(): + map = r.Map([r.Rule("/", endpoint="index")]) + environ = create_environ("/", "http://example.org:5000/") + + with pytest.warns(UserWarning): + adapter = map.bind_to_environ(environ, server_name="example.org") + assert adapter.subdomain == "" + + +def test_converter_parser(): + args, kwargs = r.parse_converter_args("test, a=1, b=3.0") + + assert args == ("test",) + assert kwargs == {"a": 1, "b": 3.0} + + args, kwargs = r.parse_converter_args("") + assert not args and not kwargs + + args, kwargs = r.parse_converter_args("a, b, c,") + assert args == ("a", "b", "c") + assert not kwargs + + args, kwargs = r.parse_converter_args("True, False, None") + assert args == (True, False, None) + + args, kwargs = r.parse_converter_args('"foo", "bar"') + assert args == ("foo", "bar") + + +def test_alias_redirects(): + m = r.Map( + [ + r.Rule("/", endpoint="index"), + r.Rule("/index.html", endpoint="index", alias=True), + r.Rule("/users/", defaults={"page": 1}, endpoint="users"), + r.Rule( + "/users/index.html", defaults={"page": 1}, alias=True, endpoint="users" + ), + r.Rule("/users/page/", endpoint="users"), + r.Rule("/users/page-.html", alias=True, endpoint="users"), + ] + ) + a = m.bind("example.com") + + def ensure_redirect(path, new_url, args=None): + with pytest.raises(r.RequestRedirect) as excinfo: + a.match(path, query_args=args) + assert excinfo.value.new_url == f"http://example.com{new_url}" + + ensure_redirect("/index.html", "/") + ensure_redirect("/users/index.html", "/users/") + ensure_redirect("/users/page-2.html", "/users/page/2") + ensure_redirect("/users/page-1.html", "/users/") + ensure_redirect("/users/page-1.html", "/users/?foo=bar", {"foo": "bar"}) + + assert a.build("index") == "/" + assert a.build("users", {"page": 1}) == "/users/" + assert a.build("users", {"page": 2}) == "/users/page/2" + + +@pytest.mark.parametrize("prefix", ("", "/aaa")) +def test_double_defaults(prefix): + m = r.Map( + [ + r.Rule(f"{prefix}/", defaults={"foo": 1, "bar": False}, endpoint="x"), + r.Rule(f"{prefix}/", defaults={"bar": False}, endpoint="x"), + r.Rule(f"{prefix}/bar/", defaults={"foo": 1, "bar": True}, endpoint="x"), + r.Rule(f"{prefix}/bar/", defaults={"bar": True}, endpoint="x"), + ] + ) + a = m.bind("example.com") + + assert a.match(f"{prefix}/") == ("x", {"foo": 1, "bar": False}) + assert a.match(f"{prefix}/2") == ("x", {"foo": 2, "bar": False}) + assert a.match(f"{prefix}/bar/") == ("x", {"foo": 1, "bar": True}) + assert a.match(f"{prefix}/bar/2") == ("x", {"foo": 2, "bar": True}) + + assert a.build("x", {"foo": 1, "bar": False}) == f"{prefix}/" + assert a.build("x", {"foo": 2, "bar": False}) == f"{prefix}/2" + assert a.build("x", {"bar": False}) == f"{prefix}/" + assert a.build("x", {"foo": 1, "bar": True}) == f"{prefix}/bar/" + assert a.build("x", {"foo": 2, "bar": True}) == f"{prefix}/bar/2" + assert a.build("x", {"bar": True}) == f"{prefix}/bar/" + + +def test_building_bytes(): + m = r.Map( + [ + r.Rule("/", endpoint="a"), + r.Rule("/", defaults={"b": b"\x01\x02\x03"}, endpoint="b"), + ] + ) + a = m.bind("example.org", "/") + assert a.build("a", {"a": b"\x01\x02\x03"}) == "/%01%02%03" + assert a.build("b") == "/%01%02%03" + + +def test_host_matching(): + m = r.Map( + [ + r.Rule("/", endpoint="index", host="www."), + r.Rule("/", endpoint="files", host="files."), + r.Rule("/foo/", defaults={"page": 1}, host="www.", endpoint="x"), + r.Rule("/", host="files.", endpoint="x"), + ], + host_matching=True, + ) + + a = m.bind("www.example.com") + assert a.match("/") == ("index", {"domain": "example.com"}) + assert a.match("/foo/") == ("x", {"domain": "example.com", "page": 1}) + + with pytest.raises(r.RequestRedirect) as excinfo: + a.match("/foo") + assert excinfo.value.new_url == "http://www.example.com/foo/" + + a = m.bind("files.example.com") + assert a.match("/") == ("files", {"domain": "example.com"}) + assert a.match("/2") == ("x", {"domain": "example.com", "page": 2}) + + with pytest.raises(r.RequestRedirect) as excinfo: + a.match("/1") + assert excinfo.value.new_url == "http://www.example.com/foo/" + + +def test_host_matching_building(): + m = r.Map( + [ + r.Rule("/", endpoint="index", host="www.domain.com"), + r.Rule("/", endpoint="foo", host="my.domain.com"), + ], + host_matching=True, + ) + + www = m.bind("www.domain.com") + assert www.match("/") == ("index", {}) + assert www.build("index") == "/" + assert www.build("foo") == "http://my.domain.com/" + + my = m.bind("my.domain.com") + assert my.match("/") == ("foo", {}) + assert my.build("foo") == "/" + assert my.build("index") == "http://www.domain.com/" + + +def test_server_name_casing(): + m = r.Map([r.Rule("/", endpoint="index", subdomain="foo")]) + + env = create_environ() + env["SERVER_NAME"] = env["HTTP_HOST"] = "FOO.EXAMPLE.COM" + a = m.bind_to_environ(env, server_name="example.com") + assert a.match("/") == ("index", {}) + + env = create_environ() + env["SERVER_NAME"] = "127.0.0.1" + env["SERVER_PORT"] = "5000" + del env["HTTP_HOST"] + + with pytest.warns(UserWarning): + a = m.bind_to_environ(env, server_name="example.com") + + with pytest.raises(NotFound): + a.match() + + +def test_redirect_request_exception_code(): + exc = r.RequestRedirect("http://www.google.com/") + exc.code = 307 + env = create_environ() + assert exc.get_response(env).status_code == exc.code + + +def test_redirect_path_quoting(): + url_map = r.Map( + [ + r.Rule("/", defaults={"page": 1}, endpoint="category"), + r.Rule("//page/", endpoint="category"), + ] + ) + adapter = url_map.bind("example.com") + + with pytest.raises(r.RequestRedirect) as excinfo: + adapter.match("/foo bar/page/1") + response = excinfo.value.get_response({}) + assert response.headers["location"] == "http://example.com/foo%20bar" + + +def test_unicode_rules(): + m = r.Map( + [r.Rule("/войти/", endpoint="enter"), r.Rule("/foo+bar/", endpoint="foobar")] + ) + a = m.bind("☃.example.com") + with pytest.raises(r.RequestRedirect) as excinfo: + a.match("/войти") + assert ( + excinfo.value.new_url + == "http://xn--n3h.example.com/%D0%B2%D0%BE%D0%B9%D1%82%D0%B8/" + ) + + endpoint, values = a.match("/войти/") + assert endpoint == "enter" + assert values == {} + + with pytest.raises(r.RequestRedirect) as excinfo: + a.match("/foo+bar") + assert excinfo.value.new_url == "http://xn--n3h.example.com/foo+bar/" + + endpoint, values = a.match("/foo+bar/") + assert endpoint == "foobar" + assert values == {} + + url = a.build("enter", {}, force_external=True) + assert url == "http://xn--n3h.example.com/%D0%B2%D0%BE%D0%B9%D1%82%D0%B8/" + + url = a.build("foobar", {}, force_external=True) + assert url == "http://xn--n3h.example.com/foo+bar/" + + +def test_empty_path_info(): + m = r.Map([r.Rule("/", endpoint="index")]) + + b = m.bind("example.com", script_name="/approot") + with pytest.raises(r.RequestRedirect) as excinfo: + b.match("") + assert excinfo.value.new_url == "http://example.com/approot/" + + a = m.bind("example.com") + with pytest.raises(r.RequestRedirect) as excinfo: + a.match("") + assert excinfo.value.new_url == "http://example.com/" + + +def test_both_bind_and_match_path_info_are_none(): + m = r.Map([r.Rule("/", endpoint="index")]) + ma = m.bind("example.org") + assert ma.match() == ("index", {}) + + +def test_map_repr(): + m = r.Map([r.Rule("/wat", endpoint="enter"), r.Rule("/woop", endpoint="foobar")]) + rv = repr(m) + assert rv == "Map([ enter>, foobar>])" + + +def test_empty_subclass_rules_with_custom_kwargs(): + class CustomRule(r.Rule): + def __init__(self, string=None, custom=None, *args, **kwargs): + self.custom = custom + super().__init__(string, *args, **kwargs) + + rule1 = CustomRule("/foo", endpoint="bar") + try: + rule2 = rule1.empty() + assert rule1.rule == rule2.rule + except TypeError as e: # raised without fix in PR #675 + raise e + + +def test_finding_closest_match_by_endpoint(): + m = r.Map( + [ + r.Rule("/foo/", endpoint="users.here"), + r.Rule("/wat/", endpoint="admin.users"), + r.Rule("/woop", endpoint="foo.users"), + ] + ) + adapter = m.bind("example.com") + assert ( + r.BuildError("admin.user", None, None, adapter).suggested.endpoint + == "admin.users" + ) + + +def test_finding_closest_match_by_values(): + rule_id = r.Rule("/user/id//", endpoint="users") + rule_slug = r.Rule("/user//", endpoint="users") + rule_random = r.Rule("/user/emails//", endpoint="users") + m = r.Map([rule_id, rule_slug, rule_random]) + adapter = m.bind("example.com") + assert r.BuildError("x", {"slug": ""}, None, adapter).suggested == rule_slug + + +def test_finding_closest_match_by_method(): + post = r.Rule("/post/", endpoint="foobar", methods=["POST"]) + get = r.Rule("/get/", endpoint="foobar", methods=["GET"]) + put = r.Rule("/put/", endpoint="foobar", methods=["PUT"]) + m = r.Map([post, get, put]) + adapter = m.bind("example.com") + assert r.BuildError("invalid", {}, "POST", adapter).suggested == post + assert r.BuildError("invalid", {}, "GET", adapter).suggested == get + assert r.BuildError("invalid", {}, "PUT", adapter).suggested == put + + +def test_finding_closest_match_when_none_exist(): + m = r.Map([]) + assert not r.BuildError("invalid", {}, None, m.bind("test.com")).suggested + + +def test_error_message_without_suggested_rule(): + m = r.Map([r.Rule("/foo/", endpoint="world", methods=["GET"])]) + adapter = m.bind("example.com") + + with pytest.raises(r.BuildError) as excinfo: + adapter.build("urks") + assert str(excinfo.value).startswith("Could not build url for endpoint 'urks'.") + + with pytest.raises(r.BuildError) as excinfo: + adapter.build("world", method="POST") + assert str(excinfo.value).startswith( + "Could not build url for endpoint 'world' ('POST')." + ) + + with pytest.raises(r.BuildError) as excinfo: + adapter.build("urks", values={"user_id": 5}) + assert str(excinfo.value).startswith( + "Could not build url for endpoint 'urks' with values ['user_id']." + ) + + +def test_error_message_suggestion(): + m = r.Map([r.Rule("/foo//", endpoint="world", methods=["GET"])]) + adapter = m.bind("example.com") + + with pytest.raises(r.BuildError) as excinfo: + adapter.build("helloworld") + assert "Did you mean 'world' instead?" in str(excinfo.value) + + with pytest.raises(r.BuildError) as excinfo: + adapter.build("world") + assert "Did you forget to specify values ['id']?" in str(excinfo.value) + assert "Did you mean to use methods" not in str(excinfo.value) + + with pytest.raises(r.BuildError) as excinfo: + adapter.build("world", {"id": 2}, method="POST") + assert "Did you mean to use methods ['GET', 'HEAD']?" in str(excinfo.value) + + +def test_no_memory_leak_from_Rule_builder(): + """See #1520""" + + # generate a bunch of objects that *should* get collected + for _ in range(100): + r.Map([r.Rule("/a/")]) + + # ensure that the garbage collection has had a chance to collect cyclic + # objects + for _ in range(5): + gc.collect() + + # assert they got collected! + count = sum(1 for obj in gc.get_objects() if isinstance(obj, r.Rule)) + assert count == 0 + + +def test_build_url_with_arg_self(): + map = r.Map([r.Rule("/foo/", endpoint="foo")]) + adapter = map.bind("example.org", "/", subdomain="blah") + + ret = adapter.build("foo", {"self": "bar"}) + assert ret == "http://example.org/foo/bar" + + +def test_build_url_with_arg_keyword(): + map = r.Map([r.Rule("/foo/", endpoint="foo")]) + adapter = map.bind("example.org", "/", subdomain="blah") + + ret = adapter.build("foo", {"class": "bar"}) + assert ret == "http://example.org/foo/bar" + + +def test_build_url_same_endpoint_multiple_hosts(): + m = r.Map( + [ + r.Rule("/", endpoint="index", host="alpha.example.com"), + r.Rule("/", endpoint="index", host="beta.example.com"), + r.Rule("/", endpoint="gamma", host="gamma.example.com"), + ], + host_matching=True, + ) + + alpha = m.bind("alpha.example.com") + assert alpha.build("index") == "/" + assert alpha.build("gamma") == "http://gamma.example.com/" + + alpha_case = m.bind("AlPhA.ExAmPlE.CoM") + assert alpha_case.build("index") == "/" + assert alpha_case.build("gamma") == "http://gamma.example.com/" + + beta = m.bind("beta.example.com") + assert beta.build("index") == "/" + + beta_case = m.bind("BeTa.ExAmPlE.CoM") + assert beta_case.build("index") == "/" + + +def test_rule_websocket_methods(): + with pytest.raises(ValueError): + r.Rule("/ws", endpoint="ws", websocket=True, methods=["post"]) + with pytest.raises(ValueError): + r.Rule( + "/ws", + endpoint="ws", + websocket=True, + methods=["get", "head", "options", "post"], + ) + r.Rule("/ws", endpoint="ws", websocket=True, methods=["get", "head", "options"]) + + +def test_path_weighting(): + m = r.Map( + [ + r.Rule("//c", endpoint="simple"), + r.Rule("///", endpoint="complex"), + ] + ) + a = m.bind("localhost", path_info="/a/b/c") + + assert a.match() == ("simple", {"path": "a/b"}) + + +def test_newline_match(): + m = r.Map([r.Rule("/hello", endpoint="hello")]) + a = m.bind("localhost") + + with pytest.raises(NotFound): + a.match("/hello\n") + + +def test_weighting(): + m = r.Map( + [ + r.Rule("/", endpoint="int"), + r.Rule("/", endpoint="uuid"), + ] + ) + a = m.bind("localhost") + + assert a.match("/2b5b0911-fdcf-4dd2-921b-28ace88db8a0") == ( + "uuid", + {"value": uuid.UUID("2b5b0911-fdcf-4dd2-921b-28ace88db8a0")}, + ) + + +def test_strict_slashes_false(): + map = r.Map( + [ + r.Rule("/path1", endpoint="leaf_path", strict_slashes=False), + r.Rule("/path2/", endpoint="branch_path", strict_slashes=False), + ], + ) + + adapter = map.bind("example.org", "/") + + assert adapter.match("/path1", method="GET") == ("leaf_path", {}) + assert adapter.match("/path1/", method="GET") == ("leaf_path", {}) + assert adapter.match("/path2", method="GET") == ("branch_path", {}) + assert adapter.match("/path2/", method="GET") == ("branch_path", {}) + + +def test_invalid_rule(): + with pytest.raises(ValueError): + map_ = r.Map([r.Rule("/", endpoint="test")]) + map_.bind("localhost") + + +def test_multiple_converters_per_part(): + map_ = r.Map( + [ + r.Rule("/v.", endpoint="version"), + ], + ) + adapter = map_.bind("localhost") + assert adapter.match("/v1.2") == ("version", {"major": 1, "minor": 2}) + + +def test_static_regex_escape(): + map_ = r.Map( + [ + r.Rule("/.", endpoint="dotted"), + ], + ) + adapter = map_.bind("localhost") + assert adapter.match("/.2") == ("dotted", {"value": 2}) + with pytest.raises(NotFound): + adapter.match("/a2") + + +class RegexConverter(r.BaseConverter): + def __init__(self, url_map, *items): + super().__init__(url_map) + self.regex = items[0] + + +def test_regex(): + map_ = r.Map( + [ + r.Rule(r"/", endpoint="regex"), + ], + converters={"regex": RegexConverter}, + ) + adapter = map_.bind("localhost") + assert adapter.match("/asdfsa.asdfs") == ("regex", {"value": "asdfsa.asdfs"}) diff --git a/tests/test_security.py b/tests/test_security.py new file mode 100644 index 0000000..3e797fc --- /dev/null +++ b/tests/test_security.py @@ -0,0 +1,49 @@ +import os +import posixpath + +import pytest + +from werkzeug.security import check_password_hash +from werkzeug.security import generate_password_hash +from werkzeug.security import safe_join + + +def test_password_hashing(): + hash0 = generate_password_hash("default") + assert check_password_hash(hash0, "default") + assert hash0.startswith("pbkdf2:sha256:260000$") + + hash1 = generate_password_hash("default", "sha1") + hash2 = generate_password_hash("default", method="sha1") + assert hash1 != hash2 + assert check_password_hash(hash1, "default") + assert check_password_hash(hash2, "default") + assert hash1.startswith("sha1$") + assert hash2.startswith("sha1$") + + with pytest.raises(ValueError): + generate_password_hash("default", "sha1", salt_length=0) + + fakehash = generate_password_hash("default", method="plain") + assert fakehash == "plain$$default" + assert check_password_hash(fakehash, "default") + + +def test_safe_join(): + assert safe_join("foo", "bar/baz") == posixpath.join("foo", "bar/baz") + assert safe_join("foo", "../bar/baz") is None + if os.name == "nt": + assert safe_join("foo", "foo\\bar") is None + + +def test_safe_join_os_sep(): + import werkzeug.security as sec + + prev_value = sec._os_alt_seps + sec._os_alt_seps = "*" + assert safe_join("foo", "bar/baz*") is None + sec._os_alt_steps = prev_value + + +def test_safe_join_empty_trusted(): + assert safe_join("", "c:test.txt") == "./c:test.txt" diff --git a/tests/test_send_file.py b/tests/test_send_file.py new file mode 100644 index 0000000..fc4299a --- /dev/null +++ b/tests/test_send_file.py @@ -0,0 +1,209 @@ +import datetime +import io +import pathlib + +import pytest + +from werkzeug.exceptions import NotFound +from werkzeug.http import http_date +from werkzeug.test import EnvironBuilder +from werkzeug.utils import send_file +from werkzeug.utils import send_from_directory + +res_path = pathlib.Path(__file__).parent / "res" +html_path = res_path / "index.html" +txt_path = res_path / "test.txt" + +environ = EnvironBuilder().get_environ() + + +@pytest.mark.parametrize("path", [html_path, str(html_path)]) +def test_path(path): + rv = send_file(path, environ) + assert rv.mimetype == "text/html" + assert rv.direct_passthrough + rv.direct_passthrough = False + assert rv.data == html_path.read_bytes() + rv.close() + + +def test_x_sendfile(): + rv = send_file(html_path, environ, use_x_sendfile=True) + assert rv.headers["x-sendfile"] == str(html_path) + assert rv.data == b"" + rv.close() + + +def test_last_modified(): + last_modified = datetime.datetime(1999, 1, 1, tzinfo=datetime.timezone.utc) + rv = send_file(txt_path, environ, last_modified=last_modified) + assert rv.last_modified == last_modified + rv.close() + + +@pytest.mark.parametrize( + "file_factory", [lambda: txt_path.open("rb"), lambda: io.BytesIO(b"test")] +) +def test_object(file_factory): + rv = send_file(file_factory(), environ, mimetype="text/plain", use_x_sendfile=True) + rv.direct_passthrough = False + assert rv.data + assert rv.mimetype == "text/plain" + assert "x-sendfile" not in rv.headers + rv.close() + + +def test_object_without_mimetype(): + with pytest.raises(TypeError, match="detect the MIME type"): + send_file(io.BytesIO(b"test"), environ) + + +def test_object_mimetype_from_name(): + rv = send_file(io.BytesIO(b"test"), environ, download_name="test.txt") + assert rv.mimetype == "text/plain" + rv.close() + + +@pytest.mark.parametrize( + "file_factory", [lambda: txt_path.open(), lambda: io.StringIO("test")] +) +def test_text_mode_fails(file_factory): + with file_factory() as f, pytest.raises(ValueError, match="binary mode"): + send_file(f, environ, mimetype="text/plain") + + +@pytest.mark.parametrize( + ("as_attachment", "value"), [(False, "inline"), (True, "attachment")] +) +def test_disposition_name(as_attachment, value): + rv = send_file(txt_path, environ, as_attachment=as_attachment) + assert rv.headers["Content-Disposition"] == f"{value}; filename=test.txt" + rv.close() + + +def test_object_attachment_requires_name(): + with pytest.raises(TypeError, match="attachment"): + send_file( + io.BytesIO(b"test"), environ, mimetype="text/plain", as_attachment=True + ) + + rv = send_file( + io.BytesIO(b"test"), environ, as_attachment=True, download_name="test.txt" + ) + assert rv.headers["Content-Disposition"] == "attachment; filename=test.txt" + rv.close() + + +@pytest.mark.parametrize( + ("name", "ascii", "utf8"), + ( + ("index.html", "index.html", None), + ( + "Ñandú/pingüino.txt", + '"Nandu/pinguino.txt"', + "%C3%91and%C3%BA%EF%BC%8Fping%C3%BCino.txt", + ), + # latin-1 isn't ascii, should be quoted + ("Vögel.txt", "Vogel.txt", "V%C3%B6gel.txt"), + # ":/" are not safe in filename* value + ("те:/ст", '":/"', "%D1%82%D0%B5%3A%2F%D1%81%D1%82"), + ), +) +def test_non_ascii_name(name, ascii, utf8): + rv = send_file(html_path, environ, as_attachment=True, download_name=name) + rv.close() + content_disposition = rv.headers["Content-Disposition"] + assert f"filename={ascii}" in content_disposition + + if utf8: + assert f"filename*=UTF-8''{utf8}" in content_disposition + else: + assert "filename*=UTF-8''" not in content_disposition + + +def test_no_cache_conditional_default(): + rv = send_file( + txt_path, + EnvironBuilder( + headers={"If-Modified-Since": http_date(datetime.datetime(2020, 7, 12))} + ).get_environ(), + last_modified=datetime.datetime(2020, 7, 11), + ) + rv.close() + assert "no-cache" in rv.headers["Cache-Control"] + assert not rv.cache_control.public + assert not rv.cache_control.max_age + assert not rv.expires + assert rv.status_code == 304 + + +@pytest.mark.parametrize(("value", "public"), [(0, False), (60, True)]) +def test_max_age(value, public): + rv = send_file(txt_path, environ, max_age=value) + rv.close() + assert ("no-cache" in rv.headers["Cache-Control"]) != public + assert rv.cache_control.public == public + assert rv.cache_control.max_age == value + assert rv.expires + assert rv.status_code == 200 + + +def test_etag(): + rv = send_file(txt_path, environ) + rv.close() + assert rv.headers["ETag"].count("-") == 2 + rv = send_file(txt_path, environ, etag=False) + rv.close() + assert "ETag" not in rv.headers + rv = send_file(txt_path, environ, etag="unique") + rv.close() + assert rv.headers["ETag"] == '"unique"' + + +@pytest.mark.parametrize("as_attachment", (True, False)) +def test_content_encoding(as_attachment): + rv = send_file( + txt_path, environ, download_name="logo.svgz", as_attachment=as_attachment + ) + rv.close() + assert rv.mimetype == "image/svg+xml" + assert rv.content_encoding == ("gzip" if not as_attachment else None) + + +@pytest.mark.parametrize( + ("directory", "path"), + [(str(res_path), "test.txt"), (res_path, pathlib.Path("test.txt"))], +) +def test_from_directory(directory, path): + rv = send_from_directory(directory, path, environ) + rv.direct_passthrough = False + assert rv.data.strip() == b"FOUND" + rv.close() + + +@pytest.mark.parametrize("path", ["../res/test.txt", "nothing.txt", "null\x00.txt"]) +def test_from_directory_not_found(path): + with pytest.raises(NotFound): + send_from_directory(res_path, path, environ) + + +def test_root_path(tmp_path): + # This is a private API, it should only be used by Flask. + d = tmp_path / "d" + d.mkdir() + (d / "test.txt").write_bytes(b"test") + rv = send_file("d/test.txt", environ, _root_path=tmp_path) + rv.direct_passthrough = False + assert rv.data == b"test" + rv.close() + rv = send_from_directory("d", "test.txt", environ, _root_path=tmp_path) + rv.direct_passthrough = False + assert rv.data == b"test" + rv.close() + + +def test_max_age_callable(): + # This is a private API, it should only be used by Flask. + rv = send_file(txt_path, environ, max_age=lambda p: 10) + rv.close() + assert rv.cache_control.max_age == 10 diff --git a/tests/test_serving.py b/tests/test_serving.py new file mode 100644 index 0000000..0494828 --- /dev/null +++ b/tests/test_serving.py @@ -0,0 +1,288 @@ +import http.client +import json +import os +import shutil +import socket +import ssl +import sys +from io import BytesIO +from pathlib import Path + +import pytest + +from werkzeug import run_simple +from werkzeug._reloader import _find_stat_paths +from werkzeug._reloader import _find_watchdog_paths +from werkzeug._reloader import _get_args_for_reloading +from werkzeug.datastructures import FileStorage +from werkzeug.serving import make_ssl_devcert +from werkzeug.test import stream_encode_multipart + + +@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") +@pytest.mark.parametrize( + "kwargs", + [ + pytest.param({}, id="http"), + pytest.param({"ssl_context": "adhoc"}, id="https"), + pytest.param({"use_reloader": True}, id="reloader"), + pytest.param( + {"hostname": "unix"}, + id="unix socket", + marks=pytest.mark.skipif( + not hasattr(socket, "AF_UNIX"), reason="requires unix socket support" + ), + ), + ], +) +@pytest.mark.dev_server +def test_server(tmp_path, dev_server, kwargs: dict): + if kwargs.get("hostname") == "unix": + kwargs["hostname"] = f"unix://{tmp_path / 'test.sock'}" + + client = dev_server(**kwargs) + r = client.request() + assert r.status == 200 + assert r.json["PATH_INFO"] == "/" + + +@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") +@pytest.mark.dev_server +def test_untrusted_host(standard_app): + r = standard_app.request( + "http://missing.test:1337/index.html#ignore", + headers={"x-base-url": standard_app.url}, + ) + assert r.json["HTTP_HOST"] == "missing.test:1337" + assert r.json["PATH_INFO"] == "/index.html" + host, _, port = r.json["HTTP_X_BASE_URL"].rpartition(":") + assert r.json["SERVER_NAME"] == host.partition("http://")[2] + assert r.json["SERVER_PORT"] == port + + +@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") +@pytest.mark.dev_server +def test_double_slash_path(standard_app): + r = standard_app.request("//double-slash") + assert "double-slash" not in r.json["HTTP_HOST"] + assert r.json["PATH_INFO"] == "/double-slash" + + +@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") +@pytest.mark.dev_server +def test_500_error(standard_app): + r = standard_app.request("/crash") + assert r.status == 500 + assert b"Internal Server Error" in r.data + + +@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") +@pytest.mark.dev_server +def test_ssl_dev_cert(tmp_path, dev_server): + client = dev_server(ssl_context=make_ssl_devcert(tmp_path)) + r = client.request() + assert r.json["wsgi.url_scheme"] == "https" + + +@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") +@pytest.mark.dev_server +def test_ssl_object(dev_server): + client = dev_server(ssl_context="custom") + r = client.request() + assert r.json["wsgi.url_scheme"] == "https" + + +@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") +@pytest.mark.parametrize("reloader_type", ["stat", "watchdog"]) +@pytest.mark.skipif( + os.name == "nt" and "CI" in os.environ, reason="unreliable on Windows during CI" +) +@pytest.mark.dev_server +def test_reloader_sys_path(tmp_path, dev_server, reloader_type): + """This tests the general behavior of the reloader. It also tests + that fixing an import error triggers a reload, not just Python + retrying the failed import. + """ + real_path = tmp_path / "real_app.py" + real_path.write_text("syntax error causes import error") + + client = dev_server("reloader", reloader_type=reloader_type) + assert client.request().status == 500 + + shutil.copyfile(Path(__file__).parent / "live_apps" / "standard_app.py", real_path) + client.wait_for_log(f" * Detected change in {str(real_path)!r}, reloading") + client.wait_for_reload() + assert client.request().status == 200 + + +def test_windows_get_args_for_reloading(monkeypatch, tmp_path): + argv = [str(tmp_path / "test.exe"), "run"] + monkeypatch.setattr("sys.executable", str(tmp_path / "python.exe")) + monkeypatch.setattr("sys.argv", argv) + monkeypatch.setattr("__main__.__package__", None) + monkeypatch.setattr("os.name", "nt") + rv = _get_args_for_reloading() + assert rv == argv + + +@pytest.mark.parametrize("find", [_find_stat_paths, _find_watchdog_paths]) +def test_exclude_patterns(find): + # Imported paths under sys.prefix will be included by default. + paths = find(set(), set()) + assert any(p.startswith(sys.prefix) for p in paths) + # Those paths should be excluded due to the pattern. + paths = find(set(), {f"{sys.prefix}*"}) + assert not any(p.startswith(sys.prefix) for p in paths) + + +@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") +@pytest.mark.dev_server +def test_wrong_protocol(standard_app): + """An HTTPS request to an HTTP server doesn't show a traceback. + https://github.com/pallets/werkzeug/pull/838 + """ + conn = http.client.HTTPSConnection(standard_app.addr) + + with pytest.raises(ssl.SSLError): + conn.request("GET", f"https://{standard_app.addr}") + + assert "Traceback" not in standard_app.log.read() + + +@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") +@pytest.mark.dev_server +def test_content_type_and_length(standard_app): + r = standard_app.request() + assert "CONTENT_TYPE" not in r.json + assert "CONTENT_LENGTH" not in r.json + + r = standard_app.request(body=b"{}", headers={"content-type": "application/json"}) + assert r.json["CONTENT_TYPE"] == "application/json" + assert r.json["CONTENT_LENGTH"] == "2" + + +def test_port_is_int(): + with pytest.raises(TypeError, match="port must be an integer"): + run_simple("127.0.0.1", "5000", None) + + +@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") +@pytest.mark.parametrize("send_length", [False, True]) +@pytest.mark.dev_server +def test_chunked_request(monkeypatch, dev_server, send_length): + stream, length, boundary = stream_encode_multipart( + { + "value": "this is text", + "file": FileStorage( + BytesIO(b"this is a file"), + filename="test.txt", + content_type="text/plain", + ), + } + ) + client = dev_server("data") + # Small block size to produce multiple chunks. + conn = client.connect(blocksize=128) + conn.putrequest("POST", "/") + conn.putheader("Transfer-Encoding", "chunked") + conn.putheader("Content-Type", f"multipart/form-data; boundary={boundary}") + + # Sending the content-length header with chunked is invalid, but if + # a client does send it the server should ignore it. Previously the + # multipart parser would crash. Python's higher-level functions + # won't send the header, which is why we use conn.put in this test. + if send_length: + conn.putheader("Content-Length", "invalid") + expect_content_len = "invalid" + else: + expect_content_len = None + + conn.endheaders(stream, encode_chunked=True) + r = conn.getresponse() + data = json.load(r) + r.close() + assert data["form"]["value"] == "this is text" + assert data["files"]["file"] == "this is a file" + environ = data["environ"] + assert environ["HTTP_TRANSFER_ENCODING"] == "chunked" + assert environ.get("CONTENT_LENGTH") == expect_content_len + assert environ["wsgi.input_terminated"] + + +@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") +@pytest.mark.dev_server +def test_multiple_headers_concatenated(standard_app): + """A header key can be sent multiple times. The server will join all + the values with commas. + + https://tools.ietf.org/html/rfc3875#section-4.1.18 + """ + # conn.request doesn't support multiple values. + conn = standard_app.connect() + conn.putrequest("GET", "/") + conn.putheader("XYZ", "a ") # trailing space is preserved + conn.putheader("X-Ignore-1", "ignore value") + conn.putheader("XYZ", " b") # leading space is collapsed + conn.putheader("X-Ignore-2", "ignore value") + conn.putheader("XYZ", "c ") + conn.putheader("X-Ignore-3", "ignore value") + conn.putheader("XYZ", "d") + conn.endheaders() + r = conn.getresponse() + data = json.load(r) + r.close() + assert data["HTTP_XYZ"] == "a ,b,c ,d" + + +@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") +@pytest.mark.dev_server +def test_multiline_header_folding(standard_app): + """A header value can be split over multiple lines with a leading + tab. The server will remove the newlines and preserve the tabs. + + https://tools.ietf.org/html/rfc2616#section-2.2 + """ + # conn.request doesn't support multiline values. + conn = standard_app.connect() + conn.putrequest("GET", "/") + conn.putheader("XYZ", "first", "second", "third") + conn.endheaders() + r = conn.getresponse() + data = json.load(r) + r.close() + assert data["HTTP_XYZ"] == "first\tsecond\tthird" + + +@pytest.mark.parametrize("endpoint", ["", "crash"]) +@pytest.mark.dev_server +def test_streaming_close_response(dev_server, endpoint): + """When using HTTP/1.0, chunked encoding is not supported. Fall + back to Connection: close, but this allows no reliable way to + distinguish between complete and truncated responses. + """ + r = dev_server("streaming").request("/" + endpoint) + assert r.getheader("connection") == "close" + assert r.data == "".join(str(x) + "\n" for x in range(5)).encode() + + +@pytest.mark.dev_server +def test_streaming_chunked_response(dev_server): + """When using HTTP/1.1, use Transfer-Encoding: chunked for streamed + responses, since it can distinguish the end of the response without + closing the connection. + + https://tools.ietf.org/html/rfc2616#section-3.6.1 + """ + r = dev_server("streaming", threaded=True).request("/") + assert r.getheader("transfer-encoding") == "chunked" + assert r.data == "".join(str(x) + "\n" for x in range(5)).encode() + + +@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning") +@pytest.mark.dev_server +def test_streaming_chunked_truncation(dev_server): + """When using HTTP/1.1, chunked encoding allows the client to detect + content truncated by a prematurely closed connection. + """ + with pytest.raises(http.client.IncompleteRead): + dev_server("streaming", threaded=True).request("/crash") diff --git a/tests/test_test.py b/tests/test_test.py new file mode 100644 index 0000000..02d637e --- /dev/null +++ b/tests/test_test.py @@ -0,0 +1,871 @@ +import json +import sys +from functools import partial +from io import BytesIO + +import pytest + +from werkzeug.datastructures import Authorization +from werkzeug.datastructures import FileStorage +from werkzeug.datastructures import Headers +from werkzeug.datastructures import MultiDict +from werkzeug.formparser import parse_form_data +from werkzeug.http import parse_authorization_header +from werkzeug.test import Client +from werkzeug.test import ClientRedirectError +from werkzeug.test import create_environ +from werkzeug.test import EnvironBuilder +from werkzeug.test import run_wsgi_app +from werkzeug.test import stream_encode_multipart +from werkzeug.utils import redirect +from werkzeug.wrappers import Request +from werkzeug.wrappers import Response + + +def cookie_app(environ, start_response): + """A WSGI application which sets a cookie, and returns as a response any + cookie which exists. + """ + response = Response(environ.get("HTTP_COOKIE", "No Cookie"), mimetype="text/plain") + response.set_cookie("test", "test") + return response(environ, start_response) + + +def redirect_loop_app(environ, start_response): + response = redirect("http://localhost/some/redirect/") + return response(environ, start_response) + + +def redirect_with_get_app(environ, start_response): + req = Request(environ) + if req.url not in ( + "http://localhost/", + "http://localhost/first/request", + "http://localhost/some/redirect/", + ): + raise AssertionError(f'redirect_demo_app() did not expect URL "{req.url}"') + if "/some/redirect" not in req.url: + response = redirect("http://localhost/some/redirect/") + else: + response = Response(f"current url: {req.url}") + return response(environ, start_response) + + +def external_redirect_demo_app(environ, start_response): + response = redirect("http://example.com/") + return response(environ, start_response) + + +def external_subdomain_redirect_demo_app(environ, start_response): + if "test.example.com" in environ["HTTP_HOST"]: + response = Response("redirected successfully to subdomain") + else: + response = redirect("http://test.example.com/login") + return response(environ, start_response) + + +def multi_value_post_app(environ, start_response): + req = Request(environ) + assert req.form["field"] == "val1", req.form["field"] + assert req.form.getlist("field") == ["val1", "val2"], req.form.getlist("field") + response = Response("ok") + return response(environ, start_response) + + +def test_cookie_forging(): + c = Client(cookie_app) + c.set_cookie("localhost", "foo", "bar") + response = c.open() + assert response.text == "foo=bar" + + +def test_set_cookie_app(): + c = Client(cookie_app) + response = c.open() + assert "Set-Cookie" in response.headers + + +def test_cookiejar_stores_cookie(): + c = Client(cookie_app) + c.open() + assert "test" in c.cookie_jar._cookies["localhost.local"]["/"] + + +def test_no_initial_cookie(): + c = Client(cookie_app) + response = c.open() + assert response.text == "No Cookie" + + +def test_resent_cookie(): + c = Client(cookie_app) + c.open() + response = c.open() + assert response.text == "test=test" + + +def test_disable_cookies(): + c = Client(cookie_app, use_cookies=False) + c.open() + response = c.open() + assert response.text == "No Cookie" + + +def test_cookie_for_different_path(): + c = Client(cookie_app) + c.open("/path1") + response = c.open("/path2") + assert response.text == "test=test" + + +def test_environ_builder_basics(): + b = EnvironBuilder() + assert b.content_type is None + b.method = "POST" + assert b.content_type is None + b.form["test"] = "normal value" + assert b.content_type == "application/x-www-form-urlencoded" + b.files.add_file("test", BytesIO(b"test contents"), "test.txt") + assert b.files["test"].content_type == "text/plain" + b.form["test_int"] = 1 + assert b.content_type == "multipart/form-data" + + req = b.get_request() + b.close() + + assert req.url == "http://localhost/" + assert req.method == "POST" + assert req.form["test"] == "normal value" + assert req.files["test"].content_type == "text/plain" + assert req.files["test"].filename == "test.txt" + assert req.files["test"].read() == b"test contents" + req.close() + + +def test_environ_builder_data(): + b = EnvironBuilder(data="foo") + assert b.input_stream.getvalue() == b"foo" + b = EnvironBuilder(data=b"foo") + assert b.input_stream.getvalue() == b"foo" + + b = EnvironBuilder(data={"foo": "bar"}) + assert b.form["foo"] == "bar" + b = EnvironBuilder(data={"foo": ["bar1", "bar2"]}) + assert b.form.getlist("foo") == ["bar1", "bar2"] + + def check_list_content(b, length): + foo = b.files.getlist("foo") + assert len(foo) == length + for obj in foo: + assert isinstance(obj, FileStorage) + + b = EnvironBuilder(data={"foo": BytesIO()}) + check_list_content(b, 1) + b = EnvironBuilder(data={"foo": [BytesIO(), BytesIO()]}) + check_list_content(b, 2) + + b = EnvironBuilder(data={"foo": (BytesIO(),)}) + check_list_content(b, 1) + b = EnvironBuilder(data={"foo": [(BytesIO(),), (BytesIO(),)]}) + check_list_content(b, 2) + + +def test_environ_builder_json(): + @Request.application + def app(request): + assert request.content_type == "application/json" + return Response(json.loads(request.get_data(as_text=True))["foo"]) + + c = Client(app) + response = c.post("/", json={"foo": "bar"}) + assert response.text == "bar" + + with pytest.raises(TypeError): + c.post("/", json={"foo": "bar"}, data={"baz": "qux"}) + + +def test_environ_builder_headers(): + b = EnvironBuilder( + environ_base={"HTTP_USER_AGENT": "Foo/0.1"}, + environ_overrides={"wsgi.version": (1, 1)}, + ) + b.headers["X-Beat-My-Horse"] = "very well sir" + env = b.get_environ() + assert env["HTTP_USER_AGENT"] == "Foo/0.1" + assert env["HTTP_X_BEAT_MY_HORSE"] == "very well sir" + assert env["wsgi.version"] == (1, 1) + + b.headers["User-Agent"] = "Bar/1.0" + env = b.get_environ() + assert env["HTTP_USER_AGENT"] == "Bar/1.0" + + +def test_environ_builder_headers_content_type(): + b = EnvironBuilder(headers={"Content-Type": "text/plain"}) + env = b.get_environ() + assert env["CONTENT_TYPE"] == "text/plain" + assert "HTTP_CONTENT_TYPE" not in env + b = EnvironBuilder(content_type="text/html", headers={"Content-Type": "text/plain"}) + env = b.get_environ() + assert env["CONTENT_TYPE"] == "text/html" + assert "HTTP_CONTENT_TYPE" not in env + b = EnvironBuilder() + env = b.get_environ() + assert "CONTENT_TYPE" not in env + assert "HTTP_CONTENT_TYPE" not in env + + +def test_envrion_builder_multiple_headers(): + h = Headers() + h.add("FOO", "bar") + h.add("FOO", "baz") + b = EnvironBuilder(headers=h) + env = b.get_environ() + assert env["HTTP_FOO"] == "bar, baz" + + +def test_environ_builder_paths(): + b = EnvironBuilder(path="/foo", base_url="http://example.com/") + assert b.base_url == "http://example.com/" + assert b.path == "/foo" + assert b.script_root == "" + assert b.host == "example.com" + + b = EnvironBuilder(path="/foo", base_url="http://example.com/bar") + assert b.base_url == "http://example.com/bar/" + assert b.path == "/foo" + assert b.script_root == "/bar" + assert b.host == "example.com" + + b.host = "localhost" + assert b.base_url == "http://localhost/bar/" + b.base_url = "http://localhost:8080/" + assert b.host == "localhost:8080" + assert b.server_name == "localhost" + assert b.server_port == 8080 + + b.host = "foo.invalid" + b.url_scheme = "https" + b.script_root = "/test" + env = b.get_environ() + assert env["SERVER_NAME"] == "foo.invalid" + assert env["SERVER_PORT"] == "443" + assert env["SCRIPT_NAME"] == "/test" + assert env["PATH_INFO"] == "/foo" + assert env["HTTP_HOST"] == "foo.invalid" + assert env["wsgi.url_scheme"] == "https" + assert b.base_url == "https://foo.invalid/test/" + + +def test_environ_builder_content_type(): + builder = EnvironBuilder() + assert builder.content_type is None + builder.method = "POST" + assert builder.content_type is None + builder.method = "PUT" + assert builder.content_type is None + builder.method = "PATCH" + assert builder.content_type is None + builder.method = "DELETE" + assert builder.content_type is None + builder.method = "GET" + assert builder.content_type is None + builder.form["foo"] = "bar" + assert builder.content_type == "application/x-www-form-urlencoded" + builder.files.add_file("data", BytesIO(b"foo"), "test.txt") + assert builder.content_type == "multipart/form-data" + req = builder.get_request() + builder.close() + assert req.form["foo"] == "bar" + assert req.files["data"].read() == b"foo" + req.close() + + +def test_basic_auth(): + builder = EnvironBuilder(auth=("username", "password")) + request = builder.get_request() + auth = parse_authorization_header(request.headers["Authorization"]) + assert auth.username == "username" + assert auth.password == "password" + + +def test_auth_object(): + builder = EnvironBuilder( + auth=Authorization("digest", {"username": "u", "password": "p"}) + ) + request = builder.get_request() + assert request.headers["Authorization"].startswith("Digest ") + + +def test_environ_builder_stream_switch(): + d = MultiDict(dict(foo="bar", blub="blah", hu="hum")) + for use_tempfile in False, True: + stream, length, boundary = stream_encode_multipart( + d, use_tempfile, threshold=150 + ) + assert isinstance(stream, BytesIO) != use_tempfile + + form = parse_form_data( + { + "wsgi.input": stream, + "CONTENT_LENGTH": str(length), + "CONTENT_TYPE": f'multipart/form-data; boundary="{boundary}"', + } + )[1] + assert form == d + stream.close() + + +def test_environ_builder_unicode_file_mix(): + for use_tempfile in False, True: + f = FileStorage(BytesIO(rb"\N{SNOWMAN}"), "snowman.txt") + d = MultiDict(dict(f=f, s="\N{SNOWMAN}")) + stream, length, boundary = stream_encode_multipart( + d, use_tempfile, threshold=150 + ) + assert isinstance(stream, BytesIO) != use_tempfile + + _, form, files = parse_form_data( + { + "wsgi.input": stream, + "CONTENT_LENGTH": str(length), + "CONTENT_TYPE": f'multipart/form-data; boundary="{boundary}"', + } + ) + assert form["s"] == "\N{SNOWMAN}" + assert files["f"].name == "f" + assert files["f"].filename == "snowman.txt" + assert files["f"].read() == rb"\N{SNOWMAN}" + stream.close() + files["f"].close() + + +def test_create_environ(): + env = create_environ("/foo?bar=baz", "http://example.org/") + expected = { + "wsgi.multiprocess": False, + "wsgi.version": (1, 0), + "wsgi.run_once": False, + "wsgi.errors": sys.stderr, + "wsgi.multithread": False, + "wsgi.url_scheme": "http", + "SCRIPT_NAME": "", + "SERVER_NAME": "example.org", + "REQUEST_METHOD": "GET", + "HTTP_HOST": "example.org", + "PATH_INFO": "/foo", + "SERVER_PORT": "80", + "SERVER_PROTOCOL": "HTTP/1.1", + "QUERY_STRING": "bar=baz", + } + for key, value in iter(expected.items()): + assert env[key] == value + assert env["wsgi.input"].read(0) == b"" + assert create_environ("/foo", "http://example.com/")["SCRIPT_NAME"] == "" + + +def test_create_environ_query_string_error(): + with pytest.raises(ValueError): + create_environ("/foo?bar=baz", query_string={"a": "b"}) + + +def test_builder_from_environ(): + environ = create_environ( + "/ㄱ", + base_url="https://example.com/base", + query_string={"name": "Werkzeug"}, + data={"foo": "ㄴ"}, + headers={"X-Foo": "ㄷ"}, + ) + builder = EnvironBuilder.from_environ(environ) + + try: + new_environ = builder.get_environ() + finally: + builder.close() + + assert new_environ == environ + + +def test_file_closing(): + closed = [] + + class SpecialInput: + def read(self, size): + return "" + + def close(self): + closed.append(self) + + create_environ(data={"foo": SpecialInput()}) + assert len(closed) == 1 + builder = EnvironBuilder() + builder.files.add_file("blah", SpecialInput()) + builder.close() + assert len(closed) == 2 + + +def test_follow_redirect(): + env = create_environ("/", base_url="http://localhost") + c = Client(redirect_with_get_app) + response = c.open(environ_overrides=env, follow_redirects=True) + assert response.status == "200 OK" + assert response.text == "current url: http://localhost/some/redirect/" + + # Test that the :cls:`Client` is aware of user defined response wrappers + c = Client(redirect_with_get_app) + resp = c.get("/", follow_redirects=True) + assert resp.status_code == 200 + assert resp.text == "current url: http://localhost/some/redirect/" + + # test with URL other than '/' to make sure redirected URL's are correct + c = Client(redirect_with_get_app) + resp = c.get("/first/request", follow_redirects=True) + assert resp.status_code == 200 + assert resp.text == "current url: http://localhost/some/redirect/" + + +def test_follow_local_redirect(): + class LocalResponse(Response): + autocorrect_location_header = False + + def local_redirect_app(environ, start_response): + req = Request(environ) + if "/from/location" in req.url: + response = redirect("/to/location", Response=LocalResponse) + else: + response = Response(f"current path: {req.path}") + return response(environ, start_response) + + c = Client(local_redirect_app) + resp = c.get("/from/location", follow_redirects=True) + assert resp.status_code == 200 + assert resp.text == "current path: /to/location" + + +@pytest.mark.parametrize( + ("code", "keep"), ((302, False), (301, False), (307, True), (308, True)) +) +def test_follow_redirect_body(code, keep): + @Request.application + def app(request): + if request.url == "http://localhost/some/redirect/": + assert request.method == "POST" if keep else "GET" + assert request.headers["X-Foo"] == "bar" + + if keep: + assert request.form["foo"] == "bar" + else: + assert not request.form + + return Response(f"current url: {request.url}") + + return redirect("http://localhost/some/redirect/", code=code) + + c = Client(app) + response = c.post( + "/", follow_redirects=True, data={"foo": "bar"}, headers={"X-Foo": "bar"} + ) + assert response.status_code == 200 + assert response.text == "current url: http://localhost/some/redirect/" + + +def test_follow_external_redirect(): + env = create_environ("/", base_url="http://localhost") + c = Client(external_redirect_demo_app) + pytest.raises( + RuntimeError, lambda: c.get(environ_overrides=env, follow_redirects=True) + ) + + +def test_follow_external_redirect_on_same_subdomain(): + env = create_environ("/", base_url="http://example.com") + c = Client(external_subdomain_redirect_demo_app, allow_subdomain_redirects=True) + c.get(environ_overrides=env, follow_redirects=True) + + # check that this does not work for real external domains + env = create_environ("/", base_url="http://localhost") + pytest.raises( + RuntimeError, lambda: c.get(environ_overrides=env, follow_redirects=True) + ) + + # check that subdomain redirects fail if no `allow_subdomain_redirects` is applied + c = Client(external_subdomain_redirect_demo_app) + pytest.raises( + RuntimeError, lambda: c.get(environ_overrides=env, follow_redirects=True) + ) + + +def test_follow_redirect_loop(): + c = Client(redirect_loop_app) + with pytest.raises(ClientRedirectError): + c.get("/", follow_redirects=True) + + +def test_follow_redirect_non_root_base_url(): + @Request.application + def app(request): + if request.path == "/redirect": + return redirect("done") + + return Response(request.path) + + c = Client(app) + response = c.get( + "/redirect", base_url="http://localhost/other", follow_redirects=True + ) + assert response.text == "/done" + + +def test_follow_redirect_exhaust_intermediate(): + class Middleware: + def __init__(self, app): + self.app = app + self.active = 0 + + def __call__(self, environ, start_response): + # Test client must exhaust response stream, otherwise the + # cleanup code that decrements this won't have run by the + # time the next request is started. + assert not self.active + self.active += 1 + try: + yield from self.app(environ, start_response) + finally: + self.active -= 1 + + app = Middleware(redirect_with_get_app) + client = Client(Middleware(redirect_with_get_app)) + response = client.get("/", follow_redirects=True, buffered=False) + assert response.text == "current url: http://localhost/some/redirect/" + assert not app.active + + +def test_redirects_are_tracked(): + @Request.application + def app(request): + if request.path == "/first": + return redirect("/second") + + if request.path == "/second": + return redirect("/third") + + return Response("done") + + c = Client(app) + response = c.get("/first", follow_redirects=True) + assert response.text == "done" + assert len(response.history) == 2 + + assert response.history[-1].request.path == "/second" + assert response.history[-1].status_code == 302 + assert response.history[-1].location == "/third" + assert len(response.history[-1].history) == 1 + assert response.history[-1].history[-1] is response.history[-2] + + assert response.history[-2].request.path == "/first" + assert response.history[-2].status_code == 302 + assert response.history[-2].location == "/second" + assert len(response.history[-2].history) == 0 + + +def test_cookie_across_redirect(): + @Request.application + def app(request): + if request.path == "/": + return Response(request.cookies.get("auth", "out")) + + if request.path == "/in": + rv = redirect("/") + rv.set_cookie("auth", "in") + return rv + + if request.path == "/out": + rv = redirect("/") + rv.delete_cookie("auth") + return rv + + c = Client(app) + assert c.get("/").text == "out" + assert c.get("/in", follow_redirects=True).text == "in" + assert c.get("/").text == "in" + assert c.get("/out", follow_redirects=True).text == "out" + assert c.get("/").text == "out" + + +def test_path_info_script_name_unquoting(): + def test_app(environ, start_response): + start_response("200 OK", [("Content-Type", "text/plain")]) + return [f"{environ['PATH_INFO']}\n{environ['SCRIPT_NAME']}"] + + c = Client(test_app) + resp = c.get("/foo%40bar") + assert resp.text == "/foo@bar\n" + c = Client(test_app) + resp = c.get("/foo%40bar", "http://localhost/bar%40baz") + assert resp.text == "/foo@bar\n/bar@baz" + + +def test_multi_value_submit(): + c = Client(multi_value_post_app) + data = {"field": ["val1", "val2"]} + resp = c.post("/", data=data) + assert resp.status_code == 200 + c = Client(multi_value_post_app) + data = MultiDict({"field": ["val1", "val2"]}) + resp = c.post("/", data=data) + assert resp.status_code == 200 + + +def test_iri_support(): + b = EnvironBuilder("/föö-bar", base_url="http://☃.net/") + assert b.path == "/f%C3%B6%C3%B6-bar" + assert b.base_url == "http://xn--n3h.net/" + + +@pytest.mark.parametrize("buffered", (True, False)) +@pytest.mark.parametrize("iterable", (True, False)) +def test_run_wsgi_apps(buffered, iterable): + leaked_data = [] + + def simple_app(environ, start_response): + start_response("200 OK", [("Content-Type", "text/html")]) + return ["Hello World!"] + + def yielding_app(environ, start_response): + start_response("200 OK", [("Content-Type", "text/html")]) + yield "Hello " + yield "World!" + + def late_start_response(environ, start_response): + yield "Hello " + yield "World" + start_response("200 OK", [("Content-Type", "text/html")]) + yield "!" + + def depends_on_close(environ, start_response): + leaked_data.append("harhar") + start_response("200 OK", [("Content-Type", "text/html")]) + + class Rv: + def __iter__(self): + yield "Hello " + yield "World" + yield "!" + + def close(self): + assert leaked_data.pop() == "harhar" + + return Rv() + + for app in (simple_app, yielding_app, late_start_response, depends_on_close): + if iterable: + app = iterable_middleware(app) + app_iter, status, headers = run_wsgi_app(app, {}, buffered=buffered) + assert status == "200 OK" + assert list(headers) == [("Content-Type", "text/html")] + assert "".join(app_iter) == "Hello World!" + + if hasattr(app_iter, "close"): + app_iter.close() + assert not leaked_data + + +@pytest.mark.parametrize("buffered", (True, False)) +@pytest.mark.parametrize("iterable", (True, False)) +def test_lazy_start_response_empty_response_app(buffered, iterable): + class app: + def __init__(self, environ, start_response): + self.start_response = start_response + + def __iter__(self): + return self + + def __next__(self): + self.start_response("200 OK", [("Content-Type", "text/html")]) + raise StopIteration + + if iterable: + app = iterable_middleware(app) + app_iter, status, headers = run_wsgi_app(app, {}, buffered=buffered) + assert status == "200 OK" + assert list(headers) == [("Content-Type", "text/html")] + assert "".join(app_iter) == "" + + +def test_run_wsgi_app_closing_iterator(): + got_close = [] + + class CloseIter: + def __init__(self): + self.iterated = False + + def __iter__(self): + return self + + def close(self): + got_close.append(None) + + def __next__(self): + if self.iterated: + raise StopIteration() + self.iterated = True + return "bar" + + def bar(environ, start_response): + start_response("200 OK", [("Content-Type", "text/plain")]) + return CloseIter() + + app_iter, status, headers = run_wsgi_app(bar, {}) + assert status == "200 OK" + assert list(headers) == [("Content-Type", "text/plain")] + assert next(app_iter) == "bar" + pytest.raises(StopIteration, partial(next, app_iter)) + app_iter.close() + + assert run_wsgi_app(bar, {}, True)[0] == ["bar"] + + assert len(got_close) == 2 + + +def iterable_middleware(app): + """Guarantee that the app returns an iterable""" + + def inner(environ, start_response): + rv = app(environ, start_response) + + class Iterable: + def __iter__(self): + return iter(rv) + + if hasattr(rv, "close"): + + def close(self): + rv.close() + + return Iterable() + + return inner + + +def test_multiple_cookies(): + @Request.application + def test_app(request): + response = Response(repr(sorted(request.cookies.items()))) + response.set_cookie("test1", b"foo") + response.set_cookie("test2", b"bar") + return response + + client = Client(test_app) + resp = client.get("/") + assert resp.text == "[]" + resp = client.get("/") + assert resp.text == repr([("test1", "foo"), ("test2", "bar")]) + + +def test_correct_open_invocation_on_redirect(): + class MyClient(Client): + counter = 0 + + def open(self, *args, **kwargs): + self.counter += 1 + env = kwargs.setdefault("environ_overrides", {}) + env["werkzeug._foo"] = self.counter + return Client.open(self, *args, **kwargs) + + @Request.application + def test_app(request): + return Response(str(request.environ["werkzeug._foo"])) + + c = MyClient(test_app, response_wrapper=Response) + assert c.get("/").text == "1" + assert c.get("/").text == "2" + assert c.get("/").text == "3" + + +def test_correct_encoding(): + req = Request.from_values("/\N{SNOWMAN}", "http://example.com/foo") + assert req.script_root == "/foo" + assert req.path == "/\N{SNOWMAN}" + + +def test_full_url_requests_with_args(): + base = "http://example.com/" + + @Request.application + def test_app(request): + return Response(request.args["x"]) + + client = Client(test_app) + resp = client.get("/?x=42", base) + assert resp.text == "42" + resp = client.get("http://www.example.com/?x=23", base) + assert resp.text == "23" + + +def test_delete_requests_with_form(): + @Request.application + def test_app(request): + return Response(request.form.get("x", None)) + + client = Client(test_app) + resp = client.delete("/", data={"x": 42}) + assert resp.text == "42" + + +def test_post_with_file_descriptor(tmpdir): + c = Client(Response()) + f = tmpdir.join("some-file.txt") + f.write("foo") + with open(f.strpath) as data: + resp = c.post("/", data=data) + assert resp.status_code == 200 + with open(f.strpath, mode="rb") as data: + resp = c.post("/", data=data) + assert resp.status_code == 200 + + +def test_content_type(): + @Request.application + def test_app(request): + return Response(request.content_type) + + client = Client(test_app) + + resp = client.get("/", data=b"testing", mimetype="text/css") + assert resp.text == "text/css; charset=utf-8" + + resp = client.get("/", data=b"testing", mimetype="application/octet-stream") + assert resp.text == "application/octet-stream" + + +def test_raw_request_uri(): + @Request.application + def app(request): + path_info = request.path + request_uri = request.environ["REQUEST_URI"] + return Response("\n".join((path_info, request_uri))) + + client = Client(app) + response = client.get("/hello%2fworld") + data = response.text + assert data == "/hello/world\n/hello%2fworld" + + response = client.get("/?a=b") + assert response.text == "/\n/?a=b" + + response = client.get("/%3f?") # escaped ? in path, and empty query string + assert response.text == "/?\n/%3f?" + + +def no_response_headers_app(environ, start_response): + """A WSGI application which returns a resposne with no headers.""" + response = Response("Response") + response.headers.clear() + return response(environ, start_response) + + +def test_no_content_type_header_addition(): + c = Client(no_response_headers_app) + response = c.open() + assert response.headers == Headers([("Content-Length", "8")]) diff --git a/tests/test_urls.py b/tests/test_urls.py new file mode 100644 index 0000000..a409709 --- /dev/null +++ b/tests/test_urls.py @@ -0,0 +1,385 @@ +import io + +import pytest + +from werkzeug import urls +from werkzeug.datastructures import OrderedMultiDict + + +def test_parsing(): + url = urls.url_parse("http://anon:hunter2@[2001:db8:0:1]:80/a/b/c") + assert url.netloc == "anon:hunter2@[2001:db8:0:1]:80" + assert url.username == "anon" + assert url.password == "hunter2" + assert url.port == 80 + assert url.ascii_host == "2001:db8:0:1" + + assert url.get_file_location() == (None, None) # no file scheme + + +@pytest.mark.parametrize("implicit_format", (True, False)) +@pytest.mark.parametrize("localhost", ("127.0.0.1", "::1", "localhost")) +def test_fileurl_parsing_windows(implicit_format, localhost, monkeypatch): + if implicit_format: + pathformat = None + monkeypatch.setattr("os.name", "nt") + else: + pathformat = "windows" + monkeypatch.delattr("os.name") # just to make sure it won't get used + + url = urls.url_parse("file:///C:/Documents and Settings/Foobar/stuff.txt") + assert url.netloc == "" + assert url.scheme == "file" + assert url.get_file_location(pathformat) == ( + None, + r"C:\Documents and Settings\Foobar\stuff.txt", + ) + + url = urls.url_parse("file://///server.tld/file.txt") + assert url.get_file_location(pathformat) == ("server.tld", r"file.txt") + + url = urls.url_parse("file://///server.tld") + assert url.get_file_location(pathformat) == ("server.tld", "") + + url = urls.url_parse(f"file://///{localhost}") + assert url.get_file_location(pathformat) == (None, "") + + url = urls.url_parse(f"file://///{localhost}/file.txt") + assert url.get_file_location(pathformat) == (None, r"file.txt") + + +def test_replace(): + url = urls.url_parse("http://de.wikipedia.org/wiki/Troll") + assert url.replace(query="foo=bar") == urls.url_parse( + "http://de.wikipedia.org/wiki/Troll?foo=bar" + ) + assert url.replace(scheme="https") == urls.url_parse( + "https://de.wikipedia.org/wiki/Troll" + ) + + +def test_quoting(): + assert urls.url_quote("\xf6\xe4\xfc") == "%C3%B6%C3%A4%C3%BC" + assert urls.url_unquote(urls.url_quote('#%="\xf6')) == '#%="\xf6' + assert urls.url_quote_plus("foo bar") == "foo+bar" + assert urls.url_unquote_plus("foo+bar") == "foo bar" + assert urls.url_quote_plus("foo+bar") == "foo%2Bbar" + assert urls.url_unquote_plus("foo%2Bbar") == "foo+bar" + assert urls.url_encode({b"a": None, b"b": b"foo bar"}) == "b=foo+bar" + assert urls.url_encode({"a": None, "b": "foo bar"}) == "b=foo+bar" + assert ( + urls.url_fix("http://de.wikipedia.org/wiki/Elf (Begriffsklärung)") + == "http://de.wikipedia.org/wiki/Elf%20(Begriffskl%C3%A4rung)" + ) + assert urls.url_quote_plus(42) == "42" + assert urls.url_quote(b"\xff") == "%FF" + + +def test_bytes_unquoting(): + assert ( + urls.url_unquote(urls.url_quote('#%="\xf6', charset="latin1"), charset=None) + == b'#%="\xf6' + ) + + +def test_url_decoding(): + x = urls.url_decode(b"foo=42&bar=23&uni=H%C3%A4nsel") + assert x["foo"] == "42" + assert x["bar"] == "23" + assert x["uni"] == "Hänsel" + + x = urls.url_decode(b"foo=42;bar=23;uni=H%C3%A4nsel", separator=b";") + assert x["foo"] == "42" + assert x["bar"] == "23" + assert x["uni"] == "Hänsel" + + x = urls.url_decode(b"%C3%9Ch=H%C3%A4nsel") + assert x["Üh"] == "Hänsel" + + +def test_url_bytes_decoding(): + x = urls.url_decode(b"foo=42&bar=23&uni=H%C3%A4nsel", charset=None) + assert x[b"foo"] == b"42" + assert x[b"bar"] == b"23" + assert x[b"uni"] == "Hänsel".encode() + + +def test_stream_decoding_string_fails(): + pytest.raises(TypeError, urls.url_decode_stream, "testing") + + +def test_url_encoding(): + assert urls.url_encode({"foo": "bar 45"}) == "foo=bar+45" + d = {"foo": 1, "bar": 23, "blah": "Hänsel"} + assert urls.url_encode(d, sort=True) == "bar=23&blah=H%C3%A4nsel&foo=1" + assert ( + urls.url_encode(d, sort=True, separator=";") == "bar=23;blah=H%C3%A4nsel;foo=1" + ) + + +def test_sorted_url_encode(): + assert ( + urls.url_encode( + {"a": 42, "b": 23, 1: 1, 2: 2}, sort=True, key=lambda i: str(i[0]) + ) + == "1=1&2=2&a=42&b=23" + ) + assert ( + urls.url_encode( + {"A": 1, "a": 2, "B": 3, "b": 4}, + sort=True, + key=lambda x: x[0].lower() + x[0], + ) + == "A=1&a=2&B=3&b=4" + ) + + +def test_streamed_url_encoding(): + out = io.StringIO() + urls.url_encode_stream({"foo": "bar 45"}, out) + assert out.getvalue() == "foo=bar+45" + + d = {"foo": 1, "bar": 23, "blah": "Hänsel"} + out = io.StringIO() + urls.url_encode_stream(d, out, sort=True) + assert out.getvalue() == "bar=23&blah=H%C3%A4nsel&foo=1" + out = io.StringIO() + urls.url_encode_stream(d, out, sort=True, separator=";") + assert out.getvalue() == "bar=23;blah=H%C3%A4nsel;foo=1" + + gen = urls.url_encode_stream(d, sort=True) + assert next(gen) == "bar=23" + assert next(gen) == "blah=H%C3%A4nsel" + assert next(gen) == "foo=1" + pytest.raises(StopIteration, lambda: next(gen)) + + +def test_url_fixing(): + x = urls.url_fix("http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)") + assert x == "http://de.wikipedia.org/wiki/Elf%20(Begriffskl%C3%A4rung)" + + x = urls.url_fix("http://just.a.test/$-_.+!*'(),") + assert x == "http://just.a.test/$-_.+!*'()," + + x = urls.url_fix("http://höhöhö.at/höhöhö/hähähä") + assert x == r"http://xn--hhh-snabb.at/h%C3%B6h%C3%B6h%C3%B6/h%C3%A4h%C3%A4h%C3%A4" + + +def test_url_fixing_filepaths(): + x = urls.url_fix(r"file://C:\Users\Administrator\My Documents\ÑÈáÇíí") + assert x == ( + r"file:///C%3A/Users/Administrator/My%20Documents/" + r"%C3%91%C3%88%C3%A1%C3%87%C3%AD%C3%AD" + ) + + a = urls.url_fix(r"file:/C:/") + b = urls.url_fix(r"file://C:/") + c = urls.url_fix(r"file:///C:/") + assert a == b == c == r"file:///C%3A/" + + x = urls.url_fix(r"file://host/sub/path") + assert x == r"file://host/sub/path" + + x = urls.url_fix(r"file:///") + assert x == r"file:///" + + +def test_url_fixing_qs(): + x = urls.url_fix(b"http://example.com/?foo=%2f%2f") + assert x == "http://example.com/?foo=%2f%2f" + + x = urls.url_fix( + "http://acronyms.thefreedictionary.com/" + "Algebraic+Methods+of+Solving+the+Schr%C3%B6dinger+Equation" + ) + assert x == ( + "http://acronyms.thefreedictionary.com/" + "Algebraic+Methods+of+Solving+the+Schr%C3%B6dinger+Equation" + ) + + +def test_iri_support(): + assert urls.uri_to_iri("http://xn--n3h.net/") == "http://\u2603.net/" + assert ( + urls.uri_to_iri(b"http://%C3%BCser:p%C3%A4ssword@xn--n3h.net/p%C3%A5th") + == "http://\xfcser:p\xe4ssword@\u2603.net/p\xe5th" + ) + assert urls.iri_to_uri("http://☃.net/") == "http://xn--n3h.net/" + assert ( + urls.iri_to_uri("http://üser:pässword@☃.net/påth") + == "http://%C3%BCser:p%C3%A4ssword@xn--n3h.net/p%C3%A5th" + ) + + assert ( + urls.uri_to_iri("http://test.com/%3Fmeh?foo=%26%2F") + == "http://test.com/%3Fmeh?foo=%26%2F" + ) + + # this should work as well, might break on 2.4 because of a broken + # idna codec + assert urls.uri_to_iri(b"/foo") == "/foo" + assert urls.iri_to_uri("/foo") == "/foo" + + assert ( + urls.iri_to_uri("http://föö.com:8080/bam/baz") + == "http://xn--f-1gaa.com:8080/bam/baz" + ) + + +def test_iri_safe_conversion(): + assert urls.iri_to_uri("magnet:?foo=bar") == "magnet:?foo=bar" + assert urls.iri_to_uri("itms-service://?foo=bar") == "itms-service:?foo=bar" + assert ( + urls.iri_to_uri("itms-service://?foo=bar", safe_conversion=True) + == "itms-service://?foo=bar" + ) + + +def test_iri_safe_quoting(): + uri = "http://xn--f-1gaa.com/%2F%25?q=%C3%B6&x=%3D%25#%25" + iri = "http://föö.com/%2F%25?q=ö&x=%3D%25#%25" + assert urls.uri_to_iri(uri) == iri + assert urls.iri_to_uri(urls.uri_to_iri(uri)) == uri + + +def test_ordered_multidict_encoding(): + d = OrderedMultiDict() + d.add("foo", 1) + d.add("foo", 2) + d.add("foo", 3) + d.add("bar", 0) + d.add("foo", 4) + assert urls.url_encode(d) == "foo=1&foo=2&foo=3&bar=0&foo=4" + + +def test_multidict_encoding(): + d = OrderedMultiDict() + d.add("2013-10-10T23:26:05.657975+0000", "2013-10-10T23:26:05.657975+0000") + assert ( + urls.url_encode(d) + == "2013-10-10T23%3A26%3A05.657975%2B0000=2013-10-10T23%3A26%3A05.657975%2B0000" + ) + + +def test_url_unquote_plus_unicode(): + # was broken in 0.6 + assert urls.url_unquote_plus("\x6d") == "\x6d" + + +def test_quoting_of_local_urls(): + rv = urls.iri_to_uri("/foo\x8f") + assert rv == "/foo%C2%8F" + + +def test_url_attributes(): + rv = urls.url_parse("http://foo%3a:bar%3a@[::1]:80/123?x=y#frag") + assert rv.scheme == "http" + assert rv.auth == "foo%3a:bar%3a" + assert rv.username == "foo:" + assert rv.password == "bar:" + assert rv.raw_username == "foo%3a" + assert rv.raw_password == "bar%3a" + assert rv.host == "::1" + assert rv.port == 80 + assert rv.path == "/123" + assert rv.query == "x=y" + assert rv.fragment == "frag" + + rv = urls.url_parse("http://\N{SNOWMAN}.com/") + assert rv.host == "\N{SNOWMAN}.com" + assert rv.ascii_host == "xn--n3h.com" + + +def test_url_attributes_bytes(): + rv = urls.url_parse(b"http://foo%3a:bar%3a@[::1]:80/123?x=y#frag") + assert rv.scheme == b"http" + assert rv.auth == b"foo%3a:bar%3a" + assert rv.username == "foo:" + assert rv.password == "bar:" + assert rv.raw_username == b"foo%3a" + assert rv.raw_password == b"bar%3a" + assert rv.host == b"::1" + assert rv.port == 80 + assert rv.path == b"/123" + assert rv.query == b"x=y" + assert rv.fragment == b"frag" + + +def test_url_joining(): + assert urls.url_join("/foo", "/bar") == "/bar" + assert urls.url_join("http://example.com/foo", "/bar") == "http://example.com/bar" + assert urls.url_join("file:///tmp/", "test.html") == "file:///tmp/test.html" + assert urls.url_join("file:///tmp/x", "test.html") == "file:///tmp/test.html" + assert urls.url_join("file:///tmp/x", "../../../x.html") == "file:///x.html" + + +def test_partial_unencoded_decode(): + ref = "foo=정상처리".encode("euc-kr") + x = urls.url_decode(ref, charset="euc-kr") + assert x["foo"] == "정상처리" + + +def test_iri_to_uri_idempotence_ascii_only(): + uri = "http://www.idempoten.ce" + uri = urls.iri_to_uri(uri) + assert urls.iri_to_uri(uri) == uri + + +def test_iri_to_uri_idempotence_non_ascii(): + uri = "http://\N{SNOWMAN}/\N{SNOWMAN}" + uri = urls.iri_to_uri(uri) + assert urls.iri_to_uri(uri) == uri + + +def test_uri_to_iri_idempotence_ascii_only(): + uri = "http://www.idempoten.ce" + uri = urls.uri_to_iri(uri) + assert urls.uri_to_iri(uri) == uri + + +def test_uri_to_iri_idempotence_non_ascii(): + uri = "http://xn--n3h/%E2%98%83" + uri = urls.uri_to_iri(uri) + assert urls.uri_to_iri(uri) == uri + + +def test_iri_to_uri_to_iri(): + iri = "http://föö.com/" + uri = urls.iri_to_uri(iri) + assert urls.uri_to_iri(uri) == iri + + +def test_uri_to_iri_to_uri(): + uri = "http://xn--f-rgao.com/%C3%9E" + iri = urls.uri_to_iri(uri) + assert urls.iri_to_uri(iri) == uri + + +def test_uri_iri_normalization(): + uri = "http://xn--f-rgao.com/%E2%98%90/fred?utf8=%E2%9C%93" + iri = "http://föñ.com/\N{BALLOT BOX}/fred?utf8=\u2713" + + tests = [ + "http://föñ.com/\N{BALLOT BOX}/fred?utf8=\u2713", + "http://xn--f-rgao.com/\u2610/fred?utf8=\N{CHECK MARK}", + b"http://xn--f-rgao.com/%E2%98%90/fred?utf8=%E2%9C%93", + "http://xn--f-rgao.com/%E2%98%90/fred?utf8=%E2%9C%93", + "http://föñ.com/\u2610/fred?utf8=%E2%9C%93", + b"http://xn--f-rgao.com/\xe2\x98\x90/fred?utf8=\xe2\x9c\x93", + ] + + for test in tests: + assert urls.uri_to_iri(test) == iri + assert urls.iri_to_uri(test) == uri + assert urls.uri_to_iri(urls.iri_to_uri(test)) == iri + assert urls.iri_to_uri(urls.uri_to_iri(test)) == uri + assert urls.uri_to_iri(urls.uri_to_iri(test)) == iri + assert urls.iri_to_uri(urls.iri_to_uri(test)) == uri + + +def test_uri_to_iri_dont_unquote_space(): + assert urls.uri_to_iri("abc%20def") == "abc%20def" + + +def test_iri_to_uri_dont_quote_reserved(): + assert urls.iri_to_uri("/path[bracket]?(paren)") == "/path[bracket]?(paren)" diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..ed8d8d0 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,316 @@ +import inspect +from datetime import datetime + +import pytest + +from werkzeug import Request +from werkzeug import utils +from werkzeug.datastructures import Headers +from werkzeug.http import http_date +from werkzeug.http import parse_date +from werkzeug.test import Client +from werkzeug.wrappers import Response + + +def test_redirect(): + resp = utils.redirect("/füübär") + assert resp.headers["Location"] == "/f%C3%BC%C3%BCb%C3%A4r" + assert resp.status_code == 302 + assert resp.get_data() == ( + b"\n" + b"\n" + b"Redirecting...\n" + b"

    Redirecting...

    \n" + b"

    Redirecting...

    \n" + b"

    You should be redirected automatically to the target URL: " + b'http://\xe2\x98\x83.net/. ' + b"If not, click the link.\n" + ) + + resp = utils.redirect("http://example.com/", 305) + assert resp.headers["Location"] == "http://example.com/" + assert resp.status_code == 305 + assert resp.get_data() == ( + b"\n" + b"\n" + b"Redirecting...\n" + b"

    Redirecting...

    \n" + b"

    You should be redirected automatically to the target URL: " + b'http://example.com/. ' + b"If not, click the link.\n" + ) + + +def test_redirect_xss(): + location = 'http://example.com/?xss=">' + resp = utils.redirect(location) + assert b"" not in resp.get_data() + + location = 'http://example.com/?xss="onmouseover="alert(1)' + resp = utils.redirect(location) + assert ( + b'href="http://example.com/?xss="onmouseover="alert(1)"' not in resp.get_data() + ) + + +def test_redirect_with_custom_response_class(): + class MyResponse(Response): + pass + + location = "http://example.com/redirect" + resp = utils.redirect(location, Response=MyResponse) + + assert isinstance(resp, MyResponse) + assert resp.headers["Location"] == location + + +def test_cached_property(): + foo = [] + + class A: + def prop(self): + foo.append(42) + return 42 + + prop = utils.cached_property(prop) + + a = A() + p = a.prop + q = a.prop + assert p == q == 42 + assert foo == [42] + + foo = [] + + class A: + def _prop(self): + foo.append(42) + return 42 + + prop = utils.cached_property(_prop, name="prop") + del _prop + + a = A() + p = a.prop + q = a.prop + assert p == q == 42 + assert foo == [42] + + +def test_can_set_cached_property(): + class A: + @utils.cached_property + def _prop(self): + return "cached_property return value" + + a = A() + a._prop = "value" + assert a._prop == "value" + + +def test_invalidate_cached_property(): + accessed = 0 + + class A: + @utils.cached_property + def prop(self): + nonlocal accessed + accessed += 1 + return 42 + + a = A() + p = a.prop + q = a.prop + assert p == q == 42 + assert accessed == 1 + + a.prop = 16 + assert a.prop == 16 + assert accessed == 1 + + del a.prop + r = a.prop + assert r == 42 + assert accessed == 2 + + +def test_inspect_treats_cached_property_as_property(): + class A: + @utils.cached_property + def _prop(self): + return "cached_property return value" + + attrs = inspect.classify_class_attrs(A) + for attr in attrs: + if attr.name == "_prop": + break + assert attr.kind == "property" + + +def test_environ_property(): + class A: + environ = {"string": "abc", "number": "42"} + + string = utils.environ_property("string") + missing = utils.environ_property("missing", "spam") + read_only = utils.environ_property("number") + number = utils.environ_property("number", load_func=int) + broken_number = utils.environ_property("broken_number", load_func=int) + date = utils.environ_property( + "date", None, parse_date, http_date, read_only=False + ) + foo = utils.environ_property("foo") + + a = A() + assert a.string == "abc" + assert a.missing == "spam" + + def test_assign(): + a.read_only = "something" + + pytest.raises(AttributeError, test_assign) + assert a.number == 42 + assert a.broken_number is None + assert a.date is None + a.date = datetime(2008, 1, 22, 10, 0, 0, 0) + assert a.environ["date"] == "Tue, 22 Jan 2008 10:00:00 GMT" + + +def test_import_string(): + from datetime import date + from werkzeug.debug import DebuggedApplication + + assert utils.import_string("datetime.date") is date + assert utils.import_string("datetime.date") is date + assert utils.import_string("datetime:date") is date + assert utils.import_string("XXXXXXXXXXXX", True) is None + assert utils.import_string("datetime.XXXXXXXXXXXX", True) is None + assert ( + utils.import_string("werkzeug.debug.DebuggedApplication") is DebuggedApplication + ) + pytest.raises(ImportError, utils.import_string, "XXXXXXXXXXXXXXXX") + pytest.raises(ImportError, utils.import_string, "datetime.XXXXXXXXXX") + + +def test_import_string_provides_traceback(tmpdir, monkeypatch): + monkeypatch.syspath_prepend(str(tmpdir)) + # Couple of packages + dir_a = tmpdir.mkdir("a") + dir_b = tmpdir.mkdir("b") + # Totally packages, I promise + dir_a.join("__init__.py").write("") + dir_b.join("__init__.py").write("") + # 'aa.a' that depends on 'bb.b', which in turn has a broken import + dir_a.join("aa.py").write("from b import bb") + dir_b.join("bb.py").write("from os import a_typo") + + # Do we get all the useful information in the traceback? + with pytest.raises(ImportError) as baz_exc: + utils.import_string("a.aa") + traceback = "".join(str(line) for line in baz_exc.traceback) + assert "bb.py':1" in traceback # a bit different than typical python tb + assert "from os import a_typo" in traceback + + +def test_import_string_attribute_error(tmpdir, monkeypatch): + monkeypatch.syspath_prepend(str(tmpdir)) + tmpdir.join("foo_test.py").write("from bar_test import value") + tmpdir.join("bar_test.py").write("raise AttributeError('bad')") + + with pytest.raises(AttributeError) as info: + utils.import_string("foo_test") + + assert "bad" in str(info.value) + + with pytest.raises(AttributeError) as info: + utils.import_string("bar_test") + + assert "bad" in str(info.value) + + +def test_find_modules(): + assert list(utils.find_modules("werkzeug.debug")) == [ + "werkzeug.debug.console", + "werkzeug.debug.repr", + "werkzeug.debug.tbtools", + ] + + +def test_header_set_duplication_bug(): + headers = Headers([("Content-Type", "text/html"), ("Foo", "bar"), ("Blub", "blah")]) + headers["blub"] = "hehe" + headers["blafasel"] = "humm" + assert headers == Headers( + [ + ("Content-Type", "text/html"), + ("Foo", "bar"), + ("blub", "hehe"), + ("blafasel", "humm"), + ] + ) + + +@pytest.mark.parametrize( + ("path", "base_url", "absolute_location"), + [ + ("foo", "http://example.org/app", "http://example.org/app/foo/"), + ("/foo", "http://example.org/app", "http://example.org/app/foo/"), + ("/foo/bar", "http://example.org/", "http://example.org/foo/bar/"), + ("/foo/bar", "http://example.org/app", "http://example.org/app/foo/bar/"), + ("/foo?baz", "http://example.org/", "http://example.org/foo/?baz"), + ("/foo/", "http://example.org/", "http://example.org/foo/"), + ("/foo/", "http://example.org/app", "http://example.org/app/foo/"), + ("/", "http://example.org/", "http://example.org/"), + ("/", "http://example.org/app", "http://example.org/app/"), + ], +) +@pytest.mark.parametrize("autocorrect", [False, True]) +def test_append_slash_redirect(autocorrect, path, base_url, absolute_location): + @Request.application + def app(request): + rv = utils.append_slash_redirect(request.environ) + rv.autocorrect_location_header = autocorrect + return rv + + client = Client(app) + response = client.get(path, base_url=base_url) + assert response.status_code == 308 + + if not autocorrect: + assert response.headers["Location"].count("/") == 1 + else: + assert response.headers["Location"] == absolute_location + + +def test_cached_property_doc(): + @utils.cached_property + def foo(): + """testing""" + return 42 + + assert foo.__doc__ == "testing" + assert foo.__name__ == "foo" + assert foo.__module__ == __name__ + + +def test_secure_filename(): + assert utils.secure_filename("My cool movie.mov") == "My_cool_movie.mov" + assert utils.secure_filename("../../../etc/passwd") == "etc_passwd" + assert ( + utils.secure_filename("i contain cool \xfcml\xe4uts.txt") + == "i_contain_cool_umlauts.txt" + ) + assert utils.secure_filename("__filename__") == "filename" + assert utils.secure_filename("foo$&^*)bar") == "foobar" diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py new file mode 100644 index 0000000..b769a38 --- /dev/null +++ b/tests/test_wrappers.py @@ -0,0 +1,1389 @@ +import contextlib +import json +import os +from datetime import datetime +from datetime import timedelta +from datetime import timezone +from io import BytesIO + +import pytest + +from werkzeug import Response +from werkzeug import wrappers +from werkzeug.datastructures import Accept +from werkzeug.datastructures import CharsetAccept +from werkzeug.datastructures import CombinedMultiDict +from werkzeug.datastructures import Headers +from werkzeug.datastructures import ImmutableList +from werkzeug.datastructures import ImmutableMultiDict +from werkzeug.datastructures import ImmutableOrderedMultiDict +from werkzeug.datastructures import LanguageAccept +from werkzeug.datastructures import MIMEAccept +from werkzeug.datastructures import MultiDict +from werkzeug.exceptions import BadRequest +from werkzeug.exceptions import RequestedRangeNotSatisfiable +from werkzeug.exceptions import SecurityError +from werkzeug.http import COEP +from werkzeug.http import COOP +from werkzeug.http import generate_etag +from werkzeug.test import Client +from werkzeug.test import create_environ +from werkzeug.test import run_wsgi_app +from werkzeug.wsgi import LimitedStream +from werkzeug.wsgi import wrap_file + + +@wrappers.Request.application +def request_demo_app(request): + assert "werkzeug.request" in request.environ + return Response() + + +def assert_environ(environ, method): + assert environ["REQUEST_METHOD"] == method + assert environ["PATH_INFO"] == "/" + assert environ["SCRIPT_NAME"] == "" + assert environ["SERVER_NAME"] == "localhost" + assert environ["wsgi.version"] == (1, 0) + assert environ["wsgi.url_scheme"] == "http" + + +def test_base_request(): + client = Client(request_demo_app) + + # get requests + response = client.get("/?foo=bar&foo=hehe") + request = response.request + assert request.args == MultiDict([("foo", "bar"), ("foo", "hehe")]) + assert request.form == MultiDict() + assert request.data == b"" + assert_environ(request.environ, "GET") + + # post requests with form data + response = client.post( + "/?blub=blah", + data="foo=blub+hehe&blah=42", + content_type="application/x-www-form-urlencoded", + ) + request = response.request + assert request.args == MultiDict([("blub", "blah")]) + assert request.form == MultiDict([("foo", "blub hehe"), ("blah", "42")]) + assert request.data == b"" + # currently we do not guarantee that the values are ordered correctly + # for post data. + # assert response['form_as_list'] == [('foo', ['blub hehe']), ('blah', ['42'])] + assert_environ(request.environ, "POST") + + # patch requests with form data + response = client.patch( + "/?blub=blah", + data="foo=blub+hehe&blah=42", + content_type="application/x-www-form-urlencoded", + ) + request = response.request + assert request.args == MultiDict([("blub", "blah")]) + assert request.form == MultiDict([("foo", "blub hehe"), ("blah", "42")]) + assert request.data == b"" + assert_environ(request.environ, "PATCH") + + # post requests with json data + json = b'{"foo": "bar", "blub": "blah"}' + response = client.post("/?a=b", data=json, content_type="application/json") + request = response.request + assert request.data == json + assert request.args == MultiDict([("a", "b")]) + assert request.form == MultiDict() + + +def test_query_string_is_bytes(): + req = wrappers.Request.from_values("/?foo=%2f") + assert req.query_string == b"foo=%2f" + + +def test_request_repr(): + req = wrappers.Request.from_values("/foobar") + assert "" == repr(req) + req = wrappers.Request.from_values("/привет") + assert "" == repr(req) + + +def test_access_route(): + req = wrappers.Request.from_values( + headers={"X-Forwarded-For": "192.168.1.2, 192.168.1.1"}, + environ_base={"REMOTE_ADDR": "192.168.1.3"}, + ) + assert req.access_route == ["192.168.1.2", "192.168.1.1"] + assert req.remote_addr == "192.168.1.3" + + req = wrappers.Request.from_values(environ_base={"REMOTE_ADDR": "192.168.1.3"}) + assert list(req.access_route) == ["192.168.1.3"] + + +def test_url_request_descriptors(): + req = wrappers.Request.from_values("/bar?foo=baz", "http://example.com/test") + assert req.path == "/bar" + assert req.full_path == "/bar?foo=baz" + assert req.script_root == "/test" + assert req.url == "http://example.com/test/bar?foo=baz" + assert req.base_url == "http://example.com/test/bar" + assert req.url_root == "http://example.com/test/" + assert req.host_url == "http://example.com/" + assert req.host == "example.com" + assert req.scheme == "http" + + req = wrappers.Request.from_values("/bar?foo=baz", "https://example.com/test") + assert req.scheme == "https" + + +def test_url_request_descriptors_query_quoting(): + next = "http%3A%2F%2Fwww.example.com%2F%3Fnext%3D%2Fbaz%23my%3Dhash" + req = wrappers.Request.from_values(f"/bar?next={next}", "http://example.com/") + assert req.path == "/bar" + assert req.full_path == f"/bar?next={next}" + assert req.url == f"http://example.com/bar?next={next}" + + +def test_url_request_descriptors_hosts(): + req = wrappers.Request.from_values("/bar?foo=baz", "http://example.com/test") + req.trusted_hosts = ["example.com"] + assert req.path == "/bar" + assert req.full_path == "/bar?foo=baz" + assert req.script_root == "/test" + assert req.url == "http://example.com/test/bar?foo=baz" + assert req.base_url == "http://example.com/test/bar" + assert req.url_root == "http://example.com/test/" + assert req.host_url == "http://example.com/" + assert req.host == "example.com" + assert req.scheme == "http" + + req = wrappers.Request.from_values("/bar?foo=baz", "https://example.com/test") + assert req.scheme == "https" + + req = wrappers.Request.from_values("/bar?foo=baz", "http://example.com/test") + req.trusted_hosts = ["example.org"] + pytest.raises(SecurityError, lambda: req.url) + pytest.raises(SecurityError, lambda: req.base_url) + pytest.raises(SecurityError, lambda: req.url_root) + pytest.raises(SecurityError, lambda: req.host_url) + pytest.raises(SecurityError, lambda: req.host) + + +def test_authorization(): + request = wrappers.Request.from_values( + headers={"Authorization": "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ=="} + ) + a = request.authorization + assert a.type == "basic" + assert a.username == "Aladdin" + assert a.password == "open sesame" + + +def test_authorization_with_unicode(): + request = wrappers.Request.from_values( + headers={"Authorization": "Basic 0YDRg9GB0YHQutC40IE60JHRg9C60LLRiw=="} + ) + a = request.authorization + assert a.type == "basic" + assert a.username == "русскиЁ" + assert a.password == "Буквы" + + +def test_request_application(): + @wrappers.Request.application + def application(request): + return wrappers.Response("Hello World!") + + @wrappers.Request.application + def failing_application(request): + raise BadRequest() + + resp = wrappers.Response.from_app(application, create_environ()) + assert resp.data == b"Hello World!" + assert resp.status_code == 200 + + resp = wrappers.Response.from_app(failing_application, create_environ()) + assert b"Bad Request" in resp.data + assert resp.status_code == 400 + + +def test_request_access_control(): + request = wrappers.Request.from_values( + headers={ + "Origin": "https://palletsprojects.com", + "Access-Control-Request-Headers": "X-A, X-B", + "Access-Control-Request-Method": "PUT", + } + ) + assert request.origin == "https://palletsprojects.com" + assert request.access_control_request_headers == {"X-A", "X-B"} + assert request.access_control_request_method == "PUT" + + +def test_response_access_control(): + response = wrappers.Response("Hello World") + assert response.access_control_allow_credentials is False + response.access_control_allow_credentials = True + response.access_control_allow_headers = ["X-A", "X-B"] + assert response.headers["Access-Control-Allow-Credentials"] == "true" + assert set(response.headers["Access-Control-Allow-Headers"].split(", ")) == { + "X-A", + "X-B", + } + + +def test_base_response(): + response = wrappers.Response("öäü") + assert response.get_data() == "öäü".encode() + + # writing + response = wrappers.Response("foo") + response.stream.write("bar") + assert response.get_data() == b"foobar" + + # set cookie + response = wrappers.Response() + response.set_cookie( + "foo", + value="bar", + max_age=60, + expires=0, + path="/blub", + domain="example.org", + samesite="Strict", + ) + assert response.headers.to_wsgi_list() == [ + ("Content-Type", "text/plain; charset=utf-8"), + ( + "Set-Cookie", + "foo=bar; Domain=example.org;" + " Expires=Thu, 01 Jan 1970 00:00:00 GMT; Max-Age=60;" + " Path=/blub; SameSite=Strict", + ), + ] + + # delete cookie + response = wrappers.Response() + response.delete_cookie("foo") + assert response.headers.to_wsgi_list() == [ + ("Content-Type", "text/plain; charset=utf-8"), + ( + "Set-Cookie", + "foo=; Expires=Thu, 01 Jan 1970 00:00:00 GMT; Max-Age=0; Path=/", + ), + ] + + # close call forwarding + closed = [] + + class Iterable: + def __next__(self): + raise StopIteration() + + def __iter__(self): + return self + + def close(self): + closed.append(True) + + response = wrappers.Response(Iterable()) + response.call_on_close(lambda: closed.append(True)) + app_iter, status, headers = run_wsgi_app(response, create_environ(), buffered=True) + assert status == "200 OK" + assert "".join(app_iter) == "" + assert len(closed) == 2 + + # with statement + del closed[:] + response = wrappers.Response(Iterable()) + with response: + pass + assert len(closed) == 1 + + +@pytest.mark.parametrize( + ("status_code", "expected_status"), + [ + (200, "200 OK"), + (404, "404 NOT FOUND"), + (588, "588 UNKNOWN"), + (999, "999 UNKNOWN"), + ], +) +def test_response_set_status_code(status_code, expected_status): + response = wrappers.Response() + response.status_code = status_code + assert response.status_code == status_code + assert response.status == expected_status + + +@pytest.mark.parametrize( + ("status", "expected_status_code", "expected_status"), + [ + ("404", 404, "404 NOT FOUND"), + ("588", 588, "588 UNKNOWN"), + ("999", 999, "999 UNKNOWN"), + ("200 OK", 200, "200 OK"), + ("999 WTF", 999, "999 WTF"), + ("wtf", 0, "0 wtf"), + ("200 TEA POT", 200, "200 TEA POT"), + (200, 200, "200 OK"), + (400, 400, "400 BAD REQUEST"), + ], +) +def test_response_set_status(status, expected_status_code, expected_status): + response = wrappers.Response() + response.status = status + assert response.status_code == expected_status_code + assert response.status == expected_status + + response = wrappers.Response(status=status) + assert response.status_code == expected_status_code + assert response.status == expected_status + + +def test_response_init_status_empty_string(): + # invalid status codes + with pytest.raises(ValueError) as info: + wrappers.Response(None, "") + + assert "Empty status argument" in str(info.value) + + +def test_response_init_status_tuple(): + with pytest.raises(TypeError) as info: + wrappers.Response(None, tuple()) + + assert "Invalid status argument" in str(info.value) + + +def test_type_forcing(): + def wsgi_application(environ, start_response): + start_response("200 OK", [("Content-Type", "text/html")]) + return ["Hello World!"] + + base_response = wrappers.Response("Hello World!", content_type="text/html") + + class SpecialResponse(wrappers.Response): + def foo(self): + return 42 + + # good enough for this simple application, but don't ever use that in + # real world examples! + fake_env = {} + + for orig_resp in wsgi_application, base_response: + response = SpecialResponse.force_type(orig_resp, fake_env) + assert response.__class__ is SpecialResponse + assert response.foo() == 42 + assert response.get_data() == b"Hello World!" + assert response.content_type == "text/html" + + # without env, no arbitrary conversion + pytest.raises(TypeError, SpecialResponse.force_type, wsgi_application) + + +def test_accept(): + request = wrappers.Request( + { + "HTTP_ACCEPT": "text/xml,application/xml,application/xhtml+xml," + "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5", + "HTTP_ACCEPT_CHARSET": "ISO-8859-1,utf-8;q=0.7,*;q=0.7", + "HTTP_ACCEPT_ENCODING": "gzip,deflate", + "HTTP_ACCEPT_LANGUAGE": "en-us,en;q=0.5", + "SERVER_NAME": "eggs", + "SERVER_PORT": "80", + } + ) + assert request.accept_mimetypes == MIMEAccept( + [ + ("text/xml", 1), + ("application/xml", 1), + ("application/xhtml+xml", 1), + ("image/png", 1), + ("text/html", 0.9), + ("text/plain", 0.8), + ("*/*", 0.5), + ] + ) + assert request.accept_charsets == CharsetAccept( + [("ISO-8859-1", 1), ("utf-8", 0.7), ("*", 0.7)] + ) + assert request.accept_encodings == Accept([("gzip", 1), ("deflate", 1)]) + assert request.accept_languages == LanguageAccept([("en-us", 1), ("en", 0.5)]) + + request = wrappers.Request( + {"HTTP_ACCEPT": "", "SERVER_NAME": "example.org", "SERVER_PORT": "80"} + ) + assert request.accept_mimetypes == MIMEAccept() + + +def test_etag_request(): + request = wrappers.Request( + { + "HTTP_CACHE_CONTROL": "no-store, no-cache", + "HTTP_IF_MATCH": 'W/"foo", bar, "baz"', + "HTTP_IF_NONE_MATCH": 'W/"foo", bar, "baz"', + "HTTP_IF_MODIFIED_SINCE": "Tue, 22 Jan 2008 11:18:44 GMT", + "HTTP_IF_UNMODIFIED_SINCE": "Tue, 22 Jan 2008 11:18:44 GMT", + "SERVER_NAME": "eggs", + "SERVER_PORT": "80", + } + ) + assert request.cache_control.no_store + assert request.cache_control.no_cache + + for etags in request.if_match, request.if_none_match: + assert etags("bar") + assert etags.contains_raw('W/"foo"') + assert etags.contains_weak("foo") + assert not etags.contains("foo") + + dt = datetime(2008, 1, 22, 11, 18, 44, tzinfo=timezone.utc) + assert request.if_modified_since == dt + assert request.if_unmodified_since == dt + + +def test_user_agent(): + user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:94.0) Gecko/20100101 Firefox/94.0" + request = wrappers.Request({"HTTP_USER_AGENT": user_agent}) + assert request.user_agent.to_header() == user_agent + assert str(request.user_agent) == user_agent + assert request.user_agent.string == user_agent + + +def test_stream_wrapping(): + class LowercasingStream: + def __init__(self, stream): + self._stream = stream + + def read(self, size=-1): + return self._stream.read(size).lower() + + def readline(self, size=-1): + return self._stream.readline(size).lower() + + data = b"foo=Hello+World" + req = wrappers.Request.from_values( + "/", method="POST", data=data, content_type="application/x-www-form-urlencoded" + ) + req.stream = LowercasingStream(req.stream) + assert req.form["foo"] == "hello world" + + +def test_data_descriptor_triggers_parsing(): + data = b"foo=Hello+World" + req = wrappers.Request.from_values( + "/", method="POST", data=data, content_type="application/x-www-form-urlencoded" + ) + + assert req.data == b"" + assert req.form["foo"] == "Hello World" + + +def test_get_data_method_parsing_caching_behavior(): + data = b"foo=Hello+World" + req = wrappers.Request.from_values( + "/", method="POST", data=data, content_type="application/x-www-form-urlencoded" + ) + + # get_data() caches, so form stays available + assert req.get_data() == data + assert req.form["foo"] == "Hello World" + assert req.get_data() == data + + # here we access the form data first, caching is bypassed + req = wrappers.Request.from_values( + "/", method="POST", data=data, content_type="application/x-www-form-urlencoded" + ) + assert req.form["foo"] == "Hello World" + assert req.get_data() == b"" + + # Another case is uncached get data which trashes everything + req = wrappers.Request.from_values( + "/", method="POST", data=data, content_type="application/x-www-form-urlencoded" + ) + assert req.get_data(cache=False) == data + assert req.get_data(cache=False) == b"" + assert req.form == {} + + # Or we can implicitly start the form parser which is similar to + # the old .data behavior + req = wrappers.Request.from_values( + "/", method="POST", data=data, content_type="application/x-www-form-urlencoded" + ) + assert req.get_data(parse_form_data=True) == b"" + assert req.form["foo"] == "Hello World" + + +def test_etag_response(): + response = wrappers.Response("Hello World") + assert response.get_etag() == (None, None) + response.add_etag() + assert response.get_etag() == ("0a4d55a8d778e5022fab701977c5d840bbc486d0", False) + assert not response.cache_control + response.cache_control.must_revalidate = True + response.cache_control.max_age = 60 + response.headers["Content-Length"] = len(response.get_data()) + assert response.headers["Cache-Control"] in ( + "must-revalidate, max-age=60", + "max-age=60, must-revalidate", + ) + + assert "date" not in response.headers + env = create_environ() + env.update({"REQUEST_METHOD": "GET", "HTTP_IF_NONE_MATCH": response.get_etag()[0]}) + response.make_conditional(env) + assert "date" in response.headers + + # after the thing is invoked by the server as wsgi application + # (we're emulating this here), there must not be any entity + # headers left and the status code would have to be 304 + resp = wrappers.Response.from_app(response, env) + assert resp.status_code == 304 + assert "content-length" not in resp.headers + + # make sure date is not overriden + response = wrappers.Response("Hello World") + response.date = 1337 + d = response.date + response.make_conditional(env) + assert response.date == d + + # make sure content length is only set if missing + response = wrappers.Response("Hello World") + response.content_length = 999 + response.make_conditional(env) + assert response.content_length == 999 + + +def test_etag_response_412(): + response = wrappers.Response("Hello World") + assert response.get_etag() == (None, None) + response.add_etag() + assert response.get_etag() == ("0a4d55a8d778e5022fab701977c5d840bbc486d0", False) + assert not response.cache_control + response.cache_control.must_revalidate = True + response.cache_control.max_age = 60 + response.headers["Content-Length"] = len(response.get_data()) + assert response.headers["Cache-Control"] in ( + "must-revalidate, max-age=60", + "max-age=60, must-revalidate", + ) + + assert "date" not in response.headers + env = create_environ() + env.update( + {"REQUEST_METHOD": "GET", "HTTP_IF_MATCH": f"{response.get_etag()[0]}xyz"} + ) + response.make_conditional(env) + assert "date" in response.headers + + # after the thing is invoked by the server as wsgi application + # (we're emulating this here), there must not be any entity + # headers left and the status code would have to be 412 + resp = wrappers.Response.from_app(response, env) + assert resp.status_code == 412 + # Make sure there is a body still + assert resp.data != b"" + + # make sure date is not overriden + response = wrappers.Response("Hello World") + response.date = 1337 + d = response.date + response.make_conditional(env) + assert response.date == d + + # make sure content length is only set if missing + response = wrappers.Response("Hello World") + response.content_length = 999 + response.make_conditional(env) + assert response.content_length == 999 + + +def test_range_request_basic(): + env = create_environ() + response = wrappers.Response("Hello World") + env["HTTP_RANGE"] = "bytes=0-4" + response.make_conditional(env, accept_ranges=True, complete_length=11) + assert response.status_code == 206 + assert response.headers["Accept-Ranges"] == "bytes" + assert response.headers["Content-Range"] == "bytes 0-4/11" + assert response.headers["Content-Length"] == "5" + assert response.data == b"Hello" + + +def test_range_request_out_of_bound(): + env = create_environ() + response = wrappers.Response("Hello World") + env["HTTP_RANGE"] = "bytes=6-666" + response.make_conditional(env, accept_ranges=True, complete_length=11) + assert response.status_code == 206 + assert response.headers["Accept-Ranges"] == "bytes" + assert response.headers["Content-Range"] == "bytes 6-10/11" + assert response.headers["Content-Length"] == "5" + assert response.data == b"World" + + +def test_range_request_with_file(): + env = create_environ() + resources = os.path.join(os.path.dirname(__file__), "res") + fname = os.path.join(resources, "test.txt") + with open(fname, "rb") as f: + fcontent = f.read() + with open(fname, "rb") as f: + response = wrappers.Response(wrap_file(env, f)) + env["HTTP_RANGE"] = "bytes=0-0" + response.make_conditional( + env, accept_ranges=True, complete_length=len(fcontent) + ) + assert response.status_code == 206 + assert response.headers["Accept-Ranges"] == "bytes" + assert response.headers["Content-Range"] == f"bytes 0-0/{len(fcontent)}" + assert response.headers["Content-Length"] == "1" + assert response.data == fcontent[:1] + + +def test_range_request_with_complete_file(): + env = create_environ() + resources = os.path.join(os.path.dirname(__file__), "res") + fname = os.path.join(resources, "test.txt") + with open(fname, "rb") as f: + fcontent = f.read() + with open(fname, "rb") as f: + fsize = os.path.getsize(fname) + response = wrappers.Response(wrap_file(env, f)) + env["HTTP_RANGE"] = f"bytes=0-{fsize - 1}" + response.make_conditional(env, accept_ranges=True, complete_length=fsize) + assert response.status_code == 206 + assert response.headers["Accept-Ranges"] == "bytes" + assert response.headers["Content-Range"] == f"bytes 0-{fsize - 1}/{fsize}" + assert response.headers["Content-Length"] == str(fsize) + assert response.data == fcontent + + +@pytest.mark.parametrize("value", [None, 0]) +def test_range_request_without_complete_length(value): + env = create_environ(headers={"Range": "bytes=0-10"}) + response = wrappers.Response("Hello World") + response.make_conditional(env, accept_ranges=True, complete_length=value) + assert response.status_code == 200 + assert response.data == b"Hello World" + + +def test_invalid_range_request(): + env = create_environ() + response = wrappers.Response("Hello World") + env["HTTP_RANGE"] = "bytes=-" + with pytest.raises(RequestedRangeNotSatisfiable): + response.make_conditional(env, accept_ranges=True, complete_length=11) + + +def test_etag_response_freezing(): + response = Response("Hello World") + response.freeze() + assert response.get_etag() == (str(generate_etag(b"Hello World")), False) + + +def test_authenticate(): + resp = wrappers.Response() + resp.www_authenticate.type = "basic" + resp.www_authenticate.realm = "Testing" + assert resp.headers["WWW-Authenticate"] == 'Basic realm="Testing"' + resp.www_authenticate.realm = None + resp.www_authenticate.type = None + assert "WWW-Authenticate" not in resp.headers + + +def test_authenticate_quoted_qop(): + # Example taken from https://github.com/pallets/werkzeug/issues/633 + resp = wrappers.Response() + resp.www_authenticate.set_digest("REALM", "NONCE", qop=("auth", "auth-int")) + + actual = set(f"{resp.headers['WWW-Authenticate']},".split()) + expected = set('Digest nonce="NONCE", realm="REALM", qop="auth, auth-int",'.split()) + assert actual == expected + + resp.www_authenticate.set_digest("REALM", "NONCE", qop=("auth",)) + + actual = set(f"{resp.headers['WWW-Authenticate']},".split()) + expected = set('Digest nonce="NONCE", realm="REALM", qop="auth",'.split()) + assert actual == expected + + +def test_response_stream(): + response = wrappers.Response() + response.stream.write("Hello ") + response.stream.write("World!") + assert response.response == ["Hello ", "World!"] + assert response.get_data() == b"Hello World!" + + +def test_common_response_descriptors(): + response = wrappers.Response() + response.mimetype = "text/html" + assert response.mimetype == "text/html" + assert response.content_type == "text/html; charset=utf-8" + assert response.mimetype_params == {"charset": "utf-8"} + response.mimetype_params["x-foo"] = "yep" + del response.mimetype_params["charset"] + assert response.content_type == "text/html; x-foo=yep" + + now = datetime.now(timezone.utc).replace(microsecond=0) + + assert response.content_length is None + response.content_length = "42" + assert response.content_length == 42 + + for attr in "date", "expires": + assert getattr(response, attr) is None + setattr(response, attr, now) + assert getattr(response, attr) == now + + assert response.age is None + age_td = timedelta(days=1, minutes=3, seconds=5) + response.age = age_td + assert response.age == age_td + response.age = 42 + assert response.age == timedelta(seconds=42) + + assert response.retry_after is None + response.retry_after = now + assert response.retry_after == now + + assert not response.vary + response.vary.add("Cookie") + response.vary.add("Content-Language") + assert "cookie" in response.vary + assert response.vary.to_header() == "Cookie, Content-Language" + response.headers["Vary"] = "Content-Encoding" + assert response.vary.as_set() == {"content-encoding"} + + response.allow.update(["GET", "POST"]) + assert response.headers["Allow"] == "GET, POST" + + response.content_language.add("en-US") + response.content_language.add("fr") + assert response.headers["Content-Language"] == "en-US, fr" + + +def test_common_request_descriptors(): + request = wrappers.Request.from_values( + content_type="text/html; charset=utf-8", + content_length="23", + headers={ + "Referer": "http://www.example.com/", + "Date": "Sat, 28 Feb 2009 19:04:35 GMT", + "Max-Forwards": "10", + "Pragma": "no-cache", + "Content-Encoding": "gzip", + "Content-MD5": "9a3bc6dbc47a70db25b84c6e5867a072", + }, + ) + + assert request.content_type == "text/html; charset=utf-8" + assert request.mimetype == "text/html" + assert request.mimetype_params == {"charset": "utf-8"} + assert request.content_length == 23 + assert request.referrer == "http://www.example.com/" + assert request.date == datetime(2009, 2, 28, 19, 4, 35, tzinfo=timezone.utc) + assert request.max_forwards == 10 + assert "no-cache" in request.pragma + assert request.content_encoding == "gzip" + assert request.content_md5 == "9a3bc6dbc47a70db25b84c6e5867a072" + + +def test_request_mimetype_always_lowercase(): + request = wrappers.Request.from_values(content_type="APPLICATION/JSON") + assert request.mimetype == "application/json" + + +def test_shallow_mode(): + request = wrappers.Request( + {"QUERY_STRING": "foo=bar", "SERVER_NAME": "eggs", "SERVER_PORT": "80"}, + shallow=True, + ) + assert request.args["foo"] == "bar" + pytest.raises(RuntimeError, lambda: request.stream) + pytest.raises(RuntimeError, lambda: request.data) + pytest.raises(RuntimeError, lambda: request.form) + + +def test_form_parsing_failed(): + data = b"--blah\r\n" + request = wrappers.Request.from_values( + input_stream=BytesIO(data), + content_length=len(data), + content_type="multipart/form-data; boundary=foo", + method="POST", + ) + assert not request.files + assert not request.form + + # Bad Content-Type + data = b"test" + request = wrappers.Request.from_values( + input_stream=BytesIO(data), + content_length=len(data), + content_type=", ", + method="POST", + ) + assert not request.form + + +def test_file_closing(): + data = ( + b"--foo\r\n" + b'Content-Disposition: form-data; name="foo"; filename="foo.txt"\r\n' + b"Content-Type: text/plain; charset=utf-8\r\n\r\n" + b"file contents, just the contents\r\n" + b"--foo--" + ) + req = wrappers.Request.from_values( + input_stream=BytesIO(data), + content_length=len(data), + content_type="multipart/form-data; boundary=foo", + method="POST", + ) + foo = req.files["foo"] + assert foo.mimetype == "text/plain" + assert foo.filename == "foo.txt" + + assert foo.closed is False + req.close() + assert foo.closed is True + + +def test_file_closing_with(): + data = ( + b"--foo\r\n" + b'Content-Disposition: form-data; name="foo"; filename="foo.txt"\r\n' + b"Content-Type: text/plain; charset=utf-8\r\n\r\n" + b"file contents, just the contents\r\n" + b"--foo--" + ) + req = wrappers.Request.from_values( + input_stream=BytesIO(data), + content_length=len(data), + content_type="multipart/form-data; boundary=foo", + method="POST", + ) + with req: + foo = req.files["foo"] + assert foo.mimetype == "text/plain" + assert foo.filename == "foo.txt" + + assert foo.closed is True + + +def test_url_charset_reflection(): + req = wrappers.Request.from_values() + req.charset = "utf-7" + assert req.url_charset == "utf-7" + + +def test_response_streamed(): + r = wrappers.Response() + assert not r.is_streamed + r = wrappers.Response("Hello World") + assert not r.is_streamed + r = wrappers.Response(["foo", "bar"]) + assert not r.is_streamed + + def gen(): + if 0: + yield None + + r = wrappers.Response(gen()) + assert r.is_streamed + + +def test_response_iter_wrapping(): + def uppercasing(iterator): + for item in iterator: + yield item.upper() + + def generator(): + yield "foo" + yield "bar" + + req = wrappers.Request.from_values() + resp = wrappers.Response(generator()) + del resp.headers["Content-Length"] + resp.response = uppercasing(resp.iter_encoded()) + actual_resp = wrappers.Response.from_app(resp, req.environ, buffered=True) + assert actual_resp.get_data() == b"FOOBAR" + + +def test_response_freeze(): + def generate(): + yield "foo" + yield "bar" + + resp = wrappers.Response(generate()) + resp.freeze() + assert resp.response == [b"foo", b"bar"] + assert resp.headers["content-length"] == "6" + + +def test_response_content_length_uses_encode(): + r = wrappers.Response("你好") + assert r.calculate_content_length() == 6 + + +def test_other_method_payload(): + data = b"Hello World" + req = wrappers.Request.from_values( + input_stream=BytesIO(data), + content_length=len(data), + content_type="text/plain", + method="WHAT_THE_FUCK", + ) + assert req.get_data() == data + assert isinstance(req.stream, LimitedStream) + + +def test_urlfication(): + resp = wrappers.Response() + resp.headers["Location"] = "http://üser:pässword@☃.net/påth" + resp.headers["Content-Location"] = "http://☃.net/" + headers = resp.get_wsgi_headers(create_environ()) + assert headers["location"] == "http://%C3%BCser:p%C3%A4ssword@xn--n3h.net/p%C3%A5th" + assert headers["content-location"] == "http://xn--n3h.net/" + + +def test_new_response_iterator_behavior(): + req = wrappers.Request.from_values() + resp = wrappers.Response("Hello Wörld!") + + def get_content_length(resp): + headers = resp.get_wsgi_headers(req.environ) + return headers.get("content-length", type=int) + + def generate_items(): + yield "Hello " + yield "Wörld!" + + # werkzeug encodes when set to `data` now, which happens + # if a string is passed to the response object. + assert resp.response == ["Hello Wörld!".encode()] + assert resp.get_data() == "Hello Wörld!".encode() + assert get_content_length(resp) == 13 + assert not resp.is_streamed + assert resp.is_sequence + + # try the same for manual assignment + resp.set_data("Wörd") + assert resp.response == ["Wörd".encode()] + assert resp.get_data() == "Wörd".encode() + assert get_content_length(resp) == 5 + assert not resp.is_streamed + assert resp.is_sequence + + # automatic generator sequence conversion + resp.response = generate_items() + assert resp.is_streamed + assert not resp.is_sequence + assert resp.get_data() == "Hello Wörld!".encode() + assert resp.response == [b"Hello ", "Wörld!".encode()] + assert not resp.is_streamed + assert resp.is_sequence + + # automatic generator sequence conversion + resp.response = generate_items() + resp.implicit_sequence_conversion = False + assert resp.is_streamed + assert not resp.is_sequence + pytest.raises(RuntimeError, lambda: resp.get_data()) + resp.make_sequence() + assert resp.get_data() == "Hello Wörld!".encode() + assert resp.response == [b"Hello ", "Wörld!".encode()] + assert not resp.is_streamed + assert resp.is_sequence + + # stream makes it a list no matter how the conversion is set + for val in True, False: + resp.implicit_sequence_conversion = val + resp.response = ("foo", "bar") + assert resp.is_sequence + resp.stream.write("baz") + assert resp.response == ["foo", "bar", "baz"] + + +def test_form_data_ordering(): + class MyRequest(wrappers.Request): + parameter_storage_class = ImmutableOrderedMultiDict + + req = MyRequest.from_values("/?foo=1&bar=0&foo=3") + assert list(req.args) == ["foo", "bar"] + assert list(req.args.items(multi=True)) == [ + ("foo", "1"), + ("bar", "0"), + ("foo", "3"), + ] + assert isinstance(req.args, ImmutableOrderedMultiDict) + assert isinstance(req.values, CombinedMultiDict) + assert req.values["foo"] == "1" + assert req.values.getlist("foo") == ["1", "3"] + + +def test_values(): + r = wrappers.Request.from_values( + method="POST", query_string={"a": "1"}, data={"a": "2", "b": "2"} + ) + assert r.values["a"] == "1" + assert r.values["b"] == "2" + + # form should not be combined for GET method + r = wrappers.Request.from_values( + method="GET", query_string={"a": "1"}, data={"a": "2", "b": "2"} + ) + assert r.values["a"] == "1" + assert "b" not in r.values + + +def test_storage_classes(): + class MyRequest(wrappers.Request): + dict_storage_class = dict + list_storage_class = list + parameter_storage_class = dict + + req = MyRequest.from_values("/?foo=baz", headers={"Cookie": "foo=bar"}) + assert type(req.cookies) is dict + assert req.cookies == {"foo": "bar"} + assert type(req.access_route) is list + + assert type(req.args) is dict + assert type(req.values) is CombinedMultiDict + assert req.values["foo"] == "baz" + + req = wrappers.Request.from_values(headers={"Cookie": "foo=bar;foo=baz"}) + assert type(req.cookies) is ImmutableMultiDict + assert req.cookies.to_dict() == {"foo": "bar"} + + # it is possible to have multiple cookies with the same name + assert req.cookies.getlist("foo") == ["bar", "baz"] + assert type(req.access_route) is ImmutableList + + MyRequest.list_storage_class = tuple + req = MyRequest.from_values() + assert type(req.access_route) is tuple + + +def test_response_headers_passthrough(): + headers = Headers() + resp = wrappers.Response(headers=headers) + assert resp.headers is headers + + +def test_response_304_no_content_length(): + resp = wrappers.Response("Test", status=304) + env = create_environ() + assert "content-length" not in resp.get_wsgi_headers(env) + + +def test_ranges(): + # basic range stuff + req = wrappers.Request.from_values() + assert req.range is None + req = wrappers.Request.from_values(headers={"Range": "bytes=0-499"}) + assert req.range.ranges == [(0, 500)] + + resp = wrappers.Response() + resp.content_range = req.range.make_content_range(1000) + assert resp.content_range.units == "bytes" + assert resp.content_range.start == 0 + assert resp.content_range.stop == 500 + assert resp.content_range.length == 1000 + assert resp.headers["Content-Range"] == "bytes 0-499/1000" + + resp.content_range.unset() + assert "Content-Range" not in resp.headers + + resp.headers["Content-Range"] = "bytes 0-499/1000" + assert resp.content_range.units == "bytes" + assert resp.content_range.start == 0 + assert resp.content_range.stop == 500 + assert resp.content_range.length == 1000 + + +def test_csp(): + resp = wrappers.Response() + resp.content_security_policy.default_src = "'self'" + assert resp.headers["Content-Security-Policy"] == "default-src 'self'" + resp.content_security_policy.script_src = "'self' palletsprojects.com" + assert ( + resp.headers["Content-Security-Policy"] + == "default-src 'self'; script-src 'self' palletsprojects.com" + ) + + resp.content_security_policy = None + assert "Content-Security-Policy" not in resp.headers + + +def test_auto_content_length(): + resp = wrappers.Response("Hello World!") + assert resp.content_length == 12 + + resp = wrappers.Response(["Hello World!"]) + assert resp.content_length is None + assert resp.get_wsgi_headers({})["Content-Length"] == "12" + + +def test_stream_content_length(): + resp = wrappers.Response() + resp.stream.writelines(["foo", "bar", "baz"]) + assert resp.get_wsgi_headers({})["Content-Length"] == "9" + + resp = wrappers.Response() + resp.make_conditional({"REQUEST_METHOD": "GET"}) + resp.stream.writelines(["foo", "bar", "baz"]) + assert resp.get_wsgi_headers({})["Content-Length"] == "9" + + resp = wrappers.Response("foo") + resp.stream.writelines(["bar", "baz"]) + assert resp.get_wsgi_headers({})["Content-Length"] == "9" + + +def test_disabled_auto_content_length(): + class MyResponse(wrappers.Response): + automatically_set_content_length = False + + resp = MyResponse("Hello World!") + assert resp.content_length is None + + resp = MyResponse(["Hello World!"]) + assert resp.content_length is None + assert "Content-Length" not in resp.get_wsgi_headers({}) + + resp = MyResponse() + resp.make_conditional({"REQUEST_METHOD": "GET"}) + assert resp.content_length is None + assert "Content-Length" not in resp.get_wsgi_headers({}) + + +@pytest.mark.parametrize( + ("auto", "location", "expect"), + ( + (False, "/test", "/test"), + (True, "/test", "http://localhost/test"), + (True, "test", "http://localhost/a/b/test"), + (True, "./test", "http://localhost/a/b/test"), + (True, "../test", "http://localhost/a/test"), + ), +) +def test_location_header_autocorrect(monkeypatch, auto, location, expect): + monkeypatch.setattr(wrappers.Response, "autocorrect_location_header", auto) + env = create_environ("/a/b/c") + resp = wrappers.Response("Hello World!") + resp.headers["Location"] = location + assert resp.get_wsgi_headers(env)["Location"] == expect + + +def test_204_and_1XX_response_has_no_content_length(): + response = wrappers.Response(status=204) + assert response.content_length is None + + headers = response.get_wsgi_headers(create_environ()) + assert "Content-Length" not in headers + + response = wrappers.Response(status=100) + assert response.content_length is None + + headers = response.get_wsgi_headers(create_environ()) + assert "Content-Length" not in headers + + +def test_malformed_204_response_has_no_content_length(): + # flask-restful can generate a malformed response when doing `return '', 204` + response = wrappers.Response(status=204) + response.set_data(b"test") + assert response.content_length == 4 + + env = create_environ() + app_iter, status, headers = response.get_wsgi_response(env) + assert status == "204 NO CONTENT" + assert "Content-Length" not in headers + assert b"".join(app_iter) == b"" # ensure data will not be sent + + +def test_modified_url_encoding(): + class ModifiedRequest(wrappers.Request): + url_charset = "euc-kr" + + req = ModifiedRequest.from_values(query_string={"foo": "정상처리"}, charset="euc-kr") + assert req.args["foo"] == "정상처리" + + +def test_request_method_case_sensitivity(): + req = wrappers.Request( + {"REQUEST_METHOD": "get", "SERVER_NAME": "eggs", "SERVER_PORT": "80"} + ) + assert req.method == "GET" + + +def test_write_length(): + response = wrappers.Response() + length = response.stream.write(b"bar") + assert length == 3 + + +def test_stream_zip(): + import zipfile + + response = wrappers.Response() + with contextlib.closing(zipfile.ZipFile(response.stream, mode="w")) as z: + z.writestr("foo", b"bar") + + buffer = BytesIO(response.get_data()) + with contextlib.closing(zipfile.ZipFile(buffer, mode="r")) as z: + assert z.namelist() == ["foo"] + assert z.read("foo") == b"bar" + + +class TestSetCookie: + def test_secure(self): + response = wrappers.Response() + response.set_cookie( + "foo", + value="bar", + max_age=60, + expires=0, + path="/blub", + domain="example.org", + secure=True, + samesite=None, + ) + assert response.headers.to_wsgi_list() == [ + ("Content-Type", "text/plain; charset=utf-8"), + ( + "Set-Cookie", + "foo=bar; Domain=example.org;" + " Expires=Thu, 01 Jan 1970 00:00:00 GMT; Max-Age=60;" + " Secure; Path=/blub", + ), + ] + + def test_httponly(self): + response = wrappers.Response() + response.set_cookie( + "foo", + value="bar", + max_age=60, + expires=0, + path="/blub", + domain="example.org", + secure=False, + httponly=True, + samesite=None, + ) + assert response.headers.to_wsgi_list() == [ + ("Content-Type", "text/plain; charset=utf-8"), + ( + "Set-Cookie", + "foo=bar; Domain=example.org;" + " Expires=Thu, 01 Jan 1970 00:00:00 GMT; Max-Age=60;" + " HttpOnly; Path=/blub", + ), + ] + + def test_secure_and_httponly(self): + response = wrappers.Response() + response.set_cookie( + "foo", + value="bar", + max_age=60, + expires=0, + path="/blub", + domain="example.org", + secure=True, + httponly=True, + samesite=None, + ) + assert response.headers.to_wsgi_list() == [ + ("Content-Type", "text/plain; charset=utf-8"), + ( + "Set-Cookie", + "foo=bar; Domain=example.org;" + " Expires=Thu, 01 Jan 1970 00:00:00 GMT; Max-Age=60;" + " Secure; HttpOnly; Path=/blub", + ), + ] + + def test_samesite(self): + response = wrappers.Response() + response.set_cookie( + "foo", + value="bar", + max_age=60, + expires=0, + path="/blub", + domain="example.org", + secure=False, + samesite="strict", + ) + assert response.headers.to_wsgi_list() == [ + ("Content-Type", "text/plain; charset=utf-8"), + ( + "Set-Cookie", + "foo=bar; Domain=example.org;" + " Expires=Thu, 01 Jan 1970 00:00:00 GMT; Max-Age=60;" + " Path=/blub; SameSite=Strict", + ), + ] + + +class TestJSON: + def test_request(self): + value = {"ä": "b"} + request = wrappers.Request.from_values(json=value) + assert request.json == value + assert request.get_data() + + def test_response(self): + value = {"ä": "b"} + response = wrappers.Response( + response=json.dumps(value), content_type="application/json" + ) + assert response.json == value + + def test_bad_content_type(self): + value = [1, 2, 3] + request = wrappers.Request.from_values(json=value, content_type="text/plain") + + with pytest.raises(BadRequest): + request.get_json() + + assert request.get_json(silent=True) is None + assert request.get_json(force=True) == value + + def test_bad_data(self): + request = wrappers.Request.from_values( + data=b'{"a":}', content_type="application/json" + ) + assert request.get_json(silent=True) is None + + with pytest.raises(BadRequest): + request.get_json() + + def test_cache_disabled(self): + value = [1, 2, 3] + request = wrappers.Request.from_values(json=value) + assert request.get_json(cache=False) == [1, 2, 3] + assert not request.get_data() + + with pytest.raises(BadRequest): + request.get_json() + + +def test_response_coop(): + response = wrappers.Response("Hello World") + assert response.cross_origin_opener_policy is COOP.UNSAFE_NONE + response.cross_origin_opener_policy = COOP.SAME_ORIGIN + assert response.headers["Cross-Origin-Opener-Policy"] == "same-origin" + + +def test_response_coep(): + response = wrappers.Response("Hello World") + assert response.cross_origin_embedder_policy is COEP.UNSAFE_NONE + response.cross_origin_embedder_policy = COEP.REQUIRE_CORP + assert response.headers["Cross-Origin-Embedder-Policy"] == "require-corp" diff --git a/tests/test_wsgi.py b/tests/test_wsgi.py new file mode 100644 index 0000000..b0f71bc --- /dev/null +++ b/tests/test_wsgi.py @@ -0,0 +1,419 @@ +import io +import json +import os + +import pytest + +from werkzeug import wsgi +from werkzeug.exceptions import BadRequest +from werkzeug.exceptions import ClientDisconnected +from werkzeug.test import Client +from werkzeug.test import create_environ +from werkzeug.test import run_wsgi_app +from werkzeug.wrappers import Response +from werkzeug.wsgi import _RangeWrapper +from werkzeug.wsgi import ClosingIterator +from werkzeug.wsgi import wrap_file + + +@pytest.mark.parametrize( + ("environ", "expect"), + ( + pytest.param({"HTTP_HOST": "spam"}, "spam", id="host"), + pytest.param({"HTTP_HOST": "spam:80"}, "spam", id="host, strip http port"), + pytest.param( + {"wsgi.url_scheme": "https", "HTTP_HOST": "spam:443"}, + "spam", + id="host, strip https port", + ), + pytest.param({"HTTP_HOST": "spam:8080"}, "spam:8080", id="host, custom port"), + pytest.param( + {"HTTP_HOST": "spam", "SERVER_NAME": "eggs", "SERVER_PORT": "80"}, + "spam", + id="prefer host", + ), + pytest.param( + {"SERVER_NAME": "eggs", "SERVER_PORT": "80"}, + "eggs", + id="name, ignore http port", + ), + pytest.param( + {"wsgi.url_scheme": "https", "SERVER_NAME": "eggs", "SERVER_PORT": "443"}, + "eggs", + id="name, ignore https port", + ), + pytest.param( + {"SERVER_NAME": "eggs", "SERVER_PORT": "8080"}, + "eggs:8080", + id="name, custom port", + ), + pytest.param( + {"HTTP_HOST": "ham", "HTTP_X_FORWARDED_HOST": "eggs"}, + "ham", + id="ignore x-forwarded-host", + ), + ), +) +def test_get_host(environ, expect): + environ.setdefault("wsgi.url_scheme", "http") + assert wsgi.get_host(environ) == expect + + +def test_get_host_validate_trusted_hosts(): + env = {"SERVER_NAME": "example.org", "SERVER_PORT": "80", "wsgi.url_scheme": "http"} + assert wsgi.get_host(env, trusted_hosts=[".example.org"]) == "example.org" + pytest.raises(BadRequest, wsgi.get_host, env, trusted_hosts=["example.com"]) + env["SERVER_PORT"] = "8080" + assert wsgi.get_host(env, trusted_hosts=[".example.org:8080"]) == "example.org:8080" + pytest.raises(BadRequest, wsgi.get_host, env, trusted_hosts=[".example.com"]) + env = {"HTTP_HOST": "example.org", "wsgi.url_scheme": "http"} + assert wsgi.get_host(env, trusted_hosts=[".example.org"]) == "example.org" + pytest.raises(BadRequest, wsgi.get_host, env, trusted_hosts=["example.com"]) + + +def test_responder(): + def foo(environ, start_response): + return Response(b"Test") + + client = Client(wsgi.responder(foo)) + response = client.get("/") + assert response.status_code == 200 + assert response.data == b"Test" + + +def test_path_info_and_script_name_fetching(): + env = create_environ("/\N{SNOWMAN}", "http://example.com/\N{COMET}/") + assert wsgi.get_path_info(env) == "/\N{SNOWMAN}" + assert wsgi.get_path_info(env, charset=None) == "/\N{SNOWMAN}".encode() + + +def test_limited_stream(): + class RaisingLimitedStream(wsgi.LimitedStream): + def on_exhausted(self): + raise BadRequest("input stream exhausted") + + io_ = io.BytesIO(b"123456") + stream = RaisingLimitedStream(io_, 3) + assert stream.read() == b"123" + pytest.raises(BadRequest, stream.read) + + io_ = io.BytesIO(b"123456") + stream = RaisingLimitedStream(io_, 3) + assert stream.tell() == 0 + assert stream.read(1) == b"1" + assert stream.tell() == 1 + assert stream.read(1) == b"2" + assert stream.tell() == 2 + assert stream.read(1) == b"3" + assert stream.tell() == 3 + pytest.raises(BadRequest, stream.read) + + io_ = io.BytesIO(b"123456\nabcdefg") + stream = wsgi.LimitedStream(io_, 9) + assert stream.readline() == b"123456\n" + assert stream.readline() == b"ab" + + io_ = io.BytesIO(b"123456\nabcdefg") + stream = wsgi.LimitedStream(io_, 9) + assert stream.readlines() == [b"123456\n", b"ab"] + + io_ = io.BytesIO(b"123456\nabcdefg") + stream = wsgi.LimitedStream(io_, 9) + assert stream.readlines(2) == [b"12"] + assert stream.readlines(2) == [b"34"] + assert stream.readlines() == [b"56\n", b"ab"] + + io_ = io.BytesIO(b"123456\nabcdefg") + stream = wsgi.LimitedStream(io_, 9) + assert stream.readline(100) == b"123456\n" + + io_ = io.BytesIO(b"123456\nabcdefg") + stream = wsgi.LimitedStream(io_, 9) + assert stream.readlines(100) == [b"123456\n", b"ab"] + + io_ = io.BytesIO(b"123456") + stream = wsgi.LimitedStream(io_, 3) + assert stream.read(1) == b"1" + assert stream.read(1) == b"2" + assert stream.read() == b"3" + assert stream.read() == b"" + + io_ = io.BytesIO(b"123456") + stream = wsgi.LimitedStream(io_, 3) + assert stream.read(-1) == b"123" + + io_ = io.BytesIO(b"123456") + stream = wsgi.LimitedStream(io_, 0) + assert stream.read(-1) == b"" + + io_ = io.StringIO("123456") + stream = wsgi.LimitedStream(io_, 0) + assert stream.read(-1) == "" + + io_ = io.StringIO("123\n456\n") + stream = wsgi.LimitedStream(io_, 8) + assert list(stream) == ["123\n", "456\n"] + + +def test_limited_stream_json_load(): + stream = wsgi.LimitedStream(io.BytesIO(b'{"hello": "test"}'), 17) + # flask.json adapts bytes to text with TextIOWrapper + # this expects stream.readable() to exist and return true + stream = io.TextIOWrapper(io.BufferedReader(stream), "UTF-8") + data = json.load(stream) + assert data == {"hello": "test"} + + +def test_limited_stream_disconnection(): + io_ = io.BytesIO(b"A bit of content") + + # disconnect detection on out of bytes + stream = wsgi.LimitedStream(io_, 255) + with pytest.raises(ClientDisconnected): + stream.read() + + # disconnect detection because file close + io_ = io.BytesIO(b"x" * 255) + io_.close() + stream = wsgi.LimitedStream(io_, 255) + with pytest.raises(ClientDisconnected): + stream.read() + + +def test_get_host_fallback(): + assert ( + wsgi.get_host( + { + "SERVER_NAME": "foobar.example.com", + "wsgi.url_scheme": "http", + "SERVER_PORT": "80", + } + ) + == "foobar.example.com" + ) + assert ( + wsgi.get_host( + { + "SERVER_NAME": "foobar.example.com", + "wsgi.url_scheme": "http", + "SERVER_PORT": "81", + } + ) + == "foobar.example.com:81" + ) + + +def test_get_current_url_unicode(): + env = create_environ(query_string="foo=bar&baz=blah&meh=\xcf") + rv = wsgi.get_current_url(env) + assert rv == "http://localhost/?foo=bar&baz=blah&meh=\xcf" + + +def test_get_current_url_invalid_utf8(): + env = create_environ() + # set the query string *after* wsgi dance, so \xcf is invalid + env["QUERY_STRING"] = "foo=bar&baz=blah&meh=\xcf" + rv = wsgi.get_current_url(env) + # it remains percent-encoded + assert rv == "http://localhost/?foo=bar&baz=blah&meh=%CF" + + +def test_multi_part_line_breaks(): + data = "abcdef\r\nghijkl\r\nmnopqrstuvwxyz\r\nABCDEFGHIJK" + test_stream = io.StringIO(data) + lines = list(wsgi.make_line_iter(test_stream, limit=len(data), buffer_size=16)) + assert lines == ["abcdef\r\n", "ghijkl\r\n", "mnopqrstuvwxyz\r\n", "ABCDEFGHIJK"] + + data = "abc\r\nThis line is broken by the buffer length.\r\nFoo bar baz" + test_stream = io.StringIO(data) + lines = list(wsgi.make_line_iter(test_stream, limit=len(data), buffer_size=24)) + assert lines == [ + "abc\r\n", + "This line is broken by the buffer length.\r\n", + "Foo bar baz", + ] + + +def test_multi_part_line_breaks_bytes(): + data = b"abcdef\r\nghijkl\r\nmnopqrstuvwxyz\r\nABCDEFGHIJK" + test_stream = io.BytesIO(data) + lines = list(wsgi.make_line_iter(test_stream, limit=len(data), buffer_size=16)) + assert lines == [ + b"abcdef\r\n", + b"ghijkl\r\n", + b"mnopqrstuvwxyz\r\n", + b"ABCDEFGHIJK", + ] + + data = b"abc\r\nThis line is broken by the buffer length.\r\nFoo bar baz" + test_stream = io.BytesIO(data) + lines = list(wsgi.make_line_iter(test_stream, limit=len(data), buffer_size=24)) + assert lines == [ + b"abc\r\n", + b"This line is broken by the buffer length.\r\n", + b"Foo bar baz", + ] + + +def test_multi_part_line_breaks_problematic(): + data = "abc\rdef\r\nghi" + for _ in range(1, 10): + test_stream = io.StringIO(data) + lines = list(wsgi.make_line_iter(test_stream, limit=len(data), buffer_size=4)) + assert lines == ["abc\r", "def\r\n", "ghi"] + + +def test_iter_functions_support_iterators(): + data = ["abcdef\r\nghi", "jkl\r\nmnopqrstuvwxyz\r", "\nABCDEFGHIJK"] + lines = list(wsgi.make_line_iter(data)) + assert lines == ["abcdef\r\n", "ghijkl\r\n", "mnopqrstuvwxyz\r\n", "ABCDEFGHIJK"] + + +def test_make_chunk_iter(): + data = ["abcdefXghi", "jklXmnopqrstuvwxyzX", "ABCDEFGHIJK"] + rv = list(wsgi.make_chunk_iter(data, "X")) + assert rv == ["abcdef", "ghijkl", "mnopqrstuvwxyz", "ABCDEFGHIJK"] + + data = "abcdefXghijklXmnopqrstuvwxyzXABCDEFGHIJK" + test_stream = io.StringIO(data) + rv = list(wsgi.make_chunk_iter(test_stream, "X", limit=len(data), buffer_size=4)) + assert rv == ["abcdef", "ghijkl", "mnopqrstuvwxyz", "ABCDEFGHIJK"] + + +def test_make_chunk_iter_bytes(): + data = [b"abcdefXghi", b"jklXmnopqrstuvwxyzX", b"ABCDEFGHIJK"] + rv = list(wsgi.make_chunk_iter(data, "X")) + assert rv == [b"abcdef", b"ghijkl", b"mnopqrstuvwxyz", b"ABCDEFGHIJK"] + + data = b"abcdefXghijklXmnopqrstuvwxyzXABCDEFGHIJK" + test_stream = io.BytesIO(data) + rv = list(wsgi.make_chunk_iter(test_stream, "X", limit=len(data), buffer_size=4)) + assert rv == [b"abcdef", b"ghijkl", b"mnopqrstuvwxyz", b"ABCDEFGHIJK"] + + data = b"abcdefXghijklXmnopqrstuvwxyzXABCDEFGHIJK" + test_stream = io.BytesIO(data) + rv = list( + wsgi.make_chunk_iter( + test_stream, "X", limit=len(data), buffer_size=4, cap_at_buffer=True + ) + ) + assert rv == [ + b"abcd", + b"ef", + b"ghij", + b"kl", + b"mnop", + b"qrst", + b"uvwx", + b"yz", + b"ABCD", + b"EFGH", + b"IJK", + ] + + +def test_lines_longer_buffer_size(): + data = "1234567890\n1234567890\n" + for bufsize in range(1, 15): + lines = list( + wsgi.make_line_iter(io.StringIO(data), limit=len(data), buffer_size=bufsize) + ) + assert lines == ["1234567890\n", "1234567890\n"] + + +def test_lines_longer_buffer_size_cap(): + data = "1234567890\n1234567890\n" + for bufsize in range(1, 15): + lines = list( + wsgi.make_line_iter( + io.StringIO(data), + limit=len(data), + buffer_size=bufsize, + cap_at_buffer=True, + ) + ) + assert len(lines[0]) == bufsize or lines[0].endswith("\n") + + +def test_range_wrapper(): + response = Response(b"Hello World") + range_wrapper = _RangeWrapper(response.response, 6, 4) + assert next(range_wrapper) == b"Worl" + + response = Response(b"Hello World") + range_wrapper = _RangeWrapper(response.response, 1, 0) + with pytest.raises(StopIteration): + next(range_wrapper) + + response = Response(b"Hello World") + range_wrapper = _RangeWrapper(response.response, 6, 100) + assert next(range_wrapper) == b"World" + + response = Response(x for x in (b"He", b"ll", b"o ", b"Wo", b"rl", b"d")) + range_wrapper = _RangeWrapper(response.response, 6, 4) + assert not range_wrapper.seekable + assert next(range_wrapper) == b"Wo" + assert next(range_wrapper) == b"rl" + + response = Response(x for x in (b"He", b"ll", b"o W", b"o", b"rld")) + range_wrapper = _RangeWrapper(response.response, 6, 4) + assert next(range_wrapper) == b"W" + assert next(range_wrapper) == b"o" + assert next(range_wrapper) == b"rl" + with pytest.raises(StopIteration): + next(range_wrapper) + + response = Response(x for x in (b"Hello", b" World")) + range_wrapper = _RangeWrapper(response.response, 1, 1) + assert next(range_wrapper) == b"e" + with pytest.raises(StopIteration): + next(range_wrapper) + + resources = os.path.join(os.path.dirname(__file__), "res") + env = create_environ() + with open(os.path.join(resources, "test.txt"), "rb") as f: + response = Response(wrap_file(env, f)) + range_wrapper = _RangeWrapper(response.response, 1, 2) + assert range_wrapper.seekable + assert next(range_wrapper) == b"OU" + with pytest.raises(StopIteration): + next(range_wrapper) + + with open(os.path.join(resources, "test.txt"), "rb") as f: + response = Response(wrap_file(env, f)) + range_wrapper = _RangeWrapper(response.response, 2) + assert next(range_wrapper) == b"UND\n" + with pytest.raises(StopIteration): + next(range_wrapper) + + +def test_closing_iterator(): + class Namespace: + got_close = False + got_additional = False + + class Response: + def __init__(self, environ, start_response): + self.start = start_response + + # Return a generator instead of making the object its own + # iterator. This ensures that ClosingIterator calls close on + # the iterable (the object), not the iterator. + def __iter__(self): + self.start("200 OK", [("Content-Type", "text/plain")]) + yield "some content" + + def close(self): + Namespace.got_close = True + + def additional(): + Namespace.got_additional = True + + def app(environ, start_response): + return ClosingIterator(Response(environ, start_response), additional) + + app_iter, status, headers = run_wsgi_app(app, create_environ(), buffered=True) + + assert "".join(app_iter) == "some content" + assert Namespace.got_close + assert Namespace.got_additional diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..056ca0d --- /dev/null +++ b/tox.ini @@ -0,0 +1,24 @@ +[tox] +envlist = + py3{11,10,9,8,7},pypy3{8,7} + style + typing + docs +skip_missing_interpreters = true + +[testenv] +deps = -r requirements/tests.txt +commands = pytest -v --tb=short --basetemp={envtmpdir} {posargs} + +[testenv:style] +deps = pre-commit +skip_install = true +commands = pre-commit run --all-files --show-diff-on-failure + +[testenv:typing] +deps = -r requirements/typing.txt +commands = mypy + +[testenv:docs] +deps = -r requirements/docs.txt +commands = sphinx-build -W -b html -d {envtmpdir}/doctrees docs {envtmpdir}/html

    You should be redirected automatically to the target URL: " + b'/f\xc3\xbc\xc3\xbcb\xc3\xa4r. ' + b"If not, click the link.\n" + ) + + resp = utils.redirect("http://☃.net/", 307) + assert resp.headers["Location"] == "http://xn--n3h.net/" + assert resp.status_code == 307 + assert resp.get_data() == ( + b"\n" + b"\n" + b"Redirecting...\n" + b"