bpo-43926: Cleaner metadata with PEP 566 JSON support. (GH-25565)

* bpo-43926: Cleaner metadata with PEP 566 JSON support.

* Add blurb

* Add versionchanged and versionadded declarations for changes to metadata.

* Use descriptor for PEP 566
This commit is contained in:
Jason R. Coombs 2021-05-02 17:03:40 -04:00 committed by GitHub
parent 0ad1e0384c
commit 37e0c7850d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 254 additions and 26 deletions

View File

@ -170,6 +170,19 @@ the values are returned unparsed from the distribution metadata::
>>> wheel_metadata['Requires-Python'] # doctest: +SKIP >>> wheel_metadata['Requires-Python'] # doctest: +SKIP
'>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*' '>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*'
``PackageMetadata`` also presents a ``json`` attribute that returns
all the metadata in a JSON-compatible form per :PEP:`566`::
>>> wheel_metadata.json['requires_python']
'>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*'
.. versionchanged:: 3.10
The ``Description`` is now included in the metadata when presented
through the payload. Line continuation characters have been removed.
.. versionadded:: 3.10
The ``json`` attribute was added.
.. _version: .. _version:

View File

@ -14,6 +14,7 @@
import posixpath import posixpath
import collections import collections
from . import _adapters, _meta
from ._collections import FreezableDefaultDict, Pair from ._collections import FreezableDefaultDict, Pair
from ._functools import method_cache from ._functools import method_cache
from ._itertools import unique_everseen from ._itertools import unique_everseen
@ -22,7 +23,7 @@
from importlib import import_module from importlib import import_module
from importlib.abc import MetaPathFinder from importlib.abc import MetaPathFinder
from itertools import starmap from itertools import starmap
from typing import Any, List, Mapping, Optional, Protocol, TypeVar, Union from typing import List, Mapping, Optional, Union
__all__ = [ __all__ = [
@ -385,25 +386,6 @@ def __repr__(self):
return '<FileHash mode: {} value: {}>'.format(self.mode, self.value) return '<FileHash mode: {} value: {}>'.format(self.mode, self.value)
_T = TypeVar("_T")
class PackageMetadata(Protocol):
def __len__(self) -> int:
... # pragma: no cover
def __contains__(self, item: str) -> bool:
... # pragma: no cover
def __getitem__(self, key: str) -> str:
... # pragma: no cover
def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]:
"""
Return all values associated with a possibly multi-valued key.
"""
class Distribution: class Distribution:
"""A Python distribution package.""" """A Python distribution package."""
@ -488,7 +470,7 @@ def _local(cls, root='.'):
return PathDistribution(zipfile.Path(meta.build_as_zip(builder))) return PathDistribution(zipfile.Path(meta.build_as_zip(builder)))
@property @property
def metadata(self) -> PackageMetadata: def metadata(self) -> _meta.PackageMetadata:
"""Return the parsed metadata for this Distribution. """Return the parsed metadata for this Distribution.
The returned object will have keys that name the various bits of The returned object will have keys that name the various bits of
@ -502,7 +484,7 @@ def metadata(self) -> PackageMetadata:
# (which points to the egg-info file) attribute unchanged. # (which points to the egg-info file) attribute unchanged.
or self.read_text('') or self.read_text('')
) )
return email.message_from_string(text) return _adapters.Message(email.message_from_string(text))
@property @property
def name(self): def name(self):
@ -829,7 +811,7 @@ def distributions(**kwargs):
return Distribution.discover(**kwargs) return Distribution.discover(**kwargs)
def metadata(distribution_name) -> PackageMetadata: def metadata(distribution_name) -> _meta.PackageMetadata:
"""Get the metadata for the named package. """Get the metadata for the named package.
:param distribution_name: The name of the distribution package to query. :param distribution_name: The name of the distribution package to query.

View File

@ -0,0 +1,67 @@
import re
import textwrap
import email.message
from ._text import FoldedCase
class Message(email.message.Message):
multiple_use_keys = set(
map(
FoldedCase,
[
'Classifier',
'Obsoletes-Dist',
'Platform',
'Project-URL',
'Provides-Dist',
'Provides-Extra',
'Requires-Dist',
'Requires-External',
'Supported-Platform',
],
)
)
"""
Keys that may be indicated multiple times per PEP 566.
"""
def __new__(cls, orig: email.message.Message):
res = super().__new__(cls)
vars(res).update(vars(orig))
return res
def __init__(self, *args, **kwargs):
self._headers = self._repair_headers()
# suppress spurious error from mypy
def __iter__(self):
return super().__iter__()
def _repair_headers(self):
def redent(value):
"Correct for RFC822 indentation"
if not value or '\n' not in value:
return value
return textwrap.dedent(' ' * 8 + value)
headers = [(key, redent(value)) for key, value in vars(self)['_headers']]
if self._payload:
headers.append(('Description', self.get_payload()))
return headers
@property
def json(self):
"""
Convert PackageMetadata to a JSON-compatible format
per PEP 0566.
"""
def transform(key):
value = self.get_all(key) if key in self.multiple_use_keys else self[key]
if key == 'Keywords':
value = re.split(r'\s+', value)
tk = key.lower().replace('-', '_')
return tk, value
return dict(map(transform, map(FoldedCase, self)))

View File

@ -0,0 +1,29 @@
from typing import Any, Dict, Iterator, List, Protocol, TypeVar, Union
_T = TypeVar("_T")
class PackageMetadata(Protocol):
def __len__(self) -> int:
... # pragma: no cover
def __contains__(self, item: str) -> bool:
... # pragma: no cover
def __getitem__(self, key: str) -> str:
... # pragma: no cover
def __iter__(self) -> Iterator[str]:
... # pragma: no cover
def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]:
"""
Return all values associated with a possibly multi-valued key.
"""
@property
def json(self) -> Dict[str, Union[str, List[str]]]:
"""
A JSON-compatible form of the metadata.
"""

View File

@ -0,0 +1,99 @@
import re
from ._functools import method_cache
# from jaraco.text 3.5
class FoldedCase(str):
"""
A case insensitive string class; behaves just like str
except compares equal when the only variation is case.
>>> s = FoldedCase('hello world')
>>> s == 'Hello World'
True
>>> 'Hello World' == s
True
>>> s != 'Hello World'
False
>>> s.index('O')
4
>>> s.split('O')
['hell', ' w', 'rld']
>>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta']))
['alpha', 'Beta', 'GAMMA']
Sequence membership is straightforward.
>>> "Hello World" in [s]
True
>>> s in ["Hello World"]
True
You may test for set inclusion, but candidate and elements
must both be folded.
>>> FoldedCase("Hello World") in {s}
True
>>> s in {FoldedCase("Hello World")}
True
String inclusion works as long as the FoldedCase object
is on the right.
>>> "hello" in FoldedCase("Hello World")
True
But not if the FoldedCase object is on the left:
>>> FoldedCase('hello') in 'Hello World'
False
In that case, use in_:
>>> FoldedCase('hello').in_('Hello World')
True
>>> FoldedCase('hello') > FoldedCase('Hello')
False
"""
def __lt__(self, other):
return self.lower() < other.lower()
def __gt__(self, other):
return self.lower() > other.lower()
def __eq__(self, other):
return self.lower() == other.lower()
def __ne__(self, other):
return self.lower() != other.lower()
def __hash__(self):
return hash(self.lower())
def __contains__(self, other):
return super(FoldedCase, self).lower().__contains__(other.lower())
def in_(self, other):
"Does self appear in other?"
return self in FoldedCase(other)
# cache lower since it's likely to be called frequently.
@method_cache
def lower(self):
return super(FoldedCase, self).lower()
def index(self, sub):
return self.lower().index(sub.lower())
def split(self, splitter=' ', maxsplit=0):
pattern = re.compile(re.escape(splitter), re.I)
return pattern.split(self, maxsplit)

View File

@ -1,5 +1,6 @@
import os import os
import sys import sys
import copy
import shutil import shutil
import pathlib import pathlib
import tempfile import tempfile
@ -108,6 +109,16 @@ def setUp(self):
super(DistInfoPkg, self).setUp() super(DistInfoPkg, self).setUp()
build_files(DistInfoPkg.files, self.site_dir) build_files(DistInfoPkg.files, self.site_dir)
def make_uppercase(self):
"""
Rewrite metadata with everything uppercase.
"""
shutil.rmtree(self.site_dir / "distinfo_pkg-1.0.0.dist-info")
files = copy.deepcopy(DistInfoPkg.files)
info = files["distinfo_pkg-1.0.0.dist-info"]
info["METADATA"] = info["METADATA"].upper()
build_files(files, self.site_dir)
class DistInfoPkgWithDot(OnSysPath, SiteDir): class DistInfoPkgWithDot(OnSysPath, SiteDir):
files: FilesDef = { files: FilesDef = {

View File

@ -125,7 +125,7 @@ def pkg_with_non_ascii_description(site_dir):
metadata_dir.mkdir() metadata_dir.mkdir()
metadata = metadata_dir / 'METADATA' metadata = metadata_dir / 'METADATA'
with metadata.open('w', encoding='utf-8') as fp: with metadata.open('w', encoding='utf-8') as fp:
fp.write('Description: pôrˈtend\n') fp.write('Description: pôrˈtend')
return 'portend' return 'portend'
@staticmethod @staticmethod
@ -145,7 +145,7 @@ def pkg_with_non_ascii_description_egg_info(site_dir):
pôrˈtend pôrˈtend
""" """
).lstrip() ).strip()
) )
return 'portend' return 'portend'
@ -157,7 +157,7 @@ def test_metadata_loads(self):
def test_metadata_loads_egg_info(self): def test_metadata_loads_egg_info(self):
pkg_name = self.pkg_with_non_ascii_description_egg_info(self.site_dir) pkg_name = self.pkg_with_non_ascii_description_egg_info(self.site_dir)
meta = metadata(pkg_name) meta = metadata(pkg_name)
assert meta.get_payload() == 'pôrˈtend\n' assert meta['Description'] == 'pôrˈtend'
class DiscoveryTests(fixtures.EggInfoPkg, fixtures.DistInfoPkg, unittest.TestCase): class DiscoveryTests(fixtures.EggInfoPkg, fixtures.DistInfoPkg, unittest.TestCase):

View File

@ -231,6 +231,29 @@ def test_more_complex_deps_requires_text(self):
assert deps == expected assert deps == expected
def test_as_json(self):
md = metadata('distinfo-pkg').json
assert 'name' in md
assert md['keywords'] == ['sample', 'package']
desc = md['description']
assert desc.startswith('Once upon a time\nThere was')
assert len(md['requires_dist']) == 2
def test_as_json_egg_info(self):
md = metadata('egginfo-pkg').json
assert 'name' in md
assert md['keywords'] == ['sample', 'package']
desc = md['description']
assert desc.startswith('Once upon a time\nThere was')
assert len(md['classifier']) == 2
def test_as_json_odd_case(self):
self.make_uppercase()
md = metadata('distinfo-pkg').json
assert 'name' in md
assert len(md['requires_dist']) == 2
assert md['keywords'] == ['SAMPLE', 'PACKAGE']
class LegacyDots(fixtures.DistInfoPkgWithDotLegacy, unittest.TestCase): class LegacyDots(fixtures.DistInfoPkgWithDotLegacy, unittest.TestCase):
def test_name_normalization(self): def test_name_normalization(self):

View File

@ -0,0 +1,4 @@
In ``importlib.metadata``, provide a uniform interface to ``Description``,
allow for any field to be encoded with multiline values, remove continuation
lines from multiline values, and add a ``.json`` property for easy access to
the PEP 566 JSON-compatible form. Sync with ``importlib_metadata 4.0``.