cpython/Doc/tools/extensions/grammar_snippet.py

262 lines
9.1 KiB
Python

"""Support for documenting Python's grammar."""
from __future__ import annotations
import re
from typing import TYPE_CHECKING
from docutils import nodes
from docutils.parsers.rst import directives
from sphinx import addnodes
from sphinx.domains.std import token_xrefs
from sphinx.util.docutils import SphinxDirective
from sphinx.util.nodes import make_id
if TYPE_CHECKING:
from collections.abc import Iterable, Iterator, Sequence
from typing import Any, Final
from docutils.nodes import Node
from sphinx.application import Sphinx
from sphinx.util.typing import ExtensionMetadata
class snippet_string_node(nodes.inline): # noqa: N801 (snake_case is fine)
"""Node for a string literal in a grammar snippet."""
def __init__(
self,
rawsource: str = '',
text: str = '',
*children: Node,
**attributes: Any,
) -> None:
super().__init__(rawsource, text, *children, **attributes)
# Use the Pygments highlight class for `Literal.String.Other`
self['classes'].append('sx')
class GrammarSnippetBase(SphinxDirective):
"""Common functionality for GrammarSnippetDirective & CompatProductionList."""
# The option/argument handling is left to the individual classes.
grammar_re: Final = re.compile(
r"""
(?P<rule_name>^[a-zA-Z0-9_]+) # identifier at start of line
(?=:) # ... followed by a colon
|
(?P<rule_ref>`[^\s`]+`) # identifier in backquotes
|
(?P<single_quoted>'[^']*') # string in 'quotes'
|
(?P<double_quoted>"[^"]*") # string in "quotes"
""",
re.VERBOSE,
)
def make_grammar_snippet(
self, options: dict[str, Any], content: Sequence[str]
) -> list[addnodes.productionlist]:
"""Create a literal block from options & content."""
group_name = options['group']
node_location = self.get_location()
production_nodes = []
for rawsource, production_defs in self.production_definitions(content):
production = self.make_production(
rawsource,
production_defs,
group_name=group_name,
location=node_location,
)
production_nodes.append(production)
node = addnodes.productionlist(
'',
*production_nodes,
support_smartquotes=False,
classes=['highlight'],
)
self.set_source_info(node)
return [node]
def production_definitions(
self, lines: Iterable[str], /
) -> Iterator[tuple[str, list[tuple[str, str]]]]:
"""Yield pairs of rawsource and production content dicts."""
production_lines: list[str] = []
production_content: list[tuple[str, str]] = []
for line in lines:
# If this line is the start of a new rule (text in the column 1),
# emit the current production and start a new one.
if not line[:1].isspace():
rawsource = '\n'.join(production_lines)
production_lines.clear()
if production_content:
yield rawsource, production_content
production_content = []
# Append the current line for the raw source
production_lines.append(line)
# Parse the line into constituent parts
last_pos = 0
for match in self.grammar_re.finditer(line):
# Handle text between matches
if match.start() > last_pos:
unmatched_text = line[last_pos : match.start()]
production_content.append(('text', unmatched_text))
last_pos = match.end()
# Handle matches.
# After filtering None (non-matches), exactly one groupdict()
# entry should remain.
[(re_group_name, content)] = (
(re_group_name, content)
for re_group_name, content in match.groupdict().items()
if content is not None
)
production_content.append((re_group_name, content))
production_content.append(('text', line[last_pos:] + '\n'))
# Emit the final production
if production_content:
rawsource = '\n'.join(production_lines)
yield rawsource, production_content
def make_production(
self,
rawsource: str,
production_defs: list[tuple[str, str]],
*,
group_name: str,
location: str,
) -> addnodes.production:
"""Create a production node from a list of parts."""
production_node = addnodes.production(rawsource)
for re_group_name, content in production_defs:
match re_group_name:
case 'rule_name':
production_node += self.make_name_target(
name=content,
production_group=group_name,
location=location,
)
case 'rule_ref':
production_node += token_xrefs(content, group_name)
case 'single_quoted' | 'double_quoted':
production_node += snippet_string_node('', content)
case 'text':
production_node += nodes.Text(content)
case _:
raise ValueError(f'unhandled match: {re_group_name!r}')
return production_node
def make_name_target(
self,
*,
name: str,
production_group: str,
location: str,
) -> addnodes.literal_strong:
"""Make a link target for the given production."""
# Cargo-culted magic to make `name_node` a link target
# similar to Sphinx `production`.
# This needs to be the same as what Sphinx does
# to avoid breaking existing links.
name_node = addnodes.literal_strong(name, name)
prefix = f'grammar-token-{production_group}'
node_id = make_id(self.env, self.state.document, prefix, name)
name_node['ids'].append(node_id)
self.state.document.note_implicit_target(name_node, name_node)
obj_name = f'{production_group}:{name}' if production_group else name
std = self.env.domains.standard_domain
std.note_object('token', obj_name, node_id, location=location)
return name_node
class GrammarSnippetDirective(GrammarSnippetBase):
"""Transform a grammar-snippet directive to a Sphinx literal_block
That is, turn something like:
.. grammar-snippet:: file
:group: python-grammar
file: (NEWLINE | statement)*
into something similar to Sphinx productionlist, but better suited
for our needs:
- Instead of `::=`, use a colon, as in `Grammar/python.gram`
- Show the listing almost as is, with no auto-aligment.
The only special character is the backtick, which marks tokens.
Unlike Sphinx's productionlist, this directive supports options.
The "group" must be given as a named option.
The content must be preceded by a blank line (like with most ReST
directives).
"""
has_content = True
option_spec = {
'group': directives.unchanged_required,
}
# We currently ignore arguments.
required_arguments = 0
optional_arguments = 1
final_argument_whitespace = True
def run(self) -> list[addnodes.productionlist]:
return self.make_grammar_snippet(self.options, self.content)
class CompatProductionList(GrammarSnippetBase):
"""Create grammar snippets from reST productionlist syntax
This is intended to be a transitional directive, used while we switch
from productionlist to grammar-snippet.
It makes existing docs that use the ReST syntax look like grammar-snippet,
as much as possible.
"""
has_content = False
required_arguments = 1
optional_arguments = 0
final_argument_whitespace = True
option_spec = {}
def run(self) -> list[addnodes.productionlist]:
# The "content" of a productionlist is actually the first and only
# argument. The first line is the group; the rest is the content lines.
lines = self.arguments[0].splitlines()
group = lines[0].strip()
options = {'group': group}
# We assume there's a colon in each line; align on it.
align_column = max(line.index(':') for line in lines[1:]) + 1
content = []
for line in lines[1:]:
rule_name, _colon, text = line.partition(':')
rule_name = rule_name.strip()
if rule_name:
name_part = rule_name + ':'
else:
name_part = ''
content.append(f'{name_part:<{align_column}}{text}')
return self.make_grammar_snippet(options, content)
def setup(app: Sphinx) -> ExtensionMetadata:
app.add_directive('grammar-snippet', GrammarSnippetDirective)
app.add_directive_to_domain(
'std', 'productionlist', CompatProductionList, override=True
)
return {
'version': '1.0',
'parallel_read_safe': True,
'parallel_write_safe': True,
}