mirror of https://github.com/python/cpython.git
Final set of changes by Fred before 1.4beta3
This commit is contained in:
parent
d8a6d1c2e7
commit
8206fb9c4c
|
@ -3,6 +3,10 @@ parser.dvi: parser.tex ../../Doc/libparser.tex
|
||||||
|
|
||||||
# Use a new name for this; the included file uses 'clean' already....
|
# Use a new name for this; the included file uses 'clean' already....
|
||||||
clean-parser:
|
clean-parser:
|
||||||
rm -f *.log *.aux *.dvi *.pyc
|
rm -f *.log *.aux *.dvi *.pyc *.ps
|
||||||
|
|
||||||
|
dist:
|
||||||
|
(cd ../..; \
|
||||||
|
tar cf - `cat Demo/parser/FILES` | gzip >parsermodule-1.4.tar.gz)
|
||||||
|
|
||||||
include ../../Doc/Makefile
|
include ../../Doc/Makefile
|
||||||
|
|
|
@ -4,12 +4,29 @@ to the Python Library Reference for more information.
|
||||||
Files:
|
Files:
|
||||||
------
|
------
|
||||||
|
|
||||||
|
FILES -- list of files associated with the parser module.
|
||||||
|
|
||||||
|
README -- this file.
|
||||||
|
|
||||||
example.py -- module that uses the `parser' module to extract
|
example.py -- module that uses the `parser' module to extract
|
||||||
information from the parse tree of Python source
|
information from the parse tree of Python source
|
||||||
code.
|
code.
|
||||||
|
|
||||||
|
docstring.py -- sample source file containing only a module docstring.
|
||||||
|
|
||||||
|
simple.py -- sample source containing a "short form" definition.
|
||||||
|
|
||||||
source.py -- sample source code used to demonstrate ability to
|
source.py -- sample source code used to demonstrate ability to
|
||||||
handle nested constructs easily using the functions
|
handle nested constructs easily using the functions
|
||||||
and classes in example.py.
|
and classes in example.py.
|
||||||
|
|
||||||
|
pprint.py -- function to pretty-print Python values.
|
||||||
|
|
||||||
|
test_parser.py program to put the parser module through it's paces.
|
||||||
|
|
||||||
|
parser.tex -- LaTex driver file for formatting the parser module
|
||||||
|
documentation separately from the library reference.
|
||||||
|
|
||||||
|
Makefile -- `make' rule set to format the parser module manual.
|
||||||
|
|
||||||
Enjoy!
|
Enjoy!
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
"""Simple code to extract class & function docstrings from a module.
|
"""Simple code to extract class & function docstrings from a module.
|
||||||
|
|
||||||
|
This code is used as an example in the library reference manual in the
|
||||||
|
section on using the parser module. Refer to the manual for a thorough
|
||||||
|
discussion of the operation of this code.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import symbol
|
import symbol
|
||||||
|
@ -23,12 +25,35 @@ def get_docs(fileName):
|
||||||
return ModuleInfo(tup, basename)
|
return ModuleInfo(tup, basename)
|
||||||
|
|
||||||
|
|
||||||
class DefnInfo:
|
class SuiteInfoBase:
|
||||||
_docstring = ''
|
_docstring = ''
|
||||||
_name = ''
|
_name = ''
|
||||||
|
|
||||||
def __init__(self, tree):
|
def __init__(self, tree = None):
|
||||||
self._name = tree[2][1]
|
self._class_info = {}
|
||||||
|
self._function_info = {}
|
||||||
|
if tree:
|
||||||
|
self._extract_info(tree)
|
||||||
|
|
||||||
|
def _extract_info(self, tree):
|
||||||
|
# extract docstring
|
||||||
|
if len(tree) == 2:
|
||||||
|
found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1])
|
||||||
|
else:
|
||||||
|
found, vars = match(DOCSTRING_STMT_PATTERN, tree[3])
|
||||||
|
if found:
|
||||||
|
self._docstring = eval(vars['docstring'])
|
||||||
|
# discover inner definitions
|
||||||
|
for node in tree[1:]:
|
||||||
|
found, vars = match(COMPOUND_STMT_PATTERN, node)
|
||||||
|
if found:
|
||||||
|
cstmt = vars['compound']
|
||||||
|
if cstmt[0] == symbol.funcdef:
|
||||||
|
name = cstmt[2][1]
|
||||||
|
self._function_info[name] = FunctionInfo(cstmt)
|
||||||
|
elif cstmt[0] == symbol.classdef:
|
||||||
|
name = cstmt[2][1]
|
||||||
|
self._class_info[name] = ClassInfo(cstmt)
|
||||||
|
|
||||||
def get_docstring(self):
|
def get_docstring(self):
|
||||||
return self._docstring
|
return self._docstring
|
||||||
|
@ -36,38 +61,21 @@ def get_docstring(self):
|
||||||
def get_name(self):
|
def get_name(self):
|
||||||
return self._name
|
return self._name
|
||||||
|
|
||||||
class SuiteInfoBase(DefnInfo):
|
|
||||||
def __init__(self):
|
|
||||||
self._class_info = {}
|
|
||||||
self._function_info = {}
|
|
||||||
|
|
||||||
def get_class_names(self):
|
def get_class_names(self):
|
||||||
return self._class_info.keys()
|
return self._class_info.keys()
|
||||||
|
|
||||||
def get_class_info(self, name):
|
def get_class_info(self, name):
|
||||||
return self._class_info[name]
|
return self._class_info[name]
|
||||||
|
|
||||||
def _extract_info(self, tree):
|
def __getitem__(self, name):
|
||||||
if len(tree) >= 4:
|
try:
|
||||||
found, vars = match(DOCSTRING_STMT_PATTERN, tree[3])
|
return self._class_info[name]
|
||||||
if found:
|
except KeyError:
|
||||||
self._docstring = eval(vars['docstring'])
|
return self._function_info[name]
|
||||||
for node in tree[1:]:
|
|
||||||
if (node[0] == symbol.stmt
|
|
||||||
and node[1][0] == symbol.compound_stmt):
|
|
||||||
if node[1][1][0] == symbol.funcdef:
|
|
||||||
name = node[1][1][2][1]
|
|
||||||
self._function_info[name] = \
|
|
||||||
FunctionInfo(node[1][1])
|
|
||||||
elif node[1][1][0] == symbol.classdef:
|
|
||||||
name = node[1][1][2][1]
|
|
||||||
self._class_info[name] = ClassInfo(node[1][1])
|
|
||||||
|
|
||||||
|
|
||||||
class SuiteInfo(SuiteInfoBase):
|
class SuiteFuncInfo:
|
||||||
def __init__(self, tree):
|
# Mixin class providing access to function names and info.
|
||||||
SuiteInfoBase.__init__(self)
|
|
||||||
self._extract_info(tree)
|
|
||||||
|
|
||||||
def get_function_names(self):
|
def get_function_names(self):
|
||||||
return self._function_info.keys()
|
return self._function_info.keys()
|
||||||
|
@ -76,23 +84,16 @@ def get_function_info(self, name):
|
||||||
return self._function_info[name]
|
return self._function_info[name]
|
||||||
|
|
||||||
|
|
||||||
class FunctionInfo(SuiteInfo):
|
class FunctionInfo(SuiteInfoBase, SuiteFuncInfo):
|
||||||
def __init__(self, tree):
|
def __init__(self, tree = None):
|
||||||
DefnInfo.__init__(self, tree)
|
self._name = tree[2][1]
|
||||||
suite = tree[-1]
|
SuiteInfoBase.__init__(self, tree and tree[-1] or None)
|
||||||
if len(suite) >= 4:
|
|
||||||
found, vars = match(DOCSTRING_STMT_PATTERN, suite[3])
|
|
||||||
if found:
|
|
||||||
self._docstring = eval(vars['docstring'])
|
|
||||||
SuiteInfoBase.__init__(self)
|
|
||||||
self._extract_info(suite)
|
|
||||||
|
|
||||||
|
|
||||||
class ClassInfo(SuiteInfoBase):
|
class ClassInfo(SuiteInfoBase):
|
||||||
def __init__(self, tree):
|
def __init__(self, tree = None):
|
||||||
SuiteInfoBase.__init__(self)
|
self._name = tree[2][1]
|
||||||
DefnInfo.__init__(self, tree)
|
SuiteInfoBase.__init__(self, tree and tree[-1] or None)
|
||||||
self._extract_info(tree[-1])
|
|
||||||
|
|
||||||
def get_method_names(self):
|
def get_method_names(self):
|
||||||
return self._function_info.keys()
|
return self._function_info.keys()
|
||||||
|
@ -101,19 +102,40 @@ def get_method_info(self, name):
|
||||||
return self._function_info[name]
|
return self._function_info[name]
|
||||||
|
|
||||||
|
|
||||||
class ModuleInfo(SuiteInfo):
|
class ModuleInfo(SuiteInfoBase, SuiteFuncInfo):
|
||||||
def __init__(self, tree, name="<string>"):
|
def __init__(self, tree = None, name = "<string>"):
|
||||||
self._name = name
|
self._name = name
|
||||||
SuiteInfo.__init__(self, tree)
|
SuiteInfoBase.__init__(self, tree)
|
||||||
found, vars = match(DOCSTRING_STMT_PATTERN, tree[1])
|
if tree:
|
||||||
if found:
|
found, vars = match(DOCSTRING_STMT_PATTERN, tree[1])
|
||||||
self._docstring = vars["docstring"]
|
if found:
|
||||||
|
self._docstring = vars["docstring"]
|
||||||
|
|
||||||
|
|
||||||
from types import ListType, TupleType
|
from types import ListType, TupleType
|
||||||
|
|
||||||
def match(pattern, data, vars=None):
|
def match(pattern, data, vars=None):
|
||||||
"""
|
"""Match `data' to `pattern', with variable extraction.
|
||||||
|
|
||||||
|
pattern
|
||||||
|
Pattern to match against, possibly containing variables.
|
||||||
|
|
||||||
|
data
|
||||||
|
Data to be checked and against which variables are extracted.
|
||||||
|
|
||||||
|
vars
|
||||||
|
Dictionary of variables which have already been found. If not
|
||||||
|
provided, an empty dictionary is created.
|
||||||
|
|
||||||
|
The `pattern' value may contain variables of the form ['varname'] which
|
||||||
|
are allowed to match anything. The value that is matched is returned as
|
||||||
|
part of a dictionary which maps 'varname' to the matched value. 'varname'
|
||||||
|
is not required to be a string object, but using strings makes patterns
|
||||||
|
and the code which uses them more readable.
|
||||||
|
|
||||||
|
This function returns two values: a boolean indicating whether a match
|
||||||
|
was found and a dictionary mapping variable names to their associated
|
||||||
|
values.
|
||||||
"""
|
"""
|
||||||
if vars is None:
|
if vars is None:
|
||||||
vars = {}
|
vars = {}
|
||||||
|
@ -131,6 +153,15 @@ def match(pattern, data, vars=None):
|
||||||
return same, vars
|
return same, vars
|
||||||
|
|
||||||
|
|
||||||
|
# This pattern identifies compound statements, allowing them to be readily
|
||||||
|
# differentiated from simple statements.
|
||||||
|
#
|
||||||
|
COMPOUND_STMT_PATTERN = (
|
||||||
|
symbol.stmt,
|
||||||
|
(symbol.compound_stmt, ['compound'])
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# This pattern will match a 'stmt' node which *might* represent a docstring;
|
# This pattern will match a 'stmt' node which *might* represent a docstring;
|
||||||
# docstrings require that the statement which provides the docstring be the
|
# docstrings require that the statement which provides the docstring be the
|
||||||
# first statement in the class or function, which this pattern does not check.
|
# first statement in the class or function, which this pattern does not check.
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# pprint.py
|
# pprint.py
|
||||||
#
|
#
|
||||||
# Author: Fred L. Drake, Jr.
|
# Author: Fred L. Drake, Jr.
|
||||||
# fdrake@vt.edu
|
# fdrake@cnri.reston.va.us, fdrake@intr.net
|
||||||
#
|
#
|
||||||
# This is a simple little module I wrote to make life easier. I didn't
|
# This is a simple little module I wrote to make life easier. I didn't
|
||||||
# see anything quite like it in the library, though I may have overlooked
|
# see anything quite like it in the library, though I may have overlooked
|
||||||
|
@ -9,35 +9,29 @@
|
||||||
# tuples with fairly non-descriptive content. This is modelled very much
|
# tuples with fairly non-descriptive content. This is modelled very much
|
||||||
# after Lisp/Scheme - style pretty-printing of lists. If you find it
|
# after Lisp/Scheme - style pretty-printing of lists. If you find it
|
||||||
# useful, thank small children who sleep at night.
|
# useful, thank small children who sleep at night.
|
||||||
#
|
|
||||||
|
|
||||||
"""Support to pretty-print lists, tuples, & dictionaries recursively.
|
"""Support to pretty-print lists, tuples, & dictionaries recursively.
|
||||||
Very simple, but at least somewhat useful, especially in debugging
|
Very simple, but useful, especially in debugging data structures.
|
||||||
data structures.
|
|
||||||
|
|
||||||
INDENT_PER_LEVEL -- Amount of indentation to use for each new
|
Constants
|
||||||
recursive level. The default is 1. This
|
---------
|
||||||
must be a non-negative integer, and may be
|
|
||||||
set by the caller before calling pprint().
|
|
||||||
|
|
||||||
MAX_WIDTH -- Maximum width of the display. This is only
|
INDENT_PER_LEVEL
|
||||||
used if the representation *can* be kept
|
Amount of indentation to use for each new recursive level. The
|
||||||
less than MAX_WIDTH characters wide. May
|
default is 1. This must be a non-negative integer, and may be set
|
||||||
be set by the user before calling pprint().
|
by the caller before calling pprint().
|
||||||
|
|
||||||
TAB_WIDTH -- The width represented by a single tab. This
|
MAX_WIDTH
|
||||||
value is typically 8, but 4 is the default
|
Maximum width of the display. This is only used if the
|
||||||
under MacOS. Can be changed by the user if
|
representation *can* be kept less than MAX_WIDTH characters wide.
|
||||||
desired, but is probably not a good idea.
|
May be set by the user before calling pprint().
|
||||||
|
|
||||||
pprint(seq [, stream]) -- The pretty-printer. This takes a Python
|
TAB_WIDTH
|
||||||
object (presumably a sequence, but that
|
The width represented by a single tab. This value is typically 8,
|
||||||
doesn't matter) and an optional output
|
but 4 is the default under MacOS. Can be changed by the user if
|
||||||
stream. See the function documentation
|
desired, but is probably not a good idea.
|
||||||
for details.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
INDENT_PER_LEVEL = 1
|
INDENT_PER_LEVEL = 1
|
||||||
|
|
||||||
MAX_WIDTH = 80
|
MAX_WIDTH = 80
|
||||||
|
@ -46,46 +40,45 @@
|
||||||
TAB_WIDTH = (os.name == 'mac' and 4) or 8
|
TAB_WIDTH = (os.name == 'mac' and 4) or 8
|
||||||
del os
|
del os
|
||||||
|
|
||||||
|
from types import DictType, ListType, TupleType
|
||||||
|
|
||||||
|
|
||||||
def _indentation(cols):
|
def _indentation(cols):
|
||||||
"Create tabbed indentation string COLS columns wide."
|
"""Create tabbed indentation string.
|
||||||
|
|
||||||
# This is used to reduce the byte-count for the output, allowing
|
|
||||||
# files created using this module to use as little external storage
|
|
||||||
# as possible. This is primarily intended to minimize impact on
|
|
||||||
# a user's quota when storing resource files, or for creating output
|
|
||||||
# intended for transmission.
|
|
||||||
|
|
||||||
|
cols
|
||||||
|
Width of the indentation, in columns.
|
||||||
|
"""
|
||||||
return ((cols / TAB_WIDTH) * '\t') + ((cols % TAB_WIDTH) * ' ')
|
return ((cols / TAB_WIDTH) * '\t') + ((cols % TAB_WIDTH) * ' ')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def pprint(seq, stream = None, indent = 0, allowance = 0):
|
def pprint(seq, stream = None, indent = 0, allowance = 0):
|
||||||
"""Pretty-print a list, tuple, or dictionary.
|
"""Pretty-print a list, tuple, or dictionary.
|
||||||
|
|
||||||
pprint(seq [, stream]) ==> None
|
seq
|
||||||
|
List, tuple, or dictionary object to be pretty-printed. Other
|
||||||
|
object types are permitted by are not specially interpreted.
|
||||||
|
|
||||||
If STREAM is provided, output is written to that stream, otherwise
|
stream
|
||||||
sys.stdout is used. Indentation is done according to
|
Output stream. If not provided, `sys.stdout' is used. This
|
||||||
INDENT_PER_LEVEL, which may be set to any non-negative integer
|
parameter must support the `write()' method with a single
|
||||||
before calling this function. The output written on the stream is
|
parameter, which will always be a string. It may be a
|
||||||
a perfectly valid representation of the Python object passed in,
|
`StringIO.StringIO' object if the result is needed as a
|
||||||
with indentation to suite human-readable interpretation. The
|
string.
|
||||||
output can be used as input without error, given readable
|
|
||||||
representations of all sequence elements are available via repr().
|
Indentation is done according to `INDENT_PER_LEVEL', which may be
|
||||||
Output is restricted to MAX_WIDTH columns where possible. The
|
set to any non-negative integer before calling this function. The
|
||||||
STREAM parameter must support the write() method with a single
|
output written on the stream is a perfectly valid representation
|
||||||
parameter, which will always be a string. The output stream may be
|
of the Python object passed in, with indentation to assist
|
||||||
a StringIO.StringIO object if the result is needed as a string.
|
human-readable interpretation. The output can be used as input
|
||||||
|
without error, given readable representations of all elements are
|
||||||
|
available via `repr()'. Output is restricted to `MAX_WIDTH'
|
||||||
|
columns where possible.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if stream is None:
|
if stream is None:
|
||||||
import sys
|
import sys
|
||||||
stream = sys.stdout
|
stream = sys.stdout
|
||||||
|
|
||||||
from types import DictType, ListType, TupleType
|
|
||||||
|
|
||||||
rep = `seq`
|
rep = `seq`
|
||||||
typ = type(seq)
|
typ = type(seq)
|
||||||
sepLines = len(rep) > (MAX_WIDTH - 1 - indent - allowance)
|
sepLines = len(rep) > (MAX_WIDTH - 1 - indent - allowance)
|
||||||
|
@ -140,4 +133,4 @@ def pprint(seq, stream = None, indent = 0, allowance = 0):
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# end of pprint.py
|
# end of file
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
def f(): "maybe a docstring"
|
|
@ -236,19 +236,25 @@ to the descriptions of each function for detailed information.
|
||||||
\subsection{AST Objects}
|
\subsection{AST Objects}
|
||||||
|
|
||||||
AST objects (returned by \code{expr()}, \code{suite()}, and
|
AST objects (returned by \code{expr()}, \code{suite()}, and
|
||||||
\code{tuple2ast()}, described above) have no methods of their own.
|
\code{sequence2ast()}, described above) have no methods of their own.
|
||||||
Some of the functions defined which accept an AST object as their
|
Some of the functions defined which accept an AST object as their
|
||||||
first argument may change to object methods in the future.
|
first argument may change to object methods in the future.
|
||||||
|
|
||||||
Ordered and equality comparisons are supported between AST objects.
|
Ordered and equality comparisons are supported between AST objects.
|
||||||
|
|
||||||
|
|
||||||
\subsection{Example}
|
\subsection{Examples}
|
||||||
|
|
||||||
The parser modules allows operations to be performed on the parse tree
|
The parser modules allows operations to be performed on the parse tree
|
||||||
of Python source code before the bytecode is generated, and provides
|
of Python source code before the bytecode is generated, and provides
|
||||||
for inspection of the parse tree for information gathering purposes as
|
for inspection of the parse tree for information gathering purposes as
|
||||||
well. While many useful operations may take place between parsing and
|
well. Two examples are presented. The simple example demonstrates
|
||||||
|
emulation of the \code{compile()} built-in function and the complex
|
||||||
|
example shows the use of a parse tree for information discovery.
|
||||||
|
|
||||||
|
\subsubsection{Emulation of {\tt compile()}}
|
||||||
|
|
||||||
|
While many useful operations may take place between parsing and
|
||||||
bytecode generation, the simplest operation is to do nothing. For
|
bytecode generation, the simplest operation is to do nothing. For
|
||||||
this purpose, using the \code{parser} module to produce an
|
this purpose, using the \code{parser} module to produce an
|
||||||
intermediate data structure is equivelent to the code
|
intermediate data structure is equivelent to the code
|
||||||
|
@ -273,6 +279,25 @@ as an AST object:
|
||||||
10
|
10
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
|
|
||||||
|
An application which needs both AST and code objects can package this
|
||||||
|
code into readily available functions:
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
import parser
|
||||||
|
|
||||||
|
def load_suite(source_string):
|
||||||
|
ast = parser.suite(source_string)
|
||||||
|
code = parser.compileast(ast)
|
||||||
|
return ast, code
|
||||||
|
|
||||||
|
def load_expression(source_string):
|
||||||
|
ast = parser.expr(source_string)
|
||||||
|
code = parser.compileast(ast)
|
||||||
|
return ast, code
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
\subsubsection{Information Discovery}
|
||||||
|
|
||||||
Some applications can benfit from access to the parse tree itself, and
|
Some applications can benfit from access to the parse tree itself, and
|
||||||
can take advantage of the intermediate data structure provided by the
|
can take advantage of the intermediate data structure provided by the
|
||||||
\code{parser} module. The remainder of this section of examples will
|
\code{parser} module. The remainder of this section of examples will
|
||||||
|
@ -293,6 +318,16 @@ operations on behalf of the caller. All source files mentioned here
|
||||||
which are not part of the Python installation are located in the
|
which are not part of the Python installation are located in the
|
||||||
\file{Demo/parser} directory of the distribution.
|
\file{Demo/parser} directory of the distribution.
|
||||||
|
|
||||||
|
The dynamic nature of Python allows the programmer a great deal of
|
||||||
|
flexibility, but most modules need only a limited measure of this when
|
||||||
|
defining classes, functions, and methods. In this example, the only
|
||||||
|
definitions that will be considered are those which are defined in the
|
||||||
|
top level of their context, e.g., a function defined by a \code{def}
|
||||||
|
statement at column zero of a module, but not a function defined
|
||||||
|
within a branch of an \code{if} ... \code{else} construct, thought
|
||||||
|
there are some good reasons for doing so in some situations. Nesting
|
||||||
|
of definitions will be handled by the code developed in the example.
|
||||||
|
|
||||||
To construct the upper-level extraction methods, we need to know what
|
To construct the upper-level extraction methods, we need to know what
|
||||||
the parse tree structure looks like and how much of it we actually
|
the parse tree structure looks like and how much of it we actually
|
||||||
need to be concerned about. Python uses a moderately deep parse tree,
|
need to be concerned about. Python uses a moderately deep parse tree,
|
||||||
|
@ -300,7 +335,8 @@ so there are a large number of intermediate nodes. It is important to
|
||||||
read and understand the formal grammar used by Python. This is
|
read and understand the formal grammar used by Python. This is
|
||||||
specified in the file \file{Grammar/Grammar} in the distribution.
|
specified in the file \file{Grammar/Grammar} in the distribution.
|
||||||
Consider the simplest case of interest when searching for docstrings:
|
Consider the simplest case of interest when searching for docstrings:
|
||||||
a module consisting of a docstring and nothing else:
|
a module consisting of a docstring and nothing else. (See file
|
||||||
|
\file{docstring.py}.)
|
||||||
|
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
"""Some documentation.
|
"""Some documentation.
|
||||||
|
@ -376,7 +412,7 @@ extraction, we can safely require that the tree be in tuple form
|
||||||
rather than list form, allowing a simple variable representation to be
|
rather than list form, allowing a simple variable representation to be
|
||||||
\code{['variable\_name']}. A simple recursive function can implement
|
\code{['variable\_name']}. A simple recursive function can implement
|
||||||
the pattern matching, returning a boolean and a dictionary of variable
|
the pattern matching, returning a boolean and a dictionary of variable
|
||||||
name to value mappings.
|
name to value mappings. (See file \file{example.py}.)
|
||||||
|
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
from types import ListType, TupleType
|
from types import ListType, TupleType
|
||||||
|
@ -399,32 +435,36 @@ def match(pattern, data, vars=None):
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
|
|
||||||
Using this simple recursive pattern matching function and the symbolic
|
Using this simple recursive pattern matching function and the symbolic
|
||||||
node types, the pattern for the candidate docstring subtrees becomes:
|
node types, the pattern for the candidate docstring subtrees becomes
|
||||||
|
fairly readable. (See file \file{example.py}.)
|
||||||
|
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
>>> DOCSTRING_STMT_PATTERN = (
|
import symbol
|
||||||
... symbol.stmt,
|
import token
|
||||||
... (symbol.simple_stmt,
|
|
||||||
... (symbol.small_stmt,
|
DOCSTRING_STMT_PATTERN = (
|
||||||
... (symbol.expr_stmt,
|
symbol.stmt,
|
||||||
... (symbol.testlist,
|
(symbol.simple_stmt,
|
||||||
... (symbol.test,
|
(symbol.small_stmt,
|
||||||
... (symbol.and_test,
|
(symbol.expr_stmt,
|
||||||
... (symbol.not_test,
|
(symbol.testlist,
|
||||||
... (symbol.comparison,
|
(symbol.test,
|
||||||
... (symbol.expr,
|
(symbol.and_test,
|
||||||
... (symbol.xor_expr,
|
(symbol.not_test,
|
||||||
... (symbol.and_expr,
|
(symbol.comparison,
|
||||||
... (symbol.shift_expr,
|
(symbol.expr,
|
||||||
... (symbol.arith_expr,
|
(symbol.xor_expr,
|
||||||
... (symbol.term,
|
(symbol.and_expr,
|
||||||
... (symbol.factor,
|
(symbol.shift_expr,
|
||||||
... (symbol.power,
|
(symbol.arith_expr,
|
||||||
... (symbol.atom,
|
(symbol.term,
|
||||||
... (token.STRING, ['docstring'])
|
(symbol.factor,
|
||||||
... )))))))))))))))),
|
(symbol.power,
|
||||||
... (token.NEWLINE, '')
|
(symbol.atom,
|
||||||
... ))
|
(token.STRING, ['docstring'])
|
||||||
|
)))))))))))))))),
|
||||||
|
(token.NEWLINE, '')
|
||||||
|
))
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
|
|
||||||
Using the \code{match()} function with this pattern, extracting the
|
Using the \code{match()} function with this pattern, extracting the
|
||||||
|
@ -453,6 +493,160 @@ sibling nodes to match without regard to number. A more elaborate
|
||||||
matching function could be used to overcome this limitation, but this
|
matching function could be used to overcome this limitation, but this
|
||||||
is sufficient for the example.
|
is sufficient for the example.
|
||||||
|
|
||||||
|
Given the ability to determine whether a statement might be a
|
||||||
|
docstring and extract the actual string from the statement, some work
|
||||||
|
needs to be performed to walk the parse tree for an entire module and
|
||||||
|
extract information about the names defined in each context of the
|
||||||
|
module and associate any docstrings with the names. The code to
|
||||||
|
perform this work is not complicated, but bears some explanation.
|
||||||
|
|
||||||
|
The public interface to the classes is straightforward and should
|
||||||
|
probably be somewhat more flexible. Each ``major'' block of the
|
||||||
|
module is described by an object providing several methods for inquiry
|
||||||
|
and a constructor which accepts at least the subtree of the complete
|
||||||
|
parse tree which it represents. The \code{ModuleInfo} constructor
|
||||||
|
accepts an optional \code{\var{name}} parameter since it cannot
|
||||||
|
otherwise determine the name of the module.
|
||||||
|
|
||||||
|
The public classes include \code{ClassInfo}, \code{FunctionInfo},
|
||||||
|
and \code{ModuleInfo}. All objects provide the
|
||||||
|
methods \code{get_name()}, \code{get_docstring()},
|
||||||
|
\code{get_class_names()}, and \code{get_class_info()}. The
|
||||||
|
\code{ClassInfo} objects support \code{get_method_names()} and
|
||||||
|
\code{get_method_info()} while the other classes provide
|
||||||
|
\code{get_function_names()} and \code{get_function_info()}.
|
||||||
|
|
||||||
|
Within each of the forms of code block that the public classes
|
||||||
|
represent, most of the required information is in the same form and is
|
||||||
|
access in the same way, with classes having the distinction that
|
||||||
|
functions defined at the top level are referred to as ``methods.''
|
||||||
|
Since the difference in nomenclature reflects a real semantic
|
||||||
|
distinction from functions defined outside of a class, our
|
||||||
|
implementation needs to maintain the same measure of distinction.
|
||||||
|
Hence, most of the functionality of the public classes can be
|
||||||
|
implemented in a common base class, \code{SuiteInfoBase}, with the
|
||||||
|
accessors for function and method information provided elsewhere.
|
||||||
|
Note that there is only one class which represents function and method
|
||||||
|
information; this mirrors the use of the \code{def} statement to
|
||||||
|
define both types of functions.
|
||||||
|
|
||||||
|
Most of the accessor functions are declared in \code{SuiteInfoBase}
|
||||||
|
and do not need to be overriden by subclasses. More importantly, the
|
||||||
|
extraction of most information from a parse tree is handled through a
|
||||||
|
method called by the \code{SuiteInfoBase} constructor. The example
|
||||||
|
code for most of the classes is clear when read alongside the formal
|
||||||
|
grammar, but the method which recursively creates new information
|
||||||
|
objects requires further examination. Here is the relevant part of
|
||||||
|
the \code{SuiteInfoBase} definition from \file{example.py}:
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
class SuiteInfoBase:
|
||||||
|
_docstring = ''
|
||||||
|
_name = ''
|
||||||
|
|
||||||
|
def __init__(self, tree = None):
|
||||||
|
self._class_info = {}
|
||||||
|
self._function_info = {}
|
||||||
|
if tree:
|
||||||
|
self._extract_info(tree)
|
||||||
|
|
||||||
|
def _extract_info(self, tree):
|
||||||
|
# extract docstring
|
||||||
|
if len(tree) == 2:
|
||||||
|
found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1])
|
||||||
|
else:
|
||||||
|
found, vars = match(DOCSTRING_STMT_PATTERN, tree[3])
|
||||||
|
if found:
|
||||||
|
self._docstring = eval(vars['docstring'])
|
||||||
|
# discover inner definitions
|
||||||
|
for node in tree[1:]:
|
||||||
|
found, vars = match(COMPOUND_STMT_PATTERN, node)
|
||||||
|
if found:
|
||||||
|
cstmt = vars['compound']
|
||||||
|
if cstmt[0] == symbol.funcdef:
|
||||||
|
name = cstmt[2][1]
|
||||||
|
self._function_info[name] = FunctionInfo(cstmt)
|
||||||
|
elif cstmt[0] == symbol.classdef:
|
||||||
|
name = cstmt[2][1]
|
||||||
|
self._class_info[name] = ClassInfo(cstmt)
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
After initializing some internal state, the constructor calls the
|
||||||
|
\code{_extract_info()} method. This method performs the bulk of the
|
||||||
|
information extraction which takes place in the entire example. The
|
||||||
|
extraction has two distinct phases: the location of the docstring for
|
||||||
|
the parse tree passed in, and the discovery of additional definitions
|
||||||
|
within the code block represented by the parse tree.
|
||||||
|
|
||||||
|
The initial \code{if} test determines whether the nested suite is of
|
||||||
|
the ``short form'' or the ``long form.'' The short form is used when
|
||||||
|
the code block is on the same line as the definition of the code
|
||||||
|
block, as in
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
def square(x): "Square an argument."; return x ** 2
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
while the long form uses an indented block and allows nested
|
||||||
|
definitions:
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
def make_power(exp):
|
||||||
|
"Make a function that raises an argument to the exponent `exp'."
|
||||||
|
def raiser(x, y=exp):
|
||||||
|
return x ** y
|
||||||
|
return raiser
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
When the short form is used, the code block may contain a docstring as
|
||||||
|
the first, and possibly only, \code{small_stmt} element. The
|
||||||
|
extraction of such a docstring is slightly different and requires only
|
||||||
|
a portion of the complete pattern used in the more common case. As
|
||||||
|
given in the code, the docstring will only be found if there is only
|
||||||
|
one \code{small_stmt} node in the \code{simple_stmt} node. Since most
|
||||||
|
functions and methods which use the short form do not provide
|
||||||
|
docstring, this may be considered sufficient. The extraction of the
|
||||||
|
docstring proceeds using the \code{match()} function as described
|
||||||
|
above, and the value of the docstring is stored as an attribute of the
|
||||||
|
\code{SuiteInfoBase} object.
|
||||||
|
|
||||||
|
After docstring extraction, the operates a simple definition discovery
|
||||||
|
algorithm on the \code{stmt} nodes of the \code{suite} node. The
|
||||||
|
special case of the short form is not tested; since there are no
|
||||||
|
\code{stmt} nodes in the short form, the algorithm will silently skip
|
||||||
|
the single \code{simple_stmt} node and correctly not discover any
|
||||||
|
nested definitions.
|
||||||
|
|
||||||
|
Each statement in the code block bing examined is categorized as being
|
||||||
|
a class definition, function definition (including methods), or
|
||||||
|
something else. For the definition statements, the name of the
|
||||||
|
element being defined is extracted and representation object
|
||||||
|
appropriate to the definition is created with the defining subtree
|
||||||
|
passed as an argument to the constructor. The repesentation objects
|
||||||
|
are stored in instance variables and may be retrieved by name using
|
||||||
|
the appropriate accessor methods.
|
||||||
|
|
||||||
|
The public classes provide any accessors required which are more
|
||||||
|
specific than those provided by the \code{SuiteInfoBase} class, but
|
||||||
|
the real extraction algorithm remains common to all forms of code
|
||||||
|
blocks. A high-level function can be used to extract the complete set
|
||||||
|
of information from a source file:
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
def get_docs(fileName):
|
||||||
|
source = open(fileName).read()
|
||||||
|
import os
|
||||||
|
basename = os.path.basename(os.path.splitext(fileName)[0])
|
||||||
|
import parser
|
||||||
|
ast = parser.suite(source)
|
||||||
|
tup = parser.ast2tuple(ast)
|
||||||
|
return ModuleInfo(tup, basename)
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
This provides an easy-to-use interface to the documentation of a
|
||||||
|
module. If information is required which is not extracted by the code
|
||||||
|
of this example, the code may be extended at clearly defined points to
|
||||||
|
provide additional capabilities.
|
||||||
|
|
||||||
|
|
||||||
%%
|
%%
|
||||||
|
|
|
@ -236,19 +236,25 @@ to the descriptions of each function for detailed information.
|
||||||
\subsection{AST Objects}
|
\subsection{AST Objects}
|
||||||
|
|
||||||
AST objects (returned by \code{expr()}, \code{suite()}, and
|
AST objects (returned by \code{expr()}, \code{suite()}, and
|
||||||
\code{tuple2ast()}, described above) have no methods of their own.
|
\code{sequence2ast()}, described above) have no methods of their own.
|
||||||
Some of the functions defined which accept an AST object as their
|
Some of the functions defined which accept an AST object as their
|
||||||
first argument may change to object methods in the future.
|
first argument may change to object methods in the future.
|
||||||
|
|
||||||
Ordered and equality comparisons are supported between AST objects.
|
Ordered and equality comparisons are supported between AST objects.
|
||||||
|
|
||||||
|
|
||||||
\subsection{Example}
|
\subsection{Examples}
|
||||||
|
|
||||||
The parser modules allows operations to be performed on the parse tree
|
The parser modules allows operations to be performed on the parse tree
|
||||||
of Python source code before the bytecode is generated, and provides
|
of Python source code before the bytecode is generated, and provides
|
||||||
for inspection of the parse tree for information gathering purposes as
|
for inspection of the parse tree for information gathering purposes as
|
||||||
well. While many useful operations may take place between parsing and
|
well. Two examples are presented. The simple example demonstrates
|
||||||
|
emulation of the \code{compile()} built-in function and the complex
|
||||||
|
example shows the use of a parse tree for information discovery.
|
||||||
|
|
||||||
|
\subsubsection{Emulation of {\tt compile()}}
|
||||||
|
|
||||||
|
While many useful operations may take place between parsing and
|
||||||
bytecode generation, the simplest operation is to do nothing. For
|
bytecode generation, the simplest operation is to do nothing. For
|
||||||
this purpose, using the \code{parser} module to produce an
|
this purpose, using the \code{parser} module to produce an
|
||||||
intermediate data structure is equivelent to the code
|
intermediate data structure is equivelent to the code
|
||||||
|
@ -273,6 +279,25 @@ as an AST object:
|
||||||
10
|
10
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
|
|
||||||
|
An application which needs both AST and code objects can package this
|
||||||
|
code into readily available functions:
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
import parser
|
||||||
|
|
||||||
|
def load_suite(source_string):
|
||||||
|
ast = parser.suite(source_string)
|
||||||
|
code = parser.compileast(ast)
|
||||||
|
return ast, code
|
||||||
|
|
||||||
|
def load_expression(source_string):
|
||||||
|
ast = parser.expr(source_string)
|
||||||
|
code = parser.compileast(ast)
|
||||||
|
return ast, code
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
\subsubsection{Information Discovery}
|
||||||
|
|
||||||
Some applications can benfit from access to the parse tree itself, and
|
Some applications can benfit from access to the parse tree itself, and
|
||||||
can take advantage of the intermediate data structure provided by the
|
can take advantage of the intermediate data structure provided by the
|
||||||
\code{parser} module. The remainder of this section of examples will
|
\code{parser} module. The remainder of this section of examples will
|
||||||
|
@ -293,6 +318,16 @@ operations on behalf of the caller. All source files mentioned here
|
||||||
which are not part of the Python installation are located in the
|
which are not part of the Python installation are located in the
|
||||||
\file{Demo/parser} directory of the distribution.
|
\file{Demo/parser} directory of the distribution.
|
||||||
|
|
||||||
|
The dynamic nature of Python allows the programmer a great deal of
|
||||||
|
flexibility, but most modules need only a limited measure of this when
|
||||||
|
defining classes, functions, and methods. In this example, the only
|
||||||
|
definitions that will be considered are those which are defined in the
|
||||||
|
top level of their context, e.g., a function defined by a \code{def}
|
||||||
|
statement at column zero of a module, but not a function defined
|
||||||
|
within a branch of an \code{if} ... \code{else} construct, thought
|
||||||
|
there are some good reasons for doing so in some situations. Nesting
|
||||||
|
of definitions will be handled by the code developed in the example.
|
||||||
|
|
||||||
To construct the upper-level extraction methods, we need to know what
|
To construct the upper-level extraction methods, we need to know what
|
||||||
the parse tree structure looks like and how much of it we actually
|
the parse tree structure looks like and how much of it we actually
|
||||||
need to be concerned about. Python uses a moderately deep parse tree,
|
need to be concerned about. Python uses a moderately deep parse tree,
|
||||||
|
@ -300,7 +335,8 @@ so there are a large number of intermediate nodes. It is important to
|
||||||
read and understand the formal grammar used by Python. This is
|
read and understand the formal grammar used by Python. This is
|
||||||
specified in the file \file{Grammar/Grammar} in the distribution.
|
specified in the file \file{Grammar/Grammar} in the distribution.
|
||||||
Consider the simplest case of interest when searching for docstrings:
|
Consider the simplest case of interest when searching for docstrings:
|
||||||
a module consisting of a docstring and nothing else:
|
a module consisting of a docstring and nothing else. (See file
|
||||||
|
\file{docstring.py}.)
|
||||||
|
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
"""Some documentation.
|
"""Some documentation.
|
||||||
|
@ -376,7 +412,7 @@ extraction, we can safely require that the tree be in tuple form
|
||||||
rather than list form, allowing a simple variable representation to be
|
rather than list form, allowing a simple variable representation to be
|
||||||
\code{['variable\_name']}. A simple recursive function can implement
|
\code{['variable\_name']}. A simple recursive function can implement
|
||||||
the pattern matching, returning a boolean and a dictionary of variable
|
the pattern matching, returning a boolean and a dictionary of variable
|
||||||
name to value mappings.
|
name to value mappings. (See file \file{example.py}.)
|
||||||
|
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
from types import ListType, TupleType
|
from types import ListType, TupleType
|
||||||
|
@ -399,32 +435,36 @@ def match(pattern, data, vars=None):
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
|
|
||||||
Using this simple recursive pattern matching function and the symbolic
|
Using this simple recursive pattern matching function and the symbolic
|
||||||
node types, the pattern for the candidate docstring subtrees becomes:
|
node types, the pattern for the candidate docstring subtrees becomes
|
||||||
|
fairly readable. (See file \file{example.py}.)
|
||||||
|
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
>>> DOCSTRING_STMT_PATTERN = (
|
import symbol
|
||||||
... symbol.stmt,
|
import token
|
||||||
... (symbol.simple_stmt,
|
|
||||||
... (symbol.small_stmt,
|
DOCSTRING_STMT_PATTERN = (
|
||||||
... (symbol.expr_stmt,
|
symbol.stmt,
|
||||||
... (symbol.testlist,
|
(symbol.simple_stmt,
|
||||||
... (symbol.test,
|
(symbol.small_stmt,
|
||||||
... (symbol.and_test,
|
(symbol.expr_stmt,
|
||||||
... (symbol.not_test,
|
(symbol.testlist,
|
||||||
... (symbol.comparison,
|
(symbol.test,
|
||||||
... (symbol.expr,
|
(symbol.and_test,
|
||||||
... (symbol.xor_expr,
|
(symbol.not_test,
|
||||||
... (symbol.and_expr,
|
(symbol.comparison,
|
||||||
... (symbol.shift_expr,
|
(symbol.expr,
|
||||||
... (symbol.arith_expr,
|
(symbol.xor_expr,
|
||||||
... (symbol.term,
|
(symbol.and_expr,
|
||||||
... (symbol.factor,
|
(symbol.shift_expr,
|
||||||
... (symbol.power,
|
(symbol.arith_expr,
|
||||||
... (symbol.atom,
|
(symbol.term,
|
||||||
... (token.STRING, ['docstring'])
|
(symbol.factor,
|
||||||
... )))))))))))))))),
|
(symbol.power,
|
||||||
... (token.NEWLINE, '')
|
(symbol.atom,
|
||||||
... ))
|
(token.STRING, ['docstring'])
|
||||||
|
)))))))))))))))),
|
||||||
|
(token.NEWLINE, '')
|
||||||
|
))
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
|
|
||||||
Using the \code{match()} function with this pattern, extracting the
|
Using the \code{match()} function with this pattern, extracting the
|
||||||
|
@ -453,6 +493,160 @@ sibling nodes to match without regard to number. A more elaborate
|
||||||
matching function could be used to overcome this limitation, but this
|
matching function could be used to overcome this limitation, but this
|
||||||
is sufficient for the example.
|
is sufficient for the example.
|
||||||
|
|
||||||
|
Given the ability to determine whether a statement might be a
|
||||||
|
docstring and extract the actual string from the statement, some work
|
||||||
|
needs to be performed to walk the parse tree for an entire module and
|
||||||
|
extract information about the names defined in each context of the
|
||||||
|
module and associate any docstrings with the names. The code to
|
||||||
|
perform this work is not complicated, but bears some explanation.
|
||||||
|
|
||||||
|
The public interface to the classes is straightforward and should
|
||||||
|
probably be somewhat more flexible. Each ``major'' block of the
|
||||||
|
module is described by an object providing several methods for inquiry
|
||||||
|
and a constructor which accepts at least the subtree of the complete
|
||||||
|
parse tree which it represents. The \code{ModuleInfo} constructor
|
||||||
|
accepts an optional \code{\var{name}} parameter since it cannot
|
||||||
|
otherwise determine the name of the module.
|
||||||
|
|
||||||
|
The public classes include \code{ClassInfo}, \code{FunctionInfo},
|
||||||
|
and \code{ModuleInfo}. All objects provide the
|
||||||
|
methods \code{get_name()}, \code{get_docstring()},
|
||||||
|
\code{get_class_names()}, and \code{get_class_info()}. The
|
||||||
|
\code{ClassInfo} objects support \code{get_method_names()} and
|
||||||
|
\code{get_method_info()} while the other classes provide
|
||||||
|
\code{get_function_names()} and \code{get_function_info()}.
|
||||||
|
|
||||||
|
Within each of the forms of code block that the public classes
|
||||||
|
represent, most of the required information is in the same form and is
|
||||||
|
access in the same way, with classes having the distinction that
|
||||||
|
functions defined at the top level are referred to as ``methods.''
|
||||||
|
Since the difference in nomenclature reflects a real semantic
|
||||||
|
distinction from functions defined outside of a class, our
|
||||||
|
implementation needs to maintain the same measure of distinction.
|
||||||
|
Hence, most of the functionality of the public classes can be
|
||||||
|
implemented in a common base class, \code{SuiteInfoBase}, with the
|
||||||
|
accessors for function and method information provided elsewhere.
|
||||||
|
Note that there is only one class which represents function and method
|
||||||
|
information; this mirrors the use of the \code{def} statement to
|
||||||
|
define both types of functions.
|
||||||
|
|
||||||
|
Most of the accessor functions are declared in \code{SuiteInfoBase}
|
||||||
|
and do not need to be overriden by subclasses. More importantly, the
|
||||||
|
extraction of most information from a parse tree is handled through a
|
||||||
|
method called by the \code{SuiteInfoBase} constructor. The example
|
||||||
|
code for most of the classes is clear when read alongside the formal
|
||||||
|
grammar, but the method which recursively creates new information
|
||||||
|
objects requires further examination. Here is the relevant part of
|
||||||
|
the \code{SuiteInfoBase} definition from \file{example.py}:
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
class SuiteInfoBase:
|
||||||
|
_docstring = ''
|
||||||
|
_name = ''
|
||||||
|
|
||||||
|
def __init__(self, tree = None):
|
||||||
|
self._class_info = {}
|
||||||
|
self._function_info = {}
|
||||||
|
if tree:
|
||||||
|
self._extract_info(tree)
|
||||||
|
|
||||||
|
def _extract_info(self, tree):
|
||||||
|
# extract docstring
|
||||||
|
if len(tree) == 2:
|
||||||
|
found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1])
|
||||||
|
else:
|
||||||
|
found, vars = match(DOCSTRING_STMT_PATTERN, tree[3])
|
||||||
|
if found:
|
||||||
|
self._docstring = eval(vars['docstring'])
|
||||||
|
# discover inner definitions
|
||||||
|
for node in tree[1:]:
|
||||||
|
found, vars = match(COMPOUND_STMT_PATTERN, node)
|
||||||
|
if found:
|
||||||
|
cstmt = vars['compound']
|
||||||
|
if cstmt[0] == symbol.funcdef:
|
||||||
|
name = cstmt[2][1]
|
||||||
|
self._function_info[name] = FunctionInfo(cstmt)
|
||||||
|
elif cstmt[0] == symbol.classdef:
|
||||||
|
name = cstmt[2][1]
|
||||||
|
self._class_info[name] = ClassInfo(cstmt)
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
After initializing some internal state, the constructor calls the
|
||||||
|
\code{_extract_info()} method. This method performs the bulk of the
|
||||||
|
information extraction which takes place in the entire example. The
|
||||||
|
extraction has two distinct phases: the location of the docstring for
|
||||||
|
the parse tree passed in, and the discovery of additional definitions
|
||||||
|
within the code block represented by the parse tree.
|
||||||
|
|
||||||
|
The initial \code{if} test determines whether the nested suite is of
|
||||||
|
the ``short form'' or the ``long form.'' The short form is used when
|
||||||
|
the code block is on the same line as the definition of the code
|
||||||
|
block, as in
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
def square(x): "Square an argument."; return x ** 2
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
while the long form uses an indented block and allows nested
|
||||||
|
definitions:
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
def make_power(exp):
|
||||||
|
"Make a function that raises an argument to the exponent `exp'."
|
||||||
|
def raiser(x, y=exp):
|
||||||
|
return x ** y
|
||||||
|
return raiser
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
When the short form is used, the code block may contain a docstring as
|
||||||
|
the first, and possibly only, \code{small_stmt} element. The
|
||||||
|
extraction of such a docstring is slightly different and requires only
|
||||||
|
a portion of the complete pattern used in the more common case. As
|
||||||
|
given in the code, the docstring will only be found if there is only
|
||||||
|
one \code{small_stmt} node in the \code{simple_stmt} node. Since most
|
||||||
|
functions and methods which use the short form do not provide
|
||||||
|
docstring, this may be considered sufficient. The extraction of the
|
||||||
|
docstring proceeds using the \code{match()} function as described
|
||||||
|
above, and the value of the docstring is stored as an attribute of the
|
||||||
|
\code{SuiteInfoBase} object.
|
||||||
|
|
||||||
|
After docstring extraction, the operates a simple definition discovery
|
||||||
|
algorithm on the \code{stmt} nodes of the \code{suite} node. The
|
||||||
|
special case of the short form is not tested; since there are no
|
||||||
|
\code{stmt} nodes in the short form, the algorithm will silently skip
|
||||||
|
the single \code{simple_stmt} node and correctly not discover any
|
||||||
|
nested definitions.
|
||||||
|
|
||||||
|
Each statement in the code block bing examined is categorized as being
|
||||||
|
a class definition, function definition (including methods), or
|
||||||
|
something else. For the definition statements, the name of the
|
||||||
|
element being defined is extracted and representation object
|
||||||
|
appropriate to the definition is created with the defining subtree
|
||||||
|
passed as an argument to the constructor. The repesentation objects
|
||||||
|
are stored in instance variables and may be retrieved by name using
|
||||||
|
the appropriate accessor methods.
|
||||||
|
|
||||||
|
The public classes provide any accessors required which are more
|
||||||
|
specific than those provided by the \code{SuiteInfoBase} class, but
|
||||||
|
the real extraction algorithm remains common to all forms of code
|
||||||
|
blocks. A high-level function can be used to extract the complete set
|
||||||
|
of information from a source file:
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
def get_docs(fileName):
|
||||||
|
source = open(fileName).read()
|
||||||
|
import os
|
||||||
|
basename = os.path.basename(os.path.splitext(fileName)[0])
|
||||||
|
import parser
|
||||||
|
ast = parser.suite(source)
|
||||||
|
tup = parser.ast2tuple(ast)
|
||||||
|
return ModuleInfo(tup, basename)
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
This provides an easy-to-use interface to the documentation of a
|
||||||
|
module. If information is required which is not extracted by the code
|
||||||
|
of this example, the code may be extended at clearly defined points to
|
||||||
|
provide additional capabilities.
|
||||||
|
|
||||||
|
|
||||||
%%
|
%%
|
||||||
|
|
Loading…
Reference in New Issue