214 lines
7.5 KiB
Python
214 lines
7.5 KiB
Python
"""ANTLR3 runtime package"""
|
|
|
|
# begin[licence]
|
|
#
|
|
# [The "BSD licence"]
|
|
# Copyright (c) 2005-2008 Terence Parr
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions
|
|
# are met:
|
|
# 1. Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# 2. Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
# 3. The name of the author may not be used to endorse or promote products
|
|
# derived from this software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
#
|
|
# end[licensc]
|
|
|
|
from antlr3.constants import EOF
|
|
from antlr3.exceptions import NoViableAltException, BacktrackingFailed
|
|
|
|
|
|
class DFA(object):
|
|
"""@brief A DFA implemented as a set of transition tables.
|
|
|
|
Any state that has a semantic predicate edge is special; those states
|
|
are generated with if-then-else structures in a specialStateTransition()
|
|
which is generated by cyclicDFA template.
|
|
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
recognizer, decisionNumber,
|
|
eot, eof, min, max, accept, special, transition
|
|
):
|
|
## Which recognizer encloses this DFA? Needed to check backtracking
|
|
self.recognizer = recognizer
|
|
|
|
self.decisionNumber = decisionNumber
|
|
self.eot = eot
|
|
self.eof = eof
|
|
self.min = min
|
|
self.max = max
|
|
self.accept = accept
|
|
self.special = special
|
|
self.transition = transition
|
|
|
|
|
|
def predict(self, input):
|
|
"""
|
|
From the input stream, predict what alternative will succeed
|
|
using this DFA (representing the covering regular approximation
|
|
to the underlying CFL). Return an alternative number 1..n. Throw
|
|
an exception upon error.
|
|
"""
|
|
mark = input.mark()
|
|
s = 0 # we always start at s0
|
|
try:
|
|
for _ in xrange(50000):
|
|
#print "***Current state = %d" % s
|
|
|
|
specialState = self.special[s]
|
|
if specialState >= 0:
|
|
#print "is special"
|
|
s = self.specialStateTransition(specialState, input)
|
|
if s == -1:
|
|
self.noViableAlt(s, input)
|
|
return 0
|
|
input.consume()
|
|
continue
|
|
|
|
if self.accept[s] >= 1:
|
|
#print "accept state for alt %d" % self.accept[s]
|
|
return self.accept[s]
|
|
|
|
# look for a normal char transition
|
|
c = input.LA(1)
|
|
|
|
#print "LA = %d (%r)" % (c, unichr(c) if c >= 0 else 'EOF')
|
|
#print "range = %d..%d" % (self.min[s], self.max[s])
|
|
|
|
if c >= self.min[s] and c <= self.max[s]:
|
|
# move to next state
|
|
snext = self.transition[s][c-self.min[s]]
|
|
#print "in range, next state = %d" % snext
|
|
|
|
if snext < 0:
|
|
#print "not a normal transition"
|
|
# was in range but not a normal transition
|
|
# must check EOT, which is like the else clause.
|
|
# eot[s]>=0 indicates that an EOT edge goes to another
|
|
# state.
|
|
if self.eot[s] >= 0: # EOT Transition to accept state?
|
|
#print "EOT trans to accept state %d" % self.eot[s]
|
|
|
|
s = self.eot[s]
|
|
input.consume()
|
|
# TODO: I had this as return accept[eot[s]]
|
|
# which assumed here that the EOT edge always
|
|
# went to an accept...faster to do this, but
|
|
# what about predicated edges coming from EOT
|
|
# target?
|
|
continue
|
|
|
|
#print "no viable alt"
|
|
self.noViableAlt(s, input)
|
|
return 0
|
|
|
|
s = snext
|
|
input.consume()
|
|
continue
|
|
|
|
if self.eot[s] >= 0:
|
|
#print "EOT to %d" % self.eot[s]
|
|
|
|
s = self.eot[s]
|
|
input.consume()
|
|
continue
|
|
|
|
# EOF Transition to accept state?
|
|
if c == EOF and self.eof[s] >= 0:
|
|
#print "EOF Transition to accept state %d" \
|
|
# % self.accept[self.eof[s]]
|
|
return self.accept[self.eof[s]]
|
|
|
|
# not in range and not EOF/EOT, must be invalid symbol
|
|
self.noViableAlt(s, input)
|
|
return 0
|
|
|
|
else:
|
|
raise RuntimeError("DFA bang!")
|
|
|
|
finally:
|
|
input.rewind(mark)
|
|
|
|
|
|
def noViableAlt(self, s, input):
|
|
if self.recognizer._state.backtracking > 0:
|
|
raise BacktrackingFailed
|
|
|
|
nvae = NoViableAltException(
|
|
self.getDescription(),
|
|
self.decisionNumber,
|
|
s,
|
|
input
|
|
)
|
|
|
|
self.error(nvae)
|
|
raise nvae
|
|
|
|
|
|
def error(self, nvae):
|
|
"""A hook for debugging interface"""
|
|
pass
|
|
|
|
|
|
def specialStateTransition(self, s, input):
|
|
return -1
|
|
|
|
|
|
def getDescription(self):
|
|
return "n/a"
|
|
|
|
|
|
## def specialTransition(self, state, symbol):
|
|
## return 0
|
|
|
|
|
|
def unpack(cls, string):
|
|
"""@brief Unpack the runlength encoded table data.
|
|
|
|
Terence implemented packed table initializers, because Java has a
|
|
size restriction on .class files and the lookup tables can grow
|
|
pretty large. The generated JavaLexer.java of the Java.g example
|
|
would be about 15MB with uncompressed array initializers.
|
|
|
|
Python does not have any size restrictions, but the compilation of
|
|
such large source files seems to be pretty memory hungry. The memory
|
|
consumption of the python process grew to >1.5GB when importing a
|
|
15MB lexer, eating all my swap space and I was to impacient to see,
|
|
if it could finish at all. With packed initializers that are unpacked
|
|
at import time of the lexer module, everything works like a charm.
|
|
|
|
"""
|
|
|
|
ret = []
|
|
for i in range(len(string) / 2):
|
|
(n, v) = ord(string[i*2]), ord(string[i*2+1])
|
|
|
|
# Is there a bitwise operation to do this?
|
|
if v == 0xFFFF:
|
|
v = -1
|
|
|
|
ret += [v] * n
|
|
|
|
return ret
|
|
|
|
unpack = classmethod(unpack)
|