Fix recursive includes.

This commit is contained in:
smurf@smurf.noris.de 2005-01-22 05:39:56 +01:00
parent edb0f3fc8c
commit d486d174ed
2 changed files with 356 additions and 336 deletions

6
debian/changelog vendored
View file

@ -1,3 +1,9 @@
yapps2 (2.1.1-17) unstable; urgency=medium
* Fix recursive includes.
-- Matthias Urlichs <smurf@debian.org> Sat, 22 Jan 2005 03:03:51 +0100
yapps2 (2.1.1-16) unstable; urgency=medium
* Brown paper bag -- fix Python 2.4 stuff.

View file

@ -23,402 +23,416 @@ MIN_WINDOW=4096
# File lookup window
class SyntaxError(Exception):
"""When we run into an unexpected token, this is the exception to use"""
def __init__(self, pos=None, msg="Bad Token", context=None):
Exception.__init__(self)
self.pos = pos
self.msg = msg
self.context = context
def __str__(self):
if not self.pos: return 'SyntaxError'
else: return 'SyntaxError@%s(%s)' % (repr(self.pos), self.msg)
"""When we run into an unexpected token, this is the exception to use"""
def __init__(self, pos=None, msg="Bad Token", context=None):
Exception.__init__(self)
self.pos = pos
self.msg = msg
self.context = context
def __str__(self):
if not self.pos: return 'SyntaxError'
else: return 'SyntaxError@%s(%s)' % (repr(self.pos), self.msg)
class NoMoreTokens(Exception):
"""Another exception object, for when we run out of tokens"""
pass
"""Another exception object, for when we run out of tokens"""
pass
class Token(object):
"""Yapps token.
"""Yapps token.
This is a container for a scanned token.
"""
This is a container for a scanned token.
"""
def __init__(self, type,value, pos=None):
"""Initialize a token."""
self.type = type
self.value = value
self.pos = pos
def __init__(self, type,value, pos=None):
"""Initialize a token."""
self.type = type
self.value = value
self.pos = pos
def __repr__(self):
output = '<%s: %s' % (self.type, repr(self.value))
if self.pos:
output += " @ "
if self.pos[0]:
output += "%s:" % self.pos[0]
if self.pos[1]:
output += "%d" % self.pos[1]
if self.pos[2] is not None:
output += ".%d" % self.pos[2]
output += ">"
return output
def __repr__(self):
output = '<%s: %s' % (self.type, repr(self.value))
if self.pos:
output += " @ "
if self.pos[0]:
output += "%s:" % self.pos[0]
if self.pos[1]:
output += "%d" % self.pos[1]
if self.pos[2] is not None:
output += ".%d" % self.pos[2]
output += ">"
return output
in_name=0
class Scanner(object):
"""Yapps scanner.
"""Yapps scanner.
The Yapps scanner can work in context sensitive or context
insensitive modes. The token(i) method is used to retrieve the
i-th token. It takes a restrict set that limits the set of tokens
it is allowed to return. In context sensitive mode, this restrict
set guides the scanner. In context insensitive mode, there is no
restriction (the set is always the full set of tokens).
"""
def __init__(self, patterns, ignore, input="",
file=None,filename=None,stacked=False):
"""Initialize the scanner.
The Yapps scanner can work in context sensitive or context
insensitive modes. The token(i) method is used to retrieve the
i-th token. It takes a restrict set that limits the set of tokens
it is allowed to return. In context sensitive mode, this restrict
set guides the scanner. In context insensitive mode, there is no
restriction (the set is always the full set of tokens).
"""
def __init__(self, patterns, ignore, input="",
file=None,filename=None,stacked=False):
"""Initialize the scanner.
Parameters:
patterns : [(terminal, uncompiled regex), ...] or None
ignore : {terminal:None, ...}
input : string
Parameters:
patterns : [(terminal, uncompiled regex), ...] or None
ignore : {terminal:None, ...}
input : string
If patterns is None, we assume that the subclass has
defined self.patterns : [(terminal, compiled regex), ...].
Note that the patterns parameter expects uncompiled regexes,
whereas the self.patterns field expects compiled regexes.
If patterns is None, we assume that the subclass has
defined self.patterns : [(terminal, compiled regex), ...].
Note that the patterns parameter expects uncompiled regexes,
whereas the self.patterns field expects compiled regexes.
The 'ignore' value is either None or a callable, which is called
with the scanner and the to-be-ignored match object; this can
be used for include file or comment handling.
"""
The 'ignore' value is either None or a callable, which is called
with the scanner and the to-be-ignored match object; this can
be used for include file or comment handling.
"""
if not filename:
global in_name
filename="<f.%d>" % in_name
in_name += 1
if not filename:
global in_name
filename="<f.%d>" % in_name
in_name += 1
self.input = input
self.ignore = ignore
self.file = file
self.filename = filename
self.pos = 0
self.del_pos = 0 # skipped
self.line = 1
self.del_line = 0 # skipped
self.col = 0
self.tokens = []
self.stack = None
self.stacked = stacked
self.last_read_token = None
self.last_token = None
self.last_types = None
self.input = input
self.ignore = ignore
self.file = file
self.filename = filename
self.pos = 0
self.del_pos = 0 # skipped
self.line = 1
self.del_line = 0 # skipped
self.col = 0
self.tokens = []
self.stack = None
self.stacked = stacked
self.last_read_token = None
self.last_token = None
self.last_types = None
if patterns is not None:
# Compile the regex strings into regex objects
self.patterns = []
for terminal, regex in patterns:
self.patterns.append( (terminal, re.compile(regex)) )
if patterns is not None:
# Compile the regex strings into regex objects
self.patterns = []
for terminal, regex in patterns:
self.patterns.append( (terminal, re.compile(regex)) )
def stack_input(self, input="", file=None, filename=None):
"""Temporarily parse from a second file."""
def stack_input(self, input="", file=None, filename=None):
"""Temporarily parse from a second file."""
# Already read from somewhere else: Go on top of that, please.
if self.stack:
if not filename:
filename = 1
elif isinstance(filename,int):
filename += 1
self.stack.stack(input,file,filename,token)
# Already reading from somewhere else: Go on top of that, please.
if self.stack:
# autogenerate a recursion-level-identifying filename
if not filename:
filename = 1
else:
try:
filename += 1
except TypeError:
pass
# now pass off to the include file
self.stack.stack_input(input,file,filename)
else:
if isinstance(filename,int):
filename = "<str_%d>" % filename
try:
filename += 0
except TypeError:
pass
else:
filename = "<str_%d>" % filename
self.stack = object.__new__(self.__class__)
Scanner.__init__(self.stack,self.patterns,self.ignore,input,file,filename, stacked=True)
# self.stack = object.__new__(self.__class__)
# Scanner.__init__(self.stack,self.patterns,self.ignore,input,file,filename, stacked=True)
def get_pos(self):
"""Return a file/line/char tuple."""
if self.stack: return self.stack.get_pos()
# Note that the pattern+ignore are added by the generated
# scanner code
self.stack = self.__class__(input,file,filename, stacked=True)
return (self.filename, self.line+self.del_line, self.col)
def get_pos(self):
"""Return a file/line/char tuple."""
if self.stack: return self.stack.get_pos()
# def __repr__(self):
# """Print the last few tokens that have been scanned in"""
# output = ''
# for t in self.tokens:
# output += '%s\n' % (repr(t),)
# return output
def print_line_with_pointer(self, pos, length=0, out=sys.stderr):
"""Print the line of 'text' that includes position 'p',
along with a second line with a single caret (^) at position p"""
return (self.filename, self.line+self.del_line, self.col)
file,line,p = pos
if file != self.filename:
if self.stack: return self.stack.print_line_with_pointer(pos,length=length,out=out)
print >>out, "(%s: not in input buffer)" % file
return
# def __repr__(self):
# """Print the last few tokens that have been scanned in"""
# output = ''
# for t in self.tokens:
# output += '%s\n' % (repr(t),)
# return output
def print_line_with_pointer(self, pos, length=0, out=sys.stderr):
"""Print the line of 'text' that includes position 'p',
along with a second line with a single caret (^) at position p"""
text = self.input
p += length-1 # starts at pos 1
file,line,p = pos
if file != self.filename:
if self.stack: return self.stack.print_line_with_pointer(pos,length=length,out=out)
print >>out, "(%s: not in input buffer)" % file
return
origline=line
line -= self.del_line
spos=0
if line > 0:
while 1:
line = line - 1
try:
cr = text.index("\n",spos)
except ValueError:
if line:
text = ""
break
if line == 0:
text = text[spos:cr]
break
spos = cr+1
else:
print >>out, "(%s:%d not in input buffer)" % (file,origline)
return
text = self.input
p += length-1 # starts at pos 1
# Now try printing part of the line
text = text[max(p-80, 0):p+80]
p = p - max(p-80, 0)
origline=line
line -= self.del_line
spos=0
if line > 0:
while 1:
line = line - 1
try:
cr = text.index("\n",spos)
except ValueError:
if line:
text = ""
break
if line == 0:
text = text[spos:cr]
break
spos = cr+1
else:
print >>out, "(%s:%d not in input buffer)" % (file,origline)
return
# Strip to the left
i = text[:p].rfind('\n')
j = text[:p].rfind('\r')
if i < 0 or (0 <= j < i): i = j
if 0 <= i < p:
p = p - i - 1
text = text[i+1:]
# Now try printing part of the line
text = text[max(p-80, 0):p+80]
p = p - max(p-80, 0)
# Strip to the right
i = text.find('\n', p)
j = text.find('\r', p)
if i < 0 or (0 <= j < i): i = j
if i >= 0:
text = text[:i]
# Strip to the left
i = text[:p].rfind('\n')
j = text[:p].rfind('\r')
if i < 0 or (0 <= j < i): i = j
if 0 <= i < p:
p = p - i - 1
text = text[i+1:]
# Now shorten the text
while len(text) > 70 and p > 60:
# Cut off 10 chars
text = "..." + text[10:]
p = p - 7
# Strip to the right
i = text.find('\n', p)
j = text.find('\r', p)
if i < 0 or (0 <= j < i): i = j
if i >= 0:
text = text[:i]
# Now print the string, along with an indicator
print >>out, '> ',text
print >>out, '> ',' '*p + '^'
def grab_input(self):
"""Get more input if possible."""
if not self.file: return
if len(self.input) - self.pos >= MIN_WINDOW: return
# Now shorten the text
while len(text) > 70 and p > 60:
# Cut off 10 chars
text = "..." + text[10:]
p = p - 7
data = self.file.read(MIN_WINDOW)
if data is None or data == "":
self.file = None
# Now print the string, along with an indicator
print >>out, '> ',text
print >>out, '> ',' '*p + '^'
def grab_input(self):
"""Get more input if possible."""
if not self.file: return
if len(self.input) - self.pos >= MIN_WINDOW: return
# Drop bytes from the start, if necessary.
if self.pos > 2*MIN_WINDOW:
self.del_pos += MIN_WINDOW
self.del_line += self.input[:MIN_WINDOW].count("\n")
self.pos -= MIN_WINDOW
self.input = self.input[MIN_WINDOW:] + data
else:
self.input = self.input + data
data = self.file.read(MIN_WINDOW)
if data is None or data == "":
self.file = None
def getchar(self):
"""Return the next character."""
self.grab_input()
# Drop bytes from the start, if necessary.
if self.pos > 2*MIN_WINDOW:
self.del_pos += MIN_WINDOW
self.del_line += self.input[:MIN_WINDOW].count("\n")
self.pos -= MIN_WINDOW
self.input = self.input[MIN_WINDOW:] + data
else:
self.input = self.input + data
c = self.input[self.pos]
self.pos += 1
return c
def getchar(self):
"""Return the next character."""
self.grab_input()
def token(self, restrict, context=None):
"""Scan for another token."""
c = self.input[self.pos]
self.pos += 1
return c
while 1:
if self.stack:
try:
return self.stack.token(restrict, context)
except StopIteration:
self.stack = None
def token(self, restrict, context=None):
"""Scan for another token."""
# Keep looking for a token, ignoring any in self.ignore
self.grab_input()
while 1:
if self.stack:
try:
return self.stack.token(restrict, context)
except StopIteration:
self.stack = None
# special handling for end-of-file
if self.stacked and self.pos==len(self.input):
raise StopIteration
# Keep looking for a token, ignoring any in self.ignore
self.grab_input()
# Search the patterns for the longest match, with earlier
# tokens in the list having preference
best_match = -1
best_pat = '(error)'
best_m = None
for p, regexp in self.patterns:
# First check to see if we're ignoring this token
if restrict and p not in restrict and p not in self.ignore:
continue
m = regexp.match(self.input, self.pos)
if m and m.end()-m.start() > best_match:
# We got a match that's better than the previous one
best_pat = p
best_match = m.end()-m.start()
best_m = m
# If we didn't find anything, raise an error
if best_pat == '(error)' and best_match < 0:
msg = 'Bad Token'
if restrict:
msg = 'Trying to find one of '+', '.join(restrict)
raise SyntaxError(self.get_pos(), msg, context=context)
# special handling for end-of-file
if self.stacked and self.pos==len(self.input):
raise StopIteration
ignore = best_pat in self.ignore
value = self.input[self.pos:self.pos+best_match]
if not ignore:
tok=Token(type=best_pat, value=value, pos=self.get_pos())
# Search the patterns for the longest match, with earlier
# tokens in the list having preference
best_match = -1
best_pat = '(error)'
best_m = None
for p, regexp in self.patterns:
# First check to see if we're ignoring this token
if restrict and p not in restrict and p not in self.ignore:
continue
m = regexp.match(self.input, self.pos)
if m and m.end()-m.start() > best_match:
# We got a match that's better than the previous one
best_pat = p
best_match = m.end()-m.start()
best_m = m
# If we didn't find anything, raise an error
if best_pat == '(error)' and best_match < 0:
msg = 'Bad Token'
if restrict:
msg = 'Trying to find one of '+', '.join(restrict)
raise SyntaxError(self.get_pos(), msg, context=context)
self.pos += best_match
ignore = best_pat in self.ignore
value = self.input[self.pos:self.pos+best_match]
if not ignore:
tok=Token(type=best_pat, value=value, pos=self.get_pos())
npos = value.rfind("\n")
if npos > -1:
self.col = best_match-npos
self.line += value.count("\n")
else:
self.col += best_match
self.pos += best_match
# If we found something that isn't to be ignored, return it
if not ignore:
if len(self.tokens) >= 10:
del self.tokens[0]
self.tokens.append(tok)
self.last_read_token = tok
# print repr(tok)
return tok
else:
ignore = self.ignore[best_pat]
if ignore:
ignore(self, best_m)
npos = value.rfind("\n")
if npos > -1:
self.col = best_match-npos
self.line += value.count("\n")
else:
self.col += best_match
def peek(self, *types, **kw):
"""Returns the token type for lookahead; if there are any args
then the list of args is the set of token types to allow"""
context = kw.get("context",None)
if self.last_token is None:
self.last_types = types
self.last_token = self.token(types,context)
elif self.last_types:
for t in types:
if t not in self.last_types:
raise NotImplementedError("Unimplemented: restriction set changed")
return self.last_token.type
def scan(self, type, **kw):
"""Returns the matched text, and moves to the next token"""
context = kw.get("context",None)
# If we found something that isn't to be ignored, return it
if not ignore:
if len(self.tokens) >= 10:
del self.tokens[0]
self.tokens.append(tok)
self.last_read_token = tok
# print repr(tok)
return tok
else:
ignore = self.ignore[best_pat]
if ignore:
ignore(self, best_m)
if self.last_token is None:
tok = self.token([type],context)
else:
if self.last_types and type not in self.last_types:
raise NotImplementedError("Unimplemented: restriction set changed")
def peek(self, *types, **kw):
"""Returns the token type for lookahead; if there are any args
then the list of args is the set of token types to allow"""
context = kw.get("context",None)
if self.last_token is None:
self.last_types = types
self.last_token = self.token(types,context)
elif self.last_types:
for t in types:
if t not in self.last_types:
raise NotImplementedError("Unimplemented: restriction set changed")
return self.last_token.type
def scan(self, type, **kw):
"""Returns the matched text, and moves to the next token"""
context = kw.get("context",None)
tok = self.last_token
self.last_token = None
if tok.type != type:
if not self.last_types: self.last_types=[]
raise SyntaxError(tok.pos, 'Trying to find '+type+': '+ ', '.join(self.last_types)+", got "+tok.type, context=context)
return tok.value
if self.last_token is None:
tok = self.token([type],context)
else:
if self.last_types and type not in self.last_types:
raise NotImplementedError("Unimplemented: restriction set changed")
tok = self.last_token
self.last_token = None
if tok.type != type:
if not self.last_types: self.last_types=[]
raise SyntaxError(tok.pos, 'Trying to find '+type+': '+ ', '.join(self.last_types)+", got "+tok.type, context=context)
return tok.value
class Parser(object):
"""Base class for Yapps-generated parsers.
"""Base class for Yapps-generated parsers.
"""
def __init__(self, scanner):
self._scanner = scanner
def _stack(self, input="",file=None,filename=None):
"""Temporarily read from someplace else"""
self._scanner.stack_input(input,file,filename)
self._tok = None
"""
def __init__(self, scanner):
self._scanner = scanner
def _stack(self, input="",file=None,filename=None):
"""Temporarily read from someplace else"""
self._scanner.stack_input(input,file,filename)
self._tok = None
def _peek(self, *types, **kw):
"""Returns the token type for lookahead; if there are any args
then the list of args is the set of token types to allow"""
return self._scanner.peek(*types, **kw)
def _scan(self, type, **kw):
"""Returns the matched text, and moves to the next token"""
return self._scanner.scan(type, **kw)
def _peek(self, *types, **kw):
"""Returns the token type for lookahead; if there are any args
then the list of args is the set of token types to allow"""
return self._scanner.peek(*types, **kw)
def _scan(self, type, **kw):
"""Returns the matched text, and moves to the next token"""
return self._scanner.scan(type, **kw)
class Context(object):
"""Class to represent the parser's call stack.
"""Class to represent the parser's call stack.
Every rule creates a Context that links to its parent rule. The
contexts can be used for debugging.
Every rule creates a Context that links to its parent rule. The
contexts can be used for debugging.
"""
def __init__(self, parent, scanner, rule, args=()):
"""Create a new context.
"""
def __init__(self, parent, scanner, rule, args=()):
"""Create a new context.
Args:
parent: Context object or None
scanner: Scanner object
rule: string (name of the rule)
args: tuple listing parameters to the rule
Args:
parent: Context object or None
scanner: Scanner object
rule: string (name of the rule)
args: tuple listing parameters to the rule
"""
self.parent = parent
self.scanner = scanner
self.rule = rule
self.args = args
while scanner.stack: scanner = scanner.stack
self.token = scanner.last_read_token
"""
self.parent = parent
self.scanner = scanner
self.rule = rule
self.args = args
while scanner.stack: scanner = scanner.stack
self.token = scanner.last_read_token
def __str__(self):
output = ''
if self.parent: output = str(self.parent) + ' > '
output += self.rule
return output
def __str__(self):
output = ''
if self.parent: output = str(self.parent) + ' > '
output += self.rule
return output
def print_error(err, scanner):
"""Print error messages, the parser stack, and the input text -- for human-readable error messages."""
# NOTE: this function assumes 80 columns :-(
# Figure out the line number
pos = err.pos
if not pos:
pos = scanner.get_pos()
"""Print error messages, the parser stack, and the input text -- for human-readable error messages."""
# NOTE: this function assumes 80 columns :-(
# Figure out the line number
pos = err.pos
if not pos:
pos = scanner.get_pos()
file_name, line_number, column_number = pos
print >>sys.stderr, '%s:%d:%d: %s' % (file_name, line_number, column_number, err.msg)
file_name, line_number, column_number = pos
print >>sys.stderr, '%s:%d:%d: %s' % (file_name, line_number, column_number, err.msg)
scanner.print_line_with_pointer(pos)
context = err.context
token = None
while context:
print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context.args))
if context.token:
token = context.token
if token:
scanner.print_line_with_pointer(token.pos, length=len(token.value))
context = context.parent
scanner.print_line_with_pointer(pos)
context = err.context
token = None
while context:
print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context.args))
if context.token:
token = context.token
if token:
scanner.print_line_with_pointer(token.pos, length=len(token.value))
context = context.parent
def wrap_error_reporter(parser, rule):
try:
return getattr(parser, rule)()
except SyntaxError, e:
print_error(e, parser._scanner)
except NoMoreTokens:
print >>sys.stderr, 'Could not complete parsing; stopped around here:'
print >>sys.stderr, parser._scanner
def wrap_error_reporter(parser, rule, *args,**kw):
try:
return getattr(parser, rule)(*args,**kw)
except SyntaxError, e:
print_error(e, parser._scanner)
except NoMoreTokens:
print >>sys.stderr, 'Could not complete parsing; stopped around here:'
print >>sys.stderr, parser._scanner