Cumulative debian patches 2.1.1-17.2 (circa debian-specific stuff)

By Matthias Urlichs and debian project maintainers/contributors
This commit is contained in:
Mike Kazantsev 2013-01-19 09:12:02 +06:00 committed by fraggod@sacrilege
parent 24b6391eb3
commit 7f948bae0e
17 changed files with 3354 additions and 864 deletions

31
doc/yapps2.haux Normal file
View file

@ -0,0 +1,31 @@
\@addtocsec{htoc}{1}{0}{\@print{1}\quad{}Introduction}
\@addtocsec{htoc}{2}{0}{\@print{2}\quad{}Examples}
\@addtocsec{htoc}{3}{1}{\@print{2.1}\quad{}Introduction to Grammars}
\@addtocsec{htoc}{4}{1}{\@print{2.2}\quad{}Lisp Expressions}
\@addtocsec{htoc}{5}{1}{\@print{2.3}\quad{}Calculator}
\@addtocsec{htoc}{6}{1}{\@print{2.4}\quad{}Calculator with Memory}
\@addtocsec{htoc}{7}{0}{\@print{3}\quad{}Grammars}
\@addtocsec{htoc}{8}{1}{\@print{3.1}\quad{}Left Factoring}
\newlabel{sec:Left-Factoring}{{3.1}{X}}
\@addtocsec{htoc}{9}{1}{\@print{3.2}\quad{}Left Recursion}
\@addtocsec{htoc}{10}{1}{\@print{3.3}\quad{}Ambiguous Grammars}
\newlabel{sec:Ambiguous-Grammars}{{3.3}{X}}
\@addtocsec{htoc}{11}{0}{\@print{4}\quad{}Customization}
\@addtocsec{htoc}{12}{1}{\@print{4.1}\quad{}Customizing Parsers}
\@addtocsec{htoc}{13}{1}{\@print{4.2}\quad{}Customizing Scanners}
\@addtocsec{htoc}{14}{0}{\@print{5}\quad{}Parser Mechanics}
\@addtocsec{htoc}{15}{1}{\@print{5.1}\quad{}Parser Objects}
\newlabel{sec:Parser-Objects}{{5.1}{X}}
\@addtocsec{htoc}{16}{1}{\@print{5.2}\quad{}Context Sensitive Scanner}
\@addtocsec{htoc}{17}{1}{\@print{5.3}\quad{}Internal Variables}
\@addtocsec{htoc}{18}{1}{\@print{5.4}\quad{}Pre- and Post-Parser Code}
\@addtocsec{htoc}{19}{1}{\@print{5.5}\quad{}Representation of Grammars}
\@addtocsec{htoc}{20}{0}{\@print{A}\quad{}Grammar for Parsers}
\@addtocsec{htoc}{21}{0}{\@print{B}\quad{}Upgrading}
\@addtocsec{htoc}{22}{0}{\@print{C}\quad{}Troubleshooting}
\@addtocsec{htoc}{23}{0}{\@print{D}\quad{}History}
\@addtocsec{htoc}{24}{0}{\@print{E}\quad{}Debian Extensions}
\newlabel{sec:debian}{{E}{X}}
\@addtocsec{htoc}{25}{0}{\@print{F}\quad{}Future Extensions}
\newlabel{sec:future}{{F}{X}}
\@addtocsec{htoc}{26}{0}{\@print{G}\quad{}References}

1206
doc/yapps2.html Normal file

File diff suppressed because it is too large Load diff

36
doc/yapps2.htoc Normal file
View file

@ -0,0 +1,36 @@
\begin{tocenv}
\tocitem \@locref{htoc1}{\begin{@norefs}\@print{1}\quad{}Introduction\end{@norefs}}
\tocitem \@locref{htoc2}{\begin{@norefs}\@print{2}\quad{}Examples\end{@norefs}}
\begin{tocenv}
\tocitem \@locref{htoc3}{\begin{@norefs}\@print{2.1}\quad{}Introduction to Grammars\end{@norefs}}
\tocitem \@locref{htoc4}{\begin{@norefs}\@print{2.2}\quad{}Lisp Expressions\end{@norefs}}
\tocitem \@locref{htoc5}{\begin{@norefs}\@print{2.3}\quad{}Calculator\end{@norefs}}
\tocitem \@locref{htoc6}{\begin{@norefs}\@print{2.4}\quad{}Calculator with Memory\end{@norefs}}
\end{tocenv}
\tocitem \@locref{htoc7}{\begin{@norefs}\@print{3}\quad{}Grammars\end{@norefs}}
\begin{tocenv}
\tocitem \@locref{htoc8}{\begin{@norefs}\@print{3.1}\quad{}Left Factoring\end{@norefs}}
\tocitem \@locref{htoc9}{\begin{@norefs}\@print{3.2}\quad{}Left Recursion\end{@norefs}}
\tocitem \@locref{htoc10}{\begin{@norefs}\@print{3.3}\quad{}Ambiguous Grammars\end{@norefs}}
\end{tocenv}
\tocitem \@locref{htoc11}{\begin{@norefs}\@print{4}\quad{}Customization\end{@norefs}}
\begin{tocenv}
\tocitem \@locref{htoc12}{\begin{@norefs}\@print{4.1}\quad{}Customizing Parsers\end{@norefs}}
\tocitem \@locref{htoc13}{\begin{@norefs}\@print{4.2}\quad{}Customizing Scanners\end{@norefs}}
\end{tocenv}
\tocitem \@locref{htoc14}{\begin{@norefs}\@print{5}\quad{}Parser Mechanics\end{@norefs}}
\begin{tocenv}
\tocitem \@locref{htoc15}{\begin{@norefs}\@print{5.1}\quad{}Parser Objects\end{@norefs}}
\tocitem \@locref{htoc16}{\begin{@norefs}\@print{5.2}\quad{}Context Sensitive Scanner\end{@norefs}}
\tocitem \@locref{htoc17}{\begin{@norefs}\@print{5.3}\quad{}Internal Variables\end{@norefs}}
\tocitem \@locref{htoc18}{\begin{@norefs}\@print{5.4}\quad{}Pre- and Post-Parser Code\end{@norefs}}
\tocitem \@locref{htoc19}{\begin{@norefs}\@print{5.5}\quad{}Representation of Grammars\end{@norefs}}
\end{tocenv}
\tocitem \@locref{htoc20}{\begin{@norefs}\@print{A}\quad{}Grammar for Parsers\end{@norefs}}
\tocitem \@locref{htoc21}{\begin{@norefs}\@print{B}\quad{}Upgrading\end{@norefs}}
\tocitem \@locref{htoc22}{\begin{@norefs}\@print{C}\quad{}Troubleshooting\end{@norefs}}
\tocitem \@locref{htoc23}{\begin{@norefs}\@print{D}\quad{}History\end{@norefs}}
\tocitem \@locref{htoc24}{\begin{@norefs}\@print{E}\quad{}Debian Extensions\end{@norefs}}
\tocitem \@locref{htoc25}{\begin{@norefs}\@print{F}\quad{}Future Extensions\end{@norefs}}
\tocitem \@locref{htoc26}{\begin{@norefs}\@print{G}\quad{}References\end{@norefs}}
\end{tocenv}

1246
doc/yapps2.tex Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,27 +1,29 @@
# This calculator on ints supports the usual (numbers, add, subtract,
# multiply, divide), global variables (stored in a global variable in
# Python), and local variables (stored in an attribute passed around
# in the grammar).
globalvars = {} # We will store the calculator's variables here
def lookup(map, name):
for x, v in map:
for x,v in map:
if x == name: return v
if not globalvars.has_key(name):
print 'Undefined (defaulting to 0):', name
if not globalvars.has_key(name): print 'Undefined (defaulting to 0):', name
return globalvars.get(name, 0)
def stack_input(scanner,ign):
"""Grab more input"""
scanner.stack_input(raw_input(">?> "))
%%
parser Calculator:
ignore: "[ \r\t\n]+"
ignore: "[?]" {{ stack_input }}
token END: "$"
token NUM: "[0-9]+"
token VAR: "[a-zA-Z_]+"
# Each line can either be an expression or an assignment statement
rule goal: expr<<[]>> END {{ return expr }}
rule goal: expr<<[]>> END {{ print '=', expr }}
{{ return expr }}
| "set" VAR expr<<[]>> END {{ globalvars[VAR] = expr }}
{{ print VAR, '=', expr }}
{{ return expr }}
# An expression is the sum and difference of factors
@ -40,33 +42,11 @@ parser Calculator:
rule term<<V>>:
NUM {{ return int(NUM) }}
| VAR {{ return lookup(V, VAR) }}
| "\\(" expr<<V>> "\\)" {{ return expr }}
| "\\(" expr "\\)" {{ return expr }}
| "let" VAR "=" expr<<V>> {{ V = [(VAR, expr)] + V }}
"in" expr<<V>> {{ return expr }}
%%
tests = [
('3', 3),
('2 * 3', 6),
('set x 5', 5),
('x', 5),
('x / 2', 2),
('x - 1', 4),
('let x = 3 in x + 1', 4),
('x', 5),
('x + let x = 3 in x', 8),
('(let x = 3 in x) + x', 8),
]
def run_tests():
for (expr, value) in tests:
assert parse('goal', expr) == value, 'Test parse(%r) == %s failed' % (expr, value)
globalvars.clear()
if __name__=='__main__':
run_tests()
print 'Welcome to the calculator sample for Yapps 2.'
print ' Enter either "<expression>" or "set <var> <expression>",'
print ' or just press return to exit. An expression can have'
@ -79,6 +59,6 @@ if __name__=='__main__':
try: s = raw_input('>>> ')
except EOFError: break
if not s.strip(): break
print '=', parse('goal', s)
parse('goal', s)
print 'Bye.'

View file

@ -1,5 +1,3 @@
# This parser can parse a simple subset of Lisp's syntax.
parser Lisp:
ignore: r'\s+'
token NUM: r'[0-9]+'

44
examples/notes Normal file
View file

@ -0,0 +1,44 @@
Hints
#####
Some additional hints for your edification.
Author: Matthias Urlichs <smurf@debian.org>
How to process C preprocessor codes:
====================================
Rudimentary include handling has been added to the parser by me.
However, if you want to do anything fancy, like for instance whatever
the C preprocessor does, things get more complicated. Fortunately,
there's already a nice tool to handle C preprocessing -- CPP itself.
If you want to report errors correctly in that situation, do this:
def set_line(s,m):
"""Fixup the scanner's idea of the current line"""
s.filename = m.group(2)
line = int(m.group(1))
s.del_line = line - s.line
%%
parser whatever:
ignore: '^#\s*(\d+)\s*"([^"\n]+)"\s*\n' {{ set_line }}
ignore: '^#.*\n'
[...]
%%
if __name__=='__main__':
import sys,os
for a in sys.argv[1:]:
f=os.popen("cpp "+repr(a),"r")
P = whatever(whateverScanner("", filename=a, file=f))
try: P.goal()
except runtime.SyntaxError, e:
runtime.print_error(e, P._scanner)
sys.exit(1)
f.close()

View file

@ -1,234 +0,0 @@
#!/usr/bin/python2
#
# grammar.py, part of Yapps 2 - yet another python parser system
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
#
# This version of the Yapps 2 grammar can be distributed under the
# terms of the MIT open source license, either found in the LICENSE
# file included with the Yapps distribution
# <http://theory.stanford.edu/~amitp/yapps/> or at
# <http://www.opensource.org/licenses/mit-license.php>
#
"""Parser for Yapps grammars.
This file defines the grammar of Yapps grammars. Naturally, it is
implemented in Yapps. The grammar.py module needed by Yapps is built
by running Yapps on yapps_grammar.g. (Holy circularity, Batman!)
"""
import sys, re
import parsetree
######################################################################
def cleanup_choice(rule, lst):
if len(lst) == 0: return Sequence(rule, [])
if len(lst) == 1: return lst[0]
return parsetree.Choice(rule, *tuple(lst))
def cleanup_sequence(rule, lst):
if len(lst) == 1: return lst[0]
return parsetree.Sequence(rule, *tuple(lst))
def resolve_name(rule, tokens, id, args):
if id in [x[0] for x in tokens]:
# It's a token
if args:
print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args)
return parsetree.Terminal(rule, id)
else:
# It's a name, so assume it's a nonterminal
return parsetree.NonTerminal(rule, id, args)
# Begin -- grammar generated by Yapps
import sys, re
import yappsrt
class ParserDescriptionScanner(yappsrt.Scanner):
patterns = [
('"rule"', re.compile('rule')),
('"ignore"', re.compile('ignore')),
('"token"', re.compile('token')),
('"option"', re.compile('option')),
('":"', re.compile(':')),
('"parser"', re.compile('parser')),
('[ \t\r\n]+', re.compile('[ \t\r\n]+')),
('#.*?\r?\n', re.compile('#.*?\r?\n')),
('EOF', re.compile('$')),
('ATTR', re.compile('<<.+?>>')),
('STMT', re.compile('{{.+?}}')),
('ID', re.compile('[a-zA-Z_][a-zA-Z_0-9]*')),
('STR', re.compile('[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"')),
('LP', re.compile('\\(')),
('RP', re.compile('\\)')),
('LB', re.compile('\\[')),
('RB', re.compile('\\]')),
('OR', re.compile('[|]')),
('STAR', re.compile('[*]')),
('PLUS', re.compile('[+]')),
('QUEST', re.compile('[?]')),
('COLON', re.compile(':')),
]
def __init__(self, str):
yappsrt.Scanner.__init__(self,None,['[ \t\r\n]+', '#.*?\r?\n'],str)
class ParserDescription(yappsrt.Parser):
Context = yappsrt.Context
def LINENO(self, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'LINENO', [])
return 1 + self._scanner.get_input_scanned().count('\n')
def Parser(self, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'Parser', [])
self._scan('"parser"')
ID = self._scan('ID')
self._scan('":"')
Options = self.Options(_context)
Tokens = self.Tokens(_context)
Rules = self.Rules(Tokens, _context)
EOF = self._scan('EOF')
return parsetree.Generator(ID,Options,Tokens,Rules)
def Options(self, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'Options', [])
opt = {}
while self._peek() == '"option"':
self._scan('"option"')
self._scan('":"')
Str = self.Str(_context)
opt[Str] = 1
if self._peek() not in ['"option"', '"token"', '"ignore"', 'EOF', '"rule"']:
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['"option"', '"token"', '"ignore"', 'EOF', '"rule"']))
return opt
def Tokens(self, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'Tokens', [])
tok = []
while self._peek() in ['"token"', '"ignore"']:
_token = self._peek()
if _token == '"token"':
self._scan('"token"')
ID = self._scan('ID')
self._scan('":"')
Str = self.Str(_context)
tok.append( (ID,Str) )
elif _token == '"ignore"':
self._scan('"ignore"')
self._scan('":"')
Str = self.Str(_context)
tok.append( ('#ignore',Str) )
else:
raise yappsrt.SyntaxError(_token[0], 'Could not match Tokens')
if self._peek() not in ['"token"', '"ignore"', 'EOF', '"rule"']:
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['"token"', '"ignore"', 'EOF', '"rule"']))
return tok
def Rules(self, tokens, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'Rules', [tokens])
rul = []
while self._peek() == '"rule"':
LINENO = self.LINENO(_context)
self._scan('"rule"')
ID = self._scan('ID')
OptParam = self.OptParam(_context)
self._scan('":"')
ClauseA = self.ClauseA(ID, tokens, _context)
rul.append( (ID, OptParam, ClauseA) )
if self._peek() not in ['"rule"', 'EOF']:
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['"rule"', 'EOF']))
return rul
def ClauseA(self, rule, tokens, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'ClauseA', [rule, tokens])
ClauseB = self.ClauseB(rule, tokens, _context)
v = [ClauseB]
while self._peek() == 'OR':
OR = self._scan('OR')
ClauseB = self.ClauseB(rule, tokens, _context)
v.append(ClauseB)
if self._peek() not in ['OR', 'RP', 'RB', '"rule"', 'EOF']:
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['OR', 'RP', 'RB', '"rule"', 'EOF']))
return cleanup_choice(rule, v)
def ClauseB(self, rule, tokens, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'ClauseB', [rule, tokens])
v = []
while self._peek() in ['STR', 'ID', 'LP', 'LB', 'STMT']:
ClauseC = self.ClauseC(rule, tokens, _context)
v.append(ClauseC)
if self._peek() not in ['STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF']:
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF']))
return cleanup_sequence(rule, v)
def ClauseC(self, rule, tokens, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'ClauseC', [rule, tokens])
ClauseD = self.ClauseD(rule, tokens, _context)
_token = self._peek()
if _token == 'PLUS':
PLUS = self._scan('PLUS')
return parsetree.Plus(rule, ClauseD)
elif _token == 'STAR':
STAR = self._scan('STAR')
return parsetree.Star(rule, ClauseD)
elif _token == 'QUEST':
QUEST = self._scan('QUEST')
return parsetree.Option(rule, ClauseD)
elif _token not in ['"ignore"', '"token"', '"option"', '":"', '"parser"', 'ATTR', 'COLON']:
return ClauseD
else:
raise yappsrt.SyntaxError(_token[0], 'Could not match ClauseC')
def ClauseD(self, rule, tokens, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'ClauseD', [rule, tokens])
_token = self._peek()
if _token == 'STR':
STR = self._scan('STR')
t = (STR, eval(STR,{},{}))
if t not in tokens: tokens.insert( 0, t )
return parsetree.Terminal(rule, STR)
elif _token == 'ID':
ID = self._scan('ID')
OptParam = self.OptParam(_context)
return resolve_name(rule, tokens, ID, OptParam)
elif _token == 'LP':
LP = self._scan('LP')
ClauseA = self.ClauseA(rule, tokens, _context)
RP = self._scan('RP')
return ClauseA
elif _token == 'LB':
LB = self._scan('LB')
ClauseA = self.ClauseA(rule, tokens, _context)
RB = self._scan('RB')
return parsetree.Option(rule, ClauseA)
elif _token == 'STMT':
STMT = self._scan('STMT')
return parsetree.Eval(rule, STMT[2:-2])
else:
raise yappsrt.SyntaxError(_token[0], 'Could not match ClauseD')
def OptParam(self, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'OptParam', [])
_token = self._peek()
if _token == 'ATTR':
ATTR = self._scan('ATTR')
return ATTR[2:-2]
elif _token not in ['"ignore"', '"token"', '"option"', '"parser"', 'COLON']:
return ''
else:
raise yappsrt.SyntaxError(_token[0], 'Could not match OptParam')
def Str(self, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'Str', [])
STR = self._scan('STR')
return eval(STR,{},{})
def parse(rule, text):
P = ParserDescription(ParserDescriptionScanner(text))
return yappsrt.wrap_error_reporter(P, rule)
# End -- grammar generated by Yapps

42
setup.py Normal file
View file

@ -0,0 +1,42 @@
#!/usr/bin/env python
"""Setup script for 'yapps'"""
from distutils.core import setup
description = "Yet Another Python Parser System"
long_description = \
"""
YAPPS is an easy to use parser generator that is written in Python and
generates Python code. There are several parser generator systems
already available for Python, but this parser has different goals:
Yapps is simple, very easy to use, and produces human-readable parsers.
It is not the fastest or most powerful parser. Yapps is designed to be
used when regular expressions are not enough and other parser systems
are too much: situations where you might otherwise write your own
recursive descent parser.
This package contains several upward-compatible enhancements to the
original YAPPS source:
- Handle stacked input ("include files")
- augmented ignore-able patterns (can parse multi-line C comments correctly)
- better error reporting
- read input incrementally
"""
setup (name = "python-yapps",
version = "2.1.1",
description = description,
long_description = long_description,
author = "Amit J. Patel",
author_email = "amitp@cs.stanford.edu",
maintainer = "Matthias Urlichs",
maintainer_email = "smurf@debian.org",
url = "http://theory.stanford.edu/~amitp/yapps/",
license = 'MIT',
platforms = ['POSIX'],
keywords = ['parsing'],
packages = ['yapps'],
#cmdclass = {'bdist_rpm': MyBDist_RPM},
)

1
yapps/__init__.py Normal file
View file

@ -0,0 +1 @@
# empty

211
yapps/grammar.py Normal file
View file

@ -0,0 +1,211 @@
# grammar.py, part of Yapps 2 - yet another python parser system
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
#
# This version of the Yapps 2 grammar can be distributed under the
# terms of the MIT open source license, either found in the LICENSE
# file included with the Yapps distribution
# <http://theory.stanford.edu/~amitp/yapps/> or at
# <http://www.opensource.org/licenses/mit-license.php>
#
"""Parser for Yapps grammars.
This file defines the grammar of Yapps grammars. Naturally, it is
implemented in Yapps. The grammar.py module needed by Yapps is built
by running Yapps on yapps_grammar.g. (Holy circularity, Batman!)
"""
import sys, re
from yapps import parsetree
######################################################################
def cleanup_choice(rule, lst):
if len(lst) == 0: return Sequence(rule, [])
if len(lst) == 1: return lst[0]
return parsetree.Choice(rule, *tuple(lst))
def cleanup_sequence(rule, lst):
if len(lst) == 1: return lst[0]
return parsetree.Sequence(rule, *tuple(lst))
def resolve_name(rule, tokens, id, args):
if id in [x[0] for x in tokens]:
# It's a token
if args:
print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args)
return parsetree.Terminal(rule, id)
else:
# It's a name, so assume it's a nonterminal
return parsetree.NonTerminal(rule, id, args)
# Begin -- grammar generated by Yapps
import sys, re
from yapps import runtime
class ParserDescriptionScanner(runtime.Scanner):
patterns = [
('"rule"', re.compile('rule')),
('"ignore"', re.compile('ignore')),
('"token"', re.compile('token')),
('"option"', re.compile('option')),
('":"', re.compile(':')),
('"parser"', re.compile('parser')),
('[ \t\r\n]+', re.compile('[ \t\r\n]+')),
('#.*?\r?\n', re.compile('#.*?\r?\n')),
('EOF', re.compile('$')),
('ATTR', re.compile('<<.+?>>')),
('STMT', re.compile('{{.+?}}')),
('ID', re.compile('[a-zA-Z_][a-zA-Z_0-9]*')),
('STR', re.compile('[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"')),
('LP', re.compile('\\(')),
('RP', re.compile('\\)')),
('LB', re.compile('\\[')),
('RB', re.compile('\\]')),
('OR', re.compile('[|]')),
('STAR', re.compile('[*]')),
('PLUS', re.compile('[+]')),
('QUEST', re.compile('[?]')),
('COLON', re.compile(':')),
]
def __init__(self, str,*args,**kw):
runtime.Scanner.__init__(self,None,{'[ \t\r\n]+':None,'#.*?\r?\n':None,},str,*args,**kw)
class ParserDescription(runtime.Parser):
Context = runtime.Context
def Parser(self, _parent=None):
_context = self.Context(_parent, self._scanner, 'Parser', [])
self._scan('"parser"', context=_context)
ID = self._scan('ID', context=_context)
self._scan('":"', context=_context)
Options = self.Options(_context)
Tokens = self.Tokens(_context)
Rules = self.Rules(Tokens, _context)
EOF = self._scan('EOF', context=_context)
return parsetree.Generator(ID,Options,Tokens,Rules)
def Options(self, _parent=None):
_context = self.Context(_parent, self._scanner, 'Options', [])
opt = {}
while self._peek('"option"', '"token"', '"ignore"', 'EOF', '"rule"', context=_context) == '"option"':
self._scan('"option"', context=_context)
self._scan('":"', context=_context)
Str = self.Str(_context)
opt[Str] = 1
return opt
def Tokens(self, _parent=None):
_context = self.Context(_parent, self._scanner, 'Tokens', [])
tok = []
while self._peek('"token"', '"ignore"', 'EOF', '"rule"', context=_context) in ['"token"', '"ignore"']:
_token = self._peek('"token"', '"ignore"', context=_context)
if _token == '"token"':
self._scan('"token"', context=_context)
ID = self._scan('ID', context=_context)
self._scan('":"', context=_context)
Str = self.Str(_context)
tok.append( (ID,Str) )
else: # == '"ignore"'
self._scan('"ignore"', context=_context)
self._scan('":"', context=_context)
Str = self.Str(_context)
ign = ('#ignore',Str)
if self._peek('STMT', '"token"', '"ignore"', 'EOF', '"rule"', context=_context) == 'STMT':
STMT = self._scan('STMT', context=_context)
ign = ign + (STMT[2:-2],)
tok.append( ign )
return tok
def Rules(self, tokens, _parent=None):
_context = self.Context(_parent, self._scanner, 'Rules', [tokens])
rul = []
while self._peek('"rule"', 'EOF', context=_context) == '"rule"':
self._scan('"rule"', context=_context)
ID = self._scan('ID', context=_context)
OptParam = self.OptParam(_context)
self._scan('":"', context=_context)
ClauseA = self.ClauseA(ID, tokens, _context)
rul.append( (ID, OptParam, ClauseA) )
return rul
def ClauseA(self, rule, tokens, _parent=None):
_context = self.Context(_parent, self._scanner, 'ClauseA', [rule, tokens])
ClauseB = self.ClauseB(rule,tokens, _context)
v = [ClauseB]
while self._peek('OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) == 'OR':
OR = self._scan('OR', context=_context)
ClauseB = self.ClauseB(rule,tokens, _context)
v.append(ClauseB)
return cleanup_choice(rule,v)
def ClauseB(self, rule,tokens, _parent=None):
_context = self.Context(_parent, self._scanner, 'ClauseB', [rule,tokens])
v = []
while self._peek('STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) in ['STR', 'ID', 'LP', 'LB', 'STMT']:
ClauseC = self.ClauseC(rule,tokens, _context)
v.append(ClauseC)
return cleanup_sequence(rule, v)
def ClauseC(self, rule,tokens, _parent=None):
_context = self.Context(_parent, self._scanner, 'ClauseC', [rule,tokens])
ClauseD = self.ClauseD(rule,tokens, _context)
_token = self._peek('PLUS', 'STAR', 'QUEST', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context)
if _token == 'PLUS':
PLUS = self._scan('PLUS', context=_context)
return parsetree.Plus(rule, ClauseD)
elif _token == 'STAR':
STAR = self._scan('STAR', context=_context)
return parsetree.Star(rule, ClauseD)
elif _token == 'QUEST':
QUEST = self._scan('QUEST', context=_context)
return parsetree.Option(rule, ClauseD)
else:
return ClauseD
def ClauseD(self, rule,tokens, _parent=None):
_context = self.Context(_parent, self._scanner, 'ClauseD', [rule,tokens])
_token = self._peek('STR', 'ID', 'LP', 'LB', 'STMT', context=_context)
if _token == 'STR':
STR = self._scan('STR', context=_context)
t = (STR, eval(STR,{},{}))
if t not in tokens: tokens.insert( 0, t )
return parsetree.Terminal(rule, STR)
elif _token == 'ID':
ID = self._scan('ID', context=_context)
OptParam = self.OptParam(_context)
return resolve_name(rule,tokens, ID, OptParam)
elif _token == 'LP':
LP = self._scan('LP', context=_context)
ClauseA = self.ClauseA(rule,tokens, _context)
RP = self._scan('RP', context=_context)
return ClauseA
elif _token == 'LB':
LB = self._scan('LB', context=_context)
ClauseA = self.ClauseA(rule,tokens, _context)
RB = self._scan('RB', context=_context)
return parsetree.Option(rule, ClauseA)
else: # == 'STMT'
STMT = self._scan('STMT', context=_context)
return parsetree.Eval(rule, STMT[2:-2])
def OptParam(self, _parent=None):
_context = self.Context(_parent, self._scanner, 'OptParam', [])
if self._peek('ATTR', '":"', 'PLUS', 'STAR', 'QUEST', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) == 'ATTR':
ATTR = self._scan('ATTR', context=_context)
return ATTR[2:-2]
return ''
def Str(self, _parent=None):
_context = self.Context(_parent, self._scanner, 'Str', [])
STR = self._scan('STR', context=_context)
return eval(STR,{},{})
def parse(rule, text):
P = ParserDescription(ParserDescriptionScanner(text))
return runtime.wrap_error_reporter(P, rule)
# End -- grammar generated by Yapps

View file

@ -1,5 +1,3 @@
#!/usr/bin/python2
#
# parsetree.py, part of Yapps 2 - yet another python parser system
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
#
@ -35,12 +33,18 @@ class Generator:
self.postparser = None
self.tokens = {} # Map from tokens to regexps
self.ignore = [] # List of token names to ignore in parsing
self.ignore = {} # List of token names to ignore in parsing, map to statements
self.terminals = [] # List of token names (to maintain ordering)
for n, t in tokens:
for t in tokens:
if len(t) == 3:
n,t,s = t
else:
n,t = t
s = None
if n == '#ignore':
n = t
self.ignore.append(n)
self.ignore[n] = s
if n in self.tokens.keys() and self.tokens[n] != t:
print >>sys.stderr, 'Warning: token %s defined more than once.' % n
self.tokens[n] = t
@ -199,7 +203,9 @@ class Generator:
a_set = (repr(a)[1:-1])
if self.equal_set(a, self.non_ignored_tokens()): a_set = ''
if self.has_option('context-insensitive-scanner'): a_set = ''
return 'self._peek(%s)' % a_set
if a_set: a_set += ","
return 'self._peek(%s context=_context)' % a_set
def peek_test(self, a, b):
"""Generate a call to test whether the next token (which could be any of
@ -252,31 +258,39 @@ class Generator:
print ' FOLLOW:', ', '.join(top.follow)
for x in top.get_children(): queue.append(x)
def repr_ignore(self):
out="{"
for t,s in self.ignore.iteritems():
if s is None: s=repr(s)
out += "%s:%s," % (repr(t),s)
out += "}"
return out
def generate_output(self):
self.calculate()
self.write(self.preparser)
self.write("# Begin -- grammar generated by Yapps\n")
self.write("import sys, re\n")
self.write("import yappsrt\n")
self.write("from yapps import runtime\n")
self.write("\n")
self.write("class ", self.name, "Scanner(yappsrt.Scanner):\n")
self.write("class ", self.name, "Scanner(runtime.Scanner):\n")
self.write(" patterns = [\n")
for p in self.terminals:
self.write(" (%s, re.compile(%s)),\n" % (
repr(p), repr(self.tokens[p])))
self.write(" ]\n")
self.write(" def __init__(self, str):\n")
self.write(" yappsrt.Scanner.__init__(self,None,%s,str)\n" %
repr(self.ignore))
self.write(" def __init__(self, str,*args,**kw):\n")
self.write(" runtime.Scanner.__init__(self,None,%s,str,*args,**kw)\n" %
self.repr_ignore())
self.write("\n")
self.write("class ", self.name, "(yappsrt.Parser):\n")
self.write(INDENT, "Context = yappsrt.Context\n")
self.write("class ", self.name, "(runtime.Parser):\n")
self.write(INDENT, "Context = runtime.Context\n")
for r in self.goals:
self.write(INDENT, "def ", r, "(self")
if self.params[r]: self.write(", ", self.params[r])
self.write(", _parent=None):\n")
self.write(INDENT+INDENT, "_context = self.Context(_parent, self._scanner, self._pos, %s, [%s])\n" %
self.write(INDENT+INDENT, "_context = self.Context(_parent, self._scanner, %s, [%s])\n" %
(repr(r), self.params.get(r, '')))
self.rules[r].output(self, INDENT+INDENT)
self.write("\n")
@ -284,7 +298,7 @@ class Generator:
self.write("\n")
self.write("def parse(rule, text):\n")
self.write(" P = ", self.name, "(", self.name, "Scanner(text))\n")
self.write(" return yappsrt.wrap_error_reporter(P, rule)\n")
self.write(" return runtime.wrap_error_reporter(P, rule)\n")
self.write("\n")
if self.postparser is not None:
self.write("# End -- grammar generated by Yapps\n")
@ -355,7 +369,7 @@ class Terminal(Node):
gen.write(indent)
if re.match('[a-zA-Z_][a-zA-Z_0-9]*$', self.token):
gen.write(self.token, " = ")
gen.write("self._scan(%s)\n" % repr(self.token))
gen.write("self._scan(%s, context=_context)\n" % repr(self.token))
class Eval(Node):
"""This class stores evaluation nodes, from {{ ... }} clauses."""
@ -547,7 +561,7 @@ class Choice(Node):
if tokens_unseen:
gen.write(indent, "else:\n")
gen.write(indent, INDENT, "raise yappsrt.SyntaxError(_token[0], ")
gen.write(indent, INDENT, "raise runtime.SyntaxError(_token[0], ")
gen.write("'Could not match ", self.rule, "')\n")
class Wrapper(Node):
@ -586,6 +600,13 @@ class Option(Wrapper):
gen.write(indent, "if %s:\n" %
gen.peek_test(self.first, self.child.first))
self.child.output(gen, indent+INDENT)
if gen.has_option('context-insensitive-scanner'):
gen.write(indent, "if %s:\n" %
gen.not_peek_test(gen.non_ignored_tokens(), self.follow))
gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" %
repr(self.first))
class Plus(Wrapper):
"""This class represents a 1-or-more repetition clause of the form A+"""
@ -613,6 +634,13 @@ class Plus(Wrapper):
gen.write(indent+INDENT, "if %s: break\n" %
gen.not_peek_test(union, self.child.first))
if gen.has_option('context-insensitive-scanner'):
gen.write(indent, "if %s:\n" %
gen.not_peek_test(gen.non_ignored_tokens(), self.follow))
gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" %
repr(self.first))
class Star(Wrapper):
"""This class represents a 0-or-more repetition clause of the form A*"""
def setup(self, gen):
@ -637,9 +665,9 @@ class Star(Wrapper):
self.child.output(gen, indent+INDENT)
# TODO: need to generate tests like this in lots of rules
# TODO: do we need to do this only when it's a context-insensitive scanner?
gen.write(indent, "if %s:\n" %
gen.not_peek_test(gen.non_ignored_tokens(), self.follow))
gen.write(indent+INDENT, "raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" %
repr(self.first))
if gen.has_option('context-insensitive-scanner'):
gen.write(indent, "if %s:\n" %
gen.not_peek_test(gen.non_ignored_tokens(), self.follow))
gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" %
repr(self.first))

442
yapps/runtime.py Normal file
View file

@ -0,0 +1,442 @@
# Yapps 2 Runtime, part of Yapps 2 - yet another python parser system
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
# Enhancements copyright 2003-2004 by Matthias Urlichs <smurf@debian.org>
#
# This version of the Yapps 2 Runtime can be distributed under the
# terms of the MIT open source license, either found in the LICENSE file
# included with the Yapps distribution
# <http://theory.stanford.edu/~amitp/yapps/> or at
# <http://www.opensource.org/licenses/mit-license.php>
#
"""Run time libraries needed to run parsers generated by Yapps.
This module defines parse-time exception classes, a scanner class, a
base class for parsers produced by Yapps, and a context class that
keeps track of the parse stack.
"""
import sys, re
MIN_WINDOW=4096
# File lookup window
class SyntaxError(Exception):
"""When we run into an unexpected token, this is the exception to use"""
def __init__(self, pos=None, msg="Bad Token", context=None):
Exception.__init__(self)
self.pos = pos
self.msg = msg
self.context = context
def __str__(self):
if not self.pos: return 'SyntaxError'
else: return 'SyntaxError@%s(%s)' % (repr(self.pos), self.msg)
class NoMoreTokens(Exception):
"""Another exception object, for when we run out of tokens"""
pass
class Token(object):
"""Yapps token.
This is a container for a scanned token.
"""
def __init__(self, type,value, pos=None):
"""Initialize a token."""
self.type = type
self.value = value
self.pos = pos
def __repr__(self):
output = '<%s: %s' % (self.type, repr(self.value))
if self.pos:
output += " @ "
if self.pos[0]:
output += "%s:" % self.pos[0]
if self.pos[1]:
output += "%d" % self.pos[1]
if self.pos[2] is not None:
output += ".%d" % self.pos[2]
output += ">"
return output
in_name=0
class Scanner(object):
"""Yapps scanner.
The Yapps scanner can work in context sensitive or context
insensitive modes. The token(i) method is used to retrieve the
i-th token. It takes a restrict set that limits the set of tokens
it is allowed to return. In context sensitive mode, this restrict
set guides the scanner. In context insensitive mode, there is no
restriction (the set is always the full set of tokens).
"""
def __init__(self, patterns, ignore, input="",
file=None,filename=None,stacked=False):
"""Initialize the scanner.
Parameters:
patterns : [(terminal, uncompiled regex), ...] or None
ignore : {terminal:None, ...}
input : string
If patterns is None, we assume that the subclass has
defined self.patterns : [(terminal, compiled regex), ...].
Note that the patterns parameter expects uncompiled regexes,
whereas the self.patterns field expects compiled regexes.
The 'ignore' value is either None or a callable, which is called
with the scanner and the to-be-ignored match object; this can
be used for include file or comment handling.
"""
if not filename:
global in_name
filename="<f.%d>" % in_name
in_name += 1
self.input = input
self.ignore = ignore
self.file = file
self.filename = filename
self.pos = 0
self.del_pos = 0 # skipped
self.line = 1
self.del_line = 0 # skipped
self.col = 0
self.tokens = []
self.stack = None
self.stacked = stacked
self.last_read_token = None
self.last_token = None
self.last_types = None
if patterns is not None:
# Compile the regex strings into regex objects
self.patterns = []
for terminal, regex in patterns:
self.patterns.append( (terminal, re.compile(regex)) )
def stack_input(self, input="", file=None, filename=None):
"""Temporarily parse from a second file."""
# Already reading from somewhere else: Go on top of that, please.
if self.stack:
# autogenerate a recursion-level-identifying filename
if not filename:
filename = 1
else:
try:
filename += 1
except TypeError:
pass
# now pass off to the include file
self.stack.stack_input(input,file,filename)
else:
try:
filename += 0
except TypeError:
pass
else:
filename = "<str_%d>" % filename
# self.stack = object.__new__(self.__class__)
# Scanner.__init__(self.stack,self.patterns,self.ignore,input,file,filename, stacked=True)
# Note that the pattern+ignore are added by the generated
# scanner code
self.stack = self.__class__(input,file,filename, stacked=True)
def get_pos(self):
"""Return a file/line/char tuple."""
if self.stack: return self.stack.get_pos()
return (self.filename, self.line+self.del_line, self.col)
# def __repr__(self):
# """Print the last few tokens that have been scanned in"""
# output = ''
# for t in self.tokens:
# output += '%s\n' % (repr(t),)
# return output
def print_line_with_pointer(self, pos, length=0, out=sys.stderr):
"""Print the line of 'text' that includes position 'p',
along with a second line with a single caret (^) at position p"""
file,line,p = pos
if file != self.filename:
if self.stack: return self.stack.print_line_with_pointer(pos,length=length,out=out)
print >>out, "(%s: not in input buffer)" % file
return
text = self.input
p += length-1 # starts at pos 1
origline=line
line -= self.del_line
spos=0
if line > 0:
while 1:
line = line - 1
try:
cr = text.index("\n",spos)
except ValueError:
if line:
text = ""
break
if line == 0:
text = text[spos:cr]
break
spos = cr+1
else:
print >>out, "(%s:%d not in input buffer)" % (file,origline)
return
# Now try printing part of the line
text = text[max(p-80, 0):p+80]
p = p - max(p-80, 0)
# Strip to the left
i = text[:p].rfind('\n')
j = text[:p].rfind('\r')
if i < 0 or (0 <= j < i): i = j
if 0 <= i < p:
p = p - i - 1
text = text[i+1:]
# Strip to the right
i = text.find('\n', p)
j = text.find('\r', p)
if i < 0 or (0 <= j < i): i = j
if i >= 0:
text = text[:i]
# Now shorten the text
while len(text) > 70 and p > 60:
# Cut off 10 chars
text = "..." + text[10:]
p = p - 7
# Now print the string, along with an indicator
print >>out, '> ',text
print >>out, '> ',' '*p + '^'
def grab_input(self):
"""Get more input if possible."""
if not self.file: return
if len(self.input) - self.pos >= MIN_WINDOW: return
data = self.file.read(MIN_WINDOW)
if data is None or data == "":
self.file = None
# Drop bytes from the start, if necessary.
if self.pos > 2*MIN_WINDOW:
self.del_pos += MIN_WINDOW
self.del_line += self.input[:MIN_WINDOW].count("\n")
self.pos -= MIN_WINDOW
self.input = self.input[MIN_WINDOW:] + data
else:
self.input = self.input + data
def getchar(self):
"""Return the next character."""
self.grab_input()
c = self.input[self.pos]
self.pos += 1
return c
def token(self, restrict, context=None):
"""Scan for another token."""
while 1:
if self.stack:
try:
return self.stack.token(restrict, context)
except StopIteration:
self.stack = None
# Keep looking for a token, ignoring any in self.ignore
self.grab_input()
# special handling for end-of-file
if self.stacked and self.pos==len(self.input):
raise StopIteration
# Search the patterns for the longest match, with earlier
# tokens in the list having preference
best_match = -1
best_pat = '(error)'
best_m = None
for p, regexp in self.patterns:
# First check to see if we're ignoring this token
if restrict and p not in restrict and p not in self.ignore:
continue
m = regexp.match(self.input, self.pos)
if m and m.end()-m.start() > best_match:
# We got a match that's better than the previous one
best_pat = p
best_match = m.end()-m.start()
best_m = m
# If we didn't find anything, raise an error
if best_pat == '(error)' and best_match < 0:
msg = 'Bad Token'
if restrict:
msg = 'Trying to find one of '+', '.join(restrict)
raise SyntaxError(self.get_pos(), msg, context=context)
ignore = best_pat in self.ignore
value = self.input[self.pos:self.pos+best_match]
if not ignore:
tok=Token(type=best_pat, value=value, pos=self.get_pos())
self.pos += best_match
npos = value.rfind("\n")
if npos > -1:
self.col = best_match-npos
self.line += value.count("\n")
else:
self.col += best_match
# If we found something that isn't to be ignored, return it
if not ignore:
if len(self.tokens) >= 10:
del self.tokens[0]
self.tokens.append(tok)
self.last_read_token = tok
# print repr(tok)
return tok
else:
ignore = self.ignore[best_pat]
if ignore:
ignore(self, best_m)
def peek(self, *types, **kw):
"""Returns the token type for lookahead; if there are any args
then the list of args is the set of token types to allow"""
context = kw.get("context",None)
if self.last_token is None:
self.last_types = types
self.last_token = self.token(types,context)
elif self.last_types:
for t in types:
if t not in self.last_types:
raise NotImplementedError("Unimplemented: restriction set changed")
return self.last_token.type
def scan(self, type, **kw):
"""Returns the matched text, and moves to the next token"""
context = kw.get("context",None)
if self.last_token is None:
tok = self.token([type],context)
else:
if self.last_types and type not in self.last_types:
raise NotImplementedError("Unimplemented: restriction set changed")
tok = self.last_token
self.last_token = None
if tok.type != type:
if not self.last_types: self.last_types=[]
raise SyntaxError(tok.pos, 'Trying to find '+type+': '+ ', '.join(self.last_types)+", got "+tok.type, context=context)
return tok.value
class Parser(object):
"""Base class for Yapps-generated parsers.
"""
def __init__(self, scanner):
self._scanner = scanner
def _stack(self, input="",file=None,filename=None):
"""Temporarily read from someplace else"""
self._scanner.stack_input(input,file,filename)
self._tok = None
def _peek(self, *types, **kw):
"""Returns the token type for lookahead; if there are any args
then the list of args is the set of token types to allow"""
return self._scanner.peek(*types, **kw)
def _scan(self, type, **kw):
"""Returns the matched text, and moves to the next token"""
return self._scanner.scan(type, **kw)
class Context(object):
"""Class to represent the parser's call stack.
Every rule creates a Context that links to its parent rule. The
contexts can be used for debugging.
"""
def __init__(self, parent, scanner, rule, args=()):
"""Create a new context.
Args:
parent: Context object or None
scanner: Scanner object
rule: string (name of the rule)
args: tuple listing parameters to the rule
"""
self.parent = parent
self.scanner = scanner
self.rule = rule
self.args = args
while scanner.stack: scanner = scanner.stack
self.token = scanner.last_read_token
def __str__(self):
output = ''
if self.parent: output = str(self.parent) + ' > '
output += self.rule
return output
def print_error(err, scanner, max_ctx=None):
"""Print error messages, the parser stack, and the input text -- for human-readable error messages."""
# NOTE: this function assumes 80 columns :-(
# Figure out the line number
pos = err.pos
if not pos:
pos = scanner.get_pos()
file_name, line_number, column_number = pos
print >>sys.stderr, '%s:%d:%d: %s' % (file_name, line_number, column_number, err.msg)
scanner.print_line_with_pointer(pos)
context = err.context
token = None
while context:
print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context.args))
if context.token:
token = context.token
if token:
scanner.print_line_with_pointer(token.pos, length=len(token.value))
context = context.parent
if max_ctx:
max_ctx = max_ctx-1
if not max_ctx:
break
def wrap_error_reporter(parser, rule, *args,**kw):
try:
return getattr(parser, rule)(*args,**kw)
except SyntaxError, e:
print_error(e, parser._scanner)
except NoMoreTokens:
print >>sys.stderr, 'Could not complete parsing; stopped around here:'
print >>sys.stderr, parser._scanner

View file

@ -1,4 +1,4 @@
#!/usr/bin/python2
#!/usr/bin/python
#
# Yapps 2 - yet another python parser system
@ -13,7 +13,7 @@
import sys, re
import yappsrt, parsetree
from yapps import runtime, parsetree
def generate(inputfilename, outputfilename='', dump=0, **flags):
"""Generate a grammar, given an input filename (X.g)
@ -40,11 +40,12 @@ def generate(inputfilename, outputfilename='', dump=0, **flags):
if f >= 0: s, postparser = s[:f], '\n\n'+s[f+len(DIVIDER):]
# Create the parser and scanner and parse the text
scanner = grammar.ParserDescriptionScanner(s)
if preparser: scanner.first_line_number = 1 + preparser.count('\n')
scanner = grammar.ParserDescriptionScanner(s, filename=inputfilename)
if preparser: scanner.del_line += preparser.count('\n')
parser = grammar.ParserDescription(scanner)
t = yappsrt.wrap_error_reporter(parser, 'Parser')
if t is None: return # Failure
t = runtime.wrap_error_reporter(parser, 'Parser')
if t is None: return 1 # Failure
if preparser is not None: t.preparser = preparser
if postparser is not None: t.postparser = postparser
@ -63,6 +64,7 @@ def generate(inputfilename, outputfilename='', dump=0, **flags):
else:
t.output = open(outputfilename, 'w')
t.generate_output()
return 0
if __name__ == '__main__':
import doctest
@ -106,6 +108,6 @@ if __name__ == '__main__':
if use_devel_grammar:
import yapps_grammar as grammar
else:
import grammar
from yapps import grammar
generate(*tuple(args), **flags)
sys.exit(generate(*tuple(args), **flags))

View file

@ -1,7 +1,6 @@
#!/usr/bin/python2
#
# grammar.py, part of Yapps 2 - yet another python parser system
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
# Enhancements copyright 2003-2004 by Matthias Urlichs <smurf@debian.org>
#
# This version of the Yapps 2 grammar can be distributed under the
# terms of the MIT open source license, either found in the LICENSE
@ -19,7 +18,7 @@ by running Yapps on yapps_grammar.g. (Holy circularity, Batman!)
"""
import sys, re
import parsetree
from yapps import parsetree
######################################################################
def cleanup_choice(rule, lst):
@ -33,9 +32,9 @@ def cleanup_sequence(rule, lst):
def resolve_name(rule, tokens, id, args):
if id in [x[0] for x in tokens]:
# It's a token
if args:
print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args)
# It's a token
if args:
print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args)
return parsetree.Terminal(rule, id)
else:
# It's a name, so assume it's a nonterminal
@ -43,7 +42,6 @@ def resolve_name(rule, tokens, id, args):
%%
parser ParserDescription:
option: "context-insensitive-scanner"
ignore: "[ \t\r\n]+"
ignore: "#.*?\r?\n"
@ -62,9 +60,6 @@ parser ParserDescription:
token QUEST: '[?]'
token COLON: ':'
rule LINENO: # This is a pseudotoken. It matches nothing; returns the line number
{{ return 1 + self._scanner.get_input_scanned().count('\n') }}
rule Parser: "parser" ID ":"
Options
Tokens
@ -79,48 +74,48 @@ parser ParserDescription:
rule Tokens: {{ tok = [] }}
(
"token" ID ":" Str {{ tok.append( (ID,Str) ) }}
| "ignore" ":" Str {{ tok.append( ('#ignore',Str) ) }}
| "ignore"
":" Str {{ ign = ('#ignore',Str) }}
( STMT {{ ign = ign + (STMT[2:-2],) }} )?
{{ tok.append( ign ) }}
)*
{{ return tok }}
rule Rules<<tokens>>:
{{ rul = [] }}
( LINENO
(
"rule" ID OptParam ":" ClauseA<<ID, tokens>>
# TODO: save LINENO somewhere?
{{ rul.append( (ID, OptParam, ClauseA) ) }}
)*
{{ return rul }}
rule ClauseA<<rule, tokens>>:
ClauseB<<rule, tokens>>
ClauseB<<rule,tokens>>
{{ v = [ClauseB] }}
( OR ClauseB<<rule, tokens>> {{ v.append(ClauseB) }} )*
{{ return cleanup_choice(rule, v) }}
( OR ClauseB<<rule,tokens>> {{ v.append(ClauseB) }} )*
{{ return cleanup_choice(rule,v) }}
rule ClauseB<<rule, tokens>>:
rule ClauseB<<rule,tokens>>:
{{ v = [] }}
( ClauseC<<rule, tokens>> {{ v.append(ClauseC) }} )*
( ClauseC<<rule,tokens>> {{ v.append(ClauseC) }} )*
{{ return cleanup_sequence(rule, v) }}
rule ClauseC<<rule, tokens>>:
ClauseD<<rule, tokens>>
rule ClauseC<<rule,tokens>>:
ClauseD<<rule,tokens>>
( PLUS {{ return parsetree.Plus(rule, ClauseD) }}
| STAR {{ return parsetree.Star(rule, ClauseD) }}
| QUEST {{ return parsetree.Option(rule, ClauseD) }}
| {{ return ClauseD }} )
rule ClauseD<<rule, tokens>>:
rule ClauseD<<rule,tokens>>:
STR {{ t = (STR, eval(STR,{},{})) }}
{{ if t not in tokens: tokens.insert( 0, t ) }}
{{ return parsetree.Terminal(rule, STR) }}
| ID OptParam {{ return resolve_name(rule, tokens, ID, OptParam) }}
| LP ClauseA<<rule, tokens>> RP {{ return ClauseA }}
| LB ClauseA<<rule, tokens>> RB {{ return parsetree.Option(rule, ClauseA) }}
| ID OptParam {{ return resolve_name(rule,tokens, ID, OptParam) }}
| LP ClauseA<<rule,tokens>> RP {{ return ClauseA }}
| LB ClauseA<<rule,tokens>> RB {{ return parsetree.Option(rule, ClauseA) }}
| STMT {{ return parsetree.Eval(rule, STMT[2:-2]) }}
rule OptParam:
ATTR {{ return ATTR[2:-2] }}
| {{ return '' }}
rule OptParam: [ ATTR {{ return ATTR[2:-2] }} ] {{ return '' }}
rule Str: STR {{ return eval(STR,{},{}) }}
%%

View file

@ -1,234 +0,0 @@
#!/usr/bin/python2
#
# grammar.py, part of Yapps 2 - yet another python parser system
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
#
# This version of the Yapps 2 grammar can be distributed under the
# terms of the MIT open source license, either found in the LICENSE
# file included with the Yapps distribution
# <http://theory.stanford.edu/~amitp/yapps/> or at
# <http://www.opensource.org/licenses/mit-license.php>
#
"""Parser for Yapps grammars.
This file defines the grammar of Yapps grammars. Naturally, it is
implemented in Yapps. The grammar.py module needed by Yapps is built
by running Yapps on yapps_grammar.g. (Holy circularity, Batman!)
"""
import sys, re
import parsetree
######################################################################
def cleanup_choice(rule, lst):
if len(lst) == 0: return Sequence(rule, [])
if len(lst) == 1: return lst[0]
return parsetree.Choice(rule, *tuple(lst))
def cleanup_sequence(rule, lst):
if len(lst) == 1: return lst[0]
return parsetree.Sequence(rule, *tuple(lst))
def resolve_name(rule, tokens, id, args):
if id in [x[0] for x in tokens]:
# It's a token
if args:
print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args)
return parsetree.Terminal(rule, id)
else:
# It's a name, so assume it's a nonterminal
return parsetree.NonTerminal(rule, id, args)
# Begin -- grammar generated by Yapps
import sys, re
import yappsrt
class ParserDescriptionScanner(yappsrt.Scanner):
patterns = [
('"rule"', re.compile('rule')),
('"ignore"', re.compile('ignore')),
('"token"', re.compile('token')),
('"option"', re.compile('option')),
('":"', re.compile(':')),
('"parser"', re.compile('parser')),
('[ \t\r\n]+', re.compile('[ \t\r\n]+')),
('#.*?\r?\n', re.compile('#.*?\r?\n')),
('EOF', re.compile('$')),
('ATTR', re.compile('<<.+?>>')),
('STMT', re.compile('{{.+?}}')),
('ID', re.compile('[a-zA-Z_][a-zA-Z_0-9]*')),
('STR', re.compile('[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"')),
('LP', re.compile('\\(')),
('RP', re.compile('\\)')),
('LB', re.compile('\\[')),
('RB', re.compile('\\]')),
('OR', re.compile('[|]')),
('STAR', re.compile('[*]')),
('PLUS', re.compile('[+]')),
('QUEST', re.compile('[?]')),
('COLON', re.compile(':')),
]
def __init__(self, str):
yappsrt.Scanner.__init__(self,None,['[ \t\r\n]+', '#.*?\r?\n'],str)
class ParserDescription(yappsrt.Parser):
Context = yappsrt.Context
def LINENO(self, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'LINENO', [])
return 1 + self._scanner.get_input_scanned().count('\n')
def Parser(self, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'Parser', [])
self._scan('"parser"')
ID = self._scan('ID')
self._scan('":"')
Options = self.Options(_context)
Tokens = self.Tokens(_context)
Rules = self.Rules(Tokens, _context)
EOF = self._scan('EOF')
return parsetree.Generator(ID,Options,Tokens,Rules)
def Options(self, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'Options', [])
opt = {}
while self._peek() == '"option"':
self._scan('"option"')
self._scan('":"')
Str = self.Str(_context)
opt[Str] = 1
if self._peek() not in ['"option"', '"token"', '"ignore"', 'EOF', '"rule"']:
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['"option"', '"token"', '"ignore"', 'EOF', '"rule"']))
return opt
def Tokens(self, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'Tokens', [])
tok = []
while self._peek() in ['"token"', '"ignore"']:
_token = self._peek()
if _token == '"token"':
self._scan('"token"')
ID = self._scan('ID')
self._scan('":"')
Str = self.Str(_context)
tok.append( (ID,Str) )
elif _token == '"ignore"':
self._scan('"ignore"')
self._scan('":"')
Str = self.Str(_context)
tok.append( ('#ignore',Str) )
else:
raise yappsrt.SyntaxError(_token[0], 'Could not match Tokens')
if self._peek() not in ['"token"', '"ignore"', 'EOF', '"rule"']:
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['"token"', '"ignore"', 'EOF', '"rule"']))
return tok
def Rules(self, tokens, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'Rules', [tokens])
rul = []
while self._peek() == '"rule"':
LINENO = self.LINENO(_context)
self._scan('"rule"')
ID = self._scan('ID')
OptParam = self.OptParam(_context)
self._scan('":"')
ClauseA = self.ClauseA(ID, tokens, _context)
rul.append( (ID, OptParam, ClauseA) )
if self._peek() not in ['"rule"', 'EOF']:
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['"rule"', 'EOF']))
return rul
def ClauseA(self, rule, tokens, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'ClauseA', [rule, tokens])
ClauseB = self.ClauseB(rule, tokens, _context)
v = [ClauseB]
while self._peek() == 'OR':
OR = self._scan('OR')
ClauseB = self.ClauseB(rule, tokens, _context)
v.append(ClauseB)
if self._peek() not in ['OR', 'RP', 'RB', '"rule"', 'EOF']:
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['OR', 'RP', 'RB', '"rule"', 'EOF']))
return cleanup_choice(rule, v)
def ClauseB(self, rule, tokens, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'ClauseB', [rule, tokens])
v = []
while self._peek() in ['STR', 'ID', 'LP', 'LB', 'STMT']:
ClauseC = self.ClauseC(rule, tokens, _context)
v.append(ClauseC)
if self._peek() not in ['STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF']:
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF']))
return cleanup_sequence(rule, v)
def ClauseC(self, rule, tokens, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'ClauseC', [rule, tokens])
ClauseD = self.ClauseD(rule, tokens, _context)
_token = self._peek()
if _token == 'PLUS':
PLUS = self._scan('PLUS')
return parsetree.Plus(rule, ClauseD)
elif _token == 'STAR':
STAR = self._scan('STAR')
return parsetree.Star(rule, ClauseD)
elif _token == 'QUEST':
QUEST = self._scan('QUEST')
return parsetree.Option(rule, ClauseD)
elif _token not in ['"ignore"', '"token"', '"option"', '":"', '"parser"', 'ATTR', 'COLON']:
return ClauseD
else:
raise yappsrt.SyntaxError(_token[0], 'Could not match ClauseC')
def ClauseD(self, rule, tokens, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'ClauseD', [rule, tokens])
_token = self._peek()
if _token == 'STR':
STR = self._scan('STR')
t = (STR, eval(STR,{},{}))
if t not in tokens: tokens.insert( 0, t )
return parsetree.Terminal(rule, STR)
elif _token == 'ID':
ID = self._scan('ID')
OptParam = self.OptParam(_context)
return resolve_name(rule, tokens, ID, OptParam)
elif _token == 'LP':
LP = self._scan('LP')
ClauseA = self.ClauseA(rule, tokens, _context)
RP = self._scan('RP')
return ClauseA
elif _token == 'LB':
LB = self._scan('LB')
ClauseA = self.ClauseA(rule, tokens, _context)
RB = self._scan('RB')
return parsetree.Option(rule, ClauseA)
elif _token == 'STMT':
STMT = self._scan('STMT')
return parsetree.Eval(rule, STMT[2:-2])
else:
raise yappsrt.SyntaxError(_token[0], 'Could not match ClauseD')
def OptParam(self, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'OptParam', [])
_token = self._peek()
if _token == 'ATTR':
ATTR = self._scan('ATTR')
return ATTR[2:-2]
elif _token not in ['"ignore"', '"token"', '"option"', '"parser"', 'COLON']:
return ''
else:
raise yappsrt.SyntaxError(_token[0], 'Could not match OptParam')
def Str(self, _parent=None):
_context = self.Context(_parent, self._scanner, self._pos, 'Str', [])
STR = self._scan('STR')
return eval(STR,{},{})
def parse(rule, text):
P = ParserDescription(ParserDescriptionScanner(text))
return yappsrt.wrap_error_reporter(P, rule)
# End -- grammar generated by Yapps

View file

@ -1,304 +0,0 @@
#
# Yapps 2 Runtime, part of Yapps 2 - yet another python parser system
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
#
# This version of the Yapps 2 Runtime can be distributed under the
# terms of the MIT open source license, either found in the LICENSE file
# included with the Yapps distribution
# <http://theory.stanford.edu/~amitp/yapps/> or at
# <http://www.opensource.org/licenses/mit-license.php>
#
"""Run time libraries needed to run parsers generated by Yapps.
This module defines parse-time exception classes, a scanner class, a
base class for parsers produced by Yapps, and a context class that
keeps track of the parse stack.
"""
# TODO: it should be possible to embed yappsrt into the generated
# grammar to make a standalone module.
import sys, re
class SyntaxError(Exception):
"""When we run into an unexpected token, this is the exception to use"""
def __init__(self, charpos=-1, msg="Bad Token", context=None):
Exception.__init__(self)
self.charpos = charpos
self.msg = msg
self.context = context
def __str__(self):
if self.charpos < 0: return 'SyntaxError'
else: return 'SyntaxError@char%s(%s)' % (repr(self.charpos), self.msg)
class NoMoreTokens(Exception):
"""Another exception object, for when we run out of tokens"""
pass
class Scanner:
"""Yapps scanner.
The Yapps scanner can work in context sensitive or context
insensitive modes. The token(i) method is used to retrieve the
i-th token. It takes a restrict set that limits the set of tokens
it is allowed to return. In context sensitive mode, this restrict
set guides the scanner. In context insensitive mode, there is no
restriction (the set is always the full set of tokens).
"""
def __init__(self, patterns, ignore, input):
"""Initialize the scanner.
Parameters:
patterns : [(terminal, uncompiled regex), ...] or None
ignore : [terminal,...]
input : string
If patterns is None, we assume that the subclass has
defined self.patterns : [(terminal, compiled regex), ...].
Note that the patterns parameter expects uncompiled regexes,
whereas the self.patterns field expects compiled regexes.
"""
self.tokens = [] # [(begin char pos, end char pos, token name, matched text), ...]
self.restrictions = []
self.input = input
self.pos = 0
self.ignore = ignore
self.first_line_number = 1
if patterns is not None:
# Compile the regex strings into regex objects
self.patterns = []
for terminal, regex in patterns:
self.patterns.append( (terminal, re.compile(regex)) )
def get_token_pos(self):
"""Get the current token position in the input text."""
return len(self.tokens)
def get_char_pos(self):
"""Get the current char position in the input text."""
return self.pos
def get_prev_char_pos(self, i=None):
"""Get the previous position (one token back) in the input text."""
if self.pos == 0: return 0
if i is None: i = -1
return self.tokens[i][0]
def get_line_number(self):
"""Get the line number of the current position in the input text."""
# TODO: make this work at any token/char position
return self.first_line_number + self.get_input_scanned().count('\n')
def get_column_number(self):
"""Get the column number of the current position in the input text."""
s = self.get_input_scanned()
i = s.rfind('\n') # may be -1, but that's okay in this case
return len(s) - (i+1)
def get_input_scanned(self):
"""Get the portion of the input that has been tokenized."""
return self.input[:self.pos]
def get_input_unscanned(self):
"""Get the portion of the input that has not yet been tokenized."""
return self.input[self.pos:]
def token(self, i, restrict=None):
"""Get the i'th token in the input.
If i is one past the end, then scan for another token.
Args:
restrict : [token, ...] or None; if restrict is None, then any
token is allowed. You may call token(i) more than once.
However, the restrict set may never be larger than what was
passed in on the first call to token(i).
"""
if i == len(self.tokens):
self.scan(restrict)
if i < len(self.tokens):
# Make sure the restriction is more restricted. This
# invariant is needed to avoid ruining tokenization at
# position i+1 and higher.
if restrict and self.restrictions[i]:
for r in restrict:
if r not in self.restrictions[i]:
raise NotImplementedError("Unimplemented: restriction set changed")
return self.tokens[i]
raise NoMoreTokens()
def __repr__(self):
"""Print the last 10 tokens that have been scanned in"""
output = ''
for t in self.tokens[-10:]:
output = '%s\n (@%s) %s = %s' % (output,t[0],t[2],repr(t[3]))
return output
def scan(self, restrict):
"""Should scan another token and add it to the list, self.tokens,
and add the restriction to self.restrictions"""
# Keep looking for a token, ignoring any in self.ignore
while 1:
# Search the patterns for the longest match, with earlier
# tokens in the list having preference
best_match = -1
best_pat = '(error)'
for p, regexp in self.patterns:
# First check to see if we're ignoring this token
if restrict and p not in restrict and p not in self.ignore:
continue
m = regexp.match(self.input, self.pos)
if m and len(m.group(0)) > best_match:
# We got a match that's better than the previous one
best_pat = p
best_match = len(m.group(0))
# If we didn't find anything, raise an error
if best_pat == '(error)' and best_match < 0:
msg = 'Bad Token'
if restrict:
msg = 'Trying to find one of '+', '.join(restrict)
raise SyntaxError(self.pos, msg)
# If we found something that isn't to be ignored, return it
if best_pat not in self.ignore:
# Create a token with this data
token = (self.pos, self.pos+best_match, best_pat,
self.input[self.pos:self.pos+best_match])
self.pos = self.pos + best_match
# Only add this token if it's not in the list
# (to prevent looping)
if not self.tokens or token != self.tokens[-1]:
self.tokens.append(token)
self.restrictions.append(restrict)
return
else:
# This token should be ignored ..
self.pos = self.pos + best_match
class Parser:
"""Base class for Yapps-generated parsers.
"""
def __init__(self, scanner):
self._scanner = scanner
self._pos = 0
def _peek(self, *types):
"""Returns the token type for lookahead; if there are any args
then the list of args is the set of token types to allow"""
tok = self._scanner.token(self._pos, types)
return tok[2]
def _scan(self, type):
"""Returns the matched text, and moves to the next token"""
tok = self._scanner.token(self._pos, [type])
if tok[2] != type:
raise SyntaxError(tok[0], 'Trying to find '+type+' :'+ ' ,'.join(self._scanner.restrictions[self._pos]))
self._pos = 1 + self._pos
return tok[3]
class Context:
"""Class to represent the parser's call stack.
Every rule creates a Context that links to its parent rule. The
contexts can be used for debugging.
"""
def __init__(self, parent, scanner, tokenpos, rule, args=()):
"""Create a new context.
Args:
parent: Context object or None
scanner: Scanner object
pos: integer (scanner token position)
rule: string (name of the rule)
args: tuple listing parameters to the rule
"""
self.parent = parent
self.scanner = scanner
self.tokenpos = tokenpos
self.rule = rule
self.args = args
def __str__(self):
output = ''
if self.parent: output = str(self.parent) + ' > '
output += self.rule
return output
def print_line_with_pointer(text, p):
"""Print the line of 'text' that includes position 'p',
along with a second line with a single caret (^) at position p"""
# TODO: separate out the logic for determining the line/character
# location from the logic for determining how to display an
# 80-column line to stderr.
# Now try printing part of the line
text = text[max(p-80, 0):p+80]
p = p - max(p-80, 0)
# Strip to the left
i = text[:p].rfind('\n')
j = text[:p].rfind('\r')
if i < 0 or (0 <= j < i): i = j
if 0 <= i < p:
p = p - i - 1
text = text[i+1:]
# Strip to the right
i = text.find('\n', p)
j = text.find('\r', p)
if i < 0 or (0 <= j < i): i = j
if i >= 0:
text = text[:i]
# Now shorten the text
while len(text) > 70 and p > 60:
# Cut off 10 chars
text = "..." + text[10:]
p = p - 7
# Now print the string, along with an indicator
print >>sys.stderr, '> ',text
print >>sys.stderr, '> ',' '*p + '^'
def print_error(input, err, scanner):
"""Print error messages, the parser stack, and the input text -- for human-readable error messages."""
# NOTE: this function assumes 80 columns :-(
# Figure out the line number
line_number = scanner.get_line_number()
column_number = scanner.get_column_number()
print >>sys.stderr, '%d:%d: %s' % (line_number, column_number, err.msg)
context = err.context
if not context:
print_line_with_pointer(input, err.charpos)
while context:
# TODO: add line number
print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context.args))
print_line_with_pointer(input, context.scanner.get_prev_char_pos(context.tokenpos))
context = context.parent
def wrap_error_reporter(parser, rule):
try:
return getattr(parser, rule)()
except SyntaxError, e:
input = parser._scanner.input
print_error(input, e, parser._scanner)
except NoMoreTokens:
print >>sys.stderr, 'Could not complete parsing; stopped around here:'
print >>sys.stderr, parser._scanner