Cumulative debian patches 2.1.1-17.2 (circa debian-specific stuff)
By Matthias Urlichs and debian project maintainers/contributors
This commit is contained in:
parent
24b6391eb3
commit
7f948bae0e
17 changed files with 3354 additions and 864 deletions
31
doc/yapps2.haux
Normal file
31
doc/yapps2.haux
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
\@addtocsec{htoc}{1}{0}{\@print{1}\quad{}Introduction}
|
||||
\@addtocsec{htoc}{2}{0}{\@print{2}\quad{}Examples}
|
||||
\@addtocsec{htoc}{3}{1}{\@print{2.1}\quad{}Introduction to Grammars}
|
||||
\@addtocsec{htoc}{4}{1}{\@print{2.2}\quad{}Lisp Expressions}
|
||||
\@addtocsec{htoc}{5}{1}{\@print{2.3}\quad{}Calculator}
|
||||
\@addtocsec{htoc}{6}{1}{\@print{2.4}\quad{}Calculator with Memory}
|
||||
\@addtocsec{htoc}{7}{0}{\@print{3}\quad{}Grammars}
|
||||
\@addtocsec{htoc}{8}{1}{\@print{3.1}\quad{}Left Factoring}
|
||||
\newlabel{sec:Left-Factoring}{{3.1}{X}}
|
||||
\@addtocsec{htoc}{9}{1}{\@print{3.2}\quad{}Left Recursion}
|
||||
\@addtocsec{htoc}{10}{1}{\@print{3.3}\quad{}Ambiguous Grammars}
|
||||
\newlabel{sec:Ambiguous-Grammars}{{3.3}{X}}
|
||||
\@addtocsec{htoc}{11}{0}{\@print{4}\quad{}Customization}
|
||||
\@addtocsec{htoc}{12}{1}{\@print{4.1}\quad{}Customizing Parsers}
|
||||
\@addtocsec{htoc}{13}{1}{\@print{4.2}\quad{}Customizing Scanners}
|
||||
\@addtocsec{htoc}{14}{0}{\@print{5}\quad{}Parser Mechanics}
|
||||
\@addtocsec{htoc}{15}{1}{\@print{5.1}\quad{}Parser Objects}
|
||||
\newlabel{sec:Parser-Objects}{{5.1}{X}}
|
||||
\@addtocsec{htoc}{16}{1}{\@print{5.2}\quad{}Context Sensitive Scanner}
|
||||
\@addtocsec{htoc}{17}{1}{\@print{5.3}\quad{}Internal Variables}
|
||||
\@addtocsec{htoc}{18}{1}{\@print{5.4}\quad{}Pre- and Post-Parser Code}
|
||||
\@addtocsec{htoc}{19}{1}{\@print{5.5}\quad{}Representation of Grammars}
|
||||
\@addtocsec{htoc}{20}{0}{\@print{A}\quad{}Grammar for Parsers}
|
||||
\@addtocsec{htoc}{21}{0}{\@print{B}\quad{}Upgrading}
|
||||
\@addtocsec{htoc}{22}{0}{\@print{C}\quad{}Troubleshooting}
|
||||
\@addtocsec{htoc}{23}{0}{\@print{D}\quad{}History}
|
||||
\@addtocsec{htoc}{24}{0}{\@print{E}\quad{}Debian Extensions}
|
||||
\newlabel{sec:debian}{{E}{X}}
|
||||
\@addtocsec{htoc}{25}{0}{\@print{F}\quad{}Future Extensions}
|
||||
\newlabel{sec:future}{{F}{X}}
|
||||
\@addtocsec{htoc}{26}{0}{\@print{G}\quad{}References}
|
||||
1206
doc/yapps2.html
Normal file
1206
doc/yapps2.html
Normal file
File diff suppressed because it is too large
Load diff
36
doc/yapps2.htoc
Normal file
36
doc/yapps2.htoc
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
\begin{tocenv}
|
||||
\tocitem \@locref{htoc1}{\begin{@norefs}\@print{1}\quad{}Introduction\end{@norefs}}
|
||||
\tocitem \@locref{htoc2}{\begin{@norefs}\@print{2}\quad{}Examples\end{@norefs}}
|
||||
\begin{tocenv}
|
||||
\tocitem \@locref{htoc3}{\begin{@norefs}\@print{2.1}\quad{}Introduction to Grammars\end{@norefs}}
|
||||
\tocitem \@locref{htoc4}{\begin{@norefs}\@print{2.2}\quad{}Lisp Expressions\end{@norefs}}
|
||||
\tocitem \@locref{htoc5}{\begin{@norefs}\@print{2.3}\quad{}Calculator\end{@norefs}}
|
||||
\tocitem \@locref{htoc6}{\begin{@norefs}\@print{2.4}\quad{}Calculator with Memory\end{@norefs}}
|
||||
\end{tocenv}
|
||||
\tocitem \@locref{htoc7}{\begin{@norefs}\@print{3}\quad{}Grammars\end{@norefs}}
|
||||
\begin{tocenv}
|
||||
\tocitem \@locref{htoc8}{\begin{@norefs}\@print{3.1}\quad{}Left Factoring\end{@norefs}}
|
||||
\tocitem \@locref{htoc9}{\begin{@norefs}\@print{3.2}\quad{}Left Recursion\end{@norefs}}
|
||||
\tocitem \@locref{htoc10}{\begin{@norefs}\@print{3.3}\quad{}Ambiguous Grammars\end{@norefs}}
|
||||
\end{tocenv}
|
||||
\tocitem \@locref{htoc11}{\begin{@norefs}\@print{4}\quad{}Customization\end{@norefs}}
|
||||
\begin{tocenv}
|
||||
\tocitem \@locref{htoc12}{\begin{@norefs}\@print{4.1}\quad{}Customizing Parsers\end{@norefs}}
|
||||
\tocitem \@locref{htoc13}{\begin{@norefs}\@print{4.2}\quad{}Customizing Scanners\end{@norefs}}
|
||||
\end{tocenv}
|
||||
\tocitem \@locref{htoc14}{\begin{@norefs}\@print{5}\quad{}Parser Mechanics\end{@norefs}}
|
||||
\begin{tocenv}
|
||||
\tocitem \@locref{htoc15}{\begin{@norefs}\@print{5.1}\quad{}Parser Objects\end{@norefs}}
|
||||
\tocitem \@locref{htoc16}{\begin{@norefs}\@print{5.2}\quad{}Context Sensitive Scanner\end{@norefs}}
|
||||
\tocitem \@locref{htoc17}{\begin{@norefs}\@print{5.3}\quad{}Internal Variables\end{@norefs}}
|
||||
\tocitem \@locref{htoc18}{\begin{@norefs}\@print{5.4}\quad{}Pre- and Post-Parser Code\end{@norefs}}
|
||||
\tocitem \@locref{htoc19}{\begin{@norefs}\@print{5.5}\quad{}Representation of Grammars\end{@norefs}}
|
||||
\end{tocenv}
|
||||
\tocitem \@locref{htoc20}{\begin{@norefs}\@print{A}\quad{}Grammar for Parsers\end{@norefs}}
|
||||
\tocitem \@locref{htoc21}{\begin{@norefs}\@print{B}\quad{}Upgrading\end{@norefs}}
|
||||
\tocitem \@locref{htoc22}{\begin{@norefs}\@print{C}\quad{}Troubleshooting\end{@norefs}}
|
||||
\tocitem \@locref{htoc23}{\begin{@norefs}\@print{D}\quad{}History\end{@norefs}}
|
||||
\tocitem \@locref{htoc24}{\begin{@norefs}\@print{E}\quad{}Debian Extensions\end{@norefs}}
|
||||
\tocitem \@locref{htoc25}{\begin{@norefs}\@print{F}\quad{}Future Extensions\end{@norefs}}
|
||||
\tocitem \@locref{htoc26}{\begin{@norefs}\@print{G}\quad{}References\end{@norefs}}
|
||||
\end{tocenv}
|
||||
1246
doc/yapps2.tex
Normal file
1246
doc/yapps2.tex
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -1,27 +1,29 @@
|
|||
# This calculator on ints supports the usual (numbers, add, subtract,
|
||||
# multiply, divide), global variables (stored in a global variable in
|
||||
# Python), and local variables (stored in an attribute passed around
|
||||
# in the grammar).
|
||||
|
||||
globalvars = {} # We will store the calculator's variables here
|
||||
|
||||
def lookup(map, name):
|
||||
for x, v in map:
|
||||
for x,v in map:
|
||||
if x == name: return v
|
||||
if not globalvars.has_key(name):
|
||||
print 'Undefined (defaulting to 0):', name
|
||||
if not globalvars.has_key(name): print 'Undefined (defaulting to 0):', name
|
||||
return globalvars.get(name, 0)
|
||||
|
||||
def stack_input(scanner,ign):
|
||||
"""Grab more input"""
|
||||
scanner.stack_input(raw_input(">?> "))
|
||||
|
||||
%%
|
||||
parser Calculator:
|
||||
ignore: "[ \r\t\n]+"
|
||||
ignore: "[?]" {{ stack_input }}
|
||||
|
||||
token END: "$"
|
||||
token NUM: "[0-9]+"
|
||||
token VAR: "[a-zA-Z_]+"
|
||||
|
||||
# Each line can either be an expression or an assignment statement
|
||||
rule goal: expr<<[]>> END {{ return expr }}
|
||||
rule goal: expr<<[]>> END {{ print '=', expr }}
|
||||
{{ return expr }}
|
||||
| "set" VAR expr<<[]>> END {{ globalvars[VAR] = expr }}
|
||||
{{ print VAR, '=', expr }}
|
||||
{{ return expr }}
|
||||
|
||||
# An expression is the sum and difference of factors
|
||||
|
|
@ -40,33 +42,11 @@ parser Calculator:
|
|||
rule term<<V>>:
|
||||
NUM {{ return int(NUM) }}
|
||||
| VAR {{ return lookup(V, VAR) }}
|
||||
| "\\(" expr<<V>> "\\)" {{ return expr }}
|
||||
| "\\(" expr "\\)" {{ return expr }}
|
||||
| "let" VAR "=" expr<<V>> {{ V = [(VAR, expr)] + V }}
|
||||
"in" expr<<V>> {{ return expr }}
|
||||
%%
|
||||
|
||||
tests = [
|
||||
('3', 3),
|
||||
('2 * 3', 6),
|
||||
('set x 5', 5),
|
||||
('x', 5),
|
||||
('x / 2', 2),
|
||||
('x - 1', 4),
|
||||
('let x = 3 in x + 1', 4),
|
||||
('x', 5),
|
||||
('x + let x = 3 in x', 8),
|
||||
('(let x = 3 in x) + x', 8),
|
||||
]
|
||||
|
||||
def run_tests():
|
||||
for (expr, value) in tests:
|
||||
assert parse('goal', expr) == value, 'Test parse(%r) == %s failed' % (expr, value)
|
||||
globalvars.clear()
|
||||
|
||||
|
||||
if __name__=='__main__':
|
||||
run_tests()
|
||||
|
||||
print 'Welcome to the calculator sample for Yapps 2.'
|
||||
print ' Enter either "<expression>" or "set <var> <expression>",'
|
||||
print ' or just press return to exit. An expression can have'
|
||||
|
|
@ -79,6 +59,6 @@ if __name__=='__main__':
|
|||
try: s = raw_input('>>> ')
|
||||
except EOFError: break
|
||||
if not s.strip(): break
|
||||
print '=', parse('goal', s)
|
||||
parse('goal', s)
|
||||
print 'Bye.'
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
# This parser can parse a simple subset of Lisp's syntax.
|
||||
|
||||
parser Lisp:
|
||||
ignore: r'\s+'
|
||||
token NUM: r'[0-9]+'
|
||||
|
|
|
|||
44
examples/notes
Normal file
44
examples/notes
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
Hints
|
||||
#####
|
||||
|
||||
Some additional hints for your edification.
|
||||
|
||||
Author: Matthias Urlichs <smurf@debian.org>
|
||||
|
||||
How to process C preprocessor codes:
|
||||
====================================
|
||||
|
||||
Rudimentary include handling has been added to the parser by me.
|
||||
|
||||
However, if you want to do anything fancy, like for instance whatever
|
||||
the C preprocessor does, things get more complicated. Fortunately,
|
||||
there's already a nice tool to handle C preprocessing -- CPP itself.
|
||||
|
||||
If you want to report errors correctly in that situation, do this:
|
||||
|
||||
def set_line(s,m):
|
||||
"""Fixup the scanner's idea of the current line"""
|
||||
s.filename = m.group(2)
|
||||
line = int(m.group(1))
|
||||
s.del_line = line - s.line
|
||||
|
||||
%%
|
||||
parser whatever:
|
||||
ignore: '^#\s*(\d+)\s*"([^"\n]+)"\s*\n' {{ set_line }}
|
||||
ignore: '^#.*\n'
|
||||
|
||||
[...]
|
||||
%%
|
||||
if __name__=='__main__':
|
||||
import sys,os
|
||||
for a in sys.argv[1:]:
|
||||
f=os.popen("cpp "+repr(a),"r")
|
||||
|
||||
P = whatever(whateverScanner("", filename=a, file=f))
|
||||
try: P.goal()
|
||||
except runtime.SyntaxError, e:
|
||||
runtime.print_error(e, P._scanner)
|
||||
sys.exit(1)
|
||||
|
||||
f.close()
|
||||
|
||||
234
grammar.py
234
grammar.py
|
|
@ -1,234 +0,0 @@
|
|||
#!/usr/bin/python2
|
||||
#
|
||||
# grammar.py, part of Yapps 2 - yet another python parser system
|
||||
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
|
||||
#
|
||||
# This version of the Yapps 2 grammar can be distributed under the
|
||||
# terms of the MIT open source license, either found in the LICENSE
|
||||
# file included with the Yapps distribution
|
||||
# <http://theory.stanford.edu/~amitp/yapps/> or at
|
||||
# <http://www.opensource.org/licenses/mit-license.php>
|
||||
#
|
||||
|
||||
"""Parser for Yapps grammars.
|
||||
|
||||
This file defines the grammar of Yapps grammars. Naturally, it is
|
||||
implemented in Yapps. The grammar.py module needed by Yapps is built
|
||||
by running Yapps on yapps_grammar.g. (Holy circularity, Batman!)
|
||||
|
||||
"""
|
||||
|
||||
import sys, re
|
||||
import parsetree
|
||||
|
||||
######################################################################
|
||||
def cleanup_choice(rule, lst):
|
||||
if len(lst) == 0: return Sequence(rule, [])
|
||||
if len(lst) == 1: return lst[0]
|
||||
return parsetree.Choice(rule, *tuple(lst))
|
||||
|
||||
def cleanup_sequence(rule, lst):
|
||||
if len(lst) == 1: return lst[0]
|
||||
return parsetree.Sequence(rule, *tuple(lst))
|
||||
|
||||
def resolve_name(rule, tokens, id, args):
|
||||
if id in [x[0] for x in tokens]:
|
||||
# It's a token
|
||||
if args:
|
||||
print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args)
|
||||
return parsetree.Terminal(rule, id)
|
||||
else:
|
||||
# It's a name, so assume it's a nonterminal
|
||||
return parsetree.NonTerminal(rule, id, args)
|
||||
|
||||
|
||||
# Begin -- grammar generated by Yapps
|
||||
import sys, re
|
||||
import yappsrt
|
||||
|
||||
class ParserDescriptionScanner(yappsrt.Scanner):
|
||||
patterns = [
|
||||
('"rule"', re.compile('rule')),
|
||||
('"ignore"', re.compile('ignore')),
|
||||
('"token"', re.compile('token')),
|
||||
('"option"', re.compile('option')),
|
||||
('":"', re.compile(':')),
|
||||
('"parser"', re.compile('parser')),
|
||||
('[ \t\r\n]+', re.compile('[ \t\r\n]+')),
|
||||
('#.*?\r?\n', re.compile('#.*?\r?\n')),
|
||||
('EOF', re.compile('$')),
|
||||
('ATTR', re.compile('<<.+?>>')),
|
||||
('STMT', re.compile('{{.+?}}')),
|
||||
('ID', re.compile('[a-zA-Z_][a-zA-Z_0-9]*')),
|
||||
('STR', re.compile('[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"')),
|
||||
('LP', re.compile('\\(')),
|
||||
('RP', re.compile('\\)')),
|
||||
('LB', re.compile('\\[')),
|
||||
('RB', re.compile('\\]')),
|
||||
('OR', re.compile('[|]')),
|
||||
('STAR', re.compile('[*]')),
|
||||
('PLUS', re.compile('[+]')),
|
||||
('QUEST', re.compile('[?]')),
|
||||
('COLON', re.compile(':')),
|
||||
]
|
||||
def __init__(self, str):
|
||||
yappsrt.Scanner.__init__(self,None,['[ \t\r\n]+', '#.*?\r?\n'],str)
|
||||
|
||||
class ParserDescription(yappsrt.Parser):
|
||||
Context = yappsrt.Context
|
||||
def LINENO(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'LINENO', [])
|
||||
return 1 + self._scanner.get_input_scanned().count('\n')
|
||||
|
||||
def Parser(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'Parser', [])
|
||||
self._scan('"parser"')
|
||||
ID = self._scan('ID')
|
||||
self._scan('":"')
|
||||
Options = self.Options(_context)
|
||||
Tokens = self.Tokens(_context)
|
||||
Rules = self.Rules(Tokens, _context)
|
||||
EOF = self._scan('EOF')
|
||||
return parsetree.Generator(ID,Options,Tokens,Rules)
|
||||
|
||||
def Options(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'Options', [])
|
||||
opt = {}
|
||||
while self._peek() == '"option"':
|
||||
self._scan('"option"')
|
||||
self._scan('":"')
|
||||
Str = self.Str(_context)
|
||||
opt[Str] = 1
|
||||
if self._peek() not in ['"option"', '"token"', '"ignore"', 'EOF', '"rule"']:
|
||||
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['"option"', '"token"', '"ignore"', 'EOF', '"rule"']))
|
||||
return opt
|
||||
|
||||
def Tokens(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'Tokens', [])
|
||||
tok = []
|
||||
while self._peek() in ['"token"', '"ignore"']:
|
||||
_token = self._peek()
|
||||
if _token == '"token"':
|
||||
self._scan('"token"')
|
||||
ID = self._scan('ID')
|
||||
self._scan('":"')
|
||||
Str = self.Str(_context)
|
||||
tok.append( (ID,Str) )
|
||||
elif _token == '"ignore"':
|
||||
self._scan('"ignore"')
|
||||
self._scan('":"')
|
||||
Str = self.Str(_context)
|
||||
tok.append( ('#ignore',Str) )
|
||||
else:
|
||||
raise yappsrt.SyntaxError(_token[0], 'Could not match Tokens')
|
||||
if self._peek() not in ['"token"', '"ignore"', 'EOF', '"rule"']:
|
||||
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['"token"', '"ignore"', 'EOF', '"rule"']))
|
||||
return tok
|
||||
|
||||
def Rules(self, tokens, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'Rules', [tokens])
|
||||
rul = []
|
||||
while self._peek() == '"rule"':
|
||||
LINENO = self.LINENO(_context)
|
||||
self._scan('"rule"')
|
||||
ID = self._scan('ID')
|
||||
OptParam = self.OptParam(_context)
|
||||
self._scan('":"')
|
||||
ClauseA = self.ClauseA(ID, tokens, _context)
|
||||
rul.append( (ID, OptParam, ClauseA) )
|
||||
if self._peek() not in ['"rule"', 'EOF']:
|
||||
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['"rule"', 'EOF']))
|
||||
return rul
|
||||
|
||||
def ClauseA(self, rule, tokens, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'ClauseA', [rule, tokens])
|
||||
ClauseB = self.ClauseB(rule, tokens, _context)
|
||||
v = [ClauseB]
|
||||
while self._peek() == 'OR':
|
||||
OR = self._scan('OR')
|
||||
ClauseB = self.ClauseB(rule, tokens, _context)
|
||||
v.append(ClauseB)
|
||||
if self._peek() not in ['OR', 'RP', 'RB', '"rule"', 'EOF']:
|
||||
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['OR', 'RP', 'RB', '"rule"', 'EOF']))
|
||||
return cleanup_choice(rule, v)
|
||||
|
||||
def ClauseB(self, rule, tokens, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'ClauseB', [rule, tokens])
|
||||
v = []
|
||||
while self._peek() in ['STR', 'ID', 'LP', 'LB', 'STMT']:
|
||||
ClauseC = self.ClauseC(rule, tokens, _context)
|
||||
v.append(ClauseC)
|
||||
if self._peek() not in ['STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF']:
|
||||
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF']))
|
||||
return cleanup_sequence(rule, v)
|
||||
|
||||
def ClauseC(self, rule, tokens, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'ClauseC', [rule, tokens])
|
||||
ClauseD = self.ClauseD(rule, tokens, _context)
|
||||
_token = self._peek()
|
||||
if _token == 'PLUS':
|
||||
PLUS = self._scan('PLUS')
|
||||
return parsetree.Plus(rule, ClauseD)
|
||||
elif _token == 'STAR':
|
||||
STAR = self._scan('STAR')
|
||||
return parsetree.Star(rule, ClauseD)
|
||||
elif _token == 'QUEST':
|
||||
QUEST = self._scan('QUEST')
|
||||
return parsetree.Option(rule, ClauseD)
|
||||
elif _token not in ['"ignore"', '"token"', '"option"', '":"', '"parser"', 'ATTR', 'COLON']:
|
||||
return ClauseD
|
||||
else:
|
||||
raise yappsrt.SyntaxError(_token[0], 'Could not match ClauseC')
|
||||
|
||||
def ClauseD(self, rule, tokens, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'ClauseD', [rule, tokens])
|
||||
_token = self._peek()
|
||||
if _token == 'STR':
|
||||
STR = self._scan('STR')
|
||||
t = (STR, eval(STR,{},{}))
|
||||
if t not in tokens: tokens.insert( 0, t )
|
||||
return parsetree.Terminal(rule, STR)
|
||||
elif _token == 'ID':
|
||||
ID = self._scan('ID')
|
||||
OptParam = self.OptParam(_context)
|
||||
return resolve_name(rule, tokens, ID, OptParam)
|
||||
elif _token == 'LP':
|
||||
LP = self._scan('LP')
|
||||
ClauseA = self.ClauseA(rule, tokens, _context)
|
||||
RP = self._scan('RP')
|
||||
return ClauseA
|
||||
elif _token == 'LB':
|
||||
LB = self._scan('LB')
|
||||
ClauseA = self.ClauseA(rule, tokens, _context)
|
||||
RB = self._scan('RB')
|
||||
return parsetree.Option(rule, ClauseA)
|
||||
elif _token == 'STMT':
|
||||
STMT = self._scan('STMT')
|
||||
return parsetree.Eval(rule, STMT[2:-2])
|
||||
else:
|
||||
raise yappsrt.SyntaxError(_token[0], 'Could not match ClauseD')
|
||||
|
||||
def OptParam(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'OptParam', [])
|
||||
_token = self._peek()
|
||||
if _token == 'ATTR':
|
||||
ATTR = self._scan('ATTR')
|
||||
return ATTR[2:-2]
|
||||
elif _token not in ['"ignore"', '"token"', '"option"', '"parser"', 'COLON']:
|
||||
return ''
|
||||
else:
|
||||
raise yappsrt.SyntaxError(_token[0], 'Could not match OptParam')
|
||||
|
||||
def Str(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'Str', [])
|
||||
STR = self._scan('STR')
|
||||
return eval(STR,{},{})
|
||||
|
||||
|
||||
def parse(rule, text):
|
||||
P = ParserDescription(ParserDescriptionScanner(text))
|
||||
return yappsrt.wrap_error_reporter(P, rule)
|
||||
|
||||
# End -- grammar generated by Yapps
|
||||
|
||||
|
||||
42
setup.py
Normal file
42
setup.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""Setup script for 'yapps'"""
|
||||
|
||||
from distutils.core import setup
|
||||
|
||||
description = "Yet Another Python Parser System"
|
||||
long_description = \
|
||||
"""
|
||||
YAPPS is an easy to use parser generator that is written in Python and
|
||||
generates Python code. There are several parser generator systems
|
||||
already available for Python, but this parser has different goals:
|
||||
Yapps is simple, very easy to use, and produces human-readable parsers.
|
||||
|
||||
It is not the fastest or most powerful parser. Yapps is designed to be
|
||||
used when regular expressions are not enough and other parser systems
|
||||
are too much: situations where you might otherwise write your own
|
||||
recursive descent parser.
|
||||
|
||||
This package contains several upward-compatible enhancements to the
|
||||
original YAPPS source:
|
||||
- Handle stacked input ("include files")
|
||||
- augmented ignore-able patterns (can parse multi-line C comments correctly)
|
||||
- better error reporting
|
||||
- read input incrementally
|
||||
"""
|
||||
|
||||
setup (name = "python-yapps",
|
||||
version = "2.1.1",
|
||||
description = description,
|
||||
long_description = long_description,
|
||||
author = "Amit J. Patel",
|
||||
author_email = "amitp@cs.stanford.edu",
|
||||
maintainer = "Matthias Urlichs",
|
||||
maintainer_email = "smurf@debian.org",
|
||||
url = "http://theory.stanford.edu/~amitp/yapps/",
|
||||
license = 'MIT',
|
||||
platforms = ['POSIX'],
|
||||
keywords = ['parsing'],
|
||||
packages = ['yapps'],
|
||||
#cmdclass = {'bdist_rpm': MyBDist_RPM},
|
||||
)
|
||||
1
yapps/__init__.py
Normal file
1
yapps/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
# empty
|
||||
211
yapps/grammar.py
Normal file
211
yapps/grammar.py
Normal file
|
|
@ -0,0 +1,211 @@
|
|||
# grammar.py, part of Yapps 2 - yet another python parser system
|
||||
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
|
||||
#
|
||||
# This version of the Yapps 2 grammar can be distributed under the
|
||||
# terms of the MIT open source license, either found in the LICENSE
|
||||
# file included with the Yapps distribution
|
||||
# <http://theory.stanford.edu/~amitp/yapps/> or at
|
||||
# <http://www.opensource.org/licenses/mit-license.php>
|
||||
#
|
||||
|
||||
"""Parser for Yapps grammars.
|
||||
|
||||
This file defines the grammar of Yapps grammars. Naturally, it is
|
||||
implemented in Yapps. The grammar.py module needed by Yapps is built
|
||||
by running Yapps on yapps_grammar.g. (Holy circularity, Batman!)
|
||||
|
||||
"""
|
||||
|
||||
import sys, re
|
||||
from yapps import parsetree
|
||||
|
||||
######################################################################
|
||||
def cleanup_choice(rule, lst):
|
||||
if len(lst) == 0: return Sequence(rule, [])
|
||||
if len(lst) == 1: return lst[0]
|
||||
return parsetree.Choice(rule, *tuple(lst))
|
||||
|
||||
def cleanup_sequence(rule, lst):
|
||||
if len(lst) == 1: return lst[0]
|
||||
return parsetree.Sequence(rule, *tuple(lst))
|
||||
|
||||
def resolve_name(rule, tokens, id, args):
|
||||
if id in [x[0] for x in tokens]:
|
||||
# It's a token
|
||||
if args:
|
||||
print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args)
|
||||
return parsetree.Terminal(rule, id)
|
||||
else:
|
||||
# It's a name, so assume it's a nonterminal
|
||||
return parsetree.NonTerminal(rule, id, args)
|
||||
|
||||
|
||||
# Begin -- grammar generated by Yapps
|
||||
import sys, re
|
||||
from yapps import runtime
|
||||
|
||||
class ParserDescriptionScanner(runtime.Scanner):
|
||||
patterns = [
|
||||
('"rule"', re.compile('rule')),
|
||||
('"ignore"', re.compile('ignore')),
|
||||
('"token"', re.compile('token')),
|
||||
('"option"', re.compile('option')),
|
||||
('":"', re.compile(':')),
|
||||
('"parser"', re.compile('parser')),
|
||||
('[ \t\r\n]+', re.compile('[ \t\r\n]+')),
|
||||
('#.*?\r?\n', re.compile('#.*?\r?\n')),
|
||||
('EOF', re.compile('$')),
|
||||
('ATTR', re.compile('<<.+?>>')),
|
||||
('STMT', re.compile('{{.+?}}')),
|
||||
('ID', re.compile('[a-zA-Z_][a-zA-Z_0-9]*')),
|
||||
('STR', re.compile('[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"')),
|
||||
('LP', re.compile('\\(')),
|
||||
('RP', re.compile('\\)')),
|
||||
('LB', re.compile('\\[')),
|
||||
('RB', re.compile('\\]')),
|
||||
('OR', re.compile('[|]')),
|
||||
('STAR', re.compile('[*]')),
|
||||
('PLUS', re.compile('[+]')),
|
||||
('QUEST', re.compile('[?]')),
|
||||
('COLON', re.compile(':')),
|
||||
]
|
||||
def __init__(self, str,*args,**kw):
|
||||
runtime.Scanner.__init__(self,None,{'[ \t\r\n]+':None,'#.*?\r?\n':None,},str,*args,**kw)
|
||||
|
||||
class ParserDescription(runtime.Parser):
|
||||
Context = runtime.Context
|
||||
def Parser(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'Parser', [])
|
||||
self._scan('"parser"', context=_context)
|
||||
ID = self._scan('ID', context=_context)
|
||||
self._scan('":"', context=_context)
|
||||
Options = self.Options(_context)
|
||||
Tokens = self.Tokens(_context)
|
||||
Rules = self.Rules(Tokens, _context)
|
||||
EOF = self._scan('EOF', context=_context)
|
||||
return parsetree.Generator(ID,Options,Tokens,Rules)
|
||||
|
||||
def Options(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'Options', [])
|
||||
opt = {}
|
||||
while self._peek('"option"', '"token"', '"ignore"', 'EOF', '"rule"', context=_context) == '"option"':
|
||||
self._scan('"option"', context=_context)
|
||||
self._scan('":"', context=_context)
|
||||
Str = self.Str(_context)
|
||||
opt[Str] = 1
|
||||
return opt
|
||||
|
||||
def Tokens(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'Tokens', [])
|
||||
tok = []
|
||||
while self._peek('"token"', '"ignore"', 'EOF', '"rule"', context=_context) in ['"token"', '"ignore"']:
|
||||
_token = self._peek('"token"', '"ignore"', context=_context)
|
||||
if _token == '"token"':
|
||||
self._scan('"token"', context=_context)
|
||||
ID = self._scan('ID', context=_context)
|
||||
self._scan('":"', context=_context)
|
||||
Str = self.Str(_context)
|
||||
tok.append( (ID,Str) )
|
||||
else: # == '"ignore"'
|
||||
self._scan('"ignore"', context=_context)
|
||||
self._scan('":"', context=_context)
|
||||
Str = self.Str(_context)
|
||||
ign = ('#ignore',Str)
|
||||
if self._peek('STMT', '"token"', '"ignore"', 'EOF', '"rule"', context=_context) == 'STMT':
|
||||
STMT = self._scan('STMT', context=_context)
|
||||
ign = ign + (STMT[2:-2],)
|
||||
tok.append( ign )
|
||||
return tok
|
||||
|
||||
def Rules(self, tokens, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'Rules', [tokens])
|
||||
rul = []
|
||||
while self._peek('"rule"', 'EOF', context=_context) == '"rule"':
|
||||
self._scan('"rule"', context=_context)
|
||||
ID = self._scan('ID', context=_context)
|
||||
OptParam = self.OptParam(_context)
|
||||
self._scan('":"', context=_context)
|
||||
ClauseA = self.ClauseA(ID, tokens, _context)
|
||||
rul.append( (ID, OptParam, ClauseA) )
|
||||
return rul
|
||||
|
||||
def ClauseA(self, rule, tokens, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'ClauseA', [rule, tokens])
|
||||
ClauseB = self.ClauseB(rule,tokens, _context)
|
||||
v = [ClauseB]
|
||||
while self._peek('OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) == 'OR':
|
||||
OR = self._scan('OR', context=_context)
|
||||
ClauseB = self.ClauseB(rule,tokens, _context)
|
||||
v.append(ClauseB)
|
||||
return cleanup_choice(rule,v)
|
||||
|
||||
def ClauseB(self, rule,tokens, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'ClauseB', [rule,tokens])
|
||||
v = []
|
||||
while self._peek('STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) in ['STR', 'ID', 'LP', 'LB', 'STMT']:
|
||||
ClauseC = self.ClauseC(rule,tokens, _context)
|
||||
v.append(ClauseC)
|
||||
return cleanup_sequence(rule, v)
|
||||
|
||||
def ClauseC(self, rule,tokens, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'ClauseC', [rule,tokens])
|
||||
ClauseD = self.ClauseD(rule,tokens, _context)
|
||||
_token = self._peek('PLUS', 'STAR', 'QUEST', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context)
|
||||
if _token == 'PLUS':
|
||||
PLUS = self._scan('PLUS', context=_context)
|
||||
return parsetree.Plus(rule, ClauseD)
|
||||
elif _token == 'STAR':
|
||||
STAR = self._scan('STAR', context=_context)
|
||||
return parsetree.Star(rule, ClauseD)
|
||||
elif _token == 'QUEST':
|
||||
QUEST = self._scan('QUEST', context=_context)
|
||||
return parsetree.Option(rule, ClauseD)
|
||||
else:
|
||||
return ClauseD
|
||||
|
||||
def ClauseD(self, rule,tokens, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'ClauseD', [rule,tokens])
|
||||
_token = self._peek('STR', 'ID', 'LP', 'LB', 'STMT', context=_context)
|
||||
if _token == 'STR':
|
||||
STR = self._scan('STR', context=_context)
|
||||
t = (STR, eval(STR,{},{}))
|
||||
if t not in tokens: tokens.insert( 0, t )
|
||||
return parsetree.Terminal(rule, STR)
|
||||
elif _token == 'ID':
|
||||
ID = self._scan('ID', context=_context)
|
||||
OptParam = self.OptParam(_context)
|
||||
return resolve_name(rule,tokens, ID, OptParam)
|
||||
elif _token == 'LP':
|
||||
LP = self._scan('LP', context=_context)
|
||||
ClauseA = self.ClauseA(rule,tokens, _context)
|
||||
RP = self._scan('RP', context=_context)
|
||||
return ClauseA
|
||||
elif _token == 'LB':
|
||||
LB = self._scan('LB', context=_context)
|
||||
ClauseA = self.ClauseA(rule,tokens, _context)
|
||||
RB = self._scan('RB', context=_context)
|
||||
return parsetree.Option(rule, ClauseA)
|
||||
else: # == 'STMT'
|
||||
STMT = self._scan('STMT', context=_context)
|
||||
return parsetree.Eval(rule, STMT[2:-2])
|
||||
|
||||
def OptParam(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'OptParam', [])
|
||||
if self._peek('ATTR', '":"', 'PLUS', 'STAR', 'QUEST', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) == 'ATTR':
|
||||
ATTR = self._scan('ATTR', context=_context)
|
||||
return ATTR[2:-2]
|
||||
return ''
|
||||
|
||||
def Str(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, 'Str', [])
|
||||
STR = self._scan('STR', context=_context)
|
||||
return eval(STR,{},{})
|
||||
|
||||
|
||||
def parse(rule, text):
|
||||
P = ParserDescription(ParserDescriptionScanner(text))
|
||||
return runtime.wrap_error_reporter(P, rule)
|
||||
|
||||
# End -- grammar generated by Yapps
|
||||
|
||||
|
||||
|
|
@ -1,5 +1,3 @@
|
|||
#!/usr/bin/python2
|
||||
#
|
||||
# parsetree.py, part of Yapps 2 - yet another python parser system
|
||||
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
|
||||
#
|
||||
|
|
@ -35,12 +33,18 @@ class Generator:
|
|||
self.postparser = None
|
||||
|
||||
self.tokens = {} # Map from tokens to regexps
|
||||
self.ignore = [] # List of token names to ignore in parsing
|
||||
self.ignore = {} # List of token names to ignore in parsing, map to statements
|
||||
self.terminals = [] # List of token names (to maintain ordering)
|
||||
for n, t in tokens:
|
||||
for t in tokens:
|
||||
if len(t) == 3:
|
||||
n,t,s = t
|
||||
else:
|
||||
n,t = t
|
||||
s = None
|
||||
|
||||
if n == '#ignore':
|
||||
n = t
|
||||
self.ignore.append(n)
|
||||
self.ignore[n] = s
|
||||
if n in self.tokens.keys() and self.tokens[n] != t:
|
||||
print >>sys.stderr, 'Warning: token %s defined more than once.' % n
|
||||
self.tokens[n] = t
|
||||
|
|
@ -199,7 +203,9 @@ class Generator:
|
|||
a_set = (repr(a)[1:-1])
|
||||
if self.equal_set(a, self.non_ignored_tokens()): a_set = ''
|
||||
if self.has_option('context-insensitive-scanner'): a_set = ''
|
||||
return 'self._peek(%s)' % a_set
|
||||
if a_set: a_set += ","
|
||||
|
||||
return 'self._peek(%s context=_context)' % a_set
|
||||
|
||||
def peek_test(self, a, b):
|
||||
"""Generate a call to test whether the next token (which could be any of
|
||||
|
|
@ -252,31 +258,39 @@ class Generator:
|
|||
print ' FOLLOW:', ', '.join(top.follow)
|
||||
for x in top.get_children(): queue.append(x)
|
||||
|
||||
def repr_ignore(self):
|
||||
out="{"
|
||||
for t,s in self.ignore.iteritems():
|
||||
if s is None: s=repr(s)
|
||||
out += "%s:%s," % (repr(t),s)
|
||||
out += "}"
|
||||
return out
|
||||
|
||||
def generate_output(self):
|
||||
self.calculate()
|
||||
self.write(self.preparser)
|
||||
self.write("# Begin -- grammar generated by Yapps\n")
|
||||
self.write("import sys, re\n")
|
||||
self.write("import yappsrt\n")
|
||||
self.write("from yapps import runtime\n")
|
||||
self.write("\n")
|
||||
self.write("class ", self.name, "Scanner(yappsrt.Scanner):\n")
|
||||
self.write("class ", self.name, "Scanner(runtime.Scanner):\n")
|
||||
self.write(" patterns = [\n")
|
||||
for p in self.terminals:
|
||||
self.write(" (%s, re.compile(%s)),\n" % (
|
||||
repr(p), repr(self.tokens[p])))
|
||||
self.write(" ]\n")
|
||||
self.write(" def __init__(self, str):\n")
|
||||
self.write(" yappsrt.Scanner.__init__(self,None,%s,str)\n" %
|
||||
repr(self.ignore))
|
||||
self.write(" def __init__(self, str,*args,**kw):\n")
|
||||
self.write(" runtime.Scanner.__init__(self,None,%s,str,*args,**kw)\n" %
|
||||
self.repr_ignore())
|
||||
self.write("\n")
|
||||
|
||||
self.write("class ", self.name, "(yappsrt.Parser):\n")
|
||||
self.write(INDENT, "Context = yappsrt.Context\n")
|
||||
self.write("class ", self.name, "(runtime.Parser):\n")
|
||||
self.write(INDENT, "Context = runtime.Context\n")
|
||||
for r in self.goals:
|
||||
self.write(INDENT, "def ", r, "(self")
|
||||
if self.params[r]: self.write(", ", self.params[r])
|
||||
self.write(", _parent=None):\n")
|
||||
self.write(INDENT+INDENT, "_context = self.Context(_parent, self._scanner, self._pos, %s, [%s])\n" %
|
||||
self.write(INDENT+INDENT, "_context = self.Context(_parent, self._scanner, %s, [%s])\n" %
|
||||
(repr(r), self.params.get(r, '')))
|
||||
self.rules[r].output(self, INDENT+INDENT)
|
||||
self.write("\n")
|
||||
|
|
@ -284,7 +298,7 @@ class Generator:
|
|||
self.write("\n")
|
||||
self.write("def parse(rule, text):\n")
|
||||
self.write(" P = ", self.name, "(", self.name, "Scanner(text))\n")
|
||||
self.write(" return yappsrt.wrap_error_reporter(P, rule)\n")
|
||||
self.write(" return runtime.wrap_error_reporter(P, rule)\n")
|
||||
self.write("\n")
|
||||
if self.postparser is not None:
|
||||
self.write("# End -- grammar generated by Yapps\n")
|
||||
|
|
@ -355,7 +369,7 @@ class Terminal(Node):
|
|||
gen.write(indent)
|
||||
if re.match('[a-zA-Z_][a-zA-Z_0-9]*$', self.token):
|
||||
gen.write(self.token, " = ")
|
||||
gen.write("self._scan(%s)\n" % repr(self.token))
|
||||
gen.write("self._scan(%s, context=_context)\n" % repr(self.token))
|
||||
|
||||
class Eval(Node):
|
||||
"""This class stores evaluation nodes, from {{ ... }} clauses."""
|
||||
|
|
@ -547,7 +561,7 @@ class Choice(Node):
|
|||
|
||||
if tokens_unseen:
|
||||
gen.write(indent, "else:\n")
|
||||
gen.write(indent, INDENT, "raise yappsrt.SyntaxError(_token[0], ")
|
||||
gen.write(indent, INDENT, "raise runtime.SyntaxError(_token[0], ")
|
||||
gen.write("'Could not match ", self.rule, "')\n")
|
||||
|
||||
class Wrapper(Node):
|
||||
|
|
@ -586,6 +600,13 @@ class Option(Wrapper):
|
|||
gen.write(indent, "if %s:\n" %
|
||||
gen.peek_test(self.first, self.child.first))
|
||||
self.child.output(gen, indent+INDENT)
|
||||
|
||||
if gen.has_option('context-insensitive-scanner'):
|
||||
gen.write(indent, "if %s:\n" %
|
||||
gen.not_peek_test(gen.non_ignored_tokens(), self.follow))
|
||||
gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" %
|
||||
repr(self.first))
|
||||
|
||||
|
||||
class Plus(Wrapper):
|
||||
"""This class represents a 1-or-more repetition clause of the form A+"""
|
||||
|
|
@ -613,6 +634,13 @@ class Plus(Wrapper):
|
|||
gen.write(indent+INDENT, "if %s: break\n" %
|
||||
gen.not_peek_test(union, self.child.first))
|
||||
|
||||
if gen.has_option('context-insensitive-scanner'):
|
||||
gen.write(indent, "if %s:\n" %
|
||||
gen.not_peek_test(gen.non_ignored_tokens(), self.follow))
|
||||
gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" %
|
||||
repr(self.first))
|
||||
|
||||
|
||||
class Star(Wrapper):
|
||||
"""This class represents a 0-or-more repetition clause of the form A*"""
|
||||
def setup(self, gen):
|
||||
|
|
@ -637,9 +665,9 @@ class Star(Wrapper):
|
|||
self.child.output(gen, indent+INDENT)
|
||||
|
||||
# TODO: need to generate tests like this in lots of rules
|
||||
# TODO: do we need to do this only when it's a context-insensitive scanner?
|
||||
gen.write(indent, "if %s:\n" %
|
||||
gen.not_peek_test(gen.non_ignored_tokens(), self.follow))
|
||||
gen.write(indent+INDENT, "raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" %
|
||||
repr(self.first))
|
||||
if gen.has_option('context-insensitive-scanner'):
|
||||
gen.write(indent, "if %s:\n" %
|
||||
gen.not_peek_test(gen.non_ignored_tokens(), self.follow))
|
||||
gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" %
|
||||
repr(self.first))
|
||||
|
||||
442
yapps/runtime.py
Normal file
442
yapps/runtime.py
Normal file
|
|
@ -0,0 +1,442 @@
|
|||
# Yapps 2 Runtime, part of Yapps 2 - yet another python parser system
|
||||
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
|
||||
# Enhancements copyright 2003-2004 by Matthias Urlichs <smurf@debian.org>
|
||||
#
|
||||
# This version of the Yapps 2 Runtime can be distributed under the
|
||||
# terms of the MIT open source license, either found in the LICENSE file
|
||||
# included with the Yapps distribution
|
||||
# <http://theory.stanford.edu/~amitp/yapps/> or at
|
||||
# <http://www.opensource.org/licenses/mit-license.php>
|
||||
#
|
||||
|
||||
"""Run time libraries needed to run parsers generated by Yapps.
|
||||
|
||||
This module defines parse-time exception classes, a scanner class, a
|
||||
base class for parsers produced by Yapps, and a context class that
|
||||
keeps track of the parse stack.
|
||||
|
||||
"""
|
||||
|
||||
import sys, re
|
||||
|
||||
MIN_WINDOW=4096
|
||||
# File lookup window
|
||||
|
||||
class SyntaxError(Exception):
|
||||
"""When we run into an unexpected token, this is the exception to use"""
|
||||
def __init__(self, pos=None, msg="Bad Token", context=None):
|
||||
Exception.__init__(self)
|
||||
self.pos = pos
|
||||
self.msg = msg
|
||||
self.context = context
|
||||
|
||||
def __str__(self):
|
||||
if not self.pos: return 'SyntaxError'
|
||||
else: return 'SyntaxError@%s(%s)' % (repr(self.pos), self.msg)
|
||||
|
||||
class NoMoreTokens(Exception):
|
||||
"""Another exception object, for when we run out of tokens"""
|
||||
pass
|
||||
|
||||
class Token(object):
|
||||
"""Yapps token.
|
||||
|
||||
This is a container for a scanned token.
|
||||
"""
|
||||
|
||||
def __init__(self, type,value, pos=None):
|
||||
"""Initialize a token."""
|
||||
self.type = type
|
||||
self.value = value
|
||||
self.pos = pos
|
||||
|
||||
def __repr__(self):
|
||||
output = '<%s: %s' % (self.type, repr(self.value))
|
||||
if self.pos:
|
||||
output += " @ "
|
||||
if self.pos[0]:
|
||||
output += "%s:" % self.pos[0]
|
||||
if self.pos[1]:
|
||||
output += "%d" % self.pos[1]
|
||||
if self.pos[2] is not None:
|
||||
output += ".%d" % self.pos[2]
|
||||
output += ">"
|
||||
return output
|
||||
|
||||
in_name=0
|
||||
class Scanner(object):
|
||||
"""Yapps scanner.
|
||||
|
||||
The Yapps scanner can work in context sensitive or context
|
||||
insensitive modes. The token(i) method is used to retrieve the
|
||||
i-th token. It takes a restrict set that limits the set of tokens
|
||||
it is allowed to return. In context sensitive mode, this restrict
|
||||
set guides the scanner. In context insensitive mode, there is no
|
||||
restriction (the set is always the full set of tokens).
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, patterns, ignore, input="",
|
||||
file=None,filename=None,stacked=False):
|
||||
"""Initialize the scanner.
|
||||
|
||||
Parameters:
|
||||
patterns : [(terminal, uncompiled regex), ...] or None
|
||||
ignore : {terminal:None, ...}
|
||||
input : string
|
||||
|
||||
If patterns is None, we assume that the subclass has
|
||||
defined self.patterns : [(terminal, compiled regex), ...].
|
||||
Note that the patterns parameter expects uncompiled regexes,
|
||||
whereas the self.patterns field expects compiled regexes.
|
||||
|
||||
The 'ignore' value is either None or a callable, which is called
|
||||
with the scanner and the to-be-ignored match object; this can
|
||||
be used for include file or comment handling.
|
||||
"""
|
||||
|
||||
if not filename:
|
||||
global in_name
|
||||
filename="<f.%d>" % in_name
|
||||
in_name += 1
|
||||
|
||||
self.input = input
|
||||
self.ignore = ignore
|
||||
self.file = file
|
||||
self.filename = filename
|
||||
self.pos = 0
|
||||
self.del_pos = 0 # skipped
|
||||
self.line = 1
|
||||
self.del_line = 0 # skipped
|
||||
self.col = 0
|
||||
self.tokens = []
|
||||
self.stack = None
|
||||
self.stacked = stacked
|
||||
|
||||
self.last_read_token = None
|
||||
self.last_token = None
|
||||
self.last_types = None
|
||||
|
||||
if patterns is not None:
|
||||
# Compile the regex strings into regex objects
|
||||
self.patterns = []
|
||||
for terminal, regex in patterns:
|
||||
self.patterns.append( (terminal, re.compile(regex)) )
|
||||
|
||||
def stack_input(self, input="", file=None, filename=None):
|
||||
"""Temporarily parse from a second file."""
|
||||
|
||||
# Already reading from somewhere else: Go on top of that, please.
|
||||
if self.stack:
|
||||
# autogenerate a recursion-level-identifying filename
|
||||
if not filename:
|
||||
filename = 1
|
||||
else:
|
||||
try:
|
||||
filename += 1
|
||||
except TypeError:
|
||||
pass
|
||||
# now pass off to the include file
|
||||
self.stack.stack_input(input,file,filename)
|
||||
else:
|
||||
|
||||
try:
|
||||
filename += 0
|
||||
except TypeError:
|
||||
pass
|
||||
else:
|
||||
filename = "<str_%d>" % filename
|
||||
|
||||
# self.stack = object.__new__(self.__class__)
|
||||
# Scanner.__init__(self.stack,self.patterns,self.ignore,input,file,filename, stacked=True)
|
||||
|
||||
# Note that the pattern+ignore are added by the generated
|
||||
# scanner code
|
||||
self.stack = self.__class__(input,file,filename, stacked=True)
|
||||
|
||||
def get_pos(self):
|
||||
"""Return a file/line/char tuple."""
|
||||
if self.stack: return self.stack.get_pos()
|
||||
|
||||
return (self.filename, self.line+self.del_line, self.col)
|
||||
|
||||
# def __repr__(self):
|
||||
# """Print the last few tokens that have been scanned in"""
|
||||
# output = ''
|
||||
# for t in self.tokens:
|
||||
# output += '%s\n' % (repr(t),)
|
||||
# return output
|
||||
|
||||
def print_line_with_pointer(self, pos, length=0, out=sys.stderr):
|
||||
"""Print the line of 'text' that includes position 'p',
|
||||
along with a second line with a single caret (^) at position p"""
|
||||
|
||||
file,line,p = pos
|
||||
if file != self.filename:
|
||||
if self.stack: return self.stack.print_line_with_pointer(pos,length=length,out=out)
|
||||
print >>out, "(%s: not in input buffer)" % file
|
||||
return
|
||||
|
||||
text = self.input
|
||||
p += length-1 # starts at pos 1
|
||||
|
||||
origline=line
|
||||
line -= self.del_line
|
||||
spos=0
|
||||
if line > 0:
|
||||
while 1:
|
||||
line = line - 1
|
||||
try:
|
||||
cr = text.index("\n",spos)
|
||||
except ValueError:
|
||||
if line:
|
||||
text = ""
|
||||
break
|
||||
if line == 0:
|
||||
text = text[spos:cr]
|
||||
break
|
||||
spos = cr+1
|
||||
else:
|
||||
print >>out, "(%s:%d not in input buffer)" % (file,origline)
|
||||
return
|
||||
|
||||
# Now try printing part of the line
|
||||
text = text[max(p-80, 0):p+80]
|
||||
p = p - max(p-80, 0)
|
||||
|
||||
# Strip to the left
|
||||
i = text[:p].rfind('\n')
|
||||
j = text[:p].rfind('\r')
|
||||
if i < 0 or (0 <= j < i): i = j
|
||||
if 0 <= i < p:
|
||||
p = p - i - 1
|
||||
text = text[i+1:]
|
||||
|
||||
# Strip to the right
|
||||
i = text.find('\n', p)
|
||||
j = text.find('\r', p)
|
||||
if i < 0 or (0 <= j < i): i = j
|
||||
if i >= 0:
|
||||
text = text[:i]
|
||||
|
||||
# Now shorten the text
|
||||
while len(text) > 70 and p > 60:
|
||||
# Cut off 10 chars
|
||||
text = "..." + text[10:]
|
||||
p = p - 7
|
||||
|
||||
# Now print the string, along with an indicator
|
||||
print >>out, '> ',text
|
||||
print >>out, '> ',' '*p + '^'
|
||||
|
||||
def grab_input(self):
|
||||
"""Get more input if possible."""
|
||||
if not self.file: return
|
||||
if len(self.input) - self.pos >= MIN_WINDOW: return
|
||||
|
||||
data = self.file.read(MIN_WINDOW)
|
||||
if data is None or data == "":
|
||||
self.file = None
|
||||
|
||||
# Drop bytes from the start, if necessary.
|
||||
if self.pos > 2*MIN_WINDOW:
|
||||
self.del_pos += MIN_WINDOW
|
||||
self.del_line += self.input[:MIN_WINDOW].count("\n")
|
||||
self.pos -= MIN_WINDOW
|
||||
self.input = self.input[MIN_WINDOW:] + data
|
||||
else:
|
||||
self.input = self.input + data
|
||||
|
||||
def getchar(self):
|
||||
"""Return the next character."""
|
||||
self.grab_input()
|
||||
|
||||
c = self.input[self.pos]
|
||||
self.pos += 1
|
||||
return c
|
||||
|
||||
def token(self, restrict, context=None):
|
||||
"""Scan for another token."""
|
||||
|
||||
while 1:
|
||||
if self.stack:
|
||||
try:
|
||||
return self.stack.token(restrict, context)
|
||||
except StopIteration:
|
||||
self.stack = None
|
||||
|
||||
# Keep looking for a token, ignoring any in self.ignore
|
||||
self.grab_input()
|
||||
|
||||
# special handling for end-of-file
|
||||
if self.stacked and self.pos==len(self.input):
|
||||
raise StopIteration
|
||||
|
||||
# Search the patterns for the longest match, with earlier
|
||||
# tokens in the list having preference
|
||||
best_match = -1
|
||||
best_pat = '(error)'
|
||||
best_m = None
|
||||
for p, regexp in self.patterns:
|
||||
# First check to see if we're ignoring this token
|
||||
if restrict and p not in restrict and p not in self.ignore:
|
||||
continue
|
||||
m = regexp.match(self.input, self.pos)
|
||||
if m and m.end()-m.start() > best_match:
|
||||
# We got a match that's better than the previous one
|
||||
best_pat = p
|
||||
best_match = m.end()-m.start()
|
||||
best_m = m
|
||||
|
||||
# If we didn't find anything, raise an error
|
||||
if best_pat == '(error)' and best_match < 0:
|
||||
msg = 'Bad Token'
|
||||
if restrict:
|
||||
msg = 'Trying to find one of '+', '.join(restrict)
|
||||
raise SyntaxError(self.get_pos(), msg, context=context)
|
||||
|
||||
ignore = best_pat in self.ignore
|
||||
value = self.input[self.pos:self.pos+best_match]
|
||||
if not ignore:
|
||||
tok=Token(type=best_pat, value=value, pos=self.get_pos())
|
||||
|
||||
self.pos += best_match
|
||||
|
||||
npos = value.rfind("\n")
|
||||
if npos > -1:
|
||||
self.col = best_match-npos
|
||||
self.line += value.count("\n")
|
||||
else:
|
||||
self.col += best_match
|
||||
|
||||
# If we found something that isn't to be ignored, return it
|
||||
if not ignore:
|
||||
if len(self.tokens) >= 10:
|
||||
del self.tokens[0]
|
||||
self.tokens.append(tok)
|
||||
self.last_read_token = tok
|
||||
# print repr(tok)
|
||||
return tok
|
||||
else:
|
||||
ignore = self.ignore[best_pat]
|
||||
if ignore:
|
||||
ignore(self, best_m)
|
||||
|
||||
def peek(self, *types, **kw):
|
||||
"""Returns the token type for lookahead; if there are any args
|
||||
then the list of args is the set of token types to allow"""
|
||||
context = kw.get("context",None)
|
||||
if self.last_token is None:
|
||||
self.last_types = types
|
||||
self.last_token = self.token(types,context)
|
||||
elif self.last_types:
|
||||
for t in types:
|
||||
if t not in self.last_types:
|
||||
raise NotImplementedError("Unimplemented: restriction set changed")
|
||||
return self.last_token.type
|
||||
|
||||
def scan(self, type, **kw):
|
||||
"""Returns the matched text, and moves to the next token"""
|
||||
context = kw.get("context",None)
|
||||
|
||||
if self.last_token is None:
|
||||
tok = self.token([type],context)
|
||||
else:
|
||||
if self.last_types and type not in self.last_types:
|
||||
raise NotImplementedError("Unimplemented: restriction set changed")
|
||||
|
||||
tok = self.last_token
|
||||
self.last_token = None
|
||||
if tok.type != type:
|
||||
if not self.last_types: self.last_types=[]
|
||||
raise SyntaxError(tok.pos, 'Trying to find '+type+': '+ ', '.join(self.last_types)+", got "+tok.type, context=context)
|
||||
return tok.value
|
||||
|
||||
class Parser(object):
|
||||
"""Base class for Yapps-generated parsers.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, scanner):
|
||||
self._scanner = scanner
|
||||
|
||||
def _stack(self, input="",file=None,filename=None):
|
||||
"""Temporarily read from someplace else"""
|
||||
self._scanner.stack_input(input,file,filename)
|
||||
self._tok = None
|
||||
|
||||
def _peek(self, *types, **kw):
|
||||
"""Returns the token type for lookahead; if there are any args
|
||||
then the list of args is the set of token types to allow"""
|
||||
return self._scanner.peek(*types, **kw)
|
||||
|
||||
def _scan(self, type, **kw):
|
||||
"""Returns the matched text, and moves to the next token"""
|
||||
return self._scanner.scan(type, **kw)
|
||||
|
||||
class Context(object):
|
||||
"""Class to represent the parser's call stack.
|
||||
|
||||
Every rule creates a Context that links to its parent rule. The
|
||||
contexts can be used for debugging.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, parent, scanner, rule, args=()):
|
||||
"""Create a new context.
|
||||
|
||||
Args:
|
||||
parent: Context object or None
|
||||
scanner: Scanner object
|
||||
rule: string (name of the rule)
|
||||
args: tuple listing parameters to the rule
|
||||
|
||||
"""
|
||||
self.parent = parent
|
||||
self.scanner = scanner
|
||||
self.rule = rule
|
||||
self.args = args
|
||||
while scanner.stack: scanner = scanner.stack
|
||||
self.token = scanner.last_read_token
|
||||
|
||||
def __str__(self):
|
||||
output = ''
|
||||
if self.parent: output = str(self.parent) + ' > '
|
||||
output += self.rule
|
||||
return output
|
||||
|
||||
def print_error(err, scanner, max_ctx=None):
|
||||
"""Print error messages, the parser stack, and the input text -- for human-readable error messages."""
|
||||
# NOTE: this function assumes 80 columns :-(
|
||||
# Figure out the line number
|
||||
pos = err.pos
|
||||
if not pos:
|
||||
pos = scanner.get_pos()
|
||||
|
||||
file_name, line_number, column_number = pos
|
||||
print >>sys.stderr, '%s:%d:%d: %s' % (file_name, line_number, column_number, err.msg)
|
||||
|
||||
scanner.print_line_with_pointer(pos)
|
||||
|
||||
context = err.context
|
||||
token = None
|
||||
while context:
|
||||
print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context.args))
|
||||
if context.token:
|
||||
token = context.token
|
||||
if token:
|
||||
scanner.print_line_with_pointer(token.pos, length=len(token.value))
|
||||
context = context.parent
|
||||
if max_ctx:
|
||||
max_ctx = max_ctx-1
|
||||
if not max_ctx:
|
||||
break
|
||||
|
||||
def wrap_error_reporter(parser, rule, *args,**kw):
|
||||
try:
|
||||
return getattr(parser, rule)(*args,**kw)
|
||||
except SyntaxError, e:
|
||||
print_error(e, parser._scanner)
|
||||
except NoMoreTokens:
|
||||
print >>sys.stderr, 'Could not complete parsing; stopped around here:'
|
||||
print >>sys.stderr, parser._scanner
|
||||
18
yapps2.py
18
yapps2.py
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/python2
|
||||
#!/usr/bin/python
|
||||
|
||||
#
|
||||
# Yapps 2 - yet another python parser system
|
||||
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
import sys, re
|
||||
|
||||
import yappsrt, parsetree
|
||||
from yapps import runtime, parsetree
|
||||
|
||||
def generate(inputfilename, outputfilename='', dump=0, **flags):
|
||||
"""Generate a grammar, given an input filename (X.g)
|
||||
|
|
@ -40,11 +40,12 @@ def generate(inputfilename, outputfilename='', dump=0, **flags):
|
|||
if f >= 0: s, postparser = s[:f], '\n\n'+s[f+len(DIVIDER):]
|
||||
|
||||
# Create the parser and scanner and parse the text
|
||||
scanner = grammar.ParserDescriptionScanner(s)
|
||||
if preparser: scanner.first_line_number = 1 + preparser.count('\n')
|
||||
scanner = grammar.ParserDescriptionScanner(s, filename=inputfilename)
|
||||
if preparser: scanner.del_line += preparser.count('\n')
|
||||
|
||||
parser = grammar.ParserDescription(scanner)
|
||||
t = yappsrt.wrap_error_reporter(parser, 'Parser')
|
||||
if t is None: return # Failure
|
||||
t = runtime.wrap_error_reporter(parser, 'Parser')
|
||||
if t is None: return 1 # Failure
|
||||
if preparser is not None: t.preparser = preparser
|
||||
if postparser is not None: t.postparser = postparser
|
||||
|
||||
|
|
@ -63,6 +64,7 @@ def generate(inputfilename, outputfilename='', dump=0, **flags):
|
|||
else:
|
||||
t.output = open(outputfilename, 'w')
|
||||
t.generate_output()
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
import doctest
|
||||
|
|
@ -106,6 +108,6 @@ if __name__ == '__main__':
|
|||
if use_devel_grammar:
|
||||
import yapps_grammar as grammar
|
||||
else:
|
||||
import grammar
|
||||
from yapps import grammar
|
||||
|
||||
generate(*tuple(args), **flags)
|
||||
sys.exit(generate(*tuple(args), **flags))
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
#!/usr/bin/python2
|
||||
#
|
||||
# grammar.py, part of Yapps 2 - yet another python parser system
|
||||
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
|
||||
# Enhancements copyright 2003-2004 by Matthias Urlichs <smurf@debian.org>
|
||||
#
|
||||
# This version of the Yapps 2 grammar can be distributed under the
|
||||
# terms of the MIT open source license, either found in the LICENSE
|
||||
|
|
@ -19,7 +18,7 @@ by running Yapps on yapps_grammar.g. (Holy circularity, Batman!)
|
|||
"""
|
||||
|
||||
import sys, re
|
||||
import parsetree
|
||||
from yapps import parsetree
|
||||
|
||||
######################################################################
|
||||
def cleanup_choice(rule, lst):
|
||||
|
|
@ -33,9 +32,9 @@ def cleanup_sequence(rule, lst):
|
|||
|
||||
def resolve_name(rule, tokens, id, args):
|
||||
if id in [x[0] for x in tokens]:
|
||||
# It's a token
|
||||
if args:
|
||||
print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args)
|
||||
# It's a token
|
||||
if args:
|
||||
print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args)
|
||||
return parsetree.Terminal(rule, id)
|
||||
else:
|
||||
# It's a name, so assume it's a nonterminal
|
||||
|
|
@ -43,7 +42,6 @@ def resolve_name(rule, tokens, id, args):
|
|||
|
||||
%%
|
||||
parser ParserDescription:
|
||||
option: "context-insensitive-scanner"
|
||||
|
||||
ignore: "[ \t\r\n]+"
|
||||
ignore: "#.*?\r?\n"
|
||||
|
|
@ -62,9 +60,6 @@ parser ParserDescription:
|
|||
token QUEST: '[?]'
|
||||
token COLON: ':'
|
||||
|
||||
rule LINENO: # This is a pseudotoken. It matches nothing; returns the line number
|
||||
{{ return 1 + self._scanner.get_input_scanned().count('\n') }}
|
||||
|
||||
rule Parser: "parser" ID ":"
|
||||
Options
|
||||
Tokens
|
||||
|
|
@ -79,48 +74,48 @@ parser ParserDescription:
|
|||
rule Tokens: {{ tok = [] }}
|
||||
(
|
||||
"token" ID ":" Str {{ tok.append( (ID,Str) ) }}
|
||||
| "ignore" ":" Str {{ tok.append( ('#ignore',Str) ) }}
|
||||
| "ignore"
|
||||
":" Str {{ ign = ('#ignore',Str) }}
|
||||
( STMT {{ ign = ign + (STMT[2:-2],) }} )?
|
||||
{{ tok.append( ign ) }}
|
||||
)*
|
||||
{{ return tok }}
|
||||
|
||||
rule Rules<<tokens>>:
|
||||
{{ rul = [] }}
|
||||
( LINENO
|
||||
(
|
||||
"rule" ID OptParam ":" ClauseA<<ID, tokens>>
|
||||
# TODO: save LINENO somewhere?
|
||||
{{ rul.append( (ID, OptParam, ClauseA) ) }}
|
||||
)*
|
||||
{{ return rul }}
|
||||
|
||||
rule ClauseA<<rule, tokens>>:
|
||||
ClauseB<<rule, tokens>>
|
||||
ClauseB<<rule,tokens>>
|
||||
{{ v = [ClauseB] }}
|
||||
( OR ClauseB<<rule, tokens>> {{ v.append(ClauseB) }} )*
|
||||
{{ return cleanup_choice(rule, v) }}
|
||||
( OR ClauseB<<rule,tokens>> {{ v.append(ClauseB) }} )*
|
||||
{{ return cleanup_choice(rule,v) }}
|
||||
|
||||
rule ClauseB<<rule, tokens>>:
|
||||
rule ClauseB<<rule,tokens>>:
|
||||
{{ v = [] }}
|
||||
( ClauseC<<rule, tokens>> {{ v.append(ClauseC) }} )*
|
||||
( ClauseC<<rule,tokens>> {{ v.append(ClauseC) }} )*
|
||||
{{ return cleanup_sequence(rule, v) }}
|
||||
|
||||
rule ClauseC<<rule, tokens>>:
|
||||
ClauseD<<rule, tokens>>
|
||||
rule ClauseC<<rule,tokens>>:
|
||||
ClauseD<<rule,tokens>>
|
||||
( PLUS {{ return parsetree.Plus(rule, ClauseD) }}
|
||||
| STAR {{ return parsetree.Star(rule, ClauseD) }}
|
||||
| QUEST {{ return parsetree.Option(rule, ClauseD) }}
|
||||
| {{ return ClauseD }} )
|
||||
|
||||
rule ClauseD<<rule, tokens>>:
|
||||
rule ClauseD<<rule,tokens>>:
|
||||
STR {{ t = (STR, eval(STR,{},{})) }}
|
||||
{{ if t not in tokens: tokens.insert( 0, t ) }}
|
||||
{{ return parsetree.Terminal(rule, STR) }}
|
||||
| ID OptParam {{ return resolve_name(rule, tokens, ID, OptParam) }}
|
||||
| LP ClauseA<<rule, tokens>> RP {{ return ClauseA }}
|
||||
| LB ClauseA<<rule, tokens>> RB {{ return parsetree.Option(rule, ClauseA) }}
|
||||
| ID OptParam {{ return resolve_name(rule,tokens, ID, OptParam) }}
|
||||
| LP ClauseA<<rule,tokens>> RP {{ return ClauseA }}
|
||||
| LB ClauseA<<rule,tokens>> RB {{ return parsetree.Option(rule, ClauseA) }}
|
||||
| STMT {{ return parsetree.Eval(rule, STMT[2:-2]) }}
|
||||
|
||||
rule OptParam:
|
||||
ATTR {{ return ATTR[2:-2] }}
|
||||
| {{ return '' }}
|
||||
rule OptParam: [ ATTR {{ return ATTR[2:-2] }} ] {{ return '' }}
|
||||
rule Str: STR {{ return eval(STR,{},{}) }}
|
||||
%%
|
||||
|
|
|
|||
234
yapps_grammar.py
234
yapps_grammar.py
|
|
@ -1,234 +0,0 @@
|
|||
#!/usr/bin/python2
|
||||
#
|
||||
# grammar.py, part of Yapps 2 - yet another python parser system
|
||||
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
|
||||
#
|
||||
# This version of the Yapps 2 grammar can be distributed under the
|
||||
# terms of the MIT open source license, either found in the LICENSE
|
||||
# file included with the Yapps distribution
|
||||
# <http://theory.stanford.edu/~amitp/yapps/> or at
|
||||
# <http://www.opensource.org/licenses/mit-license.php>
|
||||
#
|
||||
|
||||
"""Parser for Yapps grammars.
|
||||
|
||||
This file defines the grammar of Yapps grammars. Naturally, it is
|
||||
implemented in Yapps. The grammar.py module needed by Yapps is built
|
||||
by running Yapps on yapps_grammar.g. (Holy circularity, Batman!)
|
||||
|
||||
"""
|
||||
|
||||
import sys, re
|
||||
import parsetree
|
||||
|
||||
######################################################################
|
||||
def cleanup_choice(rule, lst):
|
||||
if len(lst) == 0: return Sequence(rule, [])
|
||||
if len(lst) == 1: return lst[0]
|
||||
return parsetree.Choice(rule, *tuple(lst))
|
||||
|
||||
def cleanup_sequence(rule, lst):
|
||||
if len(lst) == 1: return lst[0]
|
||||
return parsetree.Sequence(rule, *tuple(lst))
|
||||
|
||||
def resolve_name(rule, tokens, id, args):
|
||||
if id in [x[0] for x in tokens]:
|
||||
# It's a token
|
||||
if args:
|
||||
print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args)
|
||||
return parsetree.Terminal(rule, id)
|
||||
else:
|
||||
# It's a name, so assume it's a nonterminal
|
||||
return parsetree.NonTerminal(rule, id, args)
|
||||
|
||||
|
||||
# Begin -- grammar generated by Yapps
|
||||
import sys, re
|
||||
import yappsrt
|
||||
|
||||
class ParserDescriptionScanner(yappsrt.Scanner):
|
||||
patterns = [
|
||||
('"rule"', re.compile('rule')),
|
||||
('"ignore"', re.compile('ignore')),
|
||||
('"token"', re.compile('token')),
|
||||
('"option"', re.compile('option')),
|
||||
('":"', re.compile(':')),
|
||||
('"parser"', re.compile('parser')),
|
||||
('[ \t\r\n]+', re.compile('[ \t\r\n]+')),
|
||||
('#.*?\r?\n', re.compile('#.*?\r?\n')),
|
||||
('EOF', re.compile('$')),
|
||||
('ATTR', re.compile('<<.+?>>')),
|
||||
('STMT', re.compile('{{.+?}}')),
|
||||
('ID', re.compile('[a-zA-Z_][a-zA-Z_0-9]*')),
|
||||
('STR', re.compile('[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"')),
|
||||
('LP', re.compile('\\(')),
|
||||
('RP', re.compile('\\)')),
|
||||
('LB', re.compile('\\[')),
|
||||
('RB', re.compile('\\]')),
|
||||
('OR', re.compile('[|]')),
|
||||
('STAR', re.compile('[*]')),
|
||||
('PLUS', re.compile('[+]')),
|
||||
('QUEST', re.compile('[?]')),
|
||||
('COLON', re.compile(':')),
|
||||
]
|
||||
def __init__(self, str):
|
||||
yappsrt.Scanner.__init__(self,None,['[ \t\r\n]+', '#.*?\r?\n'],str)
|
||||
|
||||
class ParserDescription(yappsrt.Parser):
|
||||
Context = yappsrt.Context
|
||||
def LINENO(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'LINENO', [])
|
||||
return 1 + self._scanner.get_input_scanned().count('\n')
|
||||
|
||||
def Parser(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'Parser', [])
|
||||
self._scan('"parser"')
|
||||
ID = self._scan('ID')
|
||||
self._scan('":"')
|
||||
Options = self.Options(_context)
|
||||
Tokens = self.Tokens(_context)
|
||||
Rules = self.Rules(Tokens, _context)
|
||||
EOF = self._scan('EOF')
|
||||
return parsetree.Generator(ID,Options,Tokens,Rules)
|
||||
|
||||
def Options(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'Options', [])
|
||||
opt = {}
|
||||
while self._peek() == '"option"':
|
||||
self._scan('"option"')
|
||||
self._scan('":"')
|
||||
Str = self.Str(_context)
|
||||
opt[Str] = 1
|
||||
if self._peek() not in ['"option"', '"token"', '"ignore"', 'EOF', '"rule"']:
|
||||
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['"option"', '"token"', '"ignore"', 'EOF', '"rule"']))
|
||||
return opt
|
||||
|
||||
def Tokens(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'Tokens', [])
|
||||
tok = []
|
||||
while self._peek() in ['"token"', '"ignore"']:
|
||||
_token = self._peek()
|
||||
if _token == '"token"':
|
||||
self._scan('"token"')
|
||||
ID = self._scan('ID')
|
||||
self._scan('":"')
|
||||
Str = self.Str(_context)
|
||||
tok.append( (ID,Str) )
|
||||
elif _token == '"ignore"':
|
||||
self._scan('"ignore"')
|
||||
self._scan('":"')
|
||||
Str = self.Str(_context)
|
||||
tok.append( ('#ignore',Str) )
|
||||
else:
|
||||
raise yappsrt.SyntaxError(_token[0], 'Could not match Tokens')
|
||||
if self._peek() not in ['"token"', '"ignore"', 'EOF', '"rule"']:
|
||||
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['"token"', '"ignore"', 'EOF', '"rule"']))
|
||||
return tok
|
||||
|
||||
def Rules(self, tokens, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'Rules', [tokens])
|
||||
rul = []
|
||||
while self._peek() == '"rule"':
|
||||
LINENO = self.LINENO(_context)
|
||||
self._scan('"rule"')
|
||||
ID = self._scan('ID')
|
||||
OptParam = self.OptParam(_context)
|
||||
self._scan('":"')
|
||||
ClauseA = self.ClauseA(ID, tokens, _context)
|
||||
rul.append( (ID, OptParam, ClauseA) )
|
||||
if self._peek() not in ['"rule"', 'EOF']:
|
||||
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['"rule"', 'EOF']))
|
||||
return rul
|
||||
|
||||
def ClauseA(self, rule, tokens, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'ClauseA', [rule, tokens])
|
||||
ClauseB = self.ClauseB(rule, tokens, _context)
|
||||
v = [ClauseB]
|
||||
while self._peek() == 'OR':
|
||||
OR = self._scan('OR')
|
||||
ClauseB = self.ClauseB(rule, tokens, _context)
|
||||
v.append(ClauseB)
|
||||
if self._peek() not in ['OR', 'RP', 'RB', '"rule"', 'EOF']:
|
||||
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['OR', 'RP', 'RB', '"rule"', 'EOF']))
|
||||
return cleanup_choice(rule, v)
|
||||
|
||||
def ClauseB(self, rule, tokens, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'ClauseB', [rule, tokens])
|
||||
v = []
|
||||
while self._peek() in ['STR', 'ID', 'LP', 'LB', 'STMT']:
|
||||
ClauseC = self.ClauseC(rule, tokens, _context)
|
||||
v.append(ClauseC)
|
||||
if self._peek() not in ['STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF']:
|
||||
raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF']))
|
||||
return cleanup_sequence(rule, v)
|
||||
|
||||
def ClauseC(self, rule, tokens, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'ClauseC', [rule, tokens])
|
||||
ClauseD = self.ClauseD(rule, tokens, _context)
|
||||
_token = self._peek()
|
||||
if _token == 'PLUS':
|
||||
PLUS = self._scan('PLUS')
|
||||
return parsetree.Plus(rule, ClauseD)
|
||||
elif _token == 'STAR':
|
||||
STAR = self._scan('STAR')
|
||||
return parsetree.Star(rule, ClauseD)
|
||||
elif _token == 'QUEST':
|
||||
QUEST = self._scan('QUEST')
|
||||
return parsetree.Option(rule, ClauseD)
|
||||
elif _token not in ['"ignore"', '"token"', '"option"', '":"', '"parser"', 'ATTR', 'COLON']:
|
||||
return ClauseD
|
||||
else:
|
||||
raise yappsrt.SyntaxError(_token[0], 'Could not match ClauseC')
|
||||
|
||||
def ClauseD(self, rule, tokens, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'ClauseD', [rule, tokens])
|
||||
_token = self._peek()
|
||||
if _token == 'STR':
|
||||
STR = self._scan('STR')
|
||||
t = (STR, eval(STR,{},{}))
|
||||
if t not in tokens: tokens.insert( 0, t )
|
||||
return parsetree.Terminal(rule, STR)
|
||||
elif _token == 'ID':
|
||||
ID = self._scan('ID')
|
||||
OptParam = self.OptParam(_context)
|
||||
return resolve_name(rule, tokens, ID, OptParam)
|
||||
elif _token == 'LP':
|
||||
LP = self._scan('LP')
|
||||
ClauseA = self.ClauseA(rule, tokens, _context)
|
||||
RP = self._scan('RP')
|
||||
return ClauseA
|
||||
elif _token == 'LB':
|
||||
LB = self._scan('LB')
|
||||
ClauseA = self.ClauseA(rule, tokens, _context)
|
||||
RB = self._scan('RB')
|
||||
return parsetree.Option(rule, ClauseA)
|
||||
elif _token == 'STMT':
|
||||
STMT = self._scan('STMT')
|
||||
return parsetree.Eval(rule, STMT[2:-2])
|
||||
else:
|
||||
raise yappsrt.SyntaxError(_token[0], 'Could not match ClauseD')
|
||||
|
||||
def OptParam(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'OptParam', [])
|
||||
_token = self._peek()
|
||||
if _token == 'ATTR':
|
||||
ATTR = self._scan('ATTR')
|
||||
return ATTR[2:-2]
|
||||
elif _token not in ['"ignore"', '"token"', '"option"', '"parser"', 'COLON']:
|
||||
return ''
|
||||
else:
|
||||
raise yappsrt.SyntaxError(_token[0], 'Could not match OptParam')
|
||||
|
||||
def Str(self, _parent=None):
|
||||
_context = self.Context(_parent, self._scanner, self._pos, 'Str', [])
|
||||
STR = self._scan('STR')
|
||||
return eval(STR,{},{})
|
||||
|
||||
|
||||
def parse(rule, text):
|
||||
P = ParserDescription(ParserDescriptionScanner(text))
|
||||
return yappsrt.wrap_error_reporter(P, rule)
|
||||
|
||||
# End -- grammar generated by Yapps
|
||||
|
||||
|
||||
304
yappsrt.py
304
yappsrt.py
|
|
@ -1,304 +0,0 @@
|
|||
#
|
||||
# Yapps 2 Runtime, part of Yapps 2 - yet another python parser system
|
||||
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
|
||||
#
|
||||
# This version of the Yapps 2 Runtime can be distributed under the
|
||||
# terms of the MIT open source license, either found in the LICENSE file
|
||||
# included with the Yapps distribution
|
||||
# <http://theory.stanford.edu/~amitp/yapps/> or at
|
||||
# <http://www.opensource.org/licenses/mit-license.php>
|
||||
#
|
||||
|
||||
"""Run time libraries needed to run parsers generated by Yapps.
|
||||
|
||||
This module defines parse-time exception classes, a scanner class, a
|
||||
base class for parsers produced by Yapps, and a context class that
|
||||
keeps track of the parse stack.
|
||||
|
||||
"""
|
||||
|
||||
# TODO: it should be possible to embed yappsrt into the generated
|
||||
# grammar to make a standalone module.
|
||||
|
||||
import sys, re
|
||||
|
||||
class SyntaxError(Exception):
|
||||
"""When we run into an unexpected token, this is the exception to use"""
|
||||
def __init__(self, charpos=-1, msg="Bad Token", context=None):
|
||||
Exception.__init__(self)
|
||||
self.charpos = charpos
|
||||
self.msg = msg
|
||||
self.context = context
|
||||
|
||||
def __str__(self):
|
||||
if self.charpos < 0: return 'SyntaxError'
|
||||
else: return 'SyntaxError@char%s(%s)' % (repr(self.charpos), self.msg)
|
||||
|
||||
class NoMoreTokens(Exception):
|
||||
"""Another exception object, for when we run out of tokens"""
|
||||
pass
|
||||
|
||||
class Scanner:
|
||||
"""Yapps scanner.
|
||||
|
||||
The Yapps scanner can work in context sensitive or context
|
||||
insensitive modes. The token(i) method is used to retrieve the
|
||||
i-th token. It takes a restrict set that limits the set of tokens
|
||||
it is allowed to return. In context sensitive mode, this restrict
|
||||
set guides the scanner. In context insensitive mode, there is no
|
||||
restriction (the set is always the full set of tokens).
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, patterns, ignore, input):
|
||||
"""Initialize the scanner.
|
||||
|
||||
Parameters:
|
||||
patterns : [(terminal, uncompiled regex), ...] or None
|
||||
ignore : [terminal,...]
|
||||
input : string
|
||||
|
||||
If patterns is None, we assume that the subclass has
|
||||
defined self.patterns : [(terminal, compiled regex), ...].
|
||||
Note that the patterns parameter expects uncompiled regexes,
|
||||
whereas the self.patterns field expects compiled regexes.
|
||||
"""
|
||||
self.tokens = [] # [(begin char pos, end char pos, token name, matched text), ...]
|
||||
self.restrictions = []
|
||||
self.input = input
|
||||
self.pos = 0
|
||||
self.ignore = ignore
|
||||
self.first_line_number = 1
|
||||
|
||||
if patterns is not None:
|
||||
# Compile the regex strings into regex objects
|
||||
self.patterns = []
|
||||
for terminal, regex in patterns:
|
||||
self.patterns.append( (terminal, re.compile(regex)) )
|
||||
|
||||
def get_token_pos(self):
|
||||
"""Get the current token position in the input text."""
|
||||
return len(self.tokens)
|
||||
|
||||
def get_char_pos(self):
|
||||
"""Get the current char position in the input text."""
|
||||
return self.pos
|
||||
|
||||
def get_prev_char_pos(self, i=None):
|
||||
"""Get the previous position (one token back) in the input text."""
|
||||
if self.pos == 0: return 0
|
||||
if i is None: i = -1
|
||||
return self.tokens[i][0]
|
||||
|
||||
def get_line_number(self):
|
||||
"""Get the line number of the current position in the input text."""
|
||||
# TODO: make this work at any token/char position
|
||||
return self.first_line_number + self.get_input_scanned().count('\n')
|
||||
|
||||
def get_column_number(self):
|
||||
"""Get the column number of the current position in the input text."""
|
||||
s = self.get_input_scanned()
|
||||
i = s.rfind('\n') # may be -1, but that's okay in this case
|
||||
return len(s) - (i+1)
|
||||
|
||||
def get_input_scanned(self):
|
||||
"""Get the portion of the input that has been tokenized."""
|
||||
return self.input[:self.pos]
|
||||
|
||||
def get_input_unscanned(self):
|
||||
"""Get the portion of the input that has not yet been tokenized."""
|
||||
return self.input[self.pos:]
|
||||
|
||||
def token(self, i, restrict=None):
|
||||
"""Get the i'th token in the input.
|
||||
|
||||
If i is one past the end, then scan for another token.
|
||||
|
||||
Args:
|
||||
|
||||
restrict : [token, ...] or None; if restrict is None, then any
|
||||
token is allowed. You may call token(i) more than once.
|
||||
However, the restrict set may never be larger than what was
|
||||
passed in on the first call to token(i).
|
||||
|
||||
"""
|
||||
if i == len(self.tokens):
|
||||
self.scan(restrict)
|
||||
if i < len(self.tokens):
|
||||
# Make sure the restriction is more restricted. This
|
||||
# invariant is needed to avoid ruining tokenization at
|
||||
# position i+1 and higher.
|
||||
if restrict and self.restrictions[i]:
|
||||
for r in restrict:
|
||||
if r not in self.restrictions[i]:
|
||||
raise NotImplementedError("Unimplemented: restriction set changed")
|
||||
return self.tokens[i]
|
||||
raise NoMoreTokens()
|
||||
|
||||
def __repr__(self):
|
||||
"""Print the last 10 tokens that have been scanned in"""
|
||||
output = ''
|
||||
for t in self.tokens[-10:]:
|
||||
output = '%s\n (@%s) %s = %s' % (output,t[0],t[2],repr(t[3]))
|
||||
return output
|
||||
|
||||
def scan(self, restrict):
|
||||
"""Should scan another token and add it to the list, self.tokens,
|
||||
and add the restriction to self.restrictions"""
|
||||
# Keep looking for a token, ignoring any in self.ignore
|
||||
while 1:
|
||||
# Search the patterns for the longest match, with earlier
|
||||
# tokens in the list having preference
|
||||
best_match = -1
|
||||
best_pat = '(error)'
|
||||
for p, regexp in self.patterns:
|
||||
# First check to see if we're ignoring this token
|
||||
if restrict and p not in restrict and p not in self.ignore:
|
||||
continue
|
||||
m = regexp.match(self.input, self.pos)
|
||||
if m and len(m.group(0)) > best_match:
|
||||
# We got a match that's better than the previous one
|
||||
best_pat = p
|
||||
best_match = len(m.group(0))
|
||||
|
||||
# If we didn't find anything, raise an error
|
||||
if best_pat == '(error)' and best_match < 0:
|
||||
msg = 'Bad Token'
|
||||
if restrict:
|
||||
msg = 'Trying to find one of '+', '.join(restrict)
|
||||
raise SyntaxError(self.pos, msg)
|
||||
|
||||
# If we found something that isn't to be ignored, return it
|
||||
if best_pat not in self.ignore:
|
||||
# Create a token with this data
|
||||
token = (self.pos, self.pos+best_match, best_pat,
|
||||
self.input[self.pos:self.pos+best_match])
|
||||
self.pos = self.pos + best_match
|
||||
# Only add this token if it's not in the list
|
||||
# (to prevent looping)
|
||||
if not self.tokens or token != self.tokens[-1]:
|
||||
self.tokens.append(token)
|
||||
self.restrictions.append(restrict)
|
||||
return
|
||||
else:
|
||||
# This token should be ignored ..
|
||||
self.pos = self.pos + best_match
|
||||
|
||||
class Parser:
|
||||
"""Base class for Yapps-generated parsers.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, scanner):
|
||||
self._scanner = scanner
|
||||
self._pos = 0
|
||||
|
||||
def _peek(self, *types):
|
||||
"""Returns the token type for lookahead; if there are any args
|
||||
then the list of args is the set of token types to allow"""
|
||||
tok = self._scanner.token(self._pos, types)
|
||||
return tok[2]
|
||||
|
||||
def _scan(self, type):
|
||||
"""Returns the matched text, and moves to the next token"""
|
||||
tok = self._scanner.token(self._pos, [type])
|
||||
if tok[2] != type:
|
||||
raise SyntaxError(tok[0], 'Trying to find '+type+' :'+ ' ,'.join(self._scanner.restrictions[self._pos]))
|
||||
self._pos = 1 + self._pos
|
||||
return tok[3]
|
||||
|
||||
class Context:
|
||||
"""Class to represent the parser's call stack.
|
||||
|
||||
Every rule creates a Context that links to its parent rule. The
|
||||
contexts can be used for debugging.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, parent, scanner, tokenpos, rule, args=()):
|
||||
"""Create a new context.
|
||||
|
||||
Args:
|
||||
parent: Context object or None
|
||||
scanner: Scanner object
|
||||
pos: integer (scanner token position)
|
||||
rule: string (name of the rule)
|
||||
args: tuple listing parameters to the rule
|
||||
|
||||
"""
|
||||
self.parent = parent
|
||||
self.scanner = scanner
|
||||
self.tokenpos = tokenpos
|
||||
self.rule = rule
|
||||
self.args = args
|
||||
|
||||
def __str__(self):
|
||||
output = ''
|
||||
if self.parent: output = str(self.parent) + ' > '
|
||||
output += self.rule
|
||||
return output
|
||||
|
||||
def print_line_with_pointer(text, p):
|
||||
"""Print the line of 'text' that includes position 'p',
|
||||
along with a second line with a single caret (^) at position p"""
|
||||
|
||||
# TODO: separate out the logic for determining the line/character
|
||||
# location from the logic for determining how to display an
|
||||
# 80-column line to stderr.
|
||||
|
||||
# Now try printing part of the line
|
||||
text = text[max(p-80, 0):p+80]
|
||||
p = p - max(p-80, 0)
|
||||
|
||||
# Strip to the left
|
||||
i = text[:p].rfind('\n')
|
||||
j = text[:p].rfind('\r')
|
||||
if i < 0 or (0 <= j < i): i = j
|
||||
if 0 <= i < p:
|
||||
p = p - i - 1
|
||||
text = text[i+1:]
|
||||
|
||||
# Strip to the right
|
||||
i = text.find('\n', p)
|
||||
j = text.find('\r', p)
|
||||
if i < 0 or (0 <= j < i): i = j
|
||||
if i >= 0:
|
||||
text = text[:i]
|
||||
|
||||
# Now shorten the text
|
||||
while len(text) > 70 and p > 60:
|
||||
# Cut off 10 chars
|
||||
text = "..." + text[10:]
|
||||
p = p - 7
|
||||
|
||||
# Now print the string, along with an indicator
|
||||
print >>sys.stderr, '> ',text
|
||||
print >>sys.stderr, '> ',' '*p + '^'
|
||||
|
||||
def print_error(input, err, scanner):
|
||||
"""Print error messages, the parser stack, and the input text -- for human-readable error messages."""
|
||||
# NOTE: this function assumes 80 columns :-(
|
||||
# Figure out the line number
|
||||
line_number = scanner.get_line_number()
|
||||
column_number = scanner.get_column_number()
|
||||
print >>sys.stderr, '%d:%d: %s' % (line_number, column_number, err.msg)
|
||||
|
||||
context = err.context
|
||||
if not context:
|
||||
print_line_with_pointer(input, err.charpos)
|
||||
|
||||
while context:
|
||||
# TODO: add line number
|
||||
print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context.args))
|
||||
print_line_with_pointer(input, context.scanner.get_prev_char_pos(context.tokenpos))
|
||||
context = context.parent
|
||||
|
||||
def wrap_error_reporter(parser, rule):
|
||||
try:
|
||||
return getattr(parser, rule)()
|
||||
except SyntaxError, e:
|
||||
input = parser._scanner.input
|
||||
print_error(input, e, parser._scanner)
|
||||
except NoMoreTokens:
|
||||
print >>sys.stderr, 'Could not complete parsing; stopped around here:'
|
||||
print >>sys.stderr, parser._scanner
|
||||
Loading…
Reference in a new issue