don't bundle yapps2

This commit is contained in:
Jeff Epler 2018-02-02 17:52:18 -06:00 committed by Sebastian Kuzminsky
parent c651ec81cd
commit fc6eb7f20c
8 changed files with 4 additions and 1486 deletions

View file

@ -29,6 +29,7 @@ Build-Depends: debhelper (>= 6),
procps,
psmisc,
desktop-file-utils,
yapps2,
Standards-Version: 3.9.2
Vcs-Browser: https://github.com/LinuxCNC/linuxcnc
Vcs-Git: git://github.com/linuxcnc/linuxcnc.git

View file

@ -1 +0,0 @@
# empty

View file

@ -1,41 +0,0 @@
This is yapps2, Yet Another Python Parser System,
written by Amit J. Patel <amitp@theory.stanford.edu>.
It was packaged for Debian by Matthias Urlichs <smurf@debian.org>
on Fri, 14 May 2004.
The original source can be found at:
http://theory.stanford.edu/~amitp/yapps/yapps2.1.1.zip
The original copyright notice states:
# Yapps 2 - yet another python parser system
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
#
# This version of Yapps 2 can be distributed under the
# terms of the MIT open source license, either found in the LICENSE file
# included with the Yapps distribution
# <http://theory.stanford.edu/~amitp/yapps/> or at
# <http://www.opensource.org/licenses/mit-license.php>
The LICENSE file states:
<http://www.opensource.org/licenses/mit-license.php>
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View file

@ -1,211 +0,0 @@
# grammar.py, part of Yapps 2 - yet another python parser system
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
#
# This version of the Yapps 2 grammar can be distributed under the
# terms of the MIT open source license, either found in the LICENSE
# file included with the Yapps distribution
# <http://theory.stanford.edu/~amitp/yapps/> or at
# <http://www.opensource.org/licenses/mit-license.php>
#
"""Parser for Yapps grammars.
This file defines the grammar of Yapps grammars. Naturally, it is
implemented in Yapps. The grammar.py module needed by Yapps is built
by running Yapps on yapps_grammar.g. (Holy circularity, Batman!)
"""
import sys, re
from yapps import parsetree
######################################################################
def cleanup_choice(rule, lst):
if len(lst) == 0: return Sequence(rule, [])
if len(lst) == 1: return lst[0]
return parsetree.Choice(rule, *tuple(lst))
def cleanup_sequence(rule, lst):
if len(lst) == 1: return lst[0]
return parsetree.Sequence(rule, *tuple(lst))
def resolve_name(rule, tokens, id, args):
if id in [x[0] for x in tokens]:
# It's a token
if args:
print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args)
return parsetree.Terminal(rule, id)
else:
# It's a name, so assume it's a nonterminal
return parsetree.NonTerminal(rule, id, args)
# Begin -- grammar generated by Yapps
import sys, re
from yapps import runtime
class ParserDescriptionScanner(runtime.Scanner):
patterns = [
('"rule"', re.compile('rule')),
('"ignore"', re.compile('ignore')),
('"token"', re.compile('token')),
('"option"', re.compile('option')),
('":"', re.compile(':')),
('"parser"', re.compile('parser')),
('[ \t\r\n]+', re.compile('[ \t\r\n]+')),
('#.*?\r?\n', re.compile('#.*?\r?\n')),
('EOF', re.compile('$')),
('ATTR', re.compile('<<.+?>>')),
('STMT', re.compile('{{.+?}}')),
('ID', re.compile('[a-zA-Z_][a-zA-Z_0-9]*')),
('STR', re.compile('[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"')),
('LP', re.compile('\\(')),
('RP', re.compile('\\)')),
('LB', re.compile('\\[')),
('RB', re.compile('\\]')),
('OR', re.compile('[|]')),
('STAR', re.compile('[*]')),
('PLUS', re.compile('[+]')),
('QUEST', re.compile('[?]')),
('COLON', re.compile(':')),
]
def __init__(self, str,*args,**kw):
runtime.Scanner.__init__(self,None,{'[ \t\r\n]+':None,'#.*?\r?\n':None,},str,*args,**kw)
class ParserDescription(runtime.Parser):
Context = runtime.Context
def Parser(self, _parent=None):
_context = self.Context(_parent, self._scanner, 'Parser', [])
self._scan('"parser"', context=_context)
ID = self._scan('ID', context=_context)
self._scan('":"', context=_context)
Options = self.Options(_context)
Tokens = self.Tokens(_context)
Rules = self.Rules(Tokens, _context)
EOF = self._scan('EOF', context=_context)
return parsetree.Generator(ID,Options,Tokens,Rules)
def Options(self, _parent=None):
_context = self.Context(_parent, self._scanner, 'Options', [])
opt = {}
while self._peek('"option"', '"token"', '"ignore"', 'EOF', '"rule"', context=_context) == '"option"':
self._scan('"option"', context=_context)
self._scan('":"', context=_context)
Str = self.Str(_context)
opt[Str] = 1
return opt
def Tokens(self, _parent=None):
_context = self.Context(_parent, self._scanner, 'Tokens', [])
tok = []
while self._peek('"token"', '"ignore"', 'EOF', '"rule"', context=_context) in ['"token"', '"ignore"']:
_token = self._peek('"token"', '"ignore"', context=_context)
if _token == '"token"':
self._scan('"token"', context=_context)
ID = self._scan('ID', context=_context)
self._scan('":"', context=_context)
Str = self.Str(_context)
tok.append( (ID,Str) )
else: # == '"ignore"'
self._scan('"ignore"', context=_context)
self._scan('":"', context=_context)
Str = self.Str(_context)
ign = ('#ignore',Str)
if self._peek('STMT', '"token"', '"ignore"', 'EOF', '"rule"', context=_context) == 'STMT':
STMT = self._scan('STMT', context=_context)
ign = ign + (STMT[2:-2],)
tok.append( ign )
return tok
def Rules(self, tokens, _parent=None):
_context = self.Context(_parent, self._scanner, 'Rules', [tokens])
rul = []
while self._peek('"rule"', 'EOF', context=_context) == '"rule"':
self._scan('"rule"', context=_context)
ID = self._scan('ID', context=_context)
OptParam = self.OptParam(_context)
self._scan('":"', context=_context)
ClauseA = self.ClauseA(ID, tokens, _context)
rul.append( (ID, OptParam, ClauseA) )
return rul
def ClauseA(self, rule, tokens, _parent=None):
_context = self.Context(_parent, self._scanner, 'ClauseA', [rule, tokens])
ClauseB = self.ClauseB(rule,tokens, _context)
v = [ClauseB]
while self._peek('OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) == 'OR':
OR = self._scan('OR', context=_context)
ClauseB = self.ClauseB(rule,tokens, _context)
v.append(ClauseB)
return cleanup_choice(rule,v)
def ClauseB(self, rule,tokens, _parent=None):
_context = self.Context(_parent, self._scanner, 'ClauseB', [rule,tokens])
v = []
while self._peek('STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) in ['STR', 'ID', 'LP', 'LB', 'STMT']:
ClauseC = self.ClauseC(rule,tokens, _context)
v.append(ClauseC)
return cleanup_sequence(rule, v)
def ClauseC(self, rule,tokens, _parent=None):
_context = self.Context(_parent, self._scanner, 'ClauseC', [rule,tokens])
ClauseD = self.ClauseD(rule,tokens, _context)
_token = self._peek('PLUS', 'STAR', 'QUEST', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context)
if _token == 'PLUS':
PLUS = self._scan('PLUS', context=_context)
return parsetree.Plus(rule, ClauseD)
elif _token == 'STAR':
STAR = self._scan('STAR', context=_context)
return parsetree.Star(rule, ClauseD)
elif _token == 'QUEST':
QUEST = self._scan('QUEST', context=_context)
return parsetree.Option(rule, ClauseD)
else:
return ClauseD
def ClauseD(self, rule,tokens, _parent=None):
_context = self.Context(_parent, self._scanner, 'ClauseD', [rule,tokens])
_token = self._peek('STR', 'ID', 'LP', 'LB', 'STMT', context=_context)
if _token == 'STR':
STR = self._scan('STR', context=_context)
t = (STR, eval(STR,{},{}))
if t not in tokens: tokens.insert( 0, t )
return parsetree.Terminal(rule, STR)
elif _token == 'ID':
ID = self._scan('ID', context=_context)
OptParam = self.OptParam(_context)
return resolve_name(rule,tokens, ID, OptParam)
elif _token == 'LP':
LP = self._scan('LP', context=_context)
ClauseA = self.ClauseA(rule,tokens, _context)
RP = self._scan('RP', context=_context)
return ClauseA
elif _token == 'LB':
LB = self._scan('LB', context=_context)
ClauseA = self.ClauseA(rule,tokens, _context)
RB = self._scan('RB', context=_context)
return parsetree.Option(rule, ClauseA)
else: # == 'STMT'
STMT = self._scan('STMT', context=_context)
return parsetree.Eval(rule, STMT[2:-2])
def OptParam(self, _parent=None):
_context = self.Context(_parent, self._scanner, 'OptParam', [])
if self._peek('ATTR', '":"', 'PLUS', 'STAR', 'QUEST', 'STR', 'ID', 'LP', 'LB', 'STMT', 'OR', 'RP', 'RB', '"rule"', 'EOF', context=_context) == 'ATTR':
ATTR = self._scan('ATTR', context=_context)
return ATTR[2:-2]
return ''
def Str(self, _parent=None):
_context = self.Context(_parent, self._scanner, 'Str', [])
STR = self._scan('STR', context=_context)
return eval(STR,{},{})
def parse(rule, text):
P = ParserDescription(ParserDescriptionScanner(text))
return runtime.wrap_error_reporter(P, rule)
# End -- grammar generated by Yapps

View file

@ -1,673 +0,0 @@
# parsetree.py, part of Yapps 2 - yet another python parser system
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
#
# This version of the Yapps 2 Runtime can be distributed under the
# terms of the MIT open source license, either found in the LICENSE file
# included with the Yapps distribution
# <http://theory.stanford.edu/~amitp/yapps/> or at
# <http://www.opensource.org/licenses/mit-license.php>
#
"""Classes used to represent parse trees and generate output.
This module defines the Generator class, which drives the generation
of Python output from a grammar parse tree. It also defines nodes
used to represent the parse tree; they are derived from class Node.
The main logic of Yapps is in this module.
"""
import sys, re
######################################################################
INDENT = ' '*4
class Generator:
# TODO: many of the methods here should be class methods, not instance methods
def __init__(self, name, options, tokens, rules):
self.change_count = 0
self.name = name
self.options = options
self.preparser = ''
self.postparser = None
self.tokens = {} # Map from tokens to regexps
self.ignore = {} # List of token names to ignore in parsing, map to statements
self.terminals = [] # List of token names (to maintain ordering)
for t in tokens:
if len(t) == 3:
n,t,s = t
else:
n,t = t
s = None
if n == '#ignore':
n = t
self.ignore[n] = s
if n in self.tokens.keys() and self.tokens[n] != t:
print >>sys.stderr, 'Warning: token %s defined more than once.' % n
self.tokens[n] = t
self.terminals.append(n)
self.rules = {} # Map from rule names to parser nodes
self.params = {} # Map from rule names to parameters
self.goals = [] # List of rule names (to maintain ordering)
for n,p,r in rules:
self.params[n] = p
self.rules[n] = r
self.goals.append(n)
self.output = sys.stdout
def has_option(self, name):
return self.options.get(name, 0)
def non_ignored_tokens(self):
return [x for x in self.terminals if x not in self.ignore]
def changed(self):
"""Increments the change count.
>>> t = Generator('', [], [], [])
>>> old_count = t.change_count
>>> t.changed()
>>> assert t.change_count == old_count + 1
"""
self.change_count = 1+self.change_count
def set_subtract(self, a, b):
"""Returns the elements of a that are not in b.
>>> t = Generator('', [], [], [])
>>> t.set_subtract([], [])
[]
>>> t.set_subtract([1, 2], [1, 2])
[]
>>> t.set_subtract([1, 2, 3], [2])
[1, 3]
>>> t.set_subtract([1], [2, 3, 4])
[1]
"""
result = []
for x in a:
if x not in b:
result.append(x)
return result
def subset(self, a, b):
"""True iff all elements of sequence a are inside sequence b
>>> t = Generator('', [], [], [])
>>> t.subset([], [1, 2, 3])
1
>>> t.subset([1, 2, 3], [])
0
>>> t.subset([1], [1, 2, 3])
1
>>> t.subset([3, 2, 1], [1, 2, 3])
1
>>> t.subset([1, 1, 1], [1, 2, 3])
1
>>> t.subset([1, 2, 3], [1, 1, 1])
0
"""
for x in a:
if x not in b:
return 0
return 1
def equal_set(self, a, b):
"""True iff subset(a, b) and subset(b, a)
>>> t = Generator('', [], [], [])
>>> a_set = [1, 2, 3]
>>> t.equal_set(a_set, a_set)
1
>>> t.equal_set(a_set, a_set[:])
1
>>> t.equal_set([], a_set)
0
>>> t.equal_set([1, 2, 3], [3, 2, 1])
1
"""
if len(a) != len(b): return 0
if a == b: return 1
return self.subset(a, b) and self.subset(b, a)
def add_to(self, parent, additions):
"Modify _parent_ to include all elements in _additions_"
for x in additions:
if x not in parent:
parent.append(x)
self.changed()
def equate(self, a, b):
"""Extend (a) and (b) so that they contain each others' elements.
>>> t = Generator('', [], [], [])
>>> a = [1, 2]
>>> b = [2, 3]
>>> t.equate(a, b)
>>> a
[1, 2, 3]
>>> b
[2, 3, 1]
"""
self.add_to(a, b)
self.add_to(b, a)
def write(self, *args):
for a in args:
self.output.write(a)
def in_test(self, expr, full, set):
"""Generate a test of (expr) being in (set), where (set) is a subset of (full)
expr is a string (Python expression)
set is a list of values (which will be converted with repr)
full is the list of all values expr could possibly evaluate to
>>> t = Generator('', [], [], [])
>>> t.in_test('x', [1,2,3,4], [])
'0'
>>> t.in_test('x', [1,2,3,4], [1,2,3,4])
'1'
>>> t.in_test('x', [1,2,3,4], [1])
'x == 1'
>>> t.in_test('a+b', [1,2,3,4], [1,2])
'a+b in [1, 2]'
>>> t.in_test('x', [1,2,3,4,5], [1,2,3])
'x not in [4, 5]'
>>> t.in_test('x', [1,2,3,4,5], [1,2,3,4])
'x != 5'
"""
if not set: return '0'
if len(set) == 1: return '%s == %s' % (expr, repr(set[0]))
if full and len(set) > len(full)/2:
# Reverse the sense of the test.
not_set = [x for x in full if x not in set]
return self.not_in_test(expr, full, not_set)
return '%s in %s' % (expr, repr(set))
def not_in_test(self, expr, full, set):
"""Like in_test, but the reverse test."""
if not set: return '1'
if len(set) == 1: return '%s != %s' % (expr, repr(set[0]))
return '%s not in %s' % (expr, repr(set))
def peek_call(self, a):
"""Generate a call to scan for a token in the set 'a'"""
assert type(a) == type([])
a_set = (repr(a)[1:-1])
if self.equal_set(a, self.non_ignored_tokens()): a_set = ''
if self.has_option('context-insensitive-scanner'): a_set = ''
if a_set: a_set += ","
return 'self._peek(%s context=_context)' % a_set
def peek_test(self, a, b):
"""Generate a call to test whether the next token (which could be any of
the elements in a) is in the set b."""
if self.subset(a, b): return '1'
if self.has_option('context-insensitive-scanner'): a = self.non_ignored_tokens()
return self.in_test(self.peek_call(a), a, b)
def not_peek_test(self, a, b):
"""Like peek_test, but the opposite sense."""
if self.subset(a, b): return '0'
return self.not_in_test(self.peek_call(a), a, b)
def calculate(self):
"""The main loop to compute the epsilon, first, follow sets.
The loop continues until the sets converge. This works because
each set can only get larger, so when they stop getting larger,
we're done."""
# First we determine whether a rule accepts epsilon (the empty sequence)
while 1:
for r in self.goals:
self.rules[r].setup(self)
if self.change_count == 0: break
self.change_count = 0
# Now we compute the first/follow sets
while 1:
for r in self.goals:
self.rules[r].update(self)
if self.change_count == 0: break
self.change_count = 0
def dump_information(self):
"""Display the grammar in somewhat human-readable form."""
self.calculate()
for r in self.goals:
print ' _____' + '_'*len(r)
print ('___/Rule '+r+'\\' + '_'*80)[:79]
queue = [self.rules[r]]
while queue:
top = queue[0]
del queue[0]
print 'Rule', repr(top), 'of class', top.__class__.__name__
top.first.sort()
top.follow.sort()
eps = []
if top.accepts_epsilon: eps = ['(null)']
print ' FIRST:', ', '.join(top.first+eps)
print ' FOLLOW:', ', '.join(top.follow)
for x in top.get_children(): queue.append(x)
def repr_ignore(self):
out="{"
for t,s in self.ignore.iteritems():
if s is None: s=repr(s)
out += "%s:%s," % (repr(t),s)
out += "}"
return out
def generate_output(self):
self.calculate()
self.write(self.preparser)
self.write("# Begin -- grammar generated by Yapps\n")
self.write("import sys, re\n")
self.write("from yapps import runtime\n")
self.write("\n")
self.write("class ", self.name, "Scanner(runtime.Scanner):\n")
self.write(" patterns = [\n")
for p in self.terminals:
self.write(" (%s, re.compile(%s)),\n" % (
repr(p), repr(self.tokens[p])))
self.write(" ]\n")
self.write(" def __init__(self, str,*args,**kw):\n")
self.write(" runtime.Scanner.__init__(self,None,%s,str,*args,**kw)\n" %
self.repr_ignore())
self.write("\n")
self.write("class ", self.name, "(runtime.Parser):\n")
self.write(INDENT, "Context = runtime.Context\n")
for r in self.goals:
self.write(INDENT, "def ", r, "(self")
if self.params[r]: self.write(", ", self.params[r])
self.write(", _parent=None):\n")
self.write(INDENT+INDENT, "_context = self.Context(_parent, self._scanner, %s, [%s])\n" %
(repr(r), self.params.get(r, '')))
self.rules[r].output(self, INDENT+INDENT)
self.write("\n")
self.write("\n")
self.write("def parse(rule, text):\n")
self.write(" P = ", self.name, "(", self.name, "Scanner(text))\n")
self.write(" return runtime.wrap_error_reporter(P, rule)\n")
self.write("\n")
if self.postparser is not None:
self.write("# End -- grammar generated by Yapps\n")
self.write(self.postparser)
else:
self.write("if __name__ == '__main__':\n")
self.write(INDENT, "from sys import argv, stdin\n")
self.write(INDENT, "if len(argv) >= 2:\n")
self.write(INDENT*2, "if len(argv) >= 3:\n")
self.write(INDENT*3, "f = open(argv[2],'r')\n")
self.write(INDENT*2, "else:\n")
self.write(INDENT*3, "f = stdin\n")
self.write(INDENT*2, "print parse(argv[1], f.read())\n")
self.write(INDENT, "else: print >>sys.stderr, 'Args: <rule> [<filename>]'\n")
self.write("# End -- grammar generated by Yapps\n")
######################################################################
class Node:
"""This is the base class for all components of a grammar."""
def __init__(self, rule):
self.rule = rule # name of the rule containing this node
self.first = []
self.follow = []
self.accepts_epsilon = 0
def setup(self, gen):
# Setup will change accepts_epsilon,
# sometimes from 0 to 1 but never 1 to 0.
# It will take a finite number of steps to set things up
pass
def used(self, vars):
"Return two lists: one of vars used, and the other of vars assigned"
return vars, []
def get_children(self):
"Return a list of sub-nodes"
return []
def __repr__(self):
return str(self)
def update(self, gen):
if self.accepts_epsilon:
gen.add_to(self.first, self.follow)
def output(self, gen, indent):
"Write out code to _gen_ with _indent_:string indentation"
gen.write(indent, "assert 0 # Invalid parser node\n")
class Terminal(Node):
"""This class stores terminal nodes, which are tokens."""
def __init__(self, rule, token):
Node.__init__(self, rule)
self.token = token
self.accepts_epsilon = 0
def __str__(self):
return self.token
def update(self, gen):
Node.update(self, gen)
if self.first != [self.token]:
self.first = [self.token]
gen.changed()
def output(self, gen, indent):
gen.write(indent)
if re.match('[a-zA-Z_][a-zA-Z_0-9]*$', self.token):
gen.write(self.token, " = ")
gen.write("self._scan(%s, context=_context)\n" % repr(self.token))
class Eval(Node):
"""This class stores evaluation nodes, from {{ ... }} clauses."""
def __init__(self, rule, expr):
Node.__init__(self, rule)
self.expr = expr
def setup(self, gen):
Node.setup(self, gen)
if not self.accepts_epsilon:
self.accepts_epsilon = 1
gen.changed()
def __str__(self):
return '{{ %s }}' % self.expr.strip()
def output(self, gen, indent):
gen.write(indent, self.expr.strip(), '\n')
class NonTerminal(Node):
"""This class stores nonterminal nodes, which are rules with arguments."""
def __init__(self, rule, name, args):
Node.__init__(self, rule)
self.name = name
self.args = args
def setup(self, gen):
Node.setup(self, gen)
try:
self.target = gen.rules[self.name]
if self.accepts_epsilon != self.target.accepts_epsilon:
self.accepts_epsilon = self.target.accepts_epsilon
gen.changed()
except KeyError: # Oops, it's nonexistent
print >>sys.stderr, 'Error: no rule <%s>' % self.name
self.target = self
def __str__(self):
return '%s' % self.name
def update(self, gen):
Node.update(self, gen)
gen.equate(self.first, self.target.first)
gen.equate(self.follow, self.target.follow)
def output(self, gen, indent):
gen.write(indent)
gen.write(self.name, " = ")
args = self.args
if args: args += ', '
args += '_context'
gen.write("self.", self.name, "(", args, ")\n")
class Sequence(Node):
"""This class stores a sequence of nodes (A B C ...)"""
def __init__(self, rule, *children):
Node.__init__(self, rule)
self.children = children
def setup(self, gen):
Node.setup(self, gen)
for c in self.children: c.setup(gen)
if not self.accepts_epsilon:
# If it's not already accepting epsilon, it might now do so.
for c in self.children:
# any non-epsilon means all is non-epsilon
if not c.accepts_epsilon: break
else:
self.accepts_epsilon = 1
gen.changed()
def get_children(self):
return self.children
def __str__(self):
return '( %s )' % ' '.join(map(str, self.children))
def update(self, gen):
Node.update(self, gen)
for g in self.children:
g.update(gen)
empty = 1
for g_i in range(len(self.children)):
g = self.children[g_i]
if empty: gen.add_to(self.first, g.first)
if not g.accepts_epsilon: empty = 0
if g_i == len(self.children)-1:
next = self.follow
else:
next = self.children[1+g_i].first
gen.add_to(g.follow, next)
if self.children:
gen.add_to(self.follow, self.children[-1].follow)
def output(self, gen, indent):
if self.children:
for c in self.children:
c.output(gen, indent)
else:
# Placeholder for empty sequences, just in case
gen.write(indent, 'pass\n')
class Choice(Node):
"""This class stores a choice between nodes (A | B | C | ...)"""
def __init__(self, rule, *children):
Node.__init__(self, rule)
self.children = children
def setup(self, gen):
Node.setup(self, gen)
for c in self.children: c.setup(gen)
if not self.accepts_epsilon:
for c in self.children:
if c.accepts_epsilon:
self.accepts_epsilon = 1
gen.changed()
def get_children(self):
return self.children
def __str__(self):
return '( %s )' % ' | '.join(map(str, self.children))
def update(self, gen):
Node.update(self, gen)
for g in self.children:
g.update(gen)
for g in self.children:
gen.add_to(self.first, g.first)
gen.add_to(self.follow, g.follow)
for g in self.children:
gen.add_to(g.follow, self.follow)
if self.accepts_epsilon:
gen.add_to(self.first, self.follow)
def output(self, gen, indent):
test = "if"
gen.write(indent, "_token = ", gen.peek_call(self.first), "\n")
tokens_seen = []
tokens_unseen = self.first[:]
if gen.has_option('context-insensitive-scanner'):
# Context insensitive scanners can return ANY token,
# not only the ones in first.
tokens_unseen = gen.non_ignored_tokens()
for c in self.children:
testset = c.first[:]
removed = []
for x in testset:
if x in tokens_seen:
testset.remove(x)
removed.append(x)
if x in tokens_unseen: tokens_unseen.remove(x)
tokens_seen = tokens_seen + testset
if removed:
if not testset:
print >>sys.stderr, 'Error in rule', self.rule+':'
else:
print >>sys.stderr, 'Warning in rule', self.rule+':'
print >>sys.stderr, ' *', self
print >>sys.stderr, ' * These tokens could be matched by more than one clause:'
print >>sys.stderr, ' *', ' '.join(removed)
if testset:
if not tokens_unseen: # context sensitive scanners only!
if test == 'if':
# if it's the first AND last test, then
# we can simply put the code without an if/else
c.output(gen, indent)
else:
gen.write(indent, "else:")
t = gen.in_test('', [], testset)
if len(t) < 70-len(indent):
gen.write(' #', t)
gen.write("\n")
c.output(gen, indent+INDENT)
else:
gen.write(indent, test, " ",
gen.in_test('_token', tokens_unseen, testset),
":\n")
c.output(gen, indent+INDENT)
test = "elif"
if tokens_unseen:
gen.write(indent, "else:\n")
gen.write(indent, INDENT, "raise runtime.SyntaxError(_token[0], ")
gen.write("'Could not match ", self.rule, "')\n")
class Wrapper(Node):
"""This is a base class for nodes that modify a single child."""
def __init__(self, rule, child):
Node.__init__(self, rule)
self.child = child
def setup(self, gen):
Node.setup(self, gen)
self.child.setup(gen)
def get_children(self):
return [self.child]
def update(self, gen):
Node.update(self, gen)
self.child.update(gen)
gen.add_to(self.first, self.child.first)
gen.equate(self.follow, self.child.follow)
class Option(Wrapper):
"""This class represents an optional clause of the form [A]"""
def setup(self, gen):
Wrapper.setup(self, gen)
if not self.accepts_epsilon:
self.accepts_epsilon = 1
gen.changed()
def __str__(self):
return '[ %s ]' % str(self.child)
def output(self, gen, indent):
if self.child.accepts_epsilon:
print >>sys.stderr, 'Warning in rule', self.rule+': contents may be empty.'
gen.write(indent, "if %s:\n" %
gen.peek_test(self.first, self.child.first))
self.child.output(gen, indent+INDENT)
if gen.has_option('context-insensitive-scanner'):
gen.write(indent, "if %s:\n" %
gen.not_peek_test(gen.non_ignored_tokens(), self.follow))
gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" %
repr(self.first))
class Plus(Wrapper):
"""This class represents a 1-or-more repetition clause of the form A+"""
def setup(self, gen):
Wrapper.setup(self, gen)
if self.accepts_epsilon != self.child.accepts_epsilon:
self.accepts_epsilon = self.child.accepts_epsilon
gen.changed()
def __str__(self):
return '%s+' % str(self.child)
def update(self, gen):
Wrapper.update(self, gen)
gen.add_to(self.child.follow, self.child.first)
def output(self, gen, indent):
if self.child.accepts_epsilon:
print >>sys.stderr, 'Warning in rule', self.rule+':'
print >>sys.stderr, ' * The repeated pattern could be empty. The resulting parser may not work properly.'
gen.write(indent, "while 1:\n")
self.child.output(gen, indent+INDENT)
union = self.first[:]
gen.add_to(union, self.follow)
gen.write(indent+INDENT, "if %s: break\n" %
gen.not_peek_test(union, self.child.first))
if gen.has_option('context-insensitive-scanner'):
gen.write(indent, "if %s:\n" %
gen.not_peek_test(gen.non_ignored_tokens(), self.follow))
gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" %
repr(self.first))
class Star(Wrapper):
"""This class represents a 0-or-more repetition clause of the form A*"""
def setup(self, gen):
Wrapper.setup(self, gen)
if not self.accepts_epsilon:
self.accepts_epsilon = 1
gen.changed()
def __str__(self):
return '%s*' % str(self.child)
def update(self, gen):
Wrapper.update(self, gen)
gen.add_to(self.child.follow, self.child.first)
def output(self, gen, indent):
if self.child.accepts_epsilon:
print >>sys.stderr, 'Warning in rule', self.rule+':'
print >>sys.stderr, ' * The repeated pattern could be empty. The resulting parser probably will not work properly.'
gen.write(indent, "while %s:\n" %
gen.peek_test(self.follow, self.child.first))
self.child.output(gen, indent+INDENT)
# TODO: need to generate tests like this in lots of rules
if gen.has_option('context-insensitive-scanner'):
gen.write(indent, "if %s:\n" %
gen.not_peek_test(gen.non_ignored_tokens(), self.follow))
gen.write(indent+INDENT, "raise runtime.SyntaxError(pos=self._scanner.get_pos(), context=_context, msg='Need one of ' + ', '.join(%s))\n" %
repr(self.first))

View file

@ -1,442 +0,0 @@
# Yapps 2 Runtime, part of Yapps 2 - yet another python parser system
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
# Enhancements copyright 2003-2004 by Matthias Urlichs <smurf@debian.org>
#
# This version of the Yapps 2 Runtime can be distributed under the
# terms of the MIT open source license, either found in the LICENSE file
# included with the Yapps distribution
# <http://theory.stanford.edu/~amitp/yapps/> or at
# <http://www.opensource.org/licenses/mit-license.php>
#
"""Run time libraries needed to run parsers generated by Yapps.
This module defines parse-time exception classes, a scanner class, a
base class for parsers produced by Yapps, and a context class that
keeps track of the parse stack.
"""
import sys, re
MIN_WINDOW=4096
# File lookup window
class SyntaxError(Exception):
"""When we run into an unexpected token, this is the exception to use"""
def __init__(self, pos=None, msg="Bad Token", context=None):
Exception.__init__(self)
self.pos = pos
self.msg = msg
self.context = context
def __str__(self):
if not self.pos: return 'SyntaxError'
else: return 'SyntaxError@%s(%s)' % (repr(self.pos), self.msg)
class NoMoreTokens(Exception):
"""Another exception object, for when we run out of tokens"""
pass
class Token(object):
"""Yapps token.
This is a container for a scanned token.
"""
def __init__(self, type,value, pos=None):
"""Initialize a token."""
self.type = type
self.value = value
self.pos = pos
def __repr__(self):
output = '<%s: %s' % (self.type, repr(self.value))
if self.pos:
output += " @ "
if self.pos[0]:
output += "%s:" % self.pos[0]
if self.pos[1]:
output += "%d" % self.pos[1]
if self.pos[2] is not None:
output += ".%d" % self.pos[2]
output += ">"
return output
in_name=0
class Scanner(object):
"""Yapps scanner.
The Yapps scanner can work in context sensitive or context
insensitive modes. The token(i) method is used to retrieve the
i-th token. It takes a restrict set that limits the set of tokens
it is allowed to return. In context sensitive mode, this restrict
set guides the scanner. In context insensitive mode, there is no
restriction (the set is always the full set of tokens).
"""
def __init__(self, patterns, ignore, input="",
file=None,filename=None,stacked=False):
"""Initialize the scanner.
Parameters:
patterns : [(terminal, uncompiled regex), ...] or None
ignore : {terminal:None, ...}
input : string
If patterns is None, we assume that the subclass has
defined self.patterns : [(terminal, compiled regex), ...].
Note that the patterns parameter expects uncompiled regexes,
whereas the self.patterns field expects compiled regexes.
The 'ignore' value is either None or a callable, which is called
with the scanner and the to-be-ignored match object; this can
be used for include file or comment handling.
"""
if not filename:
global in_name
filename="<f.%d>" % in_name
in_name += 1
self.input = input
self.ignore = ignore
self.file = file
self.filename = filename
self.pos = 0
self.del_pos = 0 # skipped
self.line = 1
self.del_line = 0 # skipped
self.col = 0
self.tokens = []
self.stack = None
self.stacked = stacked
self.last_read_token = None
self.last_token = None
self.last_types = None
if patterns is not None:
# Compile the regex strings into regex objects
self.patterns = []
for terminal, regex in patterns:
self.patterns.append( (terminal, re.compile(regex)) )
def stack_input(self, input="", file=None, filename=None):
"""Temporarily parse from a second file."""
# Already reading from somewhere else: Go on top of that, please.
if self.stack:
# autogenerate a recursion-level-identifying filename
if not filename:
filename = 1
else:
try:
filename += 1
except TypeError:
pass
# now pass off to the include file
self.stack.stack_input(input,file,filename)
else:
try:
filename += 0
except TypeError:
pass
else:
filename = "<str_%d>" % filename
# self.stack = object.__new__(self.__class__)
# Scanner.__init__(self.stack,self.patterns,self.ignore,input,file,filename, stacked=True)
# Note that the pattern+ignore are added by the generated
# scanner code
self.stack = self.__class__(input,file,filename, stacked=True)
def get_pos(self):
"""Return a file/line/char tuple."""
if self.stack: return self.stack.get_pos()
return (self.filename, self.line+self.del_line, self.col)
# def __repr__(self):
# """Print the last few tokens that have been scanned in"""
# output = ''
# for t in self.tokens:
# output += '%s\n' % (repr(t),)
# return output
def print_line_with_pointer(self, pos, length=0, out=sys.stderr):
"""Print the line of 'text' that includes position 'p',
along with a second line with a single caret (^) at position p"""
file,line,p = pos
if file != self.filename:
if self.stack: return self.stack.print_line_with_pointer(pos,length=length,out=out)
print >>out, "(%s: not in input buffer)" % file
return
text = self.input
p += length-1 # starts at pos 1
origline=line
line -= self.del_line
spos=0
if line > 0:
while 1:
line = line - 1
try:
cr = text.index("\n",spos)
except ValueError:
if line:
text = ""
break
if line == 0:
text = text[spos:cr]
break
spos = cr+1
else:
print >>out, "(%s:%d not in input buffer)" % (file,origline)
return
# Now try printing part of the line
text = text[max(p-80, 0):p+80]
p = p - max(p-80, 0)
# Strip to the left
i = text[:p].rfind('\n')
j = text[:p].rfind('\r')
if i < 0 or (0 <= j < i): i = j
if 0 <= i < p:
p = p - i - 1
text = text[i+1:]
# Strip to the right
i = text.find('\n', p)
j = text.find('\r', p)
if i < 0 or (0 <= j < i): i = j
if i >= 0:
text = text[:i]
# Now shorten the text
while len(text) > 70 and p > 60:
# Cut off 10 chars
text = "..." + text[10:]
p = p - 7
# Now print the string, along with an indicator
print >>out, '> ',text
print >>out, '> ',' '*p + '^'
def grab_input(self):
"""Get more input if possible."""
if not self.file: return
if len(self.input) - self.pos >= MIN_WINDOW: return
data = self.file.read(MIN_WINDOW)
if data is None or data == "":
self.file = None
# Drop bytes from the start, if necessary.
if self.pos > 2*MIN_WINDOW:
self.del_pos += MIN_WINDOW
self.del_line += self.input[:MIN_WINDOW].count("\n")
self.pos -= MIN_WINDOW
self.input = self.input[MIN_WINDOW:] + data
else:
self.input = self.input + data
def getchar(self):
"""Return the next character."""
self.grab_input()
c = self.input[self.pos]
self.pos += 1
return c
def token(self, restrict, context=None):
"""Scan for another token."""
while 1:
if self.stack:
try:
return self.stack.token(restrict, context)
except StopIteration:
self.stack = None
# Keep looking for a token, ignoring any in self.ignore
self.grab_input()
# special handling for end-of-file
if self.stacked and self.pos==len(self.input):
raise StopIteration
# Search the patterns for the longest match, with earlier
# tokens in the list having preference
best_match = -1
best_pat = '(error)'
best_m = None
for p, regexp in self.patterns:
# First check to see if we're ignoring this token
if restrict and p not in restrict and p not in self.ignore:
continue
m = regexp.match(self.input, self.pos)
if m and m.end()-m.start() > best_match:
# We got a match that's better than the previous one
best_pat = p
best_match = m.end()-m.start()
best_m = m
# If we didn't find anything, raise an error
if best_pat == '(error)' and best_match < 0:
msg = 'Bad Token'
if restrict:
msg = 'Trying to find one of '+', '.join(restrict)
raise SyntaxError(self.get_pos(), msg, context=context)
ignore = best_pat in self.ignore
value = self.input[self.pos:self.pos+best_match]
if not ignore:
tok=Token(type=best_pat, value=value, pos=self.get_pos())
self.pos += best_match
npos = value.rfind("\n")
if npos > -1:
self.col = best_match-npos
self.line += value.count("\n")
else:
self.col += best_match
# If we found something that isn't to be ignored, return it
if not ignore:
if len(self.tokens) >= 10:
del self.tokens[0]
self.tokens.append(tok)
self.last_read_token = tok
# print repr(tok)
return tok
else:
ignore = self.ignore[best_pat]
if ignore:
ignore(self, best_m)
def peek(self, *types, **kw):
"""Returns the token type for lookahead; if there are any args
then the list of args is the set of token types to allow"""
context = kw.get("context",None)
if self.last_token is None:
self.last_types = types
self.last_token = self.token(types,context)
elif self.last_types:
for t in types:
if t not in self.last_types:
raise NotImplementedError("Unimplemented: restriction set changed")
return self.last_token.type
def scan(self, type, **kw):
"""Returns the matched text, and moves to the next token"""
context = kw.get("context",None)
if self.last_token is None:
tok = self.token([type],context)
else:
if self.last_types and type not in self.last_types:
raise NotImplementedError("Unimplemented: restriction set changed")
tok = self.last_token
self.last_token = None
if tok.type != type:
if not self.last_types: self.last_types=[]
raise SyntaxError(tok.pos, 'Trying to find '+type+': '+ ', '.join(self.last_types)+", got "+tok.type, context=context)
return tok.value
class Parser(object):
"""Base class for Yapps-generated parsers.
"""
def __init__(self, scanner):
self._scanner = scanner
def _stack(self, input="",file=None,filename=None):
"""Temporarily read from someplace else"""
self._scanner.stack_input(input,file,filename)
self._tok = None
def _peek(self, *types, **kw):
"""Returns the token type for lookahead; if there are any args
then the list of args is the set of token types to allow"""
return self._scanner.peek(*types, **kw)
def _scan(self, type, **kw):
"""Returns the matched text, and moves to the next token"""
return self._scanner.scan(type, **kw)
class Context(object):
"""Class to represent the parser's call stack.
Every rule creates a Context that links to its parent rule. The
contexts can be used for debugging.
"""
def __init__(self, parent, scanner, rule, args=()):
"""Create a new context.
Args:
parent: Context object or None
scanner: Scanner object
rule: string (name of the rule)
args: tuple listing parameters to the rule
"""
self.parent = parent
self.scanner = scanner
self.rule = rule
self.args = args
while scanner.stack: scanner = scanner.stack
self.token = scanner.last_read_token
def __str__(self):
output = ''
if self.parent: output = str(self.parent) + ' > '
output += self.rule
return output
def print_error(err, scanner, max_ctx=None):
"""Print error messages, the parser stack, and the input text -- for human-readable error messages."""
# NOTE: this function assumes 80 columns :-(
# Figure out the line number
pos = err.pos
if not pos:
pos = scanner.get_pos()
file_name, line_number, column_number = pos
print >>sys.stderr, '%s:%d:%d: %s' % (file_name, line_number, column_number, err.msg)
scanner.print_line_with_pointer(pos)
context = err.context
token = None
while context:
print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context.args))
if context.token:
token = context.token
if token:
scanner.print_line_with_pointer(token.pos, length=len(token.value))
context = context.parent
if max_ctx:
max_ctx = max_ctx-1
if not max_ctx:
break
def wrap_error_reporter(parser, rule, *args,**kw):
try:
return getattr(parser, rule)(*args,**kw)
except SyntaxError, e:
print_error(e, parser._scanner)
except NoMoreTokens:
print >>sys.stderr, 'Could not complete parsing; stopped around here:'
print >>sys.stderr, parser._scanner

View file

@ -90,13 +90,13 @@ endif
$(ECHO) Copying python script $(notdir $@)
$(Q)(echo '#!$(PYTHON)'; sed '1 { /^#!/d; }' $<) > $@.tmp && chmod +x $@.tmp && mv -f $@.tmp $@
../bin/yapps ../bin/elbpcom: ../bin/%: hal/utils/%.py
../bin/elbpcom: ../bin/%: hal/utils/%.py
@$(ECHO) Syntax checking python script $(notdir $@)
$(Q)$(PYTHON) -c 'import sys; compile(open(sys.argv[1]).read(), sys.argv[1], "exec")' $<
$(ECHO) Copying python script $(notdir $@)
$(Q)(echo '#!$(PYTHON)'; sed '1 { /^#!/d; }' $<) > $@.tmp && chmod +x $@.tmp && mv -f $@.tmp $@
TARGETS += ../bin/halcompile ../bin/elbpcom
objects/%.py: %.g ../bin/yapps
objects/%.py: %.g
@mkdir -p $(dir $@)
$(Q)../bin/yapps $< $@
$(Q)yapps $< $@

View file

@ -1,115 +0,0 @@
#!/usr/bin/env python
#
# Yapps 2 - yet another python parser system
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
#
# This version of Yapps 2 can be distributed under the
# terms of the MIT open source license, either found in the LICENSE file
# included with the Yapps distribution
# <http://theory.stanford.edu/~amitp/yapps/> or at
# <http://www.opensource.org/licenses/mit-license.php>
#
import sys, os, re
BASE = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), ".."))
sys.path.insert(0, os.path.join(BASE, "lib", "python"))
from yapps import runtime, parsetree
def generate(inputfilename, outputfilename='', dump=0, **flags):
"""Generate a grammar, given an input filename (X.g)
and an output filename (defaulting to X.py)."""
if not outputfilename:
if inputfilename.endswith('.g'):
outputfilename = inputfilename[:-2] + '.py'
else:
raise Exception('Must specify output filename if input filename is not *.g')
DIVIDER = '\n%%\n' # This pattern separates the pre/post parsers
preparser, postparser = None, None # Code before and after the parser desc
# Read the entire file
s = open(inputfilename,'r').read()
# See if there's a separation between the pre-parser and parser
f = s.find(DIVIDER)
if f >= 0: preparser, s = s[:f]+'\n\n', s[f+len(DIVIDER):]
# See if there's a separation between the parser and post-parser
f = s.find(DIVIDER)
if f >= 0: s, postparser = s[:f], '\n\n'+s[f+len(DIVIDER):]
# Create the parser and scanner and parse the text
scanner = grammar.ParserDescriptionScanner(s, filename=inputfilename)
if preparser: scanner.del_line += preparser.count('\n')
parser = grammar.ParserDescription(scanner)
t = runtime.wrap_error_reporter(parser, 'Parser')
if t is None: return 1 # Failure
if preparser is not None: t.preparser = preparser
if postparser is not None: t.postparser = postparser
# Check the options
for f in t.options.keys():
for opt,_,_ in yapps_options:
if f == opt: break
else:
print >>sys.stderr, 'Warning: unrecognized option', f
# Add command line options to the set
for f in flags.keys(): t.options[f] = flags[f]
# Generate the output
if dump:
t.dump_information()
else:
t.output = open(outputfilename, 'w')
t.generate_output()
return 0
if __name__ == '__main__':
import doctest
doctest.testmod(sys.modules['__main__'])
doctest.testmod(parsetree)
# Someday I will use optparse, but Python 2.3 is too new at the moment.
yapps_options = [
('context-insensitive-scanner',
'context-insensitive-scanner',
'Scan all tokens (see docs)'),
]
import getopt
optlist, args = getopt.getopt(sys.argv[1:], 'f:', ['help', 'dump', 'use-devel-grammar'])
if not args or len(args) > 2:
print >>sys.stderr, 'Usage:'
print >>sys.stderr, ' python', sys.argv[0], '[flags] input.g [output.py]'
print >>sys.stderr, 'Flags:'
print >>sys.stderr, (' --dump' + ' '*40)[:35] + 'Dump out grammar information'
print >>sys.stderr, (' --use-devel-grammar' + ' '*40)[:35] + 'Use the devel grammar parser from yapps_grammar.py instead of the stable grammar from grammar.py'
for flag, _, doc in yapps_options:
print >>sys.stderr, (' -f' + flag + ' '*40)[:35] + doc
else:
# Read in the options and create a list of flags
flags = {}
use_devel_grammar = 0
for opt in optlist:
for flag, name, _ in yapps_options:
if opt == ('-f', flag):
flags[name] = 1
break
else:
if opt == ('--dump', ''):
flags['dump'] = 1
elif opt == ('--use-devel-grammar', ''):
use_devel_grammar = 1
else:
print >>sys.stderr, 'Warning: unrecognized option', opt[0], opt[1]
if use_devel_grammar:
import yapps_grammar as grammar
else:
from yapps import grammar
sys.exit(generate(*tuple(args), **flags))