Merge pull request #344 from cshorler/python_p21_lexer_and_parser_improvements
Python p21 lexer and parser improvements
This commit is contained in:
commit
7568033499
1 changed files with 166 additions and 90 deletions
|
|
@ -35,8 +35,10 @@
|
|||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import logging
|
||||
|
||||
import ply.lex as lex
|
||||
import ply.yacc as yacc
|
||||
from ply.lex import LexError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -64,16 +66,20 @@ base_tokens = ['INTEGER', 'REAL', 'USER_DEFINED_KEYWORD', 'STANDARD_KEYWORD', 'S
|
|||
####################################################################################################
|
||||
# Lexer
|
||||
####################################################################################################
|
||||
states = (('compatibility', 'inclusive'),)
|
||||
self.entity_mapping = {}
|
||||
class Lexer(object):
|
||||
tokens = list(base_tokens)
|
||||
states = (('slurp', 'exclusive'),)
|
||||
|
||||
def __init__(self, debug=0, optimize=0, compatibility_mode=False, header_limit=1024):
|
||||
def __init__(self, debug=0, optimize=0, compatibility_mode=False, header_limit=4096):
|
||||
self.base_tokens = list(base_tokens)
|
||||
self.schema_dict = {}
|
||||
self.active_schema = {}
|
||||
self.input_length = 0
|
||||
self.compatibility_mode = compatibility_mode
|
||||
self.header_limit = header_limit
|
||||
self.lexer = lex.lex(module=self, debug=debug, debuglog=logger, optimize=optimize,
|
||||
errorlog=logger)
|
||||
self.reset()
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name == 'lineno':
|
||||
|
|
@ -84,83 +90,118 @@ class Lexer(object):
|
|||
raise AttributeError
|
||||
|
||||
def input(self, s):
|
||||
startidx = s.find('ISO-10303-21;', 0, self.header_limit)
|
||||
if startidx == -1:
|
||||
raise ValueError('ISO-10303-21 header not found')
|
||||
self.lexer.input(s[startidx:])
|
||||
self.lexer.lineno += s[0:startidx].count('\n')
|
||||
self.lexer.input(s)
|
||||
self.input_length += len(s)
|
||||
|
||||
if self.compatibility_mode:
|
||||
self.lexer.begin('compatibility')
|
||||
else:
|
||||
self.lexer.begin('INITIAL')
|
||||
|
||||
def reset(self):
|
||||
self.lexer.lineno = 1
|
||||
self.lexer.begin('slurp')
|
||||
|
||||
def token(self):
|
||||
try:
|
||||
return next(self.lexer)
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def register_entities(self, entities):
|
||||
def activate_schema(self, schema_name):
|
||||
if schema_name in self.schema_dict:
|
||||
self.active_schema = self.schema_dict[schema_name]
|
||||
else:
|
||||
raise ValueError('schema not registered')
|
||||
|
||||
def register_schema(self, schema_name, entities):
|
||||
if schema_name in self.schema_dict:
|
||||
raise ValueError('schema already registered')
|
||||
|
||||
for k in entities:
|
||||
if k in self.base_tokens: raise ValueError('schema cannot override base_tokens')
|
||||
|
||||
if isinstance(entities, list):
|
||||
entities = dict((k, k) for k in entities)
|
||||
|
||||
self.entity_mapping.update(entities)
|
||||
self.schema_dict[schema_name] = entities
|
||||
|
||||
def t_slurp_PART21_START(self, t):
|
||||
r'ISO-10303-21;'
|
||||
t.lexer.begin('INITIAL')
|
||||
return t
|
||||
|
||||
def t_slurp_error(self, t):
|
||||
offset = t.value.find('\nISO-10303-21;', 0, self.header_limit)
|
||||
if offset == -1 and self.header_limit < len(t.value): # not found within header_limit
|
||||
raise LexError("Scanning error. try increasing lexer header_limit parameter",
|
||||
"{0}...".format(t.value[0:20]))
|
||||
elif offset == -1: # not found before EOF
|
||||
t.lexer.lexpos = self.input_length
|
||||
else: # found ISO-10303-21;
|
||||
offset += 1 # also skip the \n
|
||||
t.lexer.lineno += t.value[0:offset].count('\n')
|
||||
t.lexer.skip(offset)
|
||||
|
||||
# Comment (ignored)
|
||||
def t_ANY_COMMENT(self, t):
|
||||
def t_COMMENT(self, t):
|
||||
r'/\*(.|\n)*?\*/'
|
||||
t.lexer.lineno += t.value.count('\n')
|
||||
|
||||
def t_ANY_PART21_START(self, t):
|
||||
r'ISO-10303-21;'
|
||||
return t
|
||||
|
||||
def t_ANY_PART21_END(self, t):
|
||||
def t_PART21_END(self, t):
|
||||
r'END-ISO-10303-21;'
|
||||
t.lexer.begin('slurp')
|
||||
return t
|
||||
|
||||
def t_ANY_HEADER_SEC(self, t):
|
||||
def t_HEADER_SEC(self, t):
|
||||
r'HEADER;'
|
||||
return t
|
||||
|
||||
def t_ANY_ENDSEC(self, t):
|
||||
def t_ENDSEC(self, t):
|
||||
r'ENDSEC;'
|
||||
return t
|
||||
|
||||
# Keywords
|
||||
def t_compatibility_STANDARD_KEYWORD(self, t):
|
||||
def t_STANDARD_KEYWORD(self, t):
|
||||
r'(?:!|)[A-Za-z_][0-9A-Za-z_]*'
|
||||
t.value = t.value.upper()
|
||||
if t.value == 'DATA':
|
||||
t.type = 'DATA_SEC'
|
||||
elif t.value in self.entity_mapping:
|
||||
t.type = self.entity_mapping[t.value]
|
||||
if self.compatibility_mode:
|
||||
t.value = t.value.upper()
|
||||
elif not t.value.isupper():
|
||||
raise LexError('Scanning error. Mixed/lower case keyword detected, please use compatibility_mode=True', t.value)
|
||||
|
||||
if t.value in self.base_tokens:
|
||||
t.type = t.value
|
||||
elif t.value in self.active_schema:
|
||||
t.type = self.active_schema[t.value]
|
||||
elif t.value.startswith('!'):
|
||||
t.type = 'USER_DEFINED_KEYWORD'
|
||||
return t
|
||||
|
||||
def t_ANY_STANDARD_KEYWORD(self, t):
|
||||
r'(?:!|)[A-Z_][0-9A-Z_]*'
|
||||
if t.value == 'DATA':
|
||||
t.type = 'DATA_SEC'
|
||||
elif t.value in self.entity_mapping:
|
||||
t.type = self.entity_mapping[t.value]
|
||||
elif t.value.startswith('!'):
|
||||
t.type = 'USER_DEFINED_KEYWORD'
|
||||
return t
|
||||
|
||||
def t_ANY_newline(self, t):
|
||||
def t_newline(self, t):
|
||||
r'\n+'
|
||||
t.lexer.lineno += len(t.value)
|
||||
|
||||
|
||||
# Simple Data Types
|
||||
t_ANY_REAL = r'[+-]*[0-9][0-9]*\.[0-9]*(?:E[+-]*[0-9][0-9]*)?'
|
||||
t_ANY_INTEGER = r'[+-]*[0-9][0-9]*'
|
||||
t_ANY_STRING = r"'(?:[][!\"*$%&.#+,\-()?/:;<=>@{}|^`~0-9a-zA-Z_\\ ]|'')*'"
|
||||
t_ANY_BINARY = r'"[0-3][0-9A-F]*"'
|
||||
t_ANY_ENTITY_INSTANCE_NAME = r'\#[0-9]+'
|
||||
t_ANY_ENUMERATION = r'\.[A-Z_][A-Z0-9_]*\.'
|
||||
def t_REAL(self, t):
|
||||
r'[+-]*[0-9][0-9]*\.[0-9]*(?:E[+-]*[0-9][0-9]*)?'
|
||||
t.value = float(t.value)
|
||||
return t
|
||||
|
||||
def t_INTEGER(self, t):
|
||||
r'[+-]*[0-9][0-9]*'
|
||||
t.value = int(t.value)
|
||||
return t
|
||||
|
||||
def t_STRING(self, t):
|
||||
r"'(?:[][!\"*$%&.#+,\-()?/:;<=>@{}|^`~0-9a-zA-Z_\\ ]|'')*'"
|
||||
t.value = t.value[1:-1]
|
||||
return t
|
||||
|
||||
def t_BINARY(self, t):
|
||||
r'"[0-3][0-9A-F]*"'
|
||||
try:
|
||||
t.value = int(t.value[2:-1], base=16)
|
||||
except ValueError:
|
||||
t.value = None
|
||||
return t
|
||||
|
||||
t_ENTITY_INSTANCE_NAME = r'\#[0-9]+'
|
||||
t_ENUMERATION = r'\.[A-Z_][A-Z0-9_]*\.'
|
||||
|
||||
# Punctuation
|
||||
literals = '()=;,*$'
|
||||
|
|
@ -186,7 +227,7 @@ class P21Header:
|
|||
class HeaderEntity:
|
||||
def __init__(self, type_name, *params):
|
||||
self.type_name = type_name
|
||||
self.params = list(*params) if params else []
|
||||
self.params = list(params) if params else []
|
||||
|
||||
class Section:
|
||||
def __init__(self, entities):
|
||||
|
|
@ -196,17 +237,17 @@ class SimpleEntity:
|
|||
def __init__(self, ref, type_name, *params):
|
||||
self.ref = ref
|
||||
self.type_name = type_name
|
||||
self.params = list(*params) if params else []
|
||||
self.params = list(params) if params else []
|
||||
|
||||
class ComplexEntity:
|
||||
def __init__(self, ref, *params):
|
||||
self.ref = ref
|
||||
self.params = list(*params) if params else []
|
||||
self.params = list(params) if params else []
|
||||
|
||||
class TypedParameter:
|
||||
def __init__(self, type_name, *params):
|
||||
self.type_name = type_name
|
||||
self.params = list(*params) if params else None
|
||||
self.params = list(params) if params else None
|
||||
|
||||
####################################################################################################
|
||||
# Parser
|
||||
|
|
@ -222,11 +263,12 @@ class Parser(object):
|
|||
except AttributeError: pass
|
||||
|
||||
self.parser = yacc.yacc(module=self, debug=debug, debuglog=logger, errorlog=logger)
|
||||
self.reset()
|
||||
|
||||
def parse(self, p21_data, **kwargs):
|
||||
#TODO: will probably need to change this function if the lexer is ever to support t_eof
|
||||
self.lexer.reset()
|
||||
self.lexer.input(p21_data)
|
||||
self.refs = {}
|
||||
self.in_p21_exchange_structure = False
|
||||
|
||||
if 'debug' in kwargs:
|
||||
result = self.parser.parse(lexer=self.lexer, debug=logger,
|
||||
|
|
@ -235,24 +277,24 @@ class Parser(object):
|
|||
result = self.parser.parse(lexer=self.lexer, **kwargs)
|
||||
return result
|
||||
|
||||
def reset(self):
|
||||
self.refs = {}
|
||||
self.is_in_exchange_structure = False
|
||||
|
||||
def p_exchange_file(self, p):
|
||||
"""exchange_file : p21_start header_section data_section_list p21_end"""
|
||||
"""exchange_file : check_p21_start_token header_section data_section_list check_p21_end_token"""
|
||||
p[0] = P21File(p[2], p[3])
|
||||
|
||||
def p_p21_start(self, p):
|
||||
"""p21_start : PART21_START"""
|
||||
if self.in_p21_exchange_structure:
|
||||
raise SyntaxError
|
||||
self.in_p21_exchange_structure = True
|
||||
def p_check_start_token(self, p):
|
||||
"""check_p21_start_token : PART21_START"""
|
||||
self.is_in_exchange_structure = True
|
||||
p[0] = p[1]
|
||||
|
||||
def p_p21_end(self, p):
|
||||
"""p21_end : PART21_END"""
|
||||
if not self.in_p21_exchange_structure:
|
||||
raise SyntaxError
|
||||
self.in_p21_exchange_structure = False
|
||||
|
||||
def p_check_end_token(self, p):
|
||||
"""check_p21_end_token : PART21_END"""
|
||||
self.is_in_exchange_structure = False
|
||||
p[0] = p[1]
|
||||
|
||||
|
||||
# TODO: Specialise the first 3 header entities
|
||||
def p_header_section(self, p):
|
||||
"""header_section : HEADER_SEC header_entity header_entity header_entity ENDSEC"""
|
||||
|
|
@ -270,8 +312,8 @@ class Parser(object):
|
|||
def p_check_entity_instance_name(self, p):
|
||||
"""check_entity_instance_name : ENTITY_INSTANCE_NAME"""
|
||||
if p[1] in self.refs:
|
||||
logger.error('Line %i, duplicate entity instance name: %s', p.lineno(1), p[1])
|
||||
raise ValueError('Duplicate entity instance name')
|
||||
logger.error('Line: {0}, SyntaxError - Duplicate Entity Instance Name: {1}'.format(p.lineno(1), p[1]))
|
||||
raise SyntaxError
|
||||
else:
|
||||
self.refs[p[1]] = None
|
||||
p[0] = p[1]
|
||||
|
|
@ -280,6 +322,11 @@ class Parser(object):
|
|||
"""simple_entity_instance : check_entity_instance_name '=' simple_record ';'"""
|
||||
p[0] = SimpleEntity(p[1], *p[3])
|
||||
|
||||
def p_entity_instance_error(self, p):
|
||||
"""simple_entity_instance : error '=' simple_record ';'
|
||||
complex_entity_instance : error '=' subsuper_record ';'"""
|
||||
pass
|
||||
|
||||
def p_complex_entity_instance(self, p):
|
||||
"""complex_entity_instance : check_entity_instance_name '=' subsuper_record ';'"""
|
||||
p[0] = ComplexEntity(p[1], p[3])
|
||||
|
|
@ -337,12 +384,12 @@ class Parser(object):
|
|||
p[0] = []
|
||||
|
||||
def p_data_start(self, p):
|
||||
"""data_start : DATA_SEC '(' parameter_list ')' ';'"""
|
||||
"""data_start : DATA '(' parameter_list ')' ';'"""
|
||||
pass
|
||||
|
||||
def p_data_start_empty(self, p):
|
||||
"""data_start : DATA_SEC '(' ')' ';'
|
||||
| DATA_SEC ';'"""
|
||||
"""data_start : DATA '(' ')' ';'
|
||||
| DATA ';'"""
|
||||
pass
|
||||
|
||||
def p_data_section(self, p):
|
||||
|
|
@ -351,10 +398,13 @@ class Parser(object):
|
|||
|
||||
def p_entity_instance_list(self, p):
|
||||
"""entity_instance_list : entity_instance_list entity_instance
|
||||
| empty"""
|
||||
| entity_instance"""
|
||||
try: p[0] = p[1] + [p[2],]
|
||||
except IndexError: pass # p[2] doesn't exist, p[1] is None
|
||||
except TypeError: p[0] = [p[2],] # p[1] is None, p[2] is valid
|
||||
except IndexError: p[0] = [p[1],]
|
||||
|
||||
def p_entity_instance_list_empty(self, p):
|
||||
"""entity_instance_list : empty"""
|
||||
p[0] = []
|
||||
|
||||
def p_entity_instance(self, p):
|
||||
"""entity_instance : simple_entity_instance
|
||||
|
|
@ -381,34 +431,60 @@ class Parser(object):
|
|||
pass
|
||||
|
||||
def test_debug():
|
||||
import os.path
|
||||
|
||||
logging.basicConfig()
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
s = open('io1-tu-203.stp', 'r').read()
|
||||
parser = Parser()
|
||||
|
||||
try:
|
||||
r = parser.parse(s, debug=1)
|
||||
except SystemExit:
|
||||
pass
|
||||
|
||||
return (parser, r)
|
||||
parser.reset()
|
||||
|
||||
logger.info("***** parser debug *****")
|
||||
p = os.path.expanduser('~/projects/src/stepcode/data/ap214e3/s1-c5-214/s1-c5-214.stp')
|
||||
with open(p, 'rU') as f:
|
||||
s = f.read()
|
||||
try:
|
||||
parser.parse(s, debug=1)
|
||||
except SystemExit:
|
||||
pass
|
||||
|
||||
logger.info("***** finished *****")
|
||||
|
||||
def test():
|
||||
import os, os.path, itertools, codecs
|
||||
|
||||
logging.basicConfig()
|
||||
logger.setLevel(logging.ERROR)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
s = open('io1-tu-203.stp', 'r').read()
|
||||
parser = Parser()
|
||||
compat_list = []
|
||||
|
||||
try:
|
||||
r = parser.parse(s)
|
||||
except SystemExit:
|
||||
pass
|
||||
def parse_check(p):
|
||||
logger.info("processing {0}".format(p))
|
||||
parser.reset()
|
||||
with open(p, 'rU') as f:
|
||||
iso_wrapper = codecs.EncodedFile(f, 'iso-8859-1')
|
||||
s = iso_wrapper.read()
|
||||
parser.parse(s)
|
||||
|
||||
return (parser, r)
|
||||
logger.info("***** standard test *****")
|
||||
for d, _, files in os.walk(os.path.expanduser('~/projects/src/stepcode')):
|
||||
for f in itertools.ifilter(lambda x: x.endswith('.stp'), files):
|
||||
p = os.path.join(d, f)
|
||||
try:
|
||||
parse_check(p)
|
||||
except LexError:
|
||||
logger.exception('Lexer issue, adding {0} to compatibility test list'.format(os.path.basename(p)))
|
||||
compat_list.append(p)
|
||||
|
||||
lexer = Lexer(compatibility_mode=True)
|
||||
parser = Parser(lexer=lexer)
|
||||
|
||||
logger.info("***** compatibility test *****")
|
||||
for p in compat_list:
|
||||
parse_check(p)
|
||||
|
||||
logger.info("***** finished *****")
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue