Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2015  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24   
  25  __doc__ = \ 
  26  """ 
  27  pyparsing module - Classes and methods to define and execute parsing grammars 
  28   
  29  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  30  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  31  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  32  provides a library of classes that you use to construct the grammar directly in Python. 
  33   
  34  Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 
  35   
  36      from pyparsing import Word, alphas 
  37   
  38      # define grammar of a greeting 
  39      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  40   
  41      hello = "Hello, World!" 
  42      print (hello, "->", greet.parseString( hello )) 
  43   
  44  The program outputs the following:: 
  45   
  46      Hello, World! -> ['Hello', ',', 'World', '!'] 
  47   
  48  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  49  class names, and the use of '+', '|' and '^' operators. 
  50   
  51  The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an 
  52  object with named attributes. 
  53   
  54  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  55   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  56   - quoted strings 
  57   - embedded comments 
  58  """ 
  59   
  60  __version__ = "2.0.7" 
  61  __versionTime__ = "17 Dec 2015 04:11" 
  62  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  63   
  64  import string 
  65  from weakref import ref as wkref 
  66  import copy 
  67  import sys 
  68  import warnings 
  69  import re 
  70  import sre_constants 
  71  import collections 
  72  import pprint 
  73  import functools 
  74  import itertools 
  75   
  76  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  77   
  78  __all__ = [ 
  79  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  80  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  81  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  82  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  83  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  84  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 
  85  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  86  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  87  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  88  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 
  89  'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 
  90  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  91  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  92  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
  93  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  94  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  95  'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', 
  96  ] 
  97   
  98  PY_3 = sys.version.startswith('3') 
  99  if PY_3: 
 100      _MAX_INT = sys.maxsize 
 101      basestring = str 
 102      unichr = chr 
 103      _ustr = str 
 104   
 105      # build list of single arg builtins, that can be used as parse actions 
 106      singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 
 107   
 108  else: 
 109      _MAX_INT = sys.maxint 
 110      range = xrange 
111 112 - def _ustr(obj):
113 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 114 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 115 then < returns the unicode object | encodes it with the default encoding | ... >. 116 """ 117 if isinstance(obj,unicode): 118 return obj 119 120 try: 121 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 122 # it won't break any existing code. 123 return str(obj) 124 125 except UnicodeEncodeError: 126 # Else encode it 127 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 128 xmlcharref = Regex('&#\d+;') 129 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 130 return xmlcharref.transformString(ret)
131 132 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 133 singleArgBuiltins = [] 134 import __builtin__ 135 for fname in "sum len sorted reversed list tuple set any all min max".split(): 136 try: 137 singleArgBuiltins.append(getattr(__builtin__,fname)) 138 except AttributeError: 139 continue 140 141 _generatorType = type((y for y in range(1)))
142 143 -def _xml_escape(data):
144 """Escape &, <, >, ", ', etc. in a string of data.""" 145 146 # ampersand must be replaced first 147 from_symbols = '&><"\'' 148 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 149 for from_,to_ in zip(from_symbols, to_symbols): 150 data = data.replace(from_, to_) 151 return data
152
153 -class _Constants(object):
154 pass
155 156 alphas = string.ascii_lowercase + string.ascii_uppercase 157 nums = "0123456789" 158 hexnums = nums + "ABCDEFabcdef" 159 alphanums = alphas + nums 160 _bslash = chr(92) 161 printables = "".join(c for c in string.printable if c not in string.whitespace)
162 163 -class ParseBaseException(Exception):
164 """base exception class for all parsing runtime exceptions""" 165 # Performance tuning: we construct a *lot* of these, so keep this 166 # constructor as small and fast as possible
167 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
168 self.loc = loc 169 if msg is None: 170 self.msg = pstr 171 self.pstr = "" 172 else: 173 self.msg = msg 174 self.pstr = pstr 175 self.parserElement = elem
176
177 - def __getattr__( self, aname ):
178 """supported attributes by name are: 179 - lineno - returns the line number of the exception text 180 - col - returns the column number of the exception text 181 - line - returns the line containing the exception text 182 """ 183 if( aname == "lineno" ): 184 return lineno( self.loc, self.pstr ) 185 elif( aname in ("col", "column") ): 186 return col( self.loc, self.pstr ) 187 elif( aname == "line" ): 188 return line( self.loc, self.pstr ) 189 else: 190 raise AttributeError(aname)
191
192 - def __str__( self ):
193 return "%s (at char %d), (line:%d, col:%d)" % \ 194 ( self.msg, self.loc, self.lineno, self.column )
195 - def __repr__( self ):
196 return _ustr(self)
197 - def markInputline( self, markerString = ">!<" ):
198 """Extracts the exception line from the input string, and marks 199 the location of the exception with a special symbol. 200 """ 201 line_str = self.line 202 line_column = self.column - 1 203 if markerString: 204 line_str = "".join((line_str[:line_column], 205 markerString, line_str[line_column:])) 206 return line_str.strip()
207 - def __dir__(self):
208 return "lineno col line".split() + dir(type(self))
209
210 -class ParseException(ParseBaseException):
211 """exception thrown when parse expressions don't match class; 212 supported attributes by name are: 213 - lineno - returns the line number of the exception text 214 - col - returns the column number of the exception text 215 - line - returns the line containing the exception text 216 """ 217 pass
218
219 -class ParseFatalException(ParseBaseException):
220 """user-throwable exception thrown when inconsistent parse content 221 is found; stops all parsing immediately""" 222 pass
223
224 -class ParseSyntaxException(ParseFatalException):
225 """just like C{L{ParseFatalException}}, but thrown internally when an 226 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because 227 an unbacktrackable syntax error has been found"""
228 - def __init__(self, pe):
229 super(ParseSyntaxException, self).__init__( 230 pe.pstr, pe.loc, pe.msg, pe.parserElement)
231
232 #~ class ReparseException(ParseBaseException): 233 #~ """Experimental class - parse actions can raise this exception to cause 234 #~ pyparsing to reparse the input string: 235 #~ - with a modified input string, and/or 236 #~ - with a modified start location 237 #~ Set the values of the ReparseException in the constructor, and raise the 238 #~ exception in a parse action to cause pyparsing to use the new string/location. 239 #~ Setting the values as None causes no change to be made. 240 #~ """ 241 #~ def __init_( self, newstring, restartLoc ): 242 #~ self.newParseText = newstring 243 #~ self.reparseLoc = restartLoc 244 245 -class RecursiveGrammarException(Exception):
246 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
247 - def __init__( self, parseElementList ):
248 self.parseElementTrace = parseElementList
249
250 - def __str__( self ):
251 return "RecursiveGrammarException: %s" % self.parseElementTrace
252
253 -class _ParseResultsWithOffset(object):
254 - def __init__(self,p1,p2):
255 self.tup = (p1,p2)
256 - def __getitem__(self,i):
257 return self.tup[i]
258 - def __repr__(self):
259 return repr(self.tup)
260 - def setOffset(self,i):
261 self.tup = (self.tup[0],i)
262
263 -class ParseResults(object):
264 """Structured parse results, to provide multiple means of access to the parsed data: 265 - as a list (C{len(results)}) 266 - by list index (C{results[0], results[1]}, etc.) 267 - by attribute (C{results.<resultsName>}) 268 """
269 - def __new__(cls, toklist, name=None, asList=True, modal=True ):
270 if isinstance(toklist, cls): 271 return toklist 272 retobj = object.__new__(cls) 273 retobj.__doinit = True 274 return retobj
275 276 # Performance tuning: we construct a *lot* of these, so keep this 277 # constructor as small and fast as possible
278 - def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ):
279 if self.__doinit: 280 self.__doinit = False 281 self.__name = None 282 self.__parent = None 283 self.__accumNames = {} 284 if isinstance(toklist, list): 285 self.__toklist = toklist[:] 286 elif isinstance(toklist, _generatorType): 287 self.__toklist = list(toklist) 288 else: 289 self.__toklist = [toklist] 290 self.__tokdict = dict() 291 292 if name is not None and name: 293 if not modal: 294 self.__accumNames[name] = 0 295 if isinstance(name,int): 296 name = _ustr(name) # will always return a str, but use _ustr for consistency 297 self.__name = name 298 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 299 if isinstance(toklist,basestring): 300 toklist = [ toklist ] 301 if asList: 302 if isinstance(toklist,ParseResults): 303 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 304 else: 305 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 306 self[name].__name = name 307 else: 308 try: 309 self[name] = toklist[0] 310 except (KeyError,TypeError,IndexError): 311 self[name] = toklist
312
313 - def __getitem__( self, i ):
314 if isinstance( i, (int,slice) ): 315 return self.__toklist[i] 316 else: 317 if i not in self.__accumNames: 318 return self.__tokdict[i][-1][0] 319 else: 320 return ParseResults([ v[0] for v in self.__tokdict[i] ])
321
322 - def __setitem__( self, k, v, isinstance=isinstance ):
323 if isinstance(v,_ParseResultsWithOffset): 324 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 325 sub = v[0] 326 elif isinstance(k,int): 327 self.__toklist[k] = v 328 sub = v 329 else: 330 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 331 sub = v 332 if isinstance(sub,ParseResults): 333 sub.__parent = wkref(self)
334
335 - def __delitem__( self, i ):
336 if isinstance(i,(int,slice)): 337 mylen = len( self.__toklist ) 338 del self.__toklist[i] 339 340 # convert int to slice 341 if isinstance(i, int): 342 if i < 0: 343 i += mylen 344 i = slice(i, i+1) 345 # get removed indices 346 removed = list(range(*i.indices(mylen))) 347 removed.reverse() 348 # fixup indices in token dictionary 349 #~ for name in self.__tokdict: 350 #~ occurrences = self.__tokdict[name] 351 #~ for j in removed: 352 #~ for k, (value, position) in enumerate(occurrences): 353 #~ occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 354 for name,occurrences in self.__tokdict.items(): 355 for j in removed: 356 for k, (value, position) in enumerate(occurrences): 357 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 358 else: 359 del self.__tokdict[i]
360
361 - def __contains__( self, k ):
362 return k in self.__tokdict
363
364 - def __len__( self ): return len( self.__toklist )
365 - def __bool__(self): return len( self.__toklist ) > 0
366 __nonzero__ = __bool__
367 - def __iter__( self ): return iter( self.__toklist )
368 - def __reversed__( self ): return iter( self.__toklist[::-1] )
369 - def iterkeys( self ):
370 """Returns all named result keys.""" 371 if hasattr(self.__tokdict, "iterkeys"): 372 return self.__tokdict.iterkeys() 373 else: 374 return iter(self.__tokdict)
375
376 - def itervalues( self ):
377 """Returns all named result values.""" 378 return (self[k] for k in self.iterkeys())
379
380 - def iteritems( self ):
381 return ((k, self[k]) for k in self.iterkeys())
382 383 if PY_3: 384 keys = iterkeys 385 values = itervalues 386 items = iteritems 387 else:
388 - def keys( self ):
389 """Returns all named result keys.""" 390 return list(self.iterkeys())
391
392 - def values( self ):
393 """Returns all named result values.""" 394 return list(self.itervalues())
395
396 - def items( self ):
397 """Returns all named result keys and values as a list of tuples.""" 398 return list(self.iteritems())
399
400 - def haskeys( self ):
401 """Since keys() returns an iterator, this method is helpful in bypassing 402 code that looks for the existence of any defined results names.""" 403 return bool(self.__tokdict)
404
405 - def pop( self, *args, **kwargs):
406 """Removes and returns item at specified index (default=last). 407 Supports both list and dict semantics for pop(). If passed no 408 argument or an integer argument, it will use list semantics 409 and pop tokens from the list of parsed tokens. If passed a 410 non-integer argument (most likely a string), it will use dict 411 semantics and pop the corresponding value from any defined 412 results names. A second default return value argument is 413 supported, just as in dict.pop().""" 414 if not args: 415 args = [-1] 416 for k,v in kwargs.items(): 417 if k == 'default': 418 args = (args[0], v) 419 else: 420 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 421 if (isinstance(args[0], int) or 422 len(args) == 1 or 423 args[0] in self): 424 index = args[0] 425 ret = self[index] 426 del self[index] 427 return ret 428 else: 429 defaultvalue = args[1] 430 return defaultvalue
431
432 - def get(self, key, defaultValue=None):
433 """Returns named result matching the given key, or if there is no 434 such name, then returns the given C{defaultValue} or C{None} if no 435 C{defaultValue} is specified.""" 436 if key in self: 437 return self[key] 438 else: 439 return defaultValue
440
441 - def insert( self, index, insStr ):
442 """Inserts new element at location index in the list of parsed tokens.""" 443 self.__toklist.insert(index, insStr) 444 # fixup indices in token dictionary 445 #~ for name in self.__tokdict: 446 #~ occurrences = self.__tokdict[name] 447 #~ for k, (value, position) in enumerate(occurrences): 448 #~ occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 449 for name,occurrences in self.__tokdict.items(): 450 for k, (value, position) in enumerate(occurrences): 451 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
452
453 - def append( self, item ):
454 """Add single element to end of ParseResults list of elements.""" 455 self.__toklist.append(item)
456
457 - def extend( self, itemseq ):
458 """Add sequence of elements to end of ParseResults list of elements.""" 459 if isinstance(itemseq, ParseResults): 460 self += itemseq 461 else: 462 self.__toklist.extend(itemseq)
463
464 - def clear( self ):
465 """Clear all elements and results names.""" 466 del self.__toklist[:] 467 self.__tokdict.clear()
468
469 - def __getattr__( self, name ):
470 try: 471 return self[name] 472 except KeyError: 473 return "" 474 475 if name in self.__tokdict: 476 if name not in self.__accumNames: 477 return self.__tokdict[name][-1][0] 478 else: 479 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 480 else: 481 return ""
482
483 - def __add__( self, other ):
484 ret = self.copy() 485 ret += other 486 return ret
487
488 - def __iadd__( self, other ):
489 if other.__tokdict: 490 offset = len(self.__toklist) 491 addoffset = lambda a: offset if a<0 else a+offset 492 otheritems = other.__tokdict.items() 493 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 494 for (k,vlist) in otheritems for v in vlist] 495 for k,v in otherdictitems: 496 self[k] = v 497 if isinstance(v[0],ParseResults): 498 v[0].__parent = wkref(self) 499 500 self.__toklist += other.__toklist 501 self.__accumNames.update( other.__accumNames ) 502 return self
503
504 - def __radd__(self, other):
505 if isinstance(other,int) and other == 0: 506 return self.copy()
507
508 - def __repr__( self ):
509 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
510
511 - def __str__( self ):
512 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
513
514 - def _asStringList( self, sep='' ):
515 out = [] 516 for item in self.__toklist: 517 if out and sep: 518 out.append(sep) 519 if isinstance( item, ParseResults ): 520 out += item._asStringList() 521 else: 522 out.append( _ustr(item) ) 523 return out
524
525 - def asList( self ):
526 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 527 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
528
529 - def asDict( self ):
530 """Returns the named parse results as dictionary.""" 531 if PY_3: 532 return dict( self.items() ) 533 else: 534 return dict( self.iteritems() )
535
536 - def copy( self ):
537 """Returns a new copy of a C{ParseResults} object.""" 538 ret = ParseResults( self.__toklist ) 539 ret.__tokdict = self.__tokdict.copy() 540 ret.__parent = self.__parent 541 ret.__accumNames.update( self.__accumNames ) 542 ret.__name = self.__name 543 return ret
544
545 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
546 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 547 nl = "\n" 548 out = [] 549 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 550 for v in vlist) 551 nextLevelIndent = indent + " " 552 553 # collapse out indents if formatting is not desired 554 if not formatted: 555 indent = "" 556 nextLevelIndent = "" 557 nl = "" 558 559 selfTag = None 560 if doctag is not None: 561 selfTag = doctag 562 else: 563 if self.__name: 564 selfTag = self.__name 565 566 if not selfTag: 567 if namedItemsOnly: 568 return "" 569 else: 570 selfTag = "ITEM" 571 572 out += [ nl, indent, "<", selfTag, ">" ] 573 574 for i,res in enumerate(self.__toklist): 575 if isinstance(res,ParseResults): 576 if i in namedItems: 577 out += [ res.asXML(namedItems[i], 578 namedItemsOnly and doctag is None, 579 nextLevelIndent, 580 formatted)] 581 else: 582 out += [ res.asXML(None, 583 namedItemsOnly and doctag is None, 584 nextLevelIndent, 585 formatted)] 586 else: 587 # individual token, see if there is a name for it 588 resTag = None 589 if i in namedItems: 590 resTag = namedItems[i] 591 if not resTag: 592 if namedItemsOnly: 593 continue 594 else: 595 resTag = "ITEM" 596 xmlBodyText = _xml_escape(_ustr(res)) 597 out += [ nl, nextLevelIndent, "<", resTag, ">", 598 xmlBodyText, 599 "</", resTag, ">" ] 600 601 out += [ nl, indent, "</", selfTag, ">" ] 602 return "".join(out)
603
604 - def __lookup(self,sub):
605 for k,vlist in self.__tokdict.items(): 606 for v,loc in vlist: 607 if sub is v: 608 return k 609 return None
610
611 - def getName(self):
612 """Returns the results name for this token expression.""" 613 if self.__name: 614 return self.__name 615 elif self.__parent: 616 par = self.__parent() 617 if par: 618 return par.__lookup(self) 619 else: 620 return None 621 elif (len(self) == 1 and 622 len(self.__tokdict) == 1 and 623 self.__tokdict.values()[0][0][1] in (0,-1)): 624 return self.__tokdict.keys()[0] 625 else: 626 return None
627
628 - def dump(self,indent='',depth=0):
629 """Diagnostic method for listing out the contents of a C{ParseResults}. 630 Accepts an optional C{indent} argument so that this string can be embedded 631 in a nested display of other data.""" 632 out = [] 633 NL = '\n' 634 out.append( indent+_ustr(self.asList()) ) 635 if self.haskeys(): 636 items = sorted(self.items()) 637 for k,v in items: 638 if out: 639 out.append(NL) 640 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 641 if isinstance(v,ParseResults): 642 if v: 643 out.append( v.dump(indent,depth+1) ) 644 else: 645 out.append(_ustr(v)) 646 else: 647 out.append(_ustr(v)) 648 elif any(isinstance(vv,ParseResults) for vv in self): 649 v = self 650 for i,vv in enumerate(v): 651 if isinstance(vv,ParseResults): 652 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) 653 else: 654 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) 655 656 return "".join(out)
657
658 - def pprint(self, *args, **kwargs):
659 """Pretty-printer for parsed results as a list, using the C{pprint} module. 660 Accepts additional positional or keyword args as defined for the 661 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})""" 662 pprint.pprint(self.asList(), *args, **kwargs)
663 664 # add support for pickle protocol
665 - def __getstate__(self):
666 return ( self.__toklist, 667 ( self.__tokdict.copy(), 668 self.__parent is not None and self.__parent() or None, 669 self.__accumNames, 670 self.__name ) )
671
672 - def __setstate__(self,state):
673 self.__toklist = state[0] 674 (self.__tokdict, 675 par, 676 inAccumNames, 677 self.__name) = state[1] 678 self.__accumNames = {} 679 self.__accumNames.update(inAccumNames) 680 if par is not None: 681 self.__parent = wkref(par) 682 else: 683 self.__parent = None
684
685 - def __dir__(self):
686 return (dir(type(self)) + list(self.keys()))
687 688 collections.MutableMapping.register(ParseResults)
689 690 -def col (loc,strg):
691 """Returns current column within a string, counting newlines as line separators. 692 The first column is number 1. 693 694 Note: the default parsing behavior is to expand tabs in the input string 695 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 696 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 697 consistent view of the parsed string, the parse location, and line and column 698 positions within the parsed string. 699 """ 700 s = strg 701 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
702
703 -def lineno(loc,strg):
704 """Returns current line number within a string, counting newlines as line separators. 705 The first line is number 1. 706 707 Note: the default parsing behavior is to expand tabs in the input string 708 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 709 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 710 consistent view of the parsed string, the parse location, and line and column 711 positions within the parsed string. 712 """ 713 return strg.count("\n",0,loc) + 1
714
715 -def line( loc, strg ):
716 """Returns the line of text containing loc within a string, counting newlines as line separators. 717 """ 718 lastCR = strg.rfind("\n", 0, loc) 719 nextCR = strg.find("\n", loc) 720 if nextCR >= 0: 721 return strg[lastCR+1:nextCR] 722 else: 723 return strg[lastCR+1:]
724
725 -def _defaultStartDebugAction( instring, loc, expr ):
726 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
727
728 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
729 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
730
731 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
732 print ("Exception raised:" + _ustr(exc))
733
734 -def nullDebugAction(*args):
735 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 736 pass
737 738 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs 739 #~ 'decorator to trim function calls to match the arity of the target' 740 #~ def _trim_arity(func, maxargs=3): 741 #~ if func in singleArgBuiltins: 742 #~ return lambda s,l,t: func(t) 743 #~ limit = 0 744 #~ foundArity = False 745 #~ def wrapper(*args): 746 #~ nonlocal limit,foundArity 747 #~ while 1: 748 #~ try: 749 #~ ret = func(*args[limit:]) 750 #~ foundArity = True 751 #~ return ret 752 #~ except TypeError: 753 #~ if limit == maxargs or foundArity: 754 #~ raise 755 #~ limit += 1 756 #~ continue 757 #~ return wrapper 758 759 # this version is Python 2.x-3.x cross-compatible 760 'decorator to trim function calls to match the arity of the target'
761 -def _trim_arity(func, maxargs=2):
762 if func in singleArgBuiltins: 763 return lambda s,l,t: func(t) 764 limit = [0] 765 foundArity = [False] 766 def wrapper(*args): 767 while 1: 768 try: 769 ret = func(*args[limit[0]:]) 770 foundArity[0] = True 771 return ret 772 except TypeError: 773 if limit[0] <= maxargs and not foundArity[0]: 774 limit[0] += 1 775 continue 776 raise
777 return wrapper 778
779 -class ParserElement(object):
780 """Abstract base level parser element class.""" 781 DEFAULT_WHITE_CHARS = " \n\t\r" 782 verbose_stacktrace = False 783 784 @staticmethod
785 - def setDefaultWhitespaceChars( chars ):
786 """Overrides the default whitespace chars 787 """ 788 ParserElement.DEFAULT_WHITE_CHARS = chars
789 790 @staticmethod
791 - def inlineLiteralsUsing(cls):
792 """ 793 Set class to be used for inclusion of string literals into a parser. 794 """ 795 ParserElement.literalStringClass = cls
796
797 - def __init__( self, savelist=False ):
798 self.parseAction = list() 799 self.failAction = None 800 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 801 self.strRepr = None 802 self.resultsName = None 803 self.saveAsList = savelist 804 self.skipWhitespace = True 805 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 806 self.copyDefaultWhiteChars = True 807 self.mayReturnEmpty = False # used when checking for left-recursion 808 self.keepTabs = False 809 self.ignoreExprs = list() 810 self.debug = False 811 self.streamlined = False 812 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 813 self.errmsg = "" 814 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 815 self.debugActions = ( None, None, None ) #custom debug actions 816 self.re = None 817 self.callPreparse = True # used to avoid redundant calls to preParse 818 self.callDuringTry = False
819
820 - def copy( self ):
821 """Make a copy of this C{ParserElement}. Useful for defining different parse actions 822 for the same parsing pattern, using copies of the original parse element.""" 823 cpy = copy.copy( self ) 824 cpy.parseAction = self.parseAction[:] 825 cpy.ignoreExprs = self.ignoreExprs[:] 826 if self.copyDefaultWhiteChars: 827 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 828 return cpy
829
830 - def setName( self, name ):
831 """Define name for this expression, for use in debugging.""" 832 self.name = name 833 self.errmsg = "Expected " + self.name 834 if hasattr(self,"exception"): 835 self.exception.msg = self.errmsg 836 return self
837
838 - def setResultsName( self, name, listAllMatches=False ):
839 """Define name for referencing matching tokens as a nested attribute 840 of the returned parse results. 841 NOTE: this returns a *copy* of the original C{ParserElement} object; 842 this is so that the client can define a basic element, such as an 843 integer, and reference it in multiple places with different names. 844 845 You can also set results names using the abbreviated syntax, 846 C{expr("name")} in place of C{expr.setResultsName("name")} - 847 see L{I{__call__}<__call__>}. 848 """ 849 newself = self.copy() 850 if name.endswith("*"): 851 name = name[:-1] 852 listAllMatches=True 853 newself.resultsName = name 854 newself.modalResults = not listAllMatches 855 return newself
856
857 - def setBreak(self,breakFlag = True):
858 """Method to invoke the Python pdb debugger when this element is 859 about to be parsed. Set C{breakFlag} to True to enable, False to 860 disable. 861 """ 862 if breakFlag: 863 _parseMethod = self._parse 864 def breaker(instring, loc, doActions=True, callPreParse=True): 865 import pdb 866 pdb.set_trace() 867 return _parseMethod( instring, loc, doActions, callPreParse )
868 breaker._originalParseMethod = _parseMethod 869 self._parse = breaker 870 else: 871 if hasattr(self._parse,"_originalParseMethod"): 872 self._parse = self._parse._originalParseMethod 873 return self
874
875 - def setParseAction( self, *fns, **kwargs ):
876 """Define action to perform when successfully matching parse element definition. 877 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 878 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 879 - s = the original string being parsed (see note below) 880 - loc = the location of the matching substring 881 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 882 If the functions in fns modify the tokens, they can return them as the return 883 value from fn, and the modified list of tokens will replace the original. 884 Otherwise, fn does not need to return any value. 885 886 Note: the default parsing behavior is to expand tabs in the input string 887 before starting the parsing process. See L{I{parseString}<parseString>} for more information 888 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 889 consistent view of the parsed string, the parse location, and line and column 890 positions within the parsed string. 891 """ 892 self.parseAction = list(map(_trim_arity, list(fns))) 893 self.callDuringTry = kwargs.get("callDuringTry", False) 894 return self
895
896 - def addParseAction( self, *fns, **kwargs ):
897 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 898 self.parseAction += list(map(_trim_arity, list(fns))) 899 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 900 return self
901
902 - def addCondition(self, *fns, **kwargs):
903 """Add a boolean predicate function to expression's list of parse actions. See 904 L{I{setParseAction}<setParseAction>}. Optional keyword argument C{message} can 905 be used to define a custom message to be used in the raised exception.""" 906 msg = kwargs.get("message") or "failed user-defined condition" 907 for fn in fns: 908 def pa(s,l,t): 909 if not bool(_trim_arity(fn)(s,l,t)): 910 raise ParseException(s,l,msg) 911 return t
912 self.parseAction.append(pa) 913 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 914 return self 915
916 - def setFailAction( self, fn ):
917 """Define action to perform if parsing fails at this expression. 918 Fail acton fn is a callable function that takes the arguments 919 C{fn(s,loc,expr,err)} where: 920 - s = string being parsed 921 - loc = location where expression match was attempted and failed 922 - expr = the parse expression that failed 923 - err = the exception thrown 924 The function returns no value. It may throw C{L{ParseFatalException}} 925 if it is desired to stop parsing immediately.""" 926 self.failAction = fn 927 return self
928
929 - def _skipIgnorables( self, instring, loc ):
930 exprsFound = True 931 while exprsFound: 932 exprsFound = False 933 for e in self.ignoreExprs: 934 try: 935 while 1: 936 loc,dummy = e._parse( instring, loc ) 937 exprsFound = True 938 except ParseException: 939 pass 940 return loc
941
942 - def preParse( self, instring, loc ):
943 if self.ignoreExprs: 944 loc = self._skipIgnorables( instring, loc ) 945 946 if self.skipWhitespace: 947 wt = self.whiteChars 948 instrlen = len(instring) 949 while loc < instrlen and instring[loc] in wt: 950 loc += 1 951 952 return loc
953
954 - def parseImpl( self, instring, loc, doActions=True ):
955 return loc, []
956
957 - def postParse( self, instring, loc, tokenlist ):
958 return tokenlist
959 960 #~ @profile
961 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
962 debugging = ( self.debug ) #and doActions ) 963 964 if debugging or self.failAction: 965 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 966 if (self.debugActions[0] ): 967 self.debugActions[0]( instring, loc, self ) 968 if callPreParse and self.callPreparse: 969 preloc = self.preParse( instring, loc ) 970 else: 971 preloc = loc 972 tokensStart = preloc 973 try: 974 try: 975 loc,tokens = self.parseImpl( instring, preloc, doActions ) 976 except IndexError: 977 raise ParseException( instring, len(instring), self.errmsg, self ) 978 except ParseBaseException as err: 979 #~ print ("Exception raised:", err) 980 if self.debugActions[2]: 981 self.debugActions[2]( instring, tokensStart, self, err ) 982 if self.failAction: 983 self.failAction( instring, tokensStart, self, err ) 984 raise 985 else: 986 if callPreParse and self.callPreparse: 987 preloc = self.preParse( instring, loc ) 988 else: 989 preloc = loc 990 tokensStart = preloc 991 if self.mayIndexError or loc >= len(instring): 992 try: 993 loc,tokens = self.parseImpl( instring, preloc, doActions ) 994 except IndexError: 995 raise ParseException( instring, len(instring), self.errmsg, self ) 996 else: 997 loc,tokens = self.parseImpl( instring, preloc, doActions ) 998 999 tokens = self.postParse( instring, loc, tokens ) 1000 1001 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 1002 if self.parseAction and (doActions or self.callDuringTry): 1003 if debugging: 1004 try: 1005 for fn in self.parseAction: 1006 tokens = fn( instring, tokensStart, retTokens ) 1007 if tokens is not None: 1008 retTokens = ParseResults( tokens, 1009 self.resultsName, 1010 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1011 modal=self.modalResults ) 1012 except ParseBaseException as err: 1013 #~ print "Exception raised in user parse action:", err 1014 if (self.debugActions[2] ): 1015 self.debugActions[2]( instring, tokensStart, self, err ) 1016 raise 1017 else: 1018 for fn in self.parseAction: 1019 tokens = fn( instring, tokensStart, retTokens ) 1020 if tokens is not None: 1021 retTokens = ParseResults( tokens, 1022 self.resultsName, 1023 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1024 modal=self.modalResults ) 1025 1026 if debugging: 1027 #~ print ("Matched",self,"->",retTokens.asList()) 1028 if (self.debugActions[1] ): 1029 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1030 1031 return loc, retTokens
1032
1033 - def tryParse( self, instring, loc ):
1034 try: 1035 return self._parse( instring, loc, doActions=False )[0] 1036 except ParseFatalException: 1037 raise ParseException( instring, loc, self.errmsg, self)
1038 1039 # this method gets repeatedly called during backtracking with the same arguments - 1040 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1041 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1042 lookup = (self,instring,loc,callPreParse,doActions) 1043 if lookup in ParserElement._exprArgCache: 1044 value = ParserElement._exprArgCache[ lookup ] 1045 if isinstance(value, Exception): 1046 raise value 1047 return (value[0],value[1].copy()) 1048 else: 1049 try: 1050 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 1051 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 1052 return value 1053 except ParseBaseException as pe: 1054 pe.__traceback__ = None 1055 ParserElement._exprArgCache[ lookup ] = pe 1056 raise
1057 1058 _parse = _parseNoCache 1059 1060 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1061 _exprArgCache = {} 1062 @staticmethod
1063 - def resetCache():
1064 ParserElement._exprArgCache.clear()
1065 1066 _packratEnabled = False 1067 @staticmethod
1068 - def enablePackrat():
1069 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1070 Repeated parse attempts at the same string location (which happens 1071 often in many complex grammars) can immediately return a cached value, 1072 instead of re-executing parsing/validating code. Memoizing is done of 1073 both valid results and parsing exceptions. 1074 1075 This speedup may break existing programs that use parse actions that 1076 have side-effects. For this reason, packrat parsing is disabled when 1077 you first import pyparsing. To activate the packrat feature, your 1078 program must call the class method C{ParserElement.enablePackrat()}. If 1079 your program uses C{psyco} to "compile as you go", you must call 1080 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1081 Python will crash. For best results, call C{enablePackrat()} immediately 1082 after importing pyparsing. 1083 """ 1084 if not ParserElement._packratEnabled: 1085 ParserElement._packratEnabled = True 1086 ParserElement._parse = ParserElement._parseCache
1087
1088 - def parseString( self, instring, parseAll=False ):
1089 """Execute the parse expression with the given string. 1090 This is the main interface to the client code, once the complete 1091 expression has been built. 1092 1093 If you want the grammar to require that the entire input string be 1094 successfully parsed, then set C{parseAll} to True (equivalent to ending 1095 the grammar with C{L{StringEnd()}}). 1096 1097 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1098 in order to report proper column numbers in parse actions. 1099 If the input string contains tabs and 1100 the grammar uses parse actions that use the C{loc} argument to index into the 1101 string being parsed, you can ensure you have a consistent view of the input 1102 string by: 1103 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1104 (see L{I{parseWithTabs}<parseWithTabs>}) 1105 - define your parse action using the full C{(s,loc,toks)} signature, and 1106 reference the input string using the parse action's C{s} argument 1107 - explictly expand the tabs in your input string before calling 1108 C{parseString} 1109 """ 1110 ParserElement.resetCache() 1111 if not self.streamlined: 1112 self.streamline() 1113 #~ self.saveAsList = True 1114 for e in self.ignoreExprs: 1115 e.streamline() 1116 if not self.keepTabs: 1117 instring = instring.expandtabs() 1118 try: 1119 loc, tokens = self._parse( instring, 0 ) 1120 if parseAll: 1121 loc = self.preParse( instring, loc ) 1122 se = Empty() + StringEnd() 1123 se._parse( instring, loc ) 1124 except ParseBaseException as exc: 1125 if ParserElement.verbose_stacktrace: 1126 raise 1127 else: 1128 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1129 raise exc 1130 else: 1131 return tokens
1132
1133 - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1134 """Scan the input string for expression matches. Each match will return the 1135 matching tokens, start location, and end location. May be called with optional 1136 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1137 C{overlap} is specified, then overlapping matches will be reported. 1138 1139 Note that the start and end locations are reported relative to the string 1140 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1141 strings with embedded tabs.""" 1142 if not self.streamlined: 1143 self.streamline() 1144 for e in self.ignoreExprs: 1145 e.streamline() 1146 1147 if not self.keepTabs: 1148 instring = _ustr(instring).expandtabs() 1149 instrlen = len(instring) 1150 loc = 0 1151 preparseFn = self.preParse 1152 parseFn = self._parse 1153 ParserElement.resetCache() 1154 matches = 0 1155 try: 1156 while loc <= instrlen and matches < maxMatches: 1157 try: 1158 preloc = preparseFn( instring, loc ) 1159 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1160 except ParseException: 1161 loc = preloc+1 1162 else: 1163 if nextLoc > loc: 1164 matches += 1 1165 yield tokens, preloc, nextLoc 1166 if overlap: 1167 nextloc = preparseFn( instring, loc ) 1168 if nextloc > loc: 1169 loc = nextLoc 1170 else: 1171 loc += 1 1172 else: 1173 loc = nextLoc 1174 else: 1175 loc = preloc+1 1176 except ParseBaseException as exc: 1177 if ParserElement.verbose_stacktrace: 1178 raise 1179 else: 1180 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1181 raise exc
1182
1183 - def transformString( self, instring ):
1184 """Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1185 be returned from a parse action. To use C{transformString}, define a grammar and 1186 attach a parse action to it that modifies the returned token list. 1187 Invoking C{transformString()} on a target string will then scan for matches, 1188 and replace the matched text patterns according to the logic in the parse 1189 action. C{transformString()} returns the resulting transformed string.""" 1190 out = [] 1191 lastE = 0 1192 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1193 # keep string locs straight between transformString and scanString 1194 self.keepTabs = True 1195 try: 1196 for t,s,e in self.scanString( instring ): 1197 out.append( instring[lastE:s] ) 1198 if t: 1199 if isinstance(t,ParseResults): 1200 out += t.asList() 1201 elif isinstance(t,list): 1202 out += t 1203 else: 1204 out.append(t) 1205 lastE = e 1206 out.append(instring[lastE:]) 1207 out = [o for o in out if o] 1208 return "".join(map(_ustr,_flatten(out))) 1209 except ParseBaseException as exc: 1210 if ParserElement.verbose_stacktrace: 1211 raise 1212 else: 1213 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1214 raise exc
1215
1216 - def searchString( self, instring, maxMatches=_MAX_INT ):
1217 """Another extension to C{L{scanString}}, simplifying the access to the tokens found 1218 to match the given parse expression. May be called with optional 1219 C{maxMatches} argument, to clip searching after 'n' matches are found. 1220 """ 1221 try: 1222 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1223 except ParseBaseException as exc: 1224 if ParserElement.verbose_stacktrace: 1225 raise 1226 else: 1227 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1228 raise exc
1229
1230 - def __add__(self, other ):
1231 """Implementation of + operator - returns C{L{And}}""" 1232 if isinstance( other, basestring ): 1233 other = ParserElement.literalStringClass( other ) 1234 if not isinstance( other, ParserElement ): 1235 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1236 SyntaxWarning, stacklevel=2) 1237 return None 1238 return And( [ self, other ] )
1239
1240 - def __radd__(self, other ):
1241 """Implementation of + operator when left operand is not a C{L{ParserElement}}""" 1242 if isinstance( other, basestring ): 1243 other = ParserElement.literalStringClass( other ) 1244 if not isinstance( other, ParserElement ): 1245 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1246 SyntaxWarning, stacklevel=2) 1247 return None 1248 return other + self
1249
1250 - def __sub__(self, other):
1251 """Implementation of - operator, returns C{L{And}} with error stop""" 1252 if isinstance( other, basestring ): 1253 other = ParserElement.literalStringClass( other ) 1254 if not isinstance( other, ParserElement ): 1255 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1256 SyntaxWarning, stacklevel=2) 1257 return None 1258 return And( [ self, And._ErrorStop(), other ] )
1259
1260 - def __rsub__(self, other ):
1261 """Implementation of - operator when left operand is not a C{L{ParserElement}}""" 1262 if isinstance( other, basestring ): 1263 other = ParserElement.literalStringClass( other ) 1264 if not isinstance( other, ParserElement ): 1265 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1266 SyntaxWarning, stacklevel=2) 1267 return None 1268 return other - self
1269
1270 - def __mul__(self,other):
1271 """Implementation of * operator, allows use of C{expr * 3} in place of 1272 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1273 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1274 may also include C{None} as in: 1275 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1276 to C{expr*n + L{ZeroOrMore}(expr)} 1277 (read as "at least n instances of C{expr}") 1278 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1279 (read as "0 to n instances of C{expr}") 1280 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1281 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1282 1283 Note that C{expr*(None,n)} does not raise an exception if 1284 more than n exprs exist in the input stream; that is, 1285 C{expr*(None,n)} does not enforce a maximum number of expr 1286 occurrences. If this behavior is desired, then write 1287 C{expr*(None,n) + ~expr} 1288 1289 """ 1290 if isinstance(other,int): 1291 minElements, optElements = other,0 1292 elif isinstance(other,tuple): 1293 other = (other + (None, None))[:2] 1294 if other[0] is None: 1295 other = (0, other[1]) 1296 if isinstance(other[0],int) and other[1] is None: 1297 if other[0] == 0: 1298 return ZeroOrMore(self) 1299 if other[0] == 1: 1300 return OneOrMore(self) 1301 else: 1302 return self*other[0] + ZeroOrMore(self) 1303 elif isinstance(other[0],int) and isinstance(other[1],int): 1304 minElements, optElements = other 1305 optElements -= minElements 1306 else: 1307 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1308 else: 1309 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1310 1311 if minElements < 0: 1312 raise ValueError("cannot multiply ParserElement by negative value") 1313 if optElements < 0: 1314 raise ValueError("second tuple value must be greater or equal to first tuple value") 1315 if minElements == optElements == 0: 1316 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1317 1318 if (optElements): 1319 def makeOptionalList(n): 1320 if n>1: 1321 return Optional(self + makeOptionalList(n-1)) 1322 else: 1323 return Optional(self)
1324 if minElements: 1325 if minElements == 1: 1326 ret = self + makeOptionalList(optElements) 1327 else: 1328 ret = And([self]*minElements) + makeOptionalList(optElements) 1329 else: 1330 ret = makeOptionalList(optElements) 1331 else: 1332 if minElements == 1: 1333 ret = self 1334 else: 1335 ret = And([self]*minElements) 1336 return ret 1337
1338 - def __rmul__(self, other):
1339 return self.__mul__(other)
1340
1341 - def __or__(self, other ):
1342 """Implementation of | operator - returns C{L{MatchFirst}}""" 1343 if isinstance( other, basestring ): 1344 other = ParserElement.literalStringClass( other ) 1345 if not isinstance( other, ParserElement ): 1346 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1347 SyntaxWarning, stacklevel=2) 1348 return None 1349 return MatchFirst( [ self, other ] )
1350
1351 - def __ror__(self, other ):
1352 """Implementation of | operator when left operand is not a C{L{ParserElement}}""" 1353 if isinstance( other, basestring ): 1354 other = ParserElement.literalStringClass( other ) 1355 if not isinstance( other, ParserElement ): 1356 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1357 SyntaxWarning, stacklevel=2) 1358 return None 1359 return other | self
1360
1361 - def __xor__(self, other ):
1362 """Implementation of ^ operator - returns C{L{Or}}""" 1363 if isinstance( other, basestring ): 1364 other = ParserElement.literalStringClass( other ) 1365 if not isinstance( other, ParserElement ): 1366 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1367 SyntaxWarning, stacklevel=2) 1368 return None 1369 return Or( [ self, other ] )
1370
1371 - def __rxor__(self, other ):
1372 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}""" 1373 if isinstance( other, basestring ): 1374 other = ParserElement.literalStringClass( other ) 1375 if not isinstance( other, ParserElement ): 1376 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1377 SyntaxWarning, stacklevel=2) 1378 return None 1379 return other ^ self
1380
1381 - def __and__(self, other ):
1382 """Implementation of & operator - returns C{L{Each}}""" 1383 if isinstance( other, basestring ): 1384 other = ParserElement.literalStringClass( other ) 1385 if not isinstance( other, ParserElement ): 1386 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1387 SyntaxWarning, stacklevel=2) 1388 return None 1389 return Each( [ self, other ] )
1390
1391 - def __rand__(self, other ):
1392 """Implementation of & operator when left operand is not a C{L{ParserElement}}""" 1393 if isinstance( other, basestring ): 1394 other = ParserElement.literalStringClass( other ) 1395 if not isinstance( other, ParserElement ): 1396 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1397 SyntaxWarning, stacklevel=2) 1398 return None 1399 return other & self
1400
1401 - def __invert__( self ):
1402 """Implementation of ~ operator - returns C{L{NotAny}}""" 1403 return NotAny( self )
1404
1405 - def __call__(self, name=None):
1406 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}:: 1407 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1408 could be written as:: 1409 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1410 1411 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 1412 passed as C{True}. 1413 1414 If C{name} is omitted, same as calling C{L{copy}}. 1415 """ 1416 if name is not None: 1417 return self.setResultsName(name) 1418 else: 1419 return self.copy()
1420
1421 - def suppress( self ):
1422 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from 1423 cluttering up returned output. 1424 """ 1425 return Suppress( self )
1426
1427 - def leaveWhitespace( self ):
1428 """Disables the skipping of whitespace before matching the characters in the 1429 C{ParserElement}'s defined pattern. This is normally only used internally by 1430 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1431 """ 1432 self.skipWhitespace = False 1433 return self
1434
1435 - def setWhitespaceChars( self, chars ):
1436 """Overrides the default whitespace chars 1437 """ 1438 self.skipWhitespace = True 1439 self.whiteChars = chars 1440 self.copyDefaultWhiteChars = False 1441 return self
1442
1443 - def parseWithTabs( self ):
1444 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 1445 Must be called before C{parseString} when the input grammar contains elements that 1446 match C{<TAB>} characters.""" 1447 self.keepTabs = True 1448 return self
1449
1450 - def ignore( self, other ):
1451 """Define expression to be ignored (e.g., comments) while doing pattern 1452 matching; may be called repeatedly, to define multiple comment or other 1453 ignorable patterns. 1454 """ 1455 if isinstance(other, basestring): 1456 other = Suppress(other) 1457 1458 if isinstance( other, Suppress ): 1459 if other not in self.ignoreExprs: 1460 self.ignoreExprs.append(other) 1461 else: 1462 self.ignoreExprs.append( Suppress( other.copy() ) ) 1463 return self
1464
1465 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1466 """Enable display of debugging messages while doing pattern matching.""" 1467 self.debugActions = (startAction or _defaultStartDebugAction, 1468 successAction or _defaultSuccessDebugAction, 1469 exceptionAction or _defaultExceptionDebugAction) 1470 self.debug = True 1471 return self
1472
1473 - def setDebug( self, flag=True ):
1474 """Enable display of debugging messages while doing pattern matching. 1475 Set C{flag} to True to enable, False to disable.""" 1476 if flag: 1477 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1478 else: 1479 self.debug = False 1480 return self
1481
1482 - def __str__( self ):
1483 return self.name
1484
1485 - def __repr__( self ):
1486 return _ustr(self)
1487
1488 - def streamline( self ):
1489 self.streamlined = True 1490 self.strRepr = None 1491 return self
1492
1493 - def checkRecursion( self, parseElementList ):
1494 pass
1495
1496 - def validate( self, validateTrace=[] ):
1497 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1498 self.checkRecursion( [] )
1499
1500 - def parseFile( self, file_or_filename, parseAll=False ):
1501 """Execute the parse expression on the given file or filename. 1502 If a filename is specified (instead of a file object), 1503 the entire file is opened, read, and closed before parsing. 1504 """ 1505 try: 1506 file_contents = file_or_filename.read() 1507 except AttributeError: 1508 f = open(file_or_filename, "r") 1509 file_contents = f.read() 1510 f.close() 1511 try: 1512 return self.parseString(file_contents, parseAll) 1513 except ParseBaseException as exc: 1514 if ParserElement.verbose_stacktrace: 1515 raise 1516 else: 1517 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1518 raise exc
1519
1520 - def __eq__(self,other):
1521 if isinstance(other, ParserElement): 1522 return self is other or self.__dict__ == other.__dict__ 1523 elif isinstance(other, basestring): 1524 try: 1525 self.parseString(_ustr(other), parseAll=True) 1526 return True 1527 except ParseBaseException: 1528 return False 1529 else: 1530 return super(ParserElement,self)==other
1531
1532 - def __ne__(self,other):
1533 return not (self == other)
1534
1535 - def __hash__(self):
1536 return hash(id(self))
1537
1538 - def __req__(self,other):
1539 return self == other
1540
1541 - def __rne__(self,other):
1542 return not (self == other)
1543
1544 - def runTests(self, tests, parseAll=False):
1545 """Execute the parse expression on a series of test strings, showing each 1546 test, the parsed results or where the parse failed. Quick and easy way to 1547 run a parse expression against a list of sample strings. 1548 1549 Parameters: 1550 - tests - a list of separate test strings, or a multiline string of test strings 1551 - parseAll - (default=False) - flag to pass to C{L{parseString}} when running tests 1552 """ 1553 if isinstance(tests, basestring): 1554 tests = map(str.strip, tests.splitlines()) 1555 for t in tests: 1556 out = [t] 1557 try: 1558 out.append(self.parseString(t, parseAll=parseAll).dump()) 1559 except ParseException as pe: 1560 if '\n' in t: 1561 out.append(line(pe.loc, t)) 1562 out.append(' '*(col(pe.loc,t)-1) + '^') 1563 else: 1564 out.append(' '*pe.loc + '^') 1565 out.append(str(pe)) 1566 out.append('') 1567 print('\n'.join(out))
1568
1569 1570 -class Token(ParserElement):
1571 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1572 - def __init__( self ):
1573 super(Token,self).__init__( savelist=False )
1574
1575 1576 -class Empty(Token):
1577 """An empty token, will always match."""
1578 - def __init__( self ):
1579 super(Empty,self).__init__() 1580 self.name = "Empty" 1581 self.mayReturnEmpty = True 1582 self.mayIndexError = False
1583
1584 1585 -class NoMatch(Token):
1586 """A token that will never match."""
1587 - def __init__( self ):
1588 super(NoMatch,self).__init__() 1589 self.name = "NoMatch" 1590 self.mayReturnEmpty = True 1591 self.mayIndexError = False 1592 self.errmsg = "Unmatchable token"
1593
1594 - def parseImpl( self, instring, loc, doActions=True ):
1595 raise ParseException(instring, loc, self.errmsg, self)
1596
1597 1598 -class Literal(Token):
1599 """Token to exactly match a specified string."""
1600 - def __init__( self, matchString ):
1601 super(Literal,self).__init__() 1602 self.match = matchString 1603 self.matchLen = len(matchString) 1604 try: 1605 self.firstMatchChar = matchString[0] 1606 except IndexError: 1607 warnings.warn("null string passed to Literal; use Empty() instead", 1608 SyntaxWarning, stacklevel=2) 1609 self.__class__ = Empty 1610 self.name = '"%s"' % _ustr(self.match) 1611 self.errmsg = "Expected " + self.name 1612 self.mayReturnEmpty = False 1613 self.mayIndexError = False
1614 1615 # Performance tuning: this routine gets called a *lot* 1616 # if this is a single character match string and the first character matches, 1617 # short-circuit as quickly as possible, and avoid calling startswith 1618 #~ @profile
1619 - def parseImpl( self, instring, loc, doActions=True ):
1620 if (instring[loc] == self.firstMatchChar and 1621 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1622 return loc+self.matchLen, self.match 1623 raise ParseException(instring, loc, self.errmsg, self)
1624 _L = Literal 1625 ParserElement.literalStringClass = Literal
1626 1627 -class Keyword(Token):
1628 """Token to exactly match a specified string as a keyword, that is, it must be 1629 immediately followed by a non-keyword character. Compare with C{L{Literal}}:: 1630 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. 1631 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 1632 Accepts two optional constructor arguments in addition to the keyword string: 1633 C{identChars} is a string of characters that would be valid identifier characters, 1634 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive 1635 matching, default is C{False}. 1636 """ 1637 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1638
1639 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1640 super(Keyword,self).__init__() 1641 self.match = matchString 1642 self.matchLen = len(matchString) 1643 try: 1644 self.firstMatchChar = matchString[0] 1645 except IndexError: 1646 warnings.warn("null string passed to Keyword; use Empty() instead", 1647 SyntaxWarning, stacklevel=2) 1648 self.name = '"%s"' % self.match 1649 self.errmsg = "Expected " + self.name 1650 self.mayReturnEmpty = False 1651 self.mayIndexError = False 1652 self.caseless = caseless 1653 if caseless: 1654 self.caselessmatch = matchString.upper() 1655 identChars = identChars.upper() 1656 self.identChars = set(identChars)
1657
1658 - def parseImpl( self, instring, loc, doActions=True ):
1659 if self.caseless: 1660 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1661 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1662 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1663 return loc+self.matchLen, self.match 1664 else: 1665 if (instring[loc] == self.firstMatchChar and 1666 (self.matchLen==1 or instring.startswith(self.match,loc)) and 1667 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1668 (loc == 0 or instring[loc-1] not in self.identChars) ): 1669 return loc+self.matchLen, self.match 1670 raise ParseException(instring, loc, self.errmsg, self)
1671
1672 - def copy(self):
1673 c = super(Keyword,self).copy() 1674 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1675 return c
1676 1677 @staticmethod
1678 - def setDefaultKeywordChars( chars ):
1679 """Overrides the default Keyword chars 1680 """ 1681 Keyword.DEFAULT_KEYWORD_CHARS = chars
1682
1683 -class CaselessLiteral(Literal):
1684 """Token to match a specified string, ignoring case of letters. 1685 Note: the matched results will always be in the case of the given 1686 match string, NOT the case of the input text. 1687 """
1688 - def __init__( self, matchString ):
1689 super(CaselessLiteral,self).__init__( matchString.upper() ) 1690 # Preserve the defining literal. 1691 self.returnString = matchString 1692 self.name = "'%s'" % self.returnString 1693 self.errmsg = "Expected " + self.name
1694
1695 - def parseImpl( self, instring, loc, doActions=True ):
1696 if instring[ loc:loc+self.matchLen ].upper() == self.match: 1697 return loc+self.matchLen, self.returnString 1698 raise ParseException(instring, loc, self.errmsg, self)
1699
1700 -class CaselessKeyword(Keyword):
1701 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1702 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1703
1704 - def parseImpl( self, instring, loc, doActions=True ):
1705 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1706 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1707 return loc+self.matchLen, self.match 1708 raise ParseException(instring, loc, self.errmsg, self)
1709
1710 -class Word(Token):
1711 """Token for matching words composed of allowed character sets. 1712 Defined with string containing all allowed initial characters, 1713 an optional string containing allowed body characters (if omitted, 1714 defaults to the initial character set), and an optional minimum, 1715 maximum, and/or exact length. The default value for C{min} is 1 (a 1716 minimum value < 1 is not valid); the default values for C{max} and C{exact} 1717 are 0, meaning no maximum or exact length restriction. An optional 1718 C{exclude} parameter can list characters that might be found in 1719 the input C{bodyChars} string; useful to define a word of all printables 1720 except for one or two characters, for instance. 1721 """
1722 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1723 super(Word,self).__init__() 1724 if excludeChars: 1725 initChars = ''.join(c for c in initChars if c not in excludeChars) 1726 if bodyChars: 1727 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 1728 self.initCharsOrig = initChars 1729 self.initChars = set(initChars) 1730 if bodyChars : 1731 self.bodyCharsOrig = bodyChars 1732 self.bodyChars = set(bodyChars) 1733 else: 1734 self.bodyCharsOrig = initChars 1735 self.bodyChars = set(initChars) 1736 1737 self.maxSpecified = max > 0 1738 1739 if min < 1: 1740 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1741 1742 self.minLen = min 1743 1744 if max > 0: 1745 self.maxLen = max 1746 else: 1747 self.maxLen = _MAX_INT 1748 1749 if exact > 0: 1750 self.maxLen = exact 1751 self.minLen = exact 1752 1753 self.name = _ustr(self) 1754 self.errmsg = "Expected " + self.name 1755 self.mayIndexError = False 1756 self.asKeyword = asKeyword 1757 1758 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1759 if self.bodyCharsOrig == self.initCharsOrig: 1760 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1761 elif len(self.initCharsOrig) == 1: 1762 self.reString = "%s[%s]*" % \ 1763 (re.escape(self.initCharsOrig), 1764 _escapeRegexRangeChars(self.bodyCharsOrig),) 1765 else: 1766 self.reString = "[%s][%s]*" % \ 1767 (_escapeRegexRangeChars(self.initCharsOrig), 1768 _escapeRegexRangeChars(self.bodyCharsOrig),) 1769 if self.asKeyword: 1770 self.reString = r"\b"+self.reString+r"\b" 1771 try: 1772 self.re = re.compile( self.reString ) 1773 except: 1774 self.re = None
1775
1776 - def parseImpl( self, instring, loc, doActions=True ):
1777 if self.re: 1778 result = self.re.match(instring,loc) 1779 if not result: 1780 raise ParseException(instring, loc, self.errmsg, self) 1781 1782 loc = result.end() 1783 return loc, result.group() 1784 1785 if not(instring[ loc ] in self.initChars): 1786 raise ParseException(instring, loc, self.errmsg, self) 1787 1788 start = loc 1789 loc += 1 1790 instrlen = len(instring) 1791 bodychars = self.bodyChars 1792 maxloc = start + self.maxLen 1793 maxloc = min( maxloc, instrlen ) 1794 while loc < maxloc and instring[loc] in bodychars: 1795 loc += 1 1796 1797 throwException = False 1798 if loc - start < self.minLen: 1799 throwException = True 1800 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1801 throwException = True 1802 if self.asKeyword: 1803 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 1804 throwException = True 1805 1806 if throwException: 1807 raise ParseException(instring, loc, self.errmsg, self) 1808 1809 return loc, instring[start:loc]
1810
1811 - def __str__( self ):
1812 try: 1813 return super(Word,self).__str__() 1814 except: 1815 pass 1816 1817 1818 if self.strRepr is None: 1819 1820 def charsAsStr(s): 1821 if len(s)>4: 1822 return s[:4]+"..." 1823 else: 1824 return s
1825 1826 if ( self.initCharsOrig != self.bodyCharsOrig ): 1827 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1828 else: 1829 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1830 1831 return self.strRepr
1832
1833 1834 -class Regex(Token):
1835 """Token for matching strings that match a given regular expression. 1836 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1837 """ 1838 compiledREtype = type(re.compile("[A-Z]"))
1839 - def __init__( self, pattern, flags=0):
1840 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 1841 super(Regex,self).__init__() 1842 1843 if isinstance(pattern, basestring): 1844 if len(pattern) == 0: 1845 warnings.warn("null string passed to Regex; use Empty() instead", 1846 SyntaxWarning, stacklevel=2) 1847 1848 self.pattern = pattern 1849 self.flags = flags 1850 1851 try: 1852 self.re = re.compile(self.pattern, self.flags) 1853 self.reString = self.pattern 1854 except sre_constants.error: 1855 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 1856 SyntaxWarning, stacklevel=2) 1857 raise 1858 1859 elif isinstance(pattern, Regex.compiledREtype): 1860 self.re = pattern 1861 self.pattern = \ 1862 self.reString = str(pattern) 1863 self.flags = flags 1864 1865 else: 1866 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 1867 1868 self.name = _ustr(self) 1869 self.errmsg = "Expected " + self.name 1870 self.mayIndexError = False 1871 self.mayReturnEmpty = True
1872
1873 - def parseImpl( self, instring, loc, doActions=True ):
1874 result = self.re.match(instring,loc) 1875 if not result: 1876 raise ParseException(instring, loc, self.errmsg, self) 1877 1878 loc = result.end() 1879 d = result.groupdict() 1880 ret = ParseResults(result.group()) 1881 if d: 1882 for k in d: 1883 ret[k] = d[k] 1884 return loc,ret
1885
1886 - def __str__( self ):
1887 try: 1888 return super(Regex,self).__str__() 1889 except: 1890 pass 1891 1892 if self.strRepr is None: 1893 self.strRepr = "Re:(%s)" % repr(self.pattern) 1894 1895 return self.strRepr
1896
1897 1898 -class QuotedString(Token):
1899 """Token for matching strings that are delimited by quoting characters. 1900 """
1901 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1902 """ 1903 Defined with the following parameters: 1904 - quoteChar - string of one or more characters defining the quote delimiting string 1905 - escChar - character to escape quotes, typically backslash (default=None) 1906 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1907 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 1908 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 1909 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 1910 """ 1911 super(QuotedString,self).__init__() 1912 1913 # remove white space from quote chars - wont work anyway 1914 quoteChar = quoteChar.strip() 1915 if len(quoteChar) == 0: 1916 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1917 raise SyntaxError() 1918 1919 if endQuoteChar is None: 1920 endQuoteChar = quoteChar 1921 else: 1922 endQuoteChar = endQuoteChar.strip() 1923 if len(endQuoteChar) == 0: 1924 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1925 raise SyntaxError() 1926 1927 self.quoteChar = quoteChar 1928 self.quoteCharLen = len(quoteChar) 1929 self.firstQuoteChar = quoteChar[0] 1930 self.endQuoteChar = endQuoteChar 1931 self.endQuoteCharLen = len(endQuoteChar) 1932 self.escChar = escChar 1933 self.escQuote = escQuote 1934 self.unquoteResults = unquoteResults 1935 1936 if multiline: 1937 self.flags = re.MULTILINE | re.DOTALL 1938 self.pattern = r'%s(?:[^%s%s]' % \ 1939 ( re.escape(self.quoteChar), 1940 _escapeRegexRangeChars(self.endQuoteChar[0]), 1941 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1942 else: 1943 self.flags = 0 1944 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 1945 ( re.escape(self.quoteChar), 1946 _escapeRegexRangeChars(self.endQuoteChar[0]), 1947 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1948 if len(self.endQuoteChar) > 1: 1949 self.pattern += ( 1950 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 1951 _escapeRegexRangeChars(self.endQuoteChar[i])) 1952 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 1953 ) 1954 if escQuote: 1955 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 1956 if escChar: 1957 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 1958 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 1959 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 1960 1961 try: 1962 self.re = re.compile(self.pattern, self.flags) 1963 self.reString = self.pattern 1964 except sre_constants.error: 1965 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 1966 SyntaxWarning, stacklevel=2) 1967 raise 1968 1969 self.name = _ustr(self) 1970 self.errmsg = "Expected " + self.name 1971 self.mayIndexError = False 1972 self.mayReturnEmpty = True
1973
1974 - def parseImpl( self, instring, loc, doActions=True ):
1975 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 1976 if not result: 1977 raise ParseException(instring, loc, self.errmsg, self) 1978 1979 loc = result.end() 1980 ret = result.group() 1981 1982 if self.unquoteResults: 1983 1984 # strip off quotes 1985 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 1986 1987 if isinstance(ret,basestring): 1988 # replace escaped characters 1989 if self.escChar: 1990 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 1991 1992 # replace escaped quotes 1993 if self.escQuote: 1994 ret = ret.replace(self.escQuote, self.endQuoteChar) 1995 1996 return loc, ret
1997
1998 - def __str__( self ):
1999 try: 2000 return super(QuotedString,self).__str__() 2001 except: 2002 pass 2003 2004 if self.strRepr is None: 2005 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 2006 2007 return self.strRepr
2008
2009 2010 -class CharsNotIn(Token):
2011 """Token for matching words composed of characters *not* in a given set. 2012 Defined with string containing all disallowed characters, and an optional 2013 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 2014 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2015 are 0, meaning no maximum or exact length restriction. 2016 """
2017 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2018 super(CharsNotIn,self).__init__() 2019 self.skipWhitespace = False 2020 self.notChars = notChars 2021 2022 if min < 1: 2023 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 2024 2025 self.minLen = min 2026 2027 if max > 0: 2028 self.maxLen = max 2029 else: 2030 self.maxLen = _MAX_INT 2031 2032 if exact > 0: 2033 self.maxLen = exact 2034 self.minLen = exact 2035 2036 self.name = _ustr(self) 2037 self.errmsg = "Expected " + self.name 2038 self.mayReturnEmpty = ( self.minLen == 0 ) 2039 self.mayIndexError = False
2040
2041 - def parseImpl( self, instring, loc, doActions=True ):
2042 if instring[loc] in self.notChars: 2043 raise ParseException(instring, loc, self.errmsg, self) 2044 2045 start = loc 2046 loc += 1 2047 notchars = self.notChars 2048 maxlen = min( start+self.maxLen, len(instring) ) 2049 while loc < maxlen and \ 2050 (instring[loc] not in notchars): 2051 loc += 1 2052 2053 if loc - start < self.minLen: 2054 raise ParseException(instring, loc, self.errmsg, self) 2055 2056 return loc, instring[start:loc]
2057
2058 - def __str__( self ):
2059 try: 2060 return super(CharsNotIn, self).__str__() 2061 except: 2062 pass 2063 2064 if self.strRepr is None: 2065 if len(self.notChars) > 4: 2066 self.strRepr = "!W:(%s...)" % self.notChars[:4] 2067 else: 2068 self.strRepr = "!W:(%s)" % self.notChars 2069 2070 return self.strRepr
2071
2072 -class White(Token):
2073 """Special matching class for matching whitespace. Normally, whitespace is ignored 2074 by pyparsing grammars. This class is included when some whitespace structures 2075 are significant. Define with a string containing the whitespace characters to be 2076 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 2077 as defined for the C{L{Word}} class.""" 2078 whiteStrs = { 2079 " " : "<SPC>", 2080 "\t": "<TAB>", 2081 "\n": "<LF>", 2082 "\r": "<CR>", 2083 "\f": "<FF>", 2084 }
2085 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2086 super(White,self).__init__() 2087 self.matchWhite = ws 2088 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 2089 #~ self.leaveWhitespace() 2090 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 2091 self.mayReturnEmpty = True 2092 self.errmsg = "Expected " + self.name 2093 2094 self.minLen = min 2095 2096 if max > 0: 2097 self.maxLen = max 2098 else: 2099 self.maxLen = _MAX_INT 2100 2101 if exact > 0: 2102 self.maxLen = exact 2103 self.minLen = exact
2104
2105 - def parseImpl( self, instring, loc, doActions=True ):
2106 if not(instring[ loc ] in self.matchWhite): 2107 raise ParseException(instring, loc, self.errmsg, self) 2108 start = loc 2109 loc += 1 2110 maxloc = start + self.maxLen 2111 maxloc = min( maxloc, len(instring) ) 2112 while loc < maxloc and instring[loc] in self.matchWhite: 2113 loc += 1 2114 2115 if loc - start < self.minLen: 2116 raise ParseException(instring, loc, self.errmsg, self) 2117 2118 return loc, instring[start:loc]
2119
2120 2121 -class _PositionToken(Token):
2122 - def __init__( self ):
2123 super(_PositionToken,self).__init__() 2124 self.name=self.__class__.__name__ 2125 self.mayReturnEmpty = True 2126 self.mayIndexError = False
2127
2128 -class GoToColumn(_PositionToken):
2129 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2130 - def __init__( self, colno ):
2131 super(GoToColumn,self).__init__() 2132 self.col = colno
2133
2134 - def preParse( self, instring, loc ):
2135 if col(loc,instring) != self.col: 2136 instrlen = len(instring) 2137 if self.ignoreExprs: 2138 loc = self._skipIgnorables( instring, loc ) 2139 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2140 loc += 1 2141 return loc
2142
2143 - def parseImpl( self, instring, loc, doActions=True ):
2144 thiscol = col( loc, instring ) 2145 if thiscol > self.col: 2146 raise ParseException( instring, loc, "Text not in expected column", self ) 2147 newloc = loc + self.col - thiscol 2148 ret = instring[ loc: newloc ] 2149 return newloc, ret
2150
2151 -class LineStart(_PositionToken):
2152 """Matches if current position is at the beginning of a line within the parse string"""
2153 - def __init__( self ):
2154 super(LineStart,self).__init__() 2155 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2156 self.errmsg = "Expected start of line"
2157
2158 - def preParse( self, instring, loc ):
2159 preloc = super(LineStart,self).preParse(instring,loc) 2160 if instring[preloc] == "\n": 2161 loc += 1 2162 return loc
2163
2164 - def parseImpl( self, instring, loc, doActions=True ):
2165 if not( loc==0 or 2166 (loc == self.preParse( instring, 0 )) or 2167 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2168 raise ParseException(instring, loc, self.errmsg, self) 2169 return loc, []
2170
2171 -class LineEnd(_PositionToken):
2172 """Matches if current position is at the end of a line within the parse string"""
2173 - def __init__( self ):
2174 super(LineEnd,self).__init__() 2175 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2176 self.errmsg = "Expected end of line"
2177
2178 - def parseImpl( self, instring, loc, doActions=True ):
2179 if loc<len(instring): 2180 if instring[loc] == "\n": 2181 return loc+1, "\n" 2182 else: 2183 raise ParseException(instring, loc, self.errmsg, self) 2184 elif loc == len(instring): 2185 return loc+1, [] 2186 else: 2187 raise ParseException(instring, loc, self.errmsg, self)
2188
2189 -class StringStart(_PositionToken):
2190 """Matches if current position is at the beginning of the parse string"""
2191 - def __init__( self ):
2192 super(StringStart,self).__init__() 2193 self.errmsg = "Expected start of text"
2194
2195 - def parseImpl( self, instring, loc, doActions=True ):
2196 if loc != 0: 2197 # see if entire string up to here is just whitespace and ignoreables 2198 if loc != self.preParse( instring, 0 ): 2199 raise ParseException(instring, loc, self.errmsg, self) 2200 return loc, []
2201
2202 -class StringEnd(_PositionToken):
2203 """Matches if current position is at the end of the parse string"""
2204 - def __init__( self ):
2205 super(StringEnd,self).__init__() 2206 self.errmsg = "Expected end of text"
2207
2208 - def parseImpl( self, instring, loc, doActions=True ):
2209 if loc < len(instring): 2210 raise ParseException(instring, loc, self.errmsg, self) 2211 elif loc == len(instring): 2212 return loc+1, [] 2213 elif loc > len(instring): 2214 return loc, [] 2215 else: 2216 raise ParseException(instring, loc, self.errmsg, self)
2217
2218 -class WordStart(_PositionToken):
2219 """Matches if the current position is at the beginning of a Word, and 2220 is not preceded by any character in a given set of C{wordChars} 2221 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2222 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 2223 the string being parsed, or at the beginning of a line. 2224 """
2225 - def __init__(self, wordChars = printables):
2226 super(WordStart,self).__init__() 2227 self.wordChars = set(wordChars) 2228 self.errmsg = "Not at the start of a word"
2229
2230 - def parseImpl(self, instring, loc, doActions=True ):
2231 if loc != 0: 2232 if (instring[loc-1] in self.wordChars or 2233 instring[loc] not in self.wordChars): 2234 raise ParseException(instring, loc, self.errmsg, self) 2235 return loc, []
2236
2237 -class WordEnd(_PositionToken):
2238 """Matches if the current position is at the end of a Word, and 2239 is not followed by any character in a given set of C{wordChars} 2240 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2241 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 2242 the string being parsed, or at the end of a line. 2243 """
2244 - def __init__(self, wordChars = printables):
2245 super(WordEnd,self).__init__() 2246 self.wordChars = set(wordChars) 2247 self.skipWhitespace = False 2248 self.errmsg = "Not at the end of a word"
2249
2250 - def parseImpl(self, instring, loc, doActions=True ):
2251 instrlen = len(instring) 2252 if instrlen>0 and loc<instrlen: 2253 if (instring[loc] in self.wordChars or 2254 instring[loc-1] not in self.wordChars): 2255 raise ParseException(instring, loc, self.errmsg, self) 2256 return loc, []
2257
2258 2259 -class ParseExpression(ParserElement):
2260 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2261 - def __init__( self, exprs, savelist = False ):
2262 super(ParseExpression,self).__init__(savelist) 2263 if isinstance( exprs, _generatorType ): 2264 exprs = list(exprs) 2265 2266 if isinstance( exprs, basestring ): 2267 self.exprs = [ Literal( exprs ) ] 2268 elif isinstance( exprs, collections.Sequence ): 2269 # if sequence of strings provided, wrap with Literal 2270 if all(isinstance(expr, basestring) for expr in exprs): 2271 exprs = map(Literal, exprs) 2272 self.exprs = list(exprs) 2273 else: 2274 try: 2275 self.exprs = list( exprs ) 2276 except TypeError: 2277 self.exprs = [ exprs ] 2278 self.callPreparse = False
2279
2280 - def __getitem__( self, i ):
2281 return self.exprs[i]
2282
2283 - def append( self, other ):
2284 self.exprs.append( other ) 2285 self.strRepr = None 2286 return self
2287
2288 - def leaveWhitespace( self ):
2289 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 2290 all contained expressions.""" 2291 self.skipWhitespace = False 2292 self.exprs = [ e.copy() for e in self.exprs ] 2293 for e in self.exprs: 2294 e.leaveWhitespace() 2295 return self
2296
2297 - def ignore( self, other ):
2298 if isinstance( other, Suppress ): 2299 if other not in self.ignoreExprs: 2300 super( ParseExpression, self).ignore( other ) 2301 for e in self.exprs: 2302 e.ignore( self.ignoreExprs[-1] ) 2303 else: 2304 super( ParseExpression, self).ignore( other ) 2305 for e in self.exprs: 2306 e.ignore( self.ignoreExprs[-1] ) 2307 return self
2308
2309 - def __str__( self ):
2310 try: 2311 return super(ParseExpression,self).__str__() 2312 except: 2313 pass 2314 2315 if self.strRepr is None: 2316 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 2317 return self.strRepr
2318
2319 - def streamline( self ):
2320 super(ParseExpression,self).streamline() 2321 2322 for e in self.exprs: 2323 e.streamline() 2324 2325 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 2326 # but only if there are no parse actions or resultsNames on the nested And's 2327 # (likewise for Or's and MatchFirst's) 2328 if ( len(self.exprs) == 2 ): 2329 other = self.exprs[0] 2330 if ( isinstance( other, self.__class__ ) and 2331 not(other.parseAction) and 2332 other.resultsName is None and 2333 not other.debug ): 2334 self.exprs = other.exprs[:] + [ self.exprs[1] ] 2335 self.strRepr = None 2336 self.mayReturnEmpty |= other.mayReturnEmpty 2337 self.mayIndexError |= other.mayIndexError 2338 2339 other = self.exprs[-1] 2340 if ( isinstance( other, self.__class__ ) and 2341 not(other.parseAction) and 2342 other.resultsName is None and 2343 not other.debug ): 2344 self.exprs = self.exprs[:-1] + other.exprs[:] 2345 self.strRepr = None 2346 self.mayReturnEmpty |= other.mayReturnEmpty 2347 self.mayIndexError |= other.mayIndexError 2348 2349 self.errmsg = "Expected " + _ustr(self) 2350 2351 return self
2352
2353 - def setResultsName( self, name, listAllMatches=False ):
2354 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 2355 return ret
2356
2357 - def validate( self, validateTrace=[] ):
2358 tmp = validateTrace[:]+[self] 2359 for e in self.exprs: 2360 e.validate(tmp) 2361 self.checkRecursion( [] )
2362
2363 - def copy(self):
2364 ret = super(ParseExpression,self).copy() 2365 ret.exprs = [e.copy() for e in self.exprs] 2366 return ret
2367
2368 -class And(ParseExpression):
2369 """Requires all given C{ParseExpression}s to be found in the given order. 2370 Expressions may be separated by whitespace. 2371 May be constructed using the C{'+'} operator. 2372 """ 2373
2374 - class _ErrorStop(Empty):
2375 - def __init__(self, *args, **kwargs):
2376 super(And._ErrorStop,self).__init__(*args, **kwargs) 2377 self.name = '-' 2378 self.leaveWhitespace()
2379
2380 - def __init__( self, exprs, savelist = True ):
2381 super(And,self).__init__(exprs, savelist) 2382 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2383 self.setWhitespaceChars( self.exprs[0].whiteChars ) 2384 self.skipWhitespace = self.exprs[0].skipWhitespace 2385 self.callPreparse = True
2386
2387 - def parseImpl( self, instring, loc, doActions=True ):
2388 # pass False as last arg to _parse for first element, since we already 2389 # pre-parsed the string as part of our And pre-parsing 2390 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 2391 errorStop = False 2392 for e in self.exprs[1:]: 2393 if isinstance(e, And._ErrorStop): 2394 errorStop = True 2395 continue 2396 if errorStop: 2397 try: 2398 loc, exprtokens = e._parse( instring, loc, doActions ) 2399 except ParseSyntaxException: 2400 raise 2401 except ParseBaseException as pe: 2402 pe.__traceback__ = None 2403 raise ParseSyntaxException(pe) 2404 except IndexError: 2405 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 2406 else: 2407 loc, exprtokens = e._parse( instring, loc, doActions ) 2408 if exprtokens or exprtokens.haskeys(): 2409 resultlist += exprtokens 2410 return loc, resultlist
2411
2412 - def __iadd__(self, other ):
2413 if isinstance( other, basestring ): 2414 other = Literal( other ) 2415 return self.append( other ) #And( [ self, other ] )
2416
2417 - def checkRecursion( self, parseElementList ):
2418 subRecCheckList = parseElementList[:] + [ self ] 2419 for e in self.exprs: 2420 e.checkRecursion( subRecCheckList ) 2421 if not e.mayReturnEmpty: 2422 break
2423
2424 - def __str__( self ):
2425 if hasattr(self,"name"): 2426 return self.name 2427 2428 if self.strRepr is None: 2429 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 2430 2431 return self.strRepr
2432
2433 2434 -class Or(ParseExpression):
2435 """Requires that at least one C{ParseExpression} is found. 2436 If two expressions match, the expression that matches the longest string will be used. 2437 May be constructed using the C{'^'} operator. 2438 """
2439 - def __init__( self, exprs, savelist = False ):
2440 super(Or,self).__init__(exprs, savelist) 2441 if self.exprs: 2442 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2443 else: 2444 self.mayReturnEmpty = True
2445
2446 - def parseImpl( self, instring, loc, doActions=True ):
2447 maxExcLoc = -1 2448 maxException = None 2449 matches = [] 2450 for e in self.exprs: 2451 try: 2452 loc2 = e.tryParse( instring, loc ) 2453 except ParseException as err: 2454 err.__traceback__ = None 2455 if err.loc > maxExcLoc: 2456 maxException = err 2457 maxExcLoc = err.loc 2458 except IndexError: 2459 if len(instring) > maxExcLoc: 2460 maxException = ParseException(instring,len(instring),e.errmsg,self) 2461 maxExcLoc = len(instring) 2462 else: 2463 # save match among all matches, to retry longest to shortest 2464 matches.append((loc2, e)) 2465 2466 if matches: 2467 matches.sort(key=lambda x: -x[0]) 2468 for _,e in matches: 2469 try: 2470 return e._parse( instring, loc, doActions ) 2471 except ParseException as err: 2472 err.__traceback__ = None 2473 if err.loc > maxExcLoc: 2474 maxException = err 2475 maxExcLoc = err.loc 2476 2477 if maxException is not None: 2478 maxException.msg = self.errmsg 2479 raise maxException 2480 else: 2481 raise ParseException(instring, loc, "no defined alternatives to match", self)
2482 2483
2484 - def __ixor__(self, other ):
2485 if isinstance( other, basestring ): 2486 other = ParserElement.literalStringClass( other ) 2487 return self.append( other ) #Or( [ self, other ] )
2488
2489 - def __str__( self ):
2490 if hasattr(self,"name"): 2491 return self.name 2492 2493 if self.strRepr is None: 2494 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 2495 2496 return self.strRepr
2497
2498 - def checkRecursion( self, parseElementList ):
2499 subRecCheckList = parseElementList[:] + [ self ] 2500 for e in self.exprs: 2501 e.checkRecursion( subRecCheckList )
2502
2503 2504 -class MatchFirst(ParseExpression):
2505 """Requires that at least one C{ParseExpression} is found. 2506 If two expressions match, the first one listed is the one that will match. 2507 May be constructed using the C{'|'} operator. 2508 """
2509 - def __init__( self, exprs, savelist = False ):
2510 super(MatchFirst,self).__init__(exprs, savelist) 2511 if self.exprs: 2512 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2513 else: 2514 self.mayReturnEmpty = True
2515
2516 - def parseImpl( self, instring, loc, doActions=True ):
2517 maxExcLoc = -1 2518 maxException = None 2519 for e in self.exprs: 2520 try: 2521 ret = e._parse( instring, loc, doActions ) 2522 return ret 2523 except ParseException as err: 2524 if err.loc > maxExcLoc: 2525 maxException = err 2526 maxExcLoc = err.loc 2527 except IndexError: 2528 if len(instring) > maxExcLoc: 2529 maxException = ParseException(instring,len(instring),e.errmsg,self) 2530 maxExcLoc = len(instring) 2531 2532 # only got here if no expression matched, raise exception for match that made it the furthest 2533 else: 2534 if maxException is not None: 2535 maxException.msg = self.errmsg 2536 raise maxException 2537 else: 2538 raise ParseException(instring, loc, "no defined alternatives to match", self)
2539
2540 - def __ior__(self, other ):
2541 if isinstance( other, basestring ): 2542 other = ParserElement.literalStringClass( other ) 2543 return self.append( other ) #MatchFirst( [ self, other ] )
2544
2545 - def __str__( self ):
2546 if hasattr(self,"name"): 2547 return self.name 2548 2549 if self.strRepr is None: 2550 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 2551 2552 return self.strRepr
2553
2554 - def checkRecursion( self, parseElementList ):
2555 subRecCheckList = parseElementList[:] + [ self ] 2556 for e in self.exprs: 2557 e.checkRecursion( subRecCheckList )
2558
2559 2560 -class Each(ParseExpression):
2561 """Requires all given C{ParseExpression}s to be found, but in any order. 2562 Expressions may be separated by whitespace. 2563 May be constructed using the C{'&'} operator. 2564 """
2565 - def __init__( self, exprs, savelist = True ):
2566 super(Each,self).__init__(exprs, savelist) 2567 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2568 self.skipWhitespace = True 2569 self.initExprGroups = True
2570
2571 - def parseImpl( self, instring, loc, doActions=True ):
2572 if self.initExprGroups: 2573 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) 2574 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2575 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] 2576 self.optionals = opt1 + opt2 2577 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2578 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2579 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2580 self.required += self.multirequired 2581 self.initExprGroups = False 2582 tmpLoc = loc 2583 tmpReqd = self.required[:] 2584 tmpOpt = self.optionals[:] 2585 matchOrder = [] 2586 2587 keepMatching = True 2588 while keepMatching: 2589 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2590 failed = [] 2591 for e in tmpExprs: 2592 try: 2593 tmpLoc = e.tryParse( instring, tmpLoc ) 2594 except ParseException: 2595 failed.append(e) 2596 else: 2597 matchOrder.append(self.opt1map.get(id(e),e)) 2598 if e in tmpReqd: 2599 tmpReqd.remove(e) 2600 elif e in tmpOpt: 2601 tmpOpt.remove(e) 2602 if len(failed) == len(tmpExprs): 2603 keepMatching = False 2604 2605 if tmpReqd: 2606 missing = ", ".join(_ustr(e) for e in tmpReqd) 2607 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2608 2609 # add any unmatched Optionals, in case they have default values defined 2610 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 2611 2612 resultlist = [] 2613 for e in matchOrder: 2614 loc,results = e._parse(instring,loc,doActions) 2615 resultlist.append(results) 2616 2617 finalResults = ParseResults([]) 2618 for r in resultlist: 2619 dups = {} 2620 for k in r.keys(): 2621 if k in finalResults: 2622 tmp = ParseResults(finalResults[k]) 2623 tmp += ParseResults(r[k]) 2624 dups[k] = tmp 2625 finalResults += ParseResults(r) 2626 for k,v in dups.items(): 2627 finalResults[k] = v 2628 return loc, finalResults
2629
2630 - def __str__( self ):
2631 if hasattr(self,"name"): 2632 return self.name 2633 2634 if self.strRepr is None: 2635 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 2636 2637 return self.strRepr
2638
2639 - def checkRecursion( self, parseElementList ):
2640 subRecCheckList = parseElementList[:] + [ self ] 2641 for e in self.exprs: 2642 e.checkRecursion( subRecCheckList )
2643
2644 2645 -class ParseElementEnhance(ParserElement):
2646 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2647 - def __init__( self, expr, savelist=False ):
2648 super(ParseElementEnhance,self).__init__(savelist) 2649 if isinstance( expr, basestring ): 2650 expr = Literal(expr) 2651 self.expr = expr 2652 self.strRepr = None 2653 if expr is not None: 2654 self.mayIndexError = expr.mayIndexError 2655 self.mayReturnEmpty = expr.mayReturnEmpty 2656 self.setWhitespaceChars( expr.whiteChars ) 2657 self.skipWhitespace = expr.skipWhitespace 2658 self.saveAsList = expr.saveAsList 2659 self.callPreparse = expr.callPreparse 2660 self.ignoreExprs.extend(expr.ignoreExprs)
2661
2662 - def parseImpl( self, instring, loc, doActions=True ):
2663 if self.expr is not None: 2664 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2665 else: 2666 raise ParseException("",loc,self.errmsg,self)
2667
2668 - def leaveWhitespace( self ):
2669 self.skipWhitespace = False 2670 self.expr = self.expr.copy() 2671 if self.expr is not None: 2672 self.expr.leaveWhitespace() 2673 return self
2674
2675 - def ignore( self, other ):
2676 if isinstance( other, Suppress ): 2677 if other not in self.ignoreExprs: 2678 super( ParseElementEnhance, self).ignore( other ) 2679 if self.expr is not None: 2680 self.expr.ignore( self.ignoreExprs[-1] ) 2681 else: 2682 super( ParseElementEnhance, self).ignore( other ) 2683 if self.expr is not None: 2684 self.expr.ignore( self.ignoreExprs[-1] ) 2685 return self
2686
2687 - def streamline( self ):
2688 super(ParseElementEnhance,self).streamline() 2689 if self.expr is not None: 2690 self.expr.streamline() 2691 return self
2692
2693 - def checkRecursion( self, parseElementList ):
2694 if self in parseElementList: 2695 raise RecursiveGrammarException( parseElementList+[self] ) 2696 subRecCheckList = parseElementList[:] + [ self ] 2697 if self.expr is not None: 2698 self.expr.checkRecursion( subRecCheckList )
2699
2700 - def validate( self, validateTrace=[] ):
2701 tmp = validateTrace[:]+[self] 2702 if self.expr is not None: 2703 self.expr.validate(tmp) 2704 self.checkRecursion( [] )
2705
2706 - def __str__( self ):
2707 try: 2708 return super(ParseElementEnhance,self).__str__() 2709 except: 2710 pass 2711 2712 if self.strRepr is None and self.expr is not None: 2713 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2714 return self.strRepr
2715
2716 2717 -class FollowedBy(ParseElementEnhance):
2718 """Lookahead matching of the given parse expression. C{FollowedBy} 2719 does *not* advance the parsing position within the input string, it only 2720 verifies that the specified parse expression matches at the current 2721 position. C{FollowedBy} always returns a null token list."""
2722 - def __init__( self, expr ):
2723 super(FollowedBy,self).__init__(expr) 2724 self.mayReturnEmpty = True
2725
2726 - def parseImpl( self, instring, loc, doActions=True ):
2727 self.expr.tryParse( instring, loc ) 2728 return loc, []
2729
2730 2731 -class NotAny(ParseElementEnhance):
2732 """Lookahead to disallow matching with the given parse expression. C{NotAny} 2733 does *not* advance the parsing position within the input string, it only 2734 verifies that the specified parse expression does *not* match at the current 2735 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 2736 always returns a null token list. May be constructed using the '~' operator."""
2737 - def __init__( self, expr ):
2738 super(NotAny,self).__init__(expr) 2739 #~ self.leaveWhitespace() 2740 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2741 self.mayReturnEmpty = True 2742 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2743
2744 - def parseImpl( self, instring, loc, doActions=True ):
2745 try: 2746 self.expr.tryParse( instring, loc ) 2747 except (ParseException,IndexError): 2748 pass 2749 else: 2750 raise ParseException(instring, loc, self.errmsg, self) 2751 return loc, []
2752
2753 - def __str__( self ):
2754 if hasattr(self,"name"): 2755 return self.name 2756 2757 if self.strRepr is None: 2758 self.strRepr = "~{" + _ustr(self.expr) + "}" 2759 2760 return self.strRepr
2761
2762 2763 -class ZeroOrMore(ParseElementEnhance):
2764 """Optional repetition of zero or more of the given expression."""
2765 - def __init__( self, expr ):
2766 super(ZeroOrMore,self).__init__(expr) 2767 self.mayReturnEmpty = True
2768
2769 - def parseImpl( self, instring, loc, doActions=True ):
2770 tokens = [] 2771 try: 2772 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2773 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2774 while 1: 2775 if hasIgnoreExprs: 2776 preloc = self._skipIgnorables( instring, loc ) 2777 else: 2778 preloc = loc 2779 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2780 if tmptokens or tmptokens.haskeys(): 2781 tokens += tmptokens 2782 except (ParseException,IndexError): 2783 pass 2784 2785 return loc, tokens
2786
2787 - def __str__( self ):
2788 if hasattr(self,"name"): 2789 return self.name 2790 2791 if self.strRepr is None: 2792 self.strRepr = "[" + _ustr(self.expr) + "]..." 2793 2794 return self.strRepr
2795
2796 - def setResultsName( self, name, listAllMatches=False ):
2797 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) 2798 ret.saveAsList = True 2799 return ret
2800
2801 2802 -class OneOrMore(ParseElementEnhance):
2803 """Repetition of one or more of the given expression."""
2804 - def parseImpl( self, instring, loc, doActions=True ):
2805 # must be at least one 2806 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2807 try: 2808 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2809 while 1: 2810 if hasIgnoreExprs: 2811 preloc = self._skipIgnorables( instring, loc ) 2812 else: 2813 preloc = loc 2814 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2815 if tmptokens or tmptokens.haskeys(): 2816 tokens += tmptokens 2817 except (ParseException,IndexError): 2818 pass 2819 2820 return loc, tokens
2821
2822 - def __str__( self ):
2823 if hasattr(self,"name"): 2824 return self.name 2825 2826 if self.strRepr is None: 2827 self.strRepr = "{" + _ustr(self.expr) + "}..." 2828 2829 return self.strRepr
2830
2831 - def setResultsName( self, name, listAllMatches=False ):
2832 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2833 ret.saveAsList = True 2834 return ret
2835
2836 -class _NullToken(object):
2837 - def __bool__(self):
2838 return False
2839 __nonzero__ = __bool__
2840 - def __str__(self):
2841 return ""
2842 2843 _optionalNotMatched = _NullToken()
2844 -class Optional(ParseElementEnhance):
2845 """Optional matching of the given expression. 2846 A default return string can also be specified, if the optional expression 2847 is not found. 2848 """
2849 - def __init__( self, expr, default=_optionalNotMatched ):
2850 super(Optional,self).__init__( expr, savelist=False ) 2851 self.defaultValue = default 2852 self.mayReturnEmpty = True
2853
2854 - def parseImpl( self, instring, loc, doActions=True ):
2855 try: 2856 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2857 except (ParseException,IndexError): 2858 if self.defaultValue is not _optionalNotMatched: 2859 if self.expr.resultsName: 2860 tokens = ParseResults([ self.defaultValue ]) 2861 tokens[self.expr.resultsName] = self.defaultValue 2862 else: 2863 tokens = [ self.defaultValue ] 2864 else: 2865 tokens = [] 2866 return loc, tokens
2867
2868 - def __str__( self ):
2869 if hasattr(self,"name"): 2870 return self.name 2871 2872 if self.strRepr is None: 2873 self.strRepr = "[" + _ustr(self.expr) + "]" 2874 2875 return self.strRepr
2876
2877 2878 -class SkipTo(ParseElementEnhance):
2879 """Token for skipping over all undefined text until the matched expression is found. 2880 If C{include} is set to true, the matched expression is also parsed (the skipped text 2881 and matched expression are returned as a 2-element list). The C{ignore} 2882 argument is used to define grammars (typically quoted strings and comments) that 2883 might contain false matches. 2884 """
2885 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2886 super( SkipTo, self ).__init__( other ) 2887 self.ignoreExpr = ignore 2888 self.mayReturnEmpty = True 2889 self.mayIndexError = False 2890 self.includeMatch = include 2891 self.asList = False 2892 if failOn is not None and isinstance(failOn, basestring): 2893 self.failOn = Literal(failOn) 2894 else: 2895 self.failOn = failOn 2896 self.errmsg = "No match found for "+_ustr(self.expr)
2897
2898 - def parseImpl( self, instring, loc, doActions=True ):
2899 startLoc = loc 2900 instrlen = len(instring) 2901 expr = self.expr 2902 failParse = False 2903 while loc <= instrlen: 2904 try: 2905 if self.failOn: 2906 try: 2907 self.failOn.tryParse(instring, loc) 2908 except ParseBaseException: 2909 pass 2910 else: 2911 failParse = True 2912 raise ParseException(instring, loc, "Found expression " + str(self.failOn)) 2913 failParse = False 2914 if self.ignoreExpr is not None: 2915 while 1: 2916 try: 2917 loc = self.ignoreExpr.tryParse(instring,loc) 2918 # print("found ignoreExpr, advance to", loc) 2919 except ParseBaseException: 2920 break 2921 expr._parse( instring, loc, doActions=False, callPreParse=False ) 2922 skipText = instring[startLoc:loc] 2923 if self.includeMatch: 2924 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) 2925 if mat: 2926 skipRes = ParseResults( skipText ) 2927 skipRes += mat 2928 return loc, [ skipRes ] 2929 else: 2930 return loc, [ skipText ] 2931 else: 2932 return loc, [ skipText ] 2933 except (ParseException,IndexError): 2934 if failParse: 2935 raise 2936 else: 2937 loc += 1 2938 raise ParseException(instring, loc, self.errmsg, self)
2939
2940 -class Forward(ParseElementEnhance):
2941 """Forward declaration of an expression to be defined later - 2942 used for recursive grammars, such as algebraic infix notation. 2943 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 2944 2945 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 2946 Specifically, '|' has a lower precedence than '<<', so that:: 2947 fwdExpr << a | b | c 2948 will actually be evaluated as:: 2949 (fwdExpr << a) | b | c 2950 thereby leaving b and c out as parseable alternatives. It is recommended that you 2951 explicitly group the values inserted into the C{Forward}:: 2952 fwdExpr << (a | b | c) 2953 Converting to use the '<<=' operator instead will avoid this problem. 2954 """
2955 - def __init__( self, other=None ):
2956 super(Forward,self).__init__( other, savelist=False )
2957
2958 - def __lshift__( self, other ):
2959 if isinstance( other, basestring ): 2960 other = ParserElement.literalStringClass(other) 2961 self.expr = other 2962 self.strRepr = None 2963 self.mayIndexError = self.expr.mayIndexError 2964 self.mayReturnEmpty = self.expr.mayReturnEmpty 2965 self.setWhitespaceChars( self.expr.whiteChars ) 2966 self.skipWhitespace = self.expr.skipWhitespace 2967 self.saveAsList = self.expr.saveAsList 2968 self.ignoreExprs.extend(self.expr.ignoreExprs) 2969 return self
2970
2971 - def __ilshift__(self, other):
2972 return self << other
2973
2974 - def leaveWhitespace( self ):
2975 self.skipWhitespace = False 2976 return self
2977
2978 - def streamline( self ):
2979 if not self.streamlined: 2980 self.streamlined = True 2981 if self.expr is not None: 2982 self.expr.streamline() 2983 return self
2984
2985 - def validate( self, validateTrace=[] ):
2986 if self not in validateTrace: 2987 tmp = validateTrace[:]+[self] 2988 if self.expr is not None: 2989 self.expr.validate(tmp) 2990 self.checkRecursion([])
2991
2992 - def __str__( self ):
2993 if hasattr(self,"name"): 2994 return self.name 2995 return self.__class__.__name__ + ": ..." 2996 2997 # stubbed out for now - creates awful memory and perf issues 2998 self._revertClass = self.__class__ 2999 self.__class__ = _ForwardNoRecurse 3000 try: 3001 if self.expr is not None: 3002 retString = _ustr(self.expr) 3003 else: 3004 retString = "None" 3005 finally: 3006 self.__class__ = self._revertClass 3007 return self.__class__.__name__ + ": " + retString
3008
3009 - def copy(self):
3010 if self.expr is not None: 3011 return super(Forward,self).copy() 3012 else: 3013 ret = Forward() 3014 ret <<= self 3015 return ret
3016
3017 -class _ForwardNoRecurse(Forward):
3018 - def __str__( self ):
3019 return "..."
3020
3021 -class TokenConverter(ParseElementEnhance):
3022 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
3023 - def __init__( self, expr, savelist=False ):
3024 super(TokenConverter,self).__init__( expr )#, savelist ) 3025 self.saveAsList = False
3026
3027 -class Upcase(TokenConverter):
3028 """Converter to upper case all matching tokens."""
3029 - def __init__(self, *args):
3030 super(Upcase,self).__init__(*args) 3031 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 3032 DeprecationWarning,stacklevel=2)
3033
3034 - def postParse( self, instring, loc, tokenlist ):
3035 return list(map( str.upper, tokenlist ))
3036
3037 3038 -class Combine(TokenConverter):
3039 """Converter to concatenate all matching tokens to a single string. 3040 By default, the matching patterns must also be contiguous in the input string; 3041 this can be disabled by specifying C{'adjacent=False'} in the constructor. 3042 """
3043 - def __init__( self, expr, joinString="", adjacent=True ):
3044 super(Combine,self).__init__( expr ) 3045 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 3046 if adjacent: 3047 self.leaveWhitespace() 3048 self.adjacent = adjacent 3049 self.skipWhitespace = True 3050 self.joinString = joinString 3051 self.callPreparse = True
3052
3053 - def ignore( self, other ):
3054 if self.adjacent: 3055 ParserElement.ignore(self, other) 3056 else: 3057 super( Combine, self).ignore( other ) 3058 return self
3059
3060 - def postParse( self, instring, loc, tokenlist ):
3061 retToks = tokenlist.copy() 3062 del retToks[:] 3063 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 3064 3065 if self.resultsName and retToks.haskeys(): 3066 return [ retToks ] 3067 else: 3068 return retToks
3069
3070 -class Group(TokenConverter):
3071 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3072 - def __init__( self, expr ):
3073 super(Group,self).__init__( expr ) 3074 self.saveAsList = True
3075
3076 - def postParse( self, instring, loc, tokenlist ):
3077 return [ tokenlist ]
3078
3079 -class Dict(TokenConverter):
3080 """Converter to return a repetitive expression as a list, but also as a dictionary. 3081 Each element can also be referenced using the first token in the expression as its key. 3082 Useful for tabular report scraping when the first column can be used as a item key. 3083 """
3084 - def __init__( self, expr ):
3085 super(Dict,self).__init__( expr ) 3086 self.saveAsList = True
3087
3088 - def postParse( self, instring, loc, tokenlist ):
3089 for i,tok in enumerate(tokenlist): 3090 if len(tok) == 0: 3091 continue 3092 ikey = tok[0] 3093 if isinstance(ikey,int): 3094 ikey = _ustr(tok[0]).strip() 3095 if len(tok)==1: 3096 tokenlist[ikey] = _ParseResultsWithOffset("",i) 3097 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 3098 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 3099 else: 3100 dictvalue = tok.copy() #ParseResults(i) 3101 del dictvalue[0] 3102 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 3103 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 3104 else: 3105 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 3106 3107 if self.resultsName: 3108 return [ tokenlist ] 3109 else: 3110 return tokenlist
3111
3112 3113 -class Suppress(TokenConverter):
3114 """Converter for ignoring the results of a parsed expression."""
3115 - def postParse( self, instring, loc, tokenlist ):
3116 return []
3117
3118 - def suppress( self ):
3119 return self
3120
3121 3122 -class OnlyOnce(object):
3123 """Wrapper for parse actions, to ensure they are only called once."""
3124 - def __init__(self, methodCall):
3125 self.callable = _trim_arity(methodCall) 3126 self.called = False
3127 - def __call__(self,s,l,t):
3128 if not self.called: 3129 results = self.callable(s,l,t) 3130 self.called = True 3131 return results 3132 raise ParseException(s,l,"")
3133 - def reset(self):
3134 self.called = False
3135
3136 -def traceParseAction(f):
3137 """Decorator for debugging parse actions.""" 3138 f = _trim_arity(f) 3139 def z(*paArgs): 3140 thisFunc = f.func_name 3141 s,l,t = paArgs[-3:] 3142 if len(paArgs)>3: 3143 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3144 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3145 try: 3146 ret = f(*paArgs) 3147 except Exception as exc: 3148 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3149 raise 3150 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3151 return ret
3152 try: 3153 z.__name__ = f.__name__ 3154 except AttributeError: 3155 pass 3156 return z 3157
3158 # 3159 # global helpers 3160 # 3161 -def delimitedList( expr, delim=",", combine=False ):
3162 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3163 By default, the list elements and delimiters can have intervening whitespace, and 3164 comments, but this can be overridden by passing C{combine=True} in the constructor. 3165 If C{combine} is set to C{True}, the matching tokens are returned as a single token 3166 string, with the delimiters included; otherwise, the matching tokens are returned 3167 as a list of tokens, with the delimiters suppressed. 3168 """ 3169 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3170 if combine: 3171 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 3172 else: 3173 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3174
3175 -def countedArray( expr, intExpr=None ):
3176 """Helper to define a counted list of expressions. 3177 This helper defines a pattern of the form:: 3178 integer expr expr expr... 3179 where the leading integer tells how many expr expressions follow. 3180 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3181 """ 3182 arrayExpr = Forward() 3183 def countFieldParseAction(s,l,t): 3184 n = t[0] 3185 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 3186 return []
3187 if intExpr is None: 3188 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 3189 else: 3190 intExpr = intExpr.copy() 3191 intExpr.setName("arrayLen") 3192 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 3193 return ( intExpr + arrayExpr ) 3194
3195 -def _flatten(L):
3196 ret = [] 3197 for i in L: 3198 if isinstance(i,list): 3199 ret.extend(_flatten(i)) 3200 else: 3201 ret.append(i) 3202 return ret
3203
3204 -def matchPreviousLiteral(expr):
3205 """Helper to define an expression that is indirectly defined from 3206 the tokens matched in a previous expression, that is, it looks 3207 for a 'repeat' of a previous expression. For example:: 3208 first = Word(nums) 3209 second = matchPreviousLiteral(first) 3210 matchExpr = first + ":" + second 3211 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 3212 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 3213 If this is not desired, use C{matchPreviousExpr}. 3214 Do *not* use with packrat parsing enabled. 3215 """ 3216 rep = Forward() 3217 def copyTokenToRepeater(s,l,t): 3218 if t: 3219 if len(t) == 1: 3220 rep << t[0] 3221 else: 3222 # flatten t tokens 3223 tflat = _flatten(t.asList()) 3224 rep << And( [ Literal(tt) for tt in tflat ] ) 3225 else: 3226 rep << Empty()
3227 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3228 return rep 3229
3230 -def matchPreviousExpr(expr):
3231 """Helper to define an expression that is indirectly defined from 3232 the tokens matched in a previous expression, that is, it looks 3233 for a 'repeat' of a previous expression. For example:: 3234 first = Word(nums) 3235 second = matchPreviousExpr(first) 3236 matchExpr = first + ":" + second 3237 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 3238 expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 3239 the expressions are evaluated first, and then compared, so 3240 C{"1"} is compared with C{"10"}. 3241 Do *not* use with packrat parsing enabled. 3242 """ 3243 rep = Forward() 3244 e2 = expr.copy() 3245 rep <<= e2 3246 def copyTokenToRepeater(s,l,t): 3247 matchTokens = _flatten(t.asList()) 3248 def mustMatchTheseTokens(s,l,t): 3249 theseTokens = _flatten(t.asList()) 3250 if theseTokens != matchTokens: 3251 raise ParseException("",0,"")
3252 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 3253 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3254 return rep 3255
3256 -def _escapeRegexRangeChars(s):
3257 #~ escape these chars: ^-] 3258 for c in r"\^-]": 3259 s = s.replace(c,_bslash+c) 3260 s = s.replace("\n",r"\n") 3261 s = s.replace("\t",r"\t") 3262 return _ustr(s)
3263
3264 -def oneOf( strs, caseless=False, useRegex=True ):
3265 """Helper to quickly define a set of alternative Literals, and makes sure to do 3266 longest-first testing when there is a conflict, regardless of the input order, 3267 but returns a C{L{MatchFirst}} for best performance. 3268 3269 Parameters: 3270 - strs - a string of space-delimited literals, or a list of string literals 3271 - caseless - (default=False) - treat all literals as caseless 3272 - useRegex - (default=True) - as an optimization, will generate a Regex 3273 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 3274 if creating a C{Regex} raises an exception) 3275 """ 3276 if caseless: 3277 isequal = ( lambda a,b: a.upper() == b.upper() ) 3278 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 3279 parseElementClass = CaselessLiteral 3280 else: 3281 isequal = ( lambda a,b: a == b ) 3282 masks = ( lambda a,b: b.startswith(a) ) 3283 parseElementClass = Literal 3284 3285 symbols = [] 3286 if isinstance(strs,basestring): 3287 symbols = strs.split() 3288 elif isinstance(strs, collections.Sequence): 3289 symbols = list(strs[:]) 3290 elif isinstance(strs, _generatorType): 3291 symbols = list(strs) 3292 else: 3293 warnings.warn("Invalid argument to oneOf, expected string or list", 3294 SyntaxWarning, stacklevel=2) 3295 if not symbols: 3296 return NoMatch() 3297 3298 i = 0 3299 while i < len(symbols)-1: 3300 cur = symbols[i] 3301 for j,other in enumerate(symbols[i+1:]): 3302 if ( isequal(other, cur) ): 3303 del symbols[i+j+1] 3304 break 3305 elif ( masks(cur, other) ): 3306 del symbols[i+j+1] 3307 symbols.insert(i,other) 3308 cur = other 3309 break 3310 else: 3311 i += 1 3312 3313 if not caseless and useRegex: 3314 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3315 try: 3316 if len(symbols)==len("".join(symbols)): 3317 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ) 3318 else: 3319 return Regex( "|".join(re.escape(sym) for sym in symbols) ) 3320 except: 3321 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3322 SyntaxWarning, stacklevel=2) 3323 3324 3325 # last resort, just use MatchFirst 3326 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3327
3328 -def dictOf( key, value ):
3329 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3330 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 3331 in the proper order. The key pattern can include delimiting markers or punctuation, 3332 as long as they are suppressed, thereby leaving the significant key text. The value 3333 pattern can include named results, so that the C{Dict} results can include named token 3334 fields. 3335 """ 3336 return Dict( ZeroOrMore( Group ( key + value ) ) )
3337
3338 -def originalTextFor(expr, asString=True):
3339 """Helper to return the original, untokenized text for a given expression. Useful to 3340 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 3341 revert separate tokens with intervening whitespace back to the original matching 3342 input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not 3343 require the inspect module to chase up the call stack. By default, returns a 3344 string containing the original parsed text. 3345 3346 If the optional C{asString} argument is passed as C{False}, then the return value is a 3347 C{L{ParseResults}} containing any results names that were originally matched, and a 3348 single token containing the original matched text from the input string. So if 3349 the expression passed to C{L{originalTextFor}} contains expressions with defined 3350 results names, you must set C{asString} to C{False} if you want to preserve those 3351 results name values.""" 3352 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 3353 endlocMarker = locMarker.copy() 3354 endlocMarker.callPreparse = False 3355 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 3356 if asString: 3357 extractText = lambda s,l,t: s[t._original_start:t._original_end] 3358 else: 3359 def extractText(s,l,t): 3360 del t[:] 3361 t.insert(0, s[t._original_start:t._original_end]) 3362 del t["_original_start"] 3363 del t["_original_end"]
3364 matchExpr.setParseAction(extractText) 3365 return matchExpr 3366
3367 -def ungroup(expr):
3368 """Helper to undo pyparsing's default grouping of And expressions, even 3369 if all but one are non-empty.""" 3370 return TokenConverter(expr).setParseAction(lambda t:t[0]) 3371
3372 -def locatedExpr(expr):
3373 """Helper to decorate a returned token with its starting and ending locations in the input string. 3374 This helper adds the following results names: 3375 - locn_start = location where matched expression begins 3376 - locn_end = location where matched expression ends 3377 - value = the actual parsed results 3378 3379 Be careful if the input text contains C{<TAB>} characters, you may want to call 3380 C{L{ParserElement.parseWithTabs}} 3381 """ 3382 locator = Empty().setParseAction(lambda s,l,t: l) 3383 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
3384 3385 3386 # convenience constants for positional expressions 3387 empty = Empty().setName("empty") 3388 lineStart = LineStart().setName("lineStart") 3389 lineEnd = LineEnd().setName("lineEnd") 3390 stringStart = StringStart().setName("stringStart") 3391 stringEnd = StringEnd().setName("stringEnd") 3392 3393 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3394 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 3395 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 3396 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE) 3397 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3398 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3399 3400 -def srange(s):
3401 r"""Helper to easily define string ranges for use in Word construction. Borrows 3402 syntax from regexp '[]' string range definitions:: 3403 srange("[0-9]") -> "0123456789" 3404 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3405 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3406 The input string must be enclosed in []'s, and the returned string is the expanded 3407 character set joined into a single string. 3408 The values enclosed in the []'s may be:: 3409 a single character 3410 an escaped character with a leading backslash (such as \- or \]) 3411 an escaped hex character with a leading '\x' (\x21, which is a '!' character) 3412 (\0x## is also supported for backwards compatibility) 3413 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3414 a range of any of the above, separated by a dash ('a-z', etc.) 3415 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3416 """ 3417 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 3418 try: 3419 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 3420 except: 3421 return ""
3422
3423 -def matchOnlyAtCol(n):
3424 """Helper method for defining parse actions that require matching at a specific 3425 column in the input text. 3426 """ 3427 def verifyCol(strg,locn,toks): 3428 if col(locn,strg) != n: 3429 raise ParseException(strg,locn,"matched token not at column %d" % n)
3430 return verifyCol 3431
3432 -def replaceWith(replStr):
3433 """Helper method for common parse actions that simply return a literal value. Especially 3434 useful when used with C{L{transformString<ParserElement.transformString>}()}. 3435 """ 3436 #def _replFunc(*args): 3437 # return [replStr] 3438 #return _replFunc 3439 return functools.partial(next, itertools.repeat([replStr]))
3440
3441 -def removeQuotes(s,l,t):
3442 """Helper parse action for removing quotation marks from parsed quoted strings. 3443 To use, add this parse action to quoted string using:: 3444 quotedString.setParseAction( removeQuotes ) 3445 """ 3446 return t[0][1:-1]
3447
3448 -def upcaseTokens(s,l,t):
3449 """Helper parse action to convert tokens to upper case.""" 3450 return [ tt.upper() for tt in map(_ustr,t) ]
3451
3452 -def downcaseTokens(s,l,t):
3453 """Helper parse action to convert tokens to lower case.""" 3454 return [ tt.lower() for tt in map(_ustr,t) ]
3455
3456 -def keepOriginalText(s,startLoc,t):
3457 """DEPRECATED - use new helper method C{L{originalTextFor}}. 3458 Helper parse action to preserve original parsed text, 3459 overriding any nested parse actions.""" 3460 try: 3461 endloc = getTokensEndLoc() 3462 except ParseException: 3463 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 3464 del t[:] 3465 t += ParseResults(s[startLoc:endloc]) 3466 return t
3467
3468 -def getTokensEndLoc():
3469 """Method to be called from within a parse action to determine the end 3470 location of the parsed tokens.""" 3471 import inspect 3472 fstack = inspect.stack() 3473 try: 3474 # search up the stack (through intervening argument normalizers) for correct calling routine 3475 for f in fstack[2:]: 3476 if f[3] == "_parseNoCache": 3477 endloc = f[0].f_locals["loc"] 3478 return endloc 3479 else: 3480 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 3481 finally: 3482 del fstack
3483
3484 -def _makeTags(tagStr, xml):
3485 """Internal helper to construct opening and closing tag expressions, given a tag name""" 3486 if isinstance(tagStr,basestring): 3487 resname = tagStr 3488 tagStr = Keyword(tagStr, caseless=not xml) 3489 else: 3490 resname = tagStr.name 3491 3492 tagAttrName = Word(alphas,alphanums+"_-:") 3493 if (xml): 3494 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3495 openTag = Suppress("<") + tagStr("tag") + \ 3496 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3497 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3498 else: 3499 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 3500 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3501 openTag = Suppress("<") + tagStr("tag") + \ 3502 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3503 Optional( Suppress("=") + tagAttrValue ) ))) + \ 3504 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3505 closeTag = Combine(_L("</") + tagStr + ">") 3506 3507 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) 3508 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr) 3509 openTag.tag = resname 3510 closeTag.tag = resname 3511 return openTag, closeTag
3512
3513 -def makeHTMLTags(tagStr):
3514 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3515 return _makeTags( tagStr, False )
3516
3517 -def makeXMLTags(tagStr):
3518 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3519 return _makeTags( tagStr, True )
3520
3521 -def withAttribute(*args,**attrDict):
3522 """Helper to create a validating parse action to be used with start tags created 3523 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 3524 with a required attribute value, to avoid false matches on common tags such as 3525 C{<TD>} or C{<DIV>}. 3526 3527 Call C{withAttribute} with a series of attribute names and values. Specify the list 3528 of filter attributes names and values as: 3529 - keyword arguments, as in C{(align="right")}, or 3530 - as an explicit dict with C{**} operator, when an attribute name is also a Python 3531 reserved word, as in C{**{"class":"Customer", "align":"right"}} 3532 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3533 For attribute names with a namespace prefix, you must use the second form. Attribute 3534 names are matched insensitive to upper/lower case. 3535 3536 If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 3537 3538 To verify that the attribute exists, but without specifying a value, pass 3539 C{withAttribute.ANY_VALUE} as the value. 3540 """ 3541 if args: 3542 attrs = args[:] 3543 else: 3544 attrs = attrDict.items() 3545 attrs = [(k,v) for k,v in attrs] 3546 def pa(s,l,tokens): 3547 for attrName,attrValue in attrs: 3548 if attrName not in tokens: 3549 raise ParseException(s,l,"no matching attribute " + attrName) 3550 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3551 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3552 (attrName, tokens[attrName], attrValue))
3553 return pa 3554 withAttribute.ANY_VALUE = object()
3555 3556 -def withClass(classname, namespace=''):
3557 """Simplified version of C{L{withAttribute}} when matching on a div class - made 3558 difficult because C{class} is a reserved word in Python. 3559 """ 3560 classattr = "%s:class" % namespace if namespace else "class" 3561 return withAttribute(**{classattr : classname})
3562 3563 opAssoc = _Constants() 3564 opAssoc.LEFT = object() 3565 opAssoc.RIGHT = object()
3566 3567 -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
3568 """Helper method for constructing grammars of expressions made up of 3569 operators working in a precedence hierarchy. Operators may be unary or 3570 binary, left- or right-associative. Parse actions can also be attached 3571 to operator expressions. 3572 3573 Parameters: 3574 - baseExpr - expression representing the most basic element for the nested 3575 - opList - list of tuples, one for each operator precedence level in the 3576 expression grammar; each tuple is of the form 3577 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3578 - opExpr is the pyparsing expression for the operator; 3579 may also be a string, which will be converted to a Literal; 3580 if numTerms is 3, opExpr is a tuple of two expressions, for the 3581 two operators separating the 3 terms 3582 - numTerms is the number of terms for this operator (must 3583 be 1, 2, or 3) 3584 - rightLeftAssoc is the indicator whether the operator is 3585 right or left associative, using the pyparsing-defined 3586 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 3587 - parseAction is the parse action to be associated with 3588 expressions matching this operator expression (the 3589 parse action tuple member may be omitted) 3590 - lpar - expression for matching left-parentheses (default=Suppress('(')) 3591 - rpar - expression for matching right-parentheses (default=Suppress(')')) 3592 """ 3593 ret = Forward() 3594 lastExpr = baseExpr | ( lpar + ret + rpar ) 3595 for i,operDef in enumerate(opList): 3596 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3597 if arity == 3: 3598 if opExpr is None or len(opExpr) != 2: 3599 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3600 opExpr1, opExpr2 = opExpr 3601 thisExpr = Forward()#.setName("expr%d" % i) 3602 if rightLeftAssoc == opAssoc.LEFT: 3603 if arity == 1: 3604 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3605 elif arity == 2: 3606 if opExpr is not None: 3607 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3608 else: 3609 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3610 elif arity == 3: 3611 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3612 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3613 else: 3614 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3615 elif rightLeftAssoc == opAssoc.RIGHT: 3616 if arity == 1: 3617 # try to avoid LR with this extra test 3618 if not isinstance(opExpr, Optional): 3619 opExpr = Optional(opExpr) 3620 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3621 elif arity == 2: 3622 if opExpr is not None: 3623 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3624 else: 3625 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3626 elif arity == 3: 3627 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3628 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3629 else: 3630 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3631 else: 3632 raise ValueError("operator must indicate right or left associativity") 3633 if pa: 3634 matchExpr.setParseAction( pa ) 3635 thisExpr <<= ( matchExpr | lastExpr ) 3636 lastExpr = thisExpr 3637 ret <<= lastExpr 3638 return ret
3639 operatorPrecedence = infixNotation 3640 3641 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 3642 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 3643 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 3644 unicodeString = Combine(_L('u') + quotedString.copy())
3645 3646 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
3647 """Helper method for defining nested lists enclosed in opening and closing 3648 delimiters ("(" and ")" are the default). 3649 3650 Parameters: 3651 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3652 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3653 - content - expression for items within the nested lists (default=None) 3654 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3655 3656 If an expression is not provided for the content argument, the nested 3657 expression will capture all whitespace-delimited content between delimiters 3658 as a list of separate values. 3659 3660 Use the C{ignoreExpr} argument to define expressions that may contain 3661 opening or closing characters that should not be treated as opening 3662 or closing characters for nesting, such as quotedString or a comment 3663 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 3664 The default is L{quotedString}, but if no expressions are to be ignored, 3665 then pass C{None} for this argument. 3666 """ 3667 if opener == closer: 3668 raise ValueError("opening and closing strings cannot be the same") 3669 if content is None: 3670 if isinstance(opener,basestring) and isinstance(closer,basestring): 3671 if len(opener) == 1 and len(closer)==1: 3672 if ignoreExpr is not None: 3673 content = (Combine(OneOrMore(~ignoreExpr + 3674 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3675 ).setParseAction(lambda t:t[0].strip())) 3676 else: 3677 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 3678 ).setParseAction(lambda t:t[0].strip())) 3679 else: 3680 if ignoreExpr is not None: 3681 content = (Combine(OneOrMore(~ignoreExpr + 3682 ~Literal(opener) + ~Literal(closer) + 3683 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3684 ).setParseAction(lambda t:t[0].strip())) 3685 else: 3686 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 3687 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3688 ).setParseAction(lambda t:t[0].strip())) 3689 else: 3690 raise ValueError("opening and closing arguments must be strings if no content expression is given") 3691 ret = Forward() 3692 if ignoreExpr is not None: 3693 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3694 else: 3695 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3696 return ret
3697
3698 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3699 """Helper method for defining space-delimited indentation blocks, such as 3700 those used to define block statements in Python source code. 3701 3702 Parameters: 3703 - blockStatementExpr - expression defining syntax of statement that 3704 is repeated within the indented block 3705 - indentStack - list created by caller to manage indentation stack 3706 (multiple statementWithIndentedBlock expressions within a single grammar 3707 should share a common indentStack) 3708 - indent - boolean indicating whether block must be indented beyond the 3709 the current level; set to False for block of left-most statements 3710 (default=True) 3711 3712 A valid block must contain at least one C{blockStatement}. 3713 """ 3714 def checkPeerIndent(s,l,t): 3715 if l >= len(s): return 3716 curCol = col(l,s) 3717 if curCol != indentStack[-1]: 3718 if curCol > indentStack[-1]: 3719 raise ParseFatalException(s,l,"illegal nesting") 3720 raise ParseException(s,l,"not a peer entry")
3721 3722 def checkSubIndent(s,l,t): 3723 curCol = col(l,s) 3724 if curCol > indentStack[-1]: 3725 indentStack.append( curCol ) 3726 else: 3727 raise ParseException(s,l,"not a subentry") 3728 3729 def checkUnindent(s,l,t): 3730 if l >= len(s): return 3731 curCol = col(l,s) 3732 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3733 raise ParseException(s,l,"not an unindent") 3734 indentStack.pop() 3735 3736 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3737 INDENT = Empty() + Empty().setParseAction(checkSubIndent) 3738 PEER = Empty().setParseAction(checkPeerIndent) 3739 UNDENT = Empty().setParseAction(checkUnindent) 3740 if indent: 3741 smExpr = Group( Optional(NL) + 3742 #~ FollowedBy(blockStatementExpr) + 3743 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3744 else: 3745 smExpr = Group( Optional(NL) + 3746 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3747 blockStatementExpr.ignore(_bslash + LineEnd()) 3748 return smExpr 3749 3750 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3751 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3752 3753 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) 3754 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() 3755 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) 3756 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 3757 3758 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 3759 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 3760 3761 htmlComment = Regex(r"<!--[\s\S]*?-->") 3762 restOfLine = Regex(r".*").leaveWhitespace() 3763 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 3764 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 3765 3766 javaStyleComment = cppStyleComment 3767 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3768 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 3769 Optional( Word(" \t") + 3770 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 3771 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 3772 3773 3774 if __name__ == "__main__": 3775 3776 selectToken = CaselessLiteral( "select" ) 3777 fromToken = CaselessLiteral( "from" ) 3778 3779 ident = Word( alphas, alphanums + "_$" ) 3780 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3781 columnNameList = Group( delimitedList( columnName ) ).setName("columns") 3782 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3783 tableNameList = Group( delimitedList( tableName ) ).setName("tables") 3784 simpleSQL = ( selectToken + \ 3785 ( '*' | columnNameList ).setResultsName( "columns" ) + \ 3786 fromToken + \ 3787 tableNameList.setResultsName( "tables" ) ) 3788 3789 simpleSQL.runTests("""\ 3790 SELECT * from XYZZY, ABC 3791 select * from SYS.XYZZY 3792 Select A from Sys.dual 3793 Select AA,BB,CC from Sys.dual 3794 Select A, B, C from Sys.dual 3795 Select A, B, C from Sys.dual 3796 Xelect A, B, C from Sys.dual 3797 Select A, B, C frox Sys.dual 3798 Select 3799 Select ^^^ frox Sys.dual 3800 Select A, B, C from Sys.dual, Table2""") 3801