Source code for gavo.utils.parsetricks

"""
A wrapper around pyparsing.  We need this because of the various
idiosyncrasies pyparsing has had over the years, and also because pyparsing
is not terribly well suited for the multi-grammar situation we have here.

Hence, whenever you use pyparsing in DaCHS: Use parsetricks instead of
pyparsing.

This is particularly important on older systems which may have pyparsings
older than 2.2.  There are rather significant changes from 2.2 to 2.4,
and we fall back to the built-in pyparsing (which is 2.4) if your pyparsing
is too old.
"""

#c Copyright 2008-2023, the GAVO project <gavo@ari.uni-heidelberg.de>
#c
#c This program is free software, covered by the GNU GPL.  See the
#c COPYING file in the source distribution.


# Not checked by pyflakes: (effectively) API file with gratuitous imports

import contextlib
import os
import threading

if False:
	from gavo.imp.pyparsing import *
else:
	import pyparsing
	if pyparsing.__version__<"2.4":
		from gavo.imp.pyparsing import *
	else:
		from pyparsing import *


[docs]class MatchFirstWithSaneError(pyparsing.MatchFirst): # upstream MatchFirst munges the error messages, which sometimes # really is distastrous for us. We monkeypatch this until # https://github.com/pyparsing/pyparsing/issues/464 is somehow # resolved
[docs] def parseImpl(self, instring, loc, doActions=True): maxExcLoc = -1 maxException = None for e in self.exprs: try: ret = e._parse(instring, loc, doActions) return ret except pyparsing.ParseException as err: if err.loc > maxExcLoc: maxException = err maxExcLoc = err.loc except IndexError: if len(instring) > maxExcLoc: maxException = pyparsing.ParseException(instring, len(instring), e.errmsg, self) maxExcLoc = len(instring) # only got here if no expression matched, raise exception for match that made it the furthest else: if maxException is not None: raise maxException else: raise pyparsing.ParseException(instring, loc, "no defined alternatives to match", self)
####################### Pyparsing hacks # # (1) When building grammars, always do so using the pyparsingWhitechars # context manager. Building grammars is thread-safe, but different # grammars here use different whitespace conventions, so without # the c.m., you might get those messed up. # # (2) When parsing strings, *always* go through pyparseString(grammar, # string) and fellow functions whenever your code could run from within # the server (i.e., basically always outside of tests). # pyparsing is not thread-safe, and thus we'll need to shoehorn some # locking on top of it; I don't want to change the pyparsing methods # themselves since they may be called very frequently. ParserElement.enablePackrat() _PYPARSE_LOCK = threading.RLock()
[docs]@contextlib.contextmanager def pyparsingWhitechars(whiteChars): """a context manager that serializes pyparsing grammar compilation and manages its whitespace chars. We need different whitespace definitions in some parts of DaCHS. (The default used to be " \\t" for a while, so this is what things get reset to). Since whitespace apparently can only be set globally for pyparsing, we provide this c.m. Since it is possible that grammars will be compiled in threads (e.g., as a side effect of getRD), this is protected by a lock. This, in turn, means that this can potentially block for a fairly long time. Bottom line: When compiling pyparsing grammars, *always* set the whitespace chars explicitly, and do it through this c.m. """ _PYPARSE_LOCK.acquire() ParserElement.setDefaultWhitespaceChars(whiteChars) try: yield finally: ParserElement.setDefaultWhitespaceChars(" \t") _PYPARSE_LOCK.release()
[docs]def pyparseString(grammar, string, **kwargs): """parses a string using a pyparsing grammar thread-safely. """ with _PYPARSE_LOCK: res = grammar.parseString(string, **kwargs) ParserElement.resetCache() return res
[docs]def pyparseTransform(grammar, string, **kwargs): """calls grammar's transformString method thread-safely. """ with _PYPARSE_LOCK: return grammar.transformString(string, **kwargs)