Source code for gavo.grammars.uniongrammar

"""
A grammar switching between subordinate grammars by file name patterns.
"""

#c Copyright 2008-2023, the GAVO project <gavo@ari.uni-heidelberg.de>
#c
#c This program is free software, covered by the GNU GPL.  See the
#c COPYING file in the source distribution.


import re

from gavo import base
from gavo.grammars import common
from gavo.rscdef import builtingrammars


[docs]class HandlesDeclaration(base.Structure): """A declaration of what grammar to use within a UnionGrammar. Each handler has a (full, python) regular expression defining what file names the grammar is responsible in the filePattern attribute; note that the pattern is matched against the full file name using search so you can match path parts, but you must take care not to overmatch. The other child is a normal DaCHS grammar. """ name_ = "handles" _pattern = base.UnicodeAttribute("pattern", default=base.Undefined, copyable=True) _grammar = base.MultiStructAttribute("grammar", default=None, childFactory=builtingrammars.getGrammar, childNames=list(builtingrammars.GRAMMAR_REGISTRY.keys()), description="Grammar used to handle these kinds of files", copyable=True)
[docs] def completeElement(self, ctx): super().completeElement(ctx) try: self.matcher = re.compile(self.pattern) except re.error as ex: raise base.StructureError( f"Bad handles pattern: '{self.pattern}' ({ex})")
[docs]class UnionGrammar(common.Grammar): """A grammar using one of a sequence of grammars to parse its sources. (since version 2.7.2) Use this if you have differing input formats eventually processible by the same row maker (of course, you can make the row maker flexible enough to cope with different grammar outputs). To do that, use two or more handles definitions, each giving a regular expression against the full file name (but matched with re.search) and a grammar to use for such files. Handles definitions will be tried in sequence; you can hence have special cases early and catch-alls later. The basic idea is that you write something like:: <unionGrammar> <handles pattern=".*\.txt$"> <reGrammar...> </handles> <handles pattern=".*\.csv$"> <csvGrammar...> </handles> </unionGrammar> """ name_ = "unionGrammar" _handles = base.StructListAttribute("handles", childFactory=HandlesDeclaration, copyable=True, description="Recipe for what grammar to use for what sort of file.")
[docs] def parse(self, sourceToken:str, targetData=None): for handler in self.handles: if handler.matcher.search(sourceToken): return handler.grammar.parse(sourceToken, targetData) else: raise base.DataError(f"No handler grammar for '{sourceToken}'")