Package gavo :: Package grammars :: Module kvgrammar
[frames] | no frames]

Source Code for Module gavo.grammars.kvgrammar

 1  """ 
 2  A grammar parsing key-value pairs from plain text files. 
 3  """ 
 4   
 5  #c Copyright 2008-2019, the GAVO project 
 6  #c 
 7  #c This program is free software, covered by the GNU GPL.  See the 
 8  #c COPYING file in the source distribution. 
 9   
10   
11  import re 
12   
13  from gavo import base 
14  from gavo.grammars.common import Grammar, FileRowIterator, MapKeys, REAttribute 
15   
16   
17 -class KVIterator(FileRowIterator):
18 """is an iterator over a file containing key, value pairs. 19 20 Depending on the parent grammar, it returns the whole k,v record as 21 one row or one pair per row. 22 """ 23 phase = "(nothing read yet)" 24
25 - def _iterRows(self):
26 try: 27 data = self.inputFile.read() 28 self.phase = "(decoding)" 29 if isinstance(data, str) and self.grammar.enc: 30 data = data.decode(self.grammar.enc) 31 except UnicodeDecodeError as msg: 32 raise base.SourceParseError(unicode(msg), source=self.inputFile.name) 33 completeRecord = {} 34 self.phase = "(decoding)" 35 data = re.sub(self.grammar.commentPattern, "", data) 36 37 for index, rec in enumerate(self.grammar.recSplitter.split(data)): 38 try: 39 self.phase = "record %s %s"%(index, repr(rec)) 40 if rec.strip(): 41 key, value = self.grammar.pairSplitter.match(rec).groups() 42 if self.grammar.yieldPairs: 43 yield {"key": key.strip(), "value": value.strip()} 44 else: 45 completeRecord[key.strip()] = value.strip() 46 except: 47 self.inputFile.close() 48 raise base.ui.logOldExc( 49 base.SourceParseError("Not a key value pair: %s"%(repr(rec)), 50 source=self.inputFile.name)) 51 52 self.inputFile.close() 53 54 self.phase = "(postprocessing)" 55 if not self.grammar.yieldPairs: 56 yield self.grammar.mapKeys.doMap(completeRecord)
57
58 - def getLocator(self):
59 return self.phase
60 61
62 -class KeyValueGrammar(Grammar):
63 """A grammar to parse key-value pairs from files. 64 65 The default assumes one pair per line, with # comments and = as 66 separating character. 67 68 yieldPairs makes the grammar return an empty docdict 69 and {"key":, "value":} rowdicts. 70 71 Whitespace around key and value is ignored. 72 """ 73 name_ = "keyValueGrammar" 74 _kvSeps = base.UnicodeAttribute("kvSeparators", default=":=", 75 description="Characters accepted as separators between key and value") 76 _pairSeps = base.UnicodeAttribute("pairSeparators", default="\n", 77 description="Characters accepted as separators between pairs") 78 _cmtPat = REAttribute("commentPattern", default=re.compile("(?m)#.*"), 79 description="A regular expression describing comments.") 80 _yieldPairs = base.BooleanAttribute("yieldPairs", default=False, 81 description="Yield key-value pairs instead of complete records?") 82 _mapKeys = base.StructAttribute("mapKeys", childFactory=MapKeys, 83 default=None, description="Mappings to rename the keys coming from" 84 " the source files. Use this, in particular, if the keys are" 85 " not valid python identifiers.") 86 87 rowIterator = KVIterator 88
89 - def onElementComplete(self):
90 self.recSplitter = re.compile("[%s]"%self.pairSeparators) 91 self.pairSplitter = re.compile("([^%s]+)[%s](.*)"%( 92 self.kvSeparators, self.kvSeparators)) 93 if self.mapKeys is None: 94 self.mapKeys = base.makeStruct(MapKeys) 95 self._onElementCompleteNext(KeyValueGrammar)
96