Package gavo :: Package grammars :: Module columngrammar
[frames] | no frames]

Source Code for Module gavo.grammars.columngrammar

  1  """ 
  2  A grammar that just splits the source into input lines and then 
  3  lets you name character ranges. 
  4  """ 
  5   
  6  #c Copyright 2008-2019, the GAVO project 
  7  #c 
  8  #c This program is free software, covered by the GNU GPL.  See the 
  9  #c COPYING file in the source distribution. 
 10   
 11   
 12  import pyparsing 
 13   
 14  from gavo import base 
 15  from gavo import utils 
 16  from gavo.grammars.common import Grammar, FileRowIterator, FileRowAttributes 
 17   
 18   
19 -class SplitLineIterator(FileRowIterator):
20 - def __init__(self, grammar, sourceToken, **kwargs):
21 FileRowIterator.__init__(self, grammar, sourceToken, **kwargs) 22 for i in range(self.grammar.topIgnoredLines): 23 self.inputFile.readline() 24 self.lineNo = self.grammar.topIgnoredLines
25
26 - def _iterRows(self):
27 while True: 28 self.lineNo += 1 29 inputLine = self.inputFile.readline() 30 if not inputLine: 31 break 32 33 if (self.grammar.commentIntroducer is not base.NotGiven 34 and inputLine.startswith(self.grammar.commentIntroducer)): 35 continue 36 37 res = self._parse(inputLine) 38 yield res 39 self.recNo += 1 40 41 self.inputFile.close() 42 self.grammar = None
43
44 - def _parse(self, inputLine):
45 res = {} 46 try: 47 for key, slice in self.grammar.colRanges.iteritems(): 48 if self.grammar.strip: 49 res[key] = inputLine[slice].strip() 50 else: 51 res[key] = inputLine[slice] 52 except IndexError: 53 raise base.ui.logOldExc(base.SourceParseError("Short line", inputLine, 54 self.getLocator(), self.sourceToken)) 55 return res
56
57 - def getLocator(self):
58 return "line %d"%self.lineNo
59 60
61 -class ColRangeAttribute(base.UnicodeAttribute):
62 """A range of indices. 63 64 Ranges can be specified as either <int1>-<int2>, just <int> 65 (which is equivalent to <int>-<int>), or as half-open ranges 66 (<int>- or -<int>) Ranges are, contrary to 67 python slices, inclusive on both sides, and start counting 68 from one. 69 """
70 - def parse(self, value):
71 if isinstance(value, slice): 72 # we're already parsed 73 return value 74 75 try: 76 if "-" in value: 77 startLit, endLit = value.split("-") 78 start, end = None, None 79 if startLit.strip(): 80 start = int(startLit)-1 81 if endLit.strip(): 82 end = int(endLit) 83 return slice(start, end) 84 else: 85 col = int(value) 86 return slice(col-1, col) 87 except ValueError: 88 raise base.ui.logOldExc( 89 base.LiteralParseError("colRanges", value, hint="A column range," 90 " (either int1-int2 or just an int) is expected here."))
91 92
93 -class ColumnGrammar(Grammar, FileRowAttributes):
94 """A grammar that builds rowdicts out of character index ranges. 95 96 This works by using the colRanges attribute like <col key="mag">12-16</col>, 97 which will take the characters 12 through 16 inclusive from each input 98 line to build the input column mag. 99 100 As a shortcut, you can also use the colDefs attribute; it contains 101 a string of the form {<key>:<range>}, i.e., 102 a whitespace-separated list of colon-separated items of key and range 103 as accepted by cols, e.g.:: 104 105 <colDefs> 106 a: 3-4 107 _u: 7 108 </colDefs> 109 """ 110 name_ = "columnGrammar" 111 112 _til = base.IntAttribute("topIgnoredLines", default=0, description= 113 "Skip this many lines at the top of each source file.", 114 copyable=True) 115 _cols = base.DictAttribute("colRanges", description="Mapping of" 116 " source keys to column ranges.", itemAttD=ColRangeAttribute("col"), 117 copyable=True) 118 _colDefs = base.ActionAttribute("colDefs", description="Shortcut" 119 " way of defining cols", methodName="_parseColDefs") 120 _commentIntroducer = base.UnicodeAttribute("commentIntroducer", 121 default=base.NotGiven, description="A character sequence" 122 " that, when found at the beginning of a line makes this line" 123 " ignored", copyable=True) 124 _strip = base.BooleanAttribute("strip", default=True, 125 description="Strip all parsed strings?", copyable=True) 126
127 - def _getColDefGrammar(self):
128 with utils.pyparsingWhitechars("\n\t\r "): 129 intLiteral = pyparsing.Word(pyparsing.nums) 130 # need to manually swallow whitespace after literals 131 blindWhite = pyparsing.Suppress(pyparsing.Optional(pyparsing.White())) 132 dash = blindWhite + pyparsing.Literal("-") + blindWhite 133 134 range = pyparsing.Combine( 135 dash + blindWhite + intLiteral 136 | intLiteral + pyparsing.Optional(dash + pyparsing.Optional(intLiteral))) 137 range.setName("Column range") 138 139 identifier = pyparsing.Regex(utils.identifierPattern.pattern[:-1]) 140 identifier.setName("Column key") 141 142 clause = (identifier + pyparsing.Literal(":") + blindWhite + range 143 ).addParseAction(lambda s,p,t: (t[0], t[2])) 144 colDefs = pyparsing.ZeroOrMore(clause)+pyparsing.StringEnd() 145 # range.setDebug(True);identifier.setDebug(True);clause.setDebug(True) 146 return colDefs
147
148 - def _parseColDefs(self, ctx):
149 # the handler for colDefs -- parse shortcut colDefs 150 try: 151 for key, range in utils.pyparseString(self._getColDefGrammar(), 152 self.colDefs): 153 self.colRanges[key] = self._cols.itemAttD.parse(range) 154 except pyparsing.ParseException as ex: 155 raise base.LiteralParseError("colDefs", self.colDefs, 156 hint="colDefs is a whitespace-separated list of key:range pairs." 157 " Your literal doesn't look like this, and here's what the" 158 " parser had to complain: %s"%ex)
159 160 161 rowIterator = SplitLineIterator
162