gavo.grammars.columngrammar

1 """ 2 A grammar that just splits the source into input lines and then 3 lets you name character ranges. 4 """ 5 6 #c Copyright 2008-2019, the GAVO project 7 #c 8 #c This program is free software, covered by the GNU GPL. See the 9 #c COPYING file in the source distribution. 10 11 12 import pyparsing 13 14 from gavo import base 15 from gavo import utils 16 from gavo.grammars.common import Grammar, FileRowIterator, FileRowAttributes 17 18

19 -class SplitLineIterator(FileRowIterator):

20 - def __init__(self, grammar, sourceToken, **kwargs):

21 FileRowIterator.__init__(self, grammar, sourceToken, **kwargs) 22 for i in range(self.grammar.topIgnoredLines): 23 self.inputFile.readline() 24 self.lineNo = self.grammar.topIgnoredLines

25

26 - def _iterRows(self):

27 while True: 28 self.lineNo += 1 29 inputLine = self.inputFile.readline() 30 if not inputLine: 31 break 32 33 if (self.grammar.commentIntroducer is not base.NotGiven 34 and inputLine.startswith(self.grammar.commentIntroducer)): 35 continue 36 37 res = self._parse(inputLine) 38 yield res 39 self.recNo += 1 40 41 self.inputFile.close() 42 self.grammar = None

43

44 - def _parse(self, inputLine):

45 res = {} 46 try: 47 for key, slice in self.grammar.colRanges.iteritems(): 48 if self.grammar.strip: 49 res[key] = inputLine[slice].strip() 50 else: 51 res[key] = inputLine[slice] 52 except IndexError: 53 raise base.ui.logOldExc(base.SourceParseError("Short line", inputLine, 54 self.getLocator(), self.sourceToken)) 55 return res

56

57 - def getLocator(self):

58 return "line %d"%self.lineNo

59 60

61 -class ColRangeAttribute(base.UnicodeAttribute):

62 """A range of indices. 63 64 Ranges can be specified as either <int1>-<int2>, just <int> 65 (which is equivalent to <int>-<int>), or as half-open ranges 66 (<int>- or -<int>) Ranges are, contrary to 67 python slices, inclusive on both sides, and start counting 68 from one. 69 """

70 - def parse(self, value):

71 if isinstance(value, slice): 72 # we're already parsed 73 return value 74 75 try: 76 if "-" in value: 77 startLit, endLit = value.split("-") 78 start, end = None, None 79 if startLit.strip(): 80 start = int(startLit)-1 81 if endLit.strip(): 82 end = int(endLit) 83 return slice(start, end) 84 else: 85 col = int(value) 86 return slice(col-1, col) 87 except ValueError: 88 raise base.ui.logOldExc( 89 base.LiteralParseError("colRanges", value, hint="A column range," 90 " (either int1-int2 or just an int) is expected here."))

91 92

93 -class ColumnGrammar(Grammar, FileRowAttributes):

94 """A grammar that builds rowdicts out of character index ranges. 95 96 This works by using the colRanges attribute like <col key="mag">12-16</col>, 97 which will take the characters 12 through 16 inclusive from each input 98 line to build the input column mag. 99 100 As a shortcut, you can also use the colDefs attribute; it contains 101 a string of the form {<key>:<range>}, i.e., 102 a whitespace-separated list of colon-separated items of key and range 103 as accepted by cols, e.g.:: 104 105 <colDefs> 106 a: 3-4 107 _u: 7 108 </colDefs> 109 """ 110 name_ = "columnGrammar" 111 112 _til = base.IntAttribute("topIgnoredLines", default=0, description= 113 "Skip this many lines at the top of each source file.", 114 copyable=True) 115 _cols = base.DictAttribute("colRanges", description="Mapping of" 116 " source keys to column ranges.", itemAttD=ColRangeAttribute("col"), 117 copyable=True) 118 _colDefs = base.ActionAttribute("colDefs", description="Shortcut" 119 " way of defining cols", methodName="_parseColDefs") 120 _commentIntroducer = base.UnicodeAttribute("commentIntroducer", 121 default=base.NotGiven, description="A character sequence" 122 " that, when found at the beginning of a line makes this line" 123 " ignored", copyable=True) 124 _strip = base.BooleanAttribute("strip", default=True, 125 description="Strip all parsed strings?", copyable=True) 126

127 - def _getColDefGrammar(self):

128 with utils.pyparsingWhitechars("\n\t\r "): 129 intLiteral = pyparsing.Word(pyparsing.nums) 130 # need to manually swallow whitespace after literals 131 blindWhite = pyparsing.Suppress(pyparsing.Optional(pyparsing.White())) 132 dash = blindWhite + pyparsing.Literal("-") + blindWhite 133 134 range = pyparsing.Combine( 135 dash + blindWhite + intLiteral 136 | intLiteral + pyparsing.Optional(dash + pyparsing.Optional(intLiteral))) 137 range.setName("Column range") 138 139 identifier = pyparsing.Regex(utils.identifierPattern.pattern[:-1]) 140 identifier.setName("Column key") 141 142 clause = (identifier + pyparsing.Literal(":") + blindWhite + range 143 ).addParseAction(lambda s,p,t: (t[0], t[2])) 144 colDefs = pyparsing.ZeroOrMore(clause)+pyparsing.StringEnd() 145 # range.setDebug(True);identifier.setDebug(True);clause.setDebug(True) 146 return colDefs

147

148 - def _parseColDefs(self, ctx):

149 # the handler for colDefs -- parse shortcut colDefs 150 try: 151 for key, range in utils.pyparseString(self._getColDefGrammar(), 152 self.colDefs): 153 self.colRanges[key] = self._cols.itemAttD.parse(range) 154 except pyparsing.ParseException as ex: 155 raise base.LiteralParseError("colDefs", self.colDefs, 156 hint="colDefs is a whitespace-separated list of key:range pairs." 157 " Your literal doesn't look like this, and here's what the" 158 " parser had to complain: %s"%ex)

159 160 161 rowIterator = SplitLineIterator

162

Source Code for Module gavo.grammars.columngrammar