gavo.grammars.binarygrammar

1 """ 2 A grammar reading from (fixed-record) binary files. 3 """ 4 5 #c Copyright 2008-2019, the GAVO project 6 #c 7 #c This program is free software, covered by the GNU GPL. See the 8 #c COPYING file in the source distribution. 9 10 11 import re 12 import struct 13 14 import pyparsing 15 16 from gavo import base 17 from gavo import utils 18 from gavo.grammars.common import Grammar, FileRowIterator 19 from gavo.utils import misctricks 20 21

22 -class BinaryRowIterator(FileRowIterator):

23 """A row iterator reading from binary files. 24 """

25 - def _iterUnarmoredRecords(self):

26 while True: 27 data = self.inputFile.read(self.grammar.fieldDefs.recordLength) 28 if data=="": 29 return 30 yield data

31

32 - def _iterInRecords(self):

33 self.inputFile.read(self.grammar.skipBytes) 34 if self.grammar.armor is None: 35 return self._iterUnarmoredRecords() 36 elif self.grammar.armor=="fortran": 37 return misctricks.iterFortranRecs(self.inputFile) 38 else: 39 assert False

40

41 - def _iterRows(self):

42 fmtStr = self.grammar.fieldDefs.structFormat 43 fieldNames = self.grammar.fieldDefs.fieldNames 44 try: 45 for rawRec in self._iterInRecords(): 46 yield dict(zip(fieldNames, struct.unpack(fmtStr, rawRec))) 47 except Exception as ex: 48 raise base.ui.logOldExc(base.SourceParseError(str(ex), 49 location="byte %s"%self.inputFile.tell(), 50 source=str(self.sourceToken)))

51 52

53 -def _getFieldsGrammar():

54 with utils.pyparsingWhitechars(" \n\t\r"): 55 identifier = pyparsing.Regex(utils.identifierPattern.pattern[:-1] 56 ).setName("identifier") 57 formatCode = pyparsing.Regex("\d+s|[bBhHiIqQfd]" 58 ).setName("fieldSpec") 59 field = ( identifier("identifier") 60 + pyparsing.Suppress(pyparsing.Literal("(")) 61 + formatCode("formatCode") 62 + pyparsing.Suppress(pyparsing.Literal(")"))).setParseAction( 63 lambda s, p, t: dict(t)) 64 return pyparsing.OneOrMore(field)+pyparsing.StringEnd()

65 66

67 -class BinaryRecordDef(base.Structure):

68 """A definition of a binary record. 69 70 A binary records consists of a number of binary fields, each of which 71 is defined by a name and a format code. The format codes supported 72 here are a subset of what python's struct module supports. The 73 widths given below are for big, little, and packed binfmts. 74 For native (which is the default), it depends on your platform. 75 76 * <number>s -- <number> characters making up a string 77 * b,B -- signed and unsigned byte (8 bit) 78 * h,H -- signed and unsigned short (16 bit) 79 * i,I -- signed and unsigned int (32 bit) 80 * q,Q -- signed and unsigned long (64 bit) 81 * f,d -- float and double. 82 83 The content of this element gives the record structure in the format 84 <name>(<code>){<whitespace><name>(<code>)} where <name> is a c-style 85 identifier. 86 """ 87 name_ = "binaryRecordDef" 88 89 _fieldsGrammar = _getFieldsGrammar() 90 91 _binfmt = base.EnumeratedUnicodeAttribute("binfmt", 92 default="native", 93 validValues=["big", "little", "native", "packed"], 94 description="Binary format of the input data; big and little stand" 95 " for msb first and lsb first, and" 96 " packed is like native except no alignment takes place.") 97 98 _fields = base.DataContent(description="The enumeration of" 99 " the record fields.") 100 101 _binfmtToStructCode = { 102 "native": "", 103 "packed": "=", 104 "big": ">", 105 "little": "<"} 106

107 - def completeElement(self, ctx):

108 try: 109 parsedFields = utils.pyparseString(self._fieldsGrammar, self.content_) 110 except pyparsing.ParseBaseException as ex: 111 raise base.ui.logOldExc(base.LiteralParseError("binaryRecordDef", 112 re.sub("\s+", " ", self.content_), 113 pos=str(ex.loc), hint="The parser said: '%s'"%str(ex))) 114 # XXX TODO: Position should probably be position during XML parse. 115 # Fix when we have source positions on parsed elements. 116 self.structFormat = (self._binfmtToStructCode[self.binfmt]+ 117 str("".join(f["formatCode"] for f in parsedFields))) 118 self.recordLength = struct.calcsize(self.structFormat) 119 self.fieldNames = tuple(f["identifier"] for f in parsedFields) 120 self._completeElementNext(BinaryRecordDef, ctx)

121 122

123 -class BinaryGrammar(Grammar):

124 """A grammar that builds rowdicts from binary data. 125 126 The grammar expects the input to be in fixed-length records. 127 the actual specification of the fields is done via a binaryRecordDef 128 element. 129 """ 130 name_ = "binaryGrammar" 131 rowIterator = BinaryRowIterator 132 133 _til = base.IntAttribute("skipBytes", 134 default=0, 135 description="Number of bytes to skip before parsing records.") 136 137 _fdefs = base.StructAttribute("fieldDefs", 138 description="Definition of the record.", 139 childFactory=BinaryRecordDef) 140 141 _armoring = base.EnumeratedUnicodeAttribute("armor", 142 default=None, 143 validValues=["fortran"], 144 description="Record armoring; by default it's None meaning the" 145 " data was dumped to the file sequentially. Set it to fortran" 146 " for fortran unformatted files (4 byte length before and after" 147 " the payload).")

148

Source Code for Module gavo.grammars.binarygrammar