Package gavo :: Package grammars :: Module binarygrammar
[frames] | no frames]

Source Code for Module gavo.grammars.binarygrammar

  1  """ 
  2  A grammar reading from (fixed-record) binary files. 
  3  """ 
  4   
  5  #c Copyright 2008-2019, the GAVO project 
  6  #c 
  7  #c This program is free software, covered by the GNU GPL.  See the 
  8  #c COPYING file in the source distribution. 
  9   
 10   
 11  import re 
 12  import struct 
 13   
 14  import pyparsing 
 15   
 16  from gavo import base 
 17  from gavo import utils 
 18  from gavo.grammars.common import Grammar, FileRowIterator 
 19  from gavo.utils import misctricks 
 20   
 21   
22 -class BinaryRowIterator(FileRowIterator):
23 """A row iterator reading from binary files. 24 """
25 - def _iterUnarmoredRecords(self):
26 while True: 27 data = self.inputFile.read(self.grammar.fieldDefs.recordLength) 28 if data=="": 29 return 30 yield data
31
32 - def _iterInRecords(self):
33 self.inputFile.read(self.grammar.skipBytes) 34 if self.grammar.armor is None: 35 return self._iterUnarmoredRecords() 36 elif self.grammar.armor=="fortran": 37 return misctricks.iterFortranRecs(self.inputFile) 38 else: 39 assert False
40
41 - def _iterRows(self):
42 fmtStr = self.grammar.fieldDefs.structFormat 43 fieldNames = self.grammar.fieldDefs.fieldNames 44 try: 45 for rawRec in self._iterInRecords(): 46 yield dict(zip(fieldNames, struct.unpack(fmtStr, rawRec))) 47 except Exception as ex: 48 raise base.ui.logOldExc(base.SourceParseError(str(ex), 49 location="byte %s"%self.inputFile.tell(), 50 source=str(self.sourceToken)))
51 52
53 -def _getFieldsGrammar():
54 with utils.pyparsingWhitechars(" \n\t\r"): 55 identifier = pyparsing.Regex(utils.identifierPattern.pattern[:-1] 56 ).setName("identifier") 57 formatCode = pyparsing.Regex("\d+s|[bBhHiIqQfd]" 58 ).setName("fieldSpec") 59 field = ( identifier("identifier") 60 + pyparsing.Suppress(pyparsing.Literal("(")) 61 + formatCode("formatCode") 62 + pyparsing.Suppress(pyparsing.Literal(")"))).setParseAction( 63 lambda s, p, t: dict(t)) 64 return pyparsing.OneOrMore(field)+pyparsing.StringEnd()
65 66
67 -class BinaryRecordDef(base.Structure):
68 """A definition of a binary record. 69 70 A binary records consists of a number of binary fields, each of which 71 is defined by a name and a format code. The format codes supported 72 here are a subset of what python's struct module supports. The 73 widths given below are for big, little, and packed binfmts. 74 For native (which is the default), it depends on your platform. 75 76 * <number>s -- <number> characters making up a string 77 * b,B -- signed and unsigned byte (8 bit) 78 * h,H -- signed and unsigned short (16 bit) 79 * i,I -- signed and unsigned int (32 bit) 80 * q,Q -- signed and unsigned long (64 bit) 81 * f,d -- float and double. 82 83 The content of this element gives the record structure in the format 84 <name>(<code>){<whitespace><name>(<code>)} where <name> is a c-style 85 identifier. 86 """ 87 name_ = "binaryRecordDef" 88 89 _fieldsGrammar = _getFieldsGrammar() 90 91 _binfmt = base.EnumeratedUnicodeAttribute("binfmt", 92 default="native", 93 validValues=["big", "little", "native", "packed"], 94 description="Binary format of the input data; big and little stand" 95 " for msb first and lsb first, and" 96 " packed is like native except no alignment takes place.") 97 98 _fields = base.DataContent(description="The enumeration of" 99 " the record fields.") 100 101 _binfmtToStructCode = { 102 "native": "", 103 "packed": "=", 104 "big": ">", 105 "little": "<"} 106
107 - def completeElement(self, ctx):
108 try: 109 parsedFields = utils.pyparseString(self._fieldsGrammar, self.content_) 110 except pyparsing.ParseBaseException as ex: 111 raise base.ui.logOldExc(base.LiteralParseError("binaryRecordDef", 112 re.sub("\s+", " ", self.content_), 113 pos=str(ex.loc), hint="The parser said: '%s'"%str(ex))) 114 # XXX TODO: Position should probably be position during XML parse. 115 # Fix when we have source positions on parsed elements. 116 self.structFormat = (self._binfmtToStructCode[self.binfmt]+ 117 str("".join(f["formatCode"] for f in parsedFields))) 118 self.recordLength = struct.calcsize(self.structFormat) 119 self.fieldNames = tuple(f["identifier"] for f in parsedFields) 120 self._completeElementNext(BinaryRecordDef, ctx)
121 122
123 -class BinaryGrammar(Grammar):
124 """A grammar that builds rowdicts from binary data. 125 126 The grammar expects the input to be in fixed-length records. 127 the actual specification of the fields is done via a binaryRecordDef 128 element. 129 """ 130 name_ = "binaryGrammar" 131 rowIterator = BinaryRowIterator 132 133 _til = base.IntAttribute("skipBytes", 134 default=0, 135 description="Number of bytes to skip before parsing records.") 136 137 _fdefs = base.StructAttribute("fieldDefs", 138 description="Definition of the record.", 139 childFactory=BinaryRecordDef) 140 141 _armoring = base.EnumeratedUnicodeAttribute("armor", 142 default=None, 143 validValues=["fortran"], 144 description="Record armoring; by default it's None meaning the" 145 " data was dumped to the file sequentially. Set it to fortran" 146 " for fortran unformatted files (4 byte length before and after" 147 " the payload).")
148