Package gavo :: Package votable :: Module common
[frames] | no frames]

Source Code for Module gavo.votable.common

  1  """ 
  2  Common definitions for the GAVO VOTable modules. 
  3  """ 
  4   
  5  #c Copyright 2008-2019, the GAVO project 
  6  #c 
  7  #c This program is free software, covered by the GNU GPL.  See the 
  8  #c COPYING file in the source distribution. 
  9   
 10   
 11  import codecs 
 12   
 13  from gavo import utils 
 14   
 15   
 16  NaN = float("NaN") 
 17   
18 -class VOTableError(utils.Error):
19 """The base class of VOTable-related errors. 20 """
21
22 -class BadVOTableLiteral(VOTableError):
23 """Raised when a literal in a VOTable is invalid. 24 """
25 - def __init__(self, type, literal, hint=None, originalException=None):
26 VOTableError.__init__(self, 27 "Invalid literal for %s: '%s'"%(type, repr(literal)), 28 hint=hint) 29 self.type, self.literal = type, literal 30 self.originalException = originalException
31
32 - def __str__(self):
33 return "Invalid literal for %s: %s"%(self.type, repr(self.literal))
34
35 -class BadVOTableData(VOTableError):
36 """Raised when something is wrong with a value being inserted into 37 a VOTable. 38 """
39 - def __init__(self, msg, val, fieldName, hint=None):
40 VOTableError.__init__(self, msg, hint=hint) 41 self.fieldName, self.val = fieldName, repr(val)
42
43 - def __getstate__(self):
44 return {"msg": self.msg, "val": self.val, "fieldName": self.fieldName}
45
46 - def __str__(self):
47 return "Field '%s', value %s: %s"%(self.fieldName, self.val, self.msg)
48
49 -class VOTableParseError(VOTableError):
50 """Raised when something is grossly wrong with the document structure. 51 52 Note that the message passed already contains line and position. I'd 53 like to have them in separate attributes, but the expat library mashes 54 them up. iterparse.getParseError is the canonical way of obtaining these 55 when you have no positional information. 56 """
57 58
59 -def qmreplace(exc):
60 """a dumb handler for decoder errors. 61 62 This is like python's "replace" handler except that we'll always return 63 question marks rather than ufffd. The latter makes sense in a unicode 64 environment, but we need this for VOTable chars, and there that's just 65 a nuisance. 66 """ 67 return u'?', exc.start+1
68 69 70 codecs.register_error("qmreplace", qmreplace) 71 72
73 -def validateTDComplex(val):
74 re, im = map(float, val.split())
75 76
77 -def validateVOTInt(val):
78 """raise an error if val is not a legal int for VOTables. 79 80 Actually, this is for tabledata, and after the relaxed 1.3 rules, we allow 81 the empty string ("NULL"), too. 82 """ 83 if val=="": 84 return 85 try: 86 int(val[2:], 16) 87 except ValueError: 88 int(val)
89 90
91 -def indentList(lines, indent):
92 """prepens indent to all elements in lines. 93 """ 94 return [indent+l for l in lines]
95 96
97 -def getLoopifier(field):
98 """returns a function to map code over arrays. 99 100 This is used by *XtypeEncoderCode functions below, and for now only 101 deals with 1D arrays of xtyped things, which right now means 2D arrays 102 of votable arrays. 103 104 This will return a callable accepting a list of lines (the xtype 105 decoder for an elementary thing), nor None if the array is too complex. 106 """ 107 loopify = lambda x: x 108 109 # All xtyped things are 1D arrays so far. We're using this to decide 110 # if we have to loop 111 if field.isMultiDim(): 112 if field.arraysize.count("x")==1: 113 # 1-d array of xtyped thing; handle it 114 def loopify(code): 115 return [ 116 "seq, arr = val, []", 117 "for val in seq:", 118 ]+indentList(code, " ")+[ 119 " arr.append(val)", 120 "val = arr"]
121 else: 122 # just forget it; if there are native objects in the value, it's fail, 123 # but since the decoder operate the same way, roundtrip will work. 124 return None 125 126 return loopify 127 128
129 -def getXtypeEncoderCode(field):
130 """returns code that turns special internal representations for 131 xtyped fields to what's serialised in VOTables. 132 133 For None or unknown xtypes, this will return an empty list. Otherwise, 134 it expects the value in a local variable val and will leave the transformed 135 value there. 136 137 This is currently only called for char and float arrays, as no 138 xtypes are defined for other types. If that changes, you'll have 139 to change the *_enc modules. 140 141 This will handle 1D arrays of xtyped things but nothing more deeply 142 nested. More deeply nested structures will be left alone (which will 143 only work under very special conditions and yield ugly error messages 144 otherwise). 145 """ 146 loopify = getLoopifier(field) 147 if loopify is None: 148 return [] 149 150 if (field.xtype=="adql:TIMESTAMP" # legacy, delete ~ 2020 151 or field.xtype=="timestamp"): 152 return loopify([ 153 "if isinstance(val, datetime.datetime):", 154 " val = utils.formatISODT(val)"]) 155 156 elif field.xtype=="dachs:DATE": 157 return loopify([ 158 "if isinstance(val, datetime.date):", 159 " val = val.isoformat()"]) 160 161 elif field.xtype in ["adql:POINT", "adql:REGION"]: 162 return loopify([ 163 "if isinstance(val, pgsphere.PgSAdapter):", 164 " val = val.asSTCS('UNKNOWNFrame')"]) 165 166 elif field.xtype in ["point", "circle", "polygon", "moc", "x:box"]: 167 return loopify([ 168 "if isinstance(val, pgsphere.PgSAdapter):", 169 " val = val.asDALI()"]) 170 171 else: 172 return []
173 174
175 -def getXtypeDecoderCode(field):
176 """returns code that turns generic VOTable arrays into special internal 177 representations based on xtype. 178 179 This returns a list of lines or an empty list if no known xtype 180 is found. The code is executed with the unpacked array seen as val, 181 and it should set val to the special representation. 182 183 This will handle 1D arrays of xtyped things but nothing more deeply 184 nested. More deeply nested structures will be left alone (which is 185 ok for round-tripping but probably will fail when DaCHS components 186 want to process stuff). 187 """ 188 if not field.xtype: 189 return [] 190 191 192 loopify = getLoopifier(field) 193 if loopify is None: 194 return [] 195 196 src = [ 197 "if not val:", 198 " val = None", 199 "else:"] 200 201 if field.xtype=="adql:POINT": 202 src.extend([ 203 " val = stc.parseSimpleSTCS(val)"]) 204 205 elif field.xtype=="adql:REGION": 206 src.extend([ 207 " val = stc.simpleSTCSToPolygon(val)"]) 208 209 elif field.xtype=="point": 210 src.extend([ 211 " val = pgsphere.SPoint.fromDALI(val)"]) 212 213 elif field.xtype=="circle": 214 src.extend([ 215 " val = pgsphere.SCircle.fromDALI(val)"]) 216 217 elif field.xtype=="polygon": 218 src.extend([ 219 " val = pgsphere.SPoly.fromDALI(val)"]) 220 221 elif field.xtype=="moc": 222 src.extend([ 223 " val = pgsphere.SMoc.fromDALI(val)"]) 224 225 elif field.xtype=="x:box": 226 src.extend([ 227 " val = pgsphere.SBox.fromDALI(val)"]) 228 229 elif (field.xtype=="adql:TIMESTAMP" 230 or field.xtype=="timestamp"): 231 src.extend([ 232 " val = parseDefaultDatetime(val)"]) 233 234 # GAVO-specific extension for consistency in our type systems 235 elif field.xtype=="dachs:DATE": 236 src.extend([ 237 " val = parseDefaultDate(val)"]) 238 239 else: 240 # unknown xtype; ignore it and process stuff as usual 241 return [] 242 243 return loopify(src)
244 245
246 -class NULLFlags(object):
247 """an interface to the BINARY2 NULL flags. 248 249 Construct it with the number of fields, then use 250 """ 251 masks = [0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01] 252
253 - def __init__(self, nFields):
254 self.nFields = nFields 255 self.nBytes = (self.nFields+7)/8
256
257 - def serialize(self, nullMap):
258 """returns null bytes for nullMap, which is a sequence of booleans 259 with Trues where the field is NULL. 260 261 It is an error to pass in nullMaps with lengths!=nFields. 262 """ 263 assert len(nullMap)==self.nFields 264 bytes, curBits, val = [], 0, 0 265 for isNull in nullMap: 266 if isNull: 267 val = (val<<1)+1 268 else: 269 val <<= 1 270 curBits += 1 271 if curBits==8: 272 bytes.append(chr(val)) 273 curBits, val = 0, 0 274 275 if curBits: 276 val <<= (8-curBits) 277 bytes.append(chr(val)) 278 return "".join(bytes)
279
280 - def serializeFromRow(self, row):
281 """returns null bytes for a row, which is a sequence of values. 282 Everything that's None is flagged as NULL. 283 """ 284 return self.serialize([v is None for v in row])
285
286 - def deserialize(self, bytes):
287 """returns a sequence of booleans giving for each element in a row 288 if there's a NULL there. 289 """ 290 nulls = [] 291 for char in bytes: 292 byte = ord(char) 293 for mask in self.masks: 294 if mask&byte: 295 nulls.append(True) 296 else: 297 nulls.append(False) 298 if len(nulls)==self.nFields: 299 break 300 return nulls
301
302 - def getFromFile(self, file):
303 """returns a sequence of booleans giving for each element in a row 304 if there's a NULL there. 305 """ 306 return self.deserialize(file.read(self.nBytes))
307 308
309 -def isMultiDim(arraysize):
310 """returns True if the VOTable arraysize denotes a >1D-array. 311 """ 312 return arraysize is not None and "x" in arraysize
313
314 -def hasVarLength(arraysize):
315 """returns True if the VOTable arraysize denotes a variable-length array. 316 317 This is, of course, False for None arraysizes, 318 """ 319 return arraysize and arraysize.endswith("*")
320 321
322 -def getLength(arraysize):
323 """returns the number of elements expected for an array described with 324 the VOTable attribute arraysize. 325 326 A 1-element array isn't told apart from a scalar here. Both return 1. 327 For variable-length arrays, this returns None. 328 329 Bad arraysize specs will give ValueErrors (perhaps not always with the 330 most helpful messages). 331 332 >>> getLength(None) 333 1 334 >>> getLength("*") 335 >>> getLength("5") 336 5 337 >>> getLength("5x*") 338 >>> getLength("5x6*") 339 >>> getLength("7x5x6") 340 210 341 >>> getLength("7*x5x6") 342 Traceback (most recent call last): 343 ValueError: invalid literal for int() with base 10: '7*' 344 """ 345 if arraysize is None: 346 return 1 347 if arraysize.endswith("*"): 348 return None 349 elif isMultiDim(arraysize): 350 return reduce(lambda a, b: a*b, map(int, arraysize.split("x"))) 351 else: 352 try: 353 return int(arraysize) 354 except ValueError: 355 # fall through to exception at function exit 356 pass 357 raise ValueError("Invalid arraysize specification: %s"%arraysize)
358 359
360 -def getShape(datatype, arraysize):
361 """returns a numpy-compatible shape for a VOTable arraysize. 362 363 For variable length 1D arrays, this returns None; for 2+D arrrays, the 364 last dimension is currently replaced by 1. Which doesn't sound smart. 365 """ 366 if arraysize is None: 367 return None 368 369 if datatype=="char" and not "x" in arraysize: 370 # special case: 1d char arrays are just scalar strings 371 return None 372 373 if arraysize=="*": 374 return None # What should we really return here? 375 376 val = arraysize.replace("*", "") 377 if "x" in val: 378 if val.endswith("x"): # variable last dimension 379 val = val+'1' 380 return tuple(int(d) for d in val.split("x")) 381 382 else: 383 return (int(val),)
384 385
386 -def _test():
387 import doctest, common 388 doctest.testmod(common)
389 390 391 if __name__=="__main__": 392 _test() 393