1 """
2 Common definitions for the GAVO VOTable modules.
3 """
4
5
6
7
8
9
10
11 import codecs
12
13 from gavo import utils
14
15
16 NaN = float("NaN")
17
19 """The base class of VOTable-related errors.
20 """
21
23 """Raised when a literal in a VOTable is invalid.
24 """
25 - def __init__(self, type, literal, hint=None, originalException=None):
26 VOTableError.__init__(self,
27 "Invalid literal for %s: '%s'"%(type, repr(literal)),
28 hint=hint)
29 self.type, self.literal = type, literal
30 self.originalException = originalException
31
33 return "Invalid literal for %s: %s"%(self.type, repr(self.literal))
34
36 """Raised when something is wrong with a value being inserted into
37 a VOTable.
38 """
39 - def __init__(self, msg, val, fieldName, hint=None):
42
44 return {"msg": self.msg, "val": self.val, "fieldName": self.fieldName}
45
47 return "Field '%s', value %s: %s"%(self.fieldName, self.val, self.msg)
48
50 """Raised when something is grossly wrong with the document structure.
51
52 Note that the message passed already contains line and position. I'd
53 like to have them in separate attributes, but the expat library mashes
54 them up. iterparse.getParseError is the canonical way of obtaining these
55 when you have no positional information.
56 """
57
58
60 """a dumb handler for decoder errors.
61
62 This is like python's "replace" handler except that we'll always return
63 question marks rather than ufffd. The latter makes sense in a unicode
64 environment, but we need this for VOTable chars, and there that's just
65 a nuisance.
66 """
67 return u'?', exc.start+1
68
69
70 codecs.register_error("qmreplace", qmreplace)
71
72
74 re, im = map(float, val.split())
75
76
78 """raise an error if val is not a legal int for VOTables.
79
80 Actually, this is for tabledata, and after the relaxed 1.3 rules, we allow
81 the empty string ("NULL"), too.
82 """
83 if val=="":
84 return
85 try:
86 int(val[2:], 16)
87 except ValueError:
88 int(val)
89
90
92 """prepens indent to all elements in lines.
93 """
94 return [indent+l for l in lines]
95
96
98 """returns a function to map code over arrays.
99
100 This is used by *XtypeEncoderCode functions below, and for now only
101 deals with 1D arrays of xtyped things, which right now means 2D arrays
102 of votable arrays.
103
104 This will return a callable accepting a list of lines (the xtype
105 decoder for an elementary thing), nor None if the array is too complex.
106 """
107 loopify = lambda x: x
108
109
110
111 if field.isMultiDim():
112 if field.arraysize.count("x")==1:
113
114 def loopify(code):
115 return [
116 "seq, arr = val, []",
117 "for val in seq:",
118 ]+indentList(code, " ")+[
119 " arr.append(val)",
120 "val = arr"]
121 else:
122
123
124 return None
125
126 return loopify
127
128
130 """returns code that turns special internal representations for
131 xtyped fields to what's serialised in VOTables.
132
133 For None or unknown xtypes, this will return an empty list. Otherwise,
134 it expects the value in a local variable val and will leave the transformed
135 value there.
136
137 This is currently only called for char and float arrays, as no
138 xtypes are defined for other types. If that changes, you'll have
139 to change the *_enc modules.
140
141 This will handle 1D arrays of xtyped things but nothing more deeply
142 nested. More deeply nested structures will be left alone (which will
143 only work under very special conditions and yield ugly error messages
144 otherwise).
145 """
146 loopify = getLoopifier(field)
147 if loopify is None:
148 return []
149
150 if (field.xtype=="adql:TIMESTAMP"
151 or field.xtype=="timestamp"):
152 return loopify([
153 "if isinstance(val, datetime.datetime):",
154 " val = utils.formatISODT(val)"])
155
156 elif field.xtype=="dachs:DATE":
157 return loopify([
158 "if isinstance(val, datetime.date):",
159 " val = val.isoformat()"])
160
161 elif field.xtype in ["adql:POINT", "adql:REGION"]:
162 return loopify([
163 "if isinstance(val, pgsphere.PgSAdapter):",
164 " val = val.asSTCS('UNKNOWNFrame')"])
165
166 elif field.xtype in ["point", "circle", "polygon", "moc", "x:box"]:
167 return loopify([
168 "if isinstance(val, pgsphere.PgSAdapter):",
169 " val = val.asDALI()"])
170
171 else:
172 return []
173
174
176 """returns code that turns generic VOTable arrays into special internal
177 representations based on xtype.
178
179 This returns a list of lines or an empty list if no known xtype
180 is found. The code is executed with the unpacked array seen as val,
181 and it should set val to the special representation.
182
183 This will handle 1D arrays of xtyped things but nothing more deeply
184 nested. More deeply nested structures will be left alone (which is
185 ok for round-tripping but probably will fail when DaCHS components
186 want to process stuff).
187 """
188 if not field.xtype:
189 return []
190
191
192 loopify = getLoopifier(field)
193 if loopify is None:
194 return []
195
196 src = [
197 "if not val:",
198 " val = None",
199 "else:"]
200
201 if field.xtype=="adql:POINT":
202 src.extend([
203 " val = stc.parseSimpleSTCS(val)"])
204
205 elif field.xtype=="adql:REGION":
206 src.extend([
207 " val = stc.simpleSTCSToPolygon(val)"])
208
209 elif field.xtype=="point":
210 src.extend([
211 " val = pgsphere.SPoint.fromDALI(val)"])
212
213 elif field.xtype=="circle":
214 src.extend([
215 " val = pgsphere.SCircle.fromDALI(val)"])
216
217 elif field.xtype=="polygon":
218 src.extend([
219 " val = pgsphere.SPoly.fromDALI(val)"])
220
221 elif field.xtype=="moc":
222 src.extend([
223 " val = pgsphere.SMoc.fromDALI(val)"])
224
225 elif field.xtype=="x:box":
226 src.extend([
227 " val = pgsphere.SBox.fromDALI(val)"])
228
229 elif (field.xtype=="adql:TIMESTAMP"
230 or field.xtype=="timestamp"):
231 src.extend([
232 " val = parseDefaultDatetime(val)"])
233
234
235 elif field.xtype=="dachs:DATE":
236 src.extend([
237 " val = parseDefaultDate(val)"])
238
239 else:
240
241 return []
242
243 return loopify(src)
244
245
247 """an interface to the BINARY2 NULL flags.
248
249 Construct it with the number of fields, then use
250 """
251 masks = [0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01]
252
254 self.nFields = nFields
255 self.nBytes = (self.nFields+7)/8
256
258 """returns null bytes for nullMap, which is a sequence of booleans
259 with Trues where the field is NULL.
260
261 It is an error to pass in nullMaps with lengths!=nFields.
262 """
263 assert len(nullMap)==self.nFields
264 bytes, curBits, val = [], 0, 0
265 for isNull in nullMap:
266 if isNull:
267 val = (val<<1)+1
268 else:
269 val <<= 1
270 curBits += 1
271 if curBits==8:
272 bytes.append(chr(val))
273 curBits, val = 0, 0
274
275 if curBits:
276 val <<= (8-curBits)
277 bytes.append(chr(val))
278 return "".join(bytes)
279
281 """returns null bytes for a row, which is a sequence of values.
282 Everything that's None is flagged as NULL.
283 """
284 return self.serialize([v is None for v in row])
285
287 """returns a sequence of booleans giving for each element in a row
288 if there's a NULL there.
289 """
290 nulls = []
291 for char in bytes:
292 byte = ord(char)
293 for mask in self.masks:
294 if mask&byte:
295 nulls.append(True)
296 else:
297 nulls.append(False)
298 if len(nulls)==self.nFields:
299 break
300 return nulls
301
303 """returns a sequence of booleans giving for each element in a row
304 if there's a NULL there.
305 """
306 return self.deserialize(file.read(self.nBytes))
307
308
310 """returns True if the VOTable arraysize denotes a >1D-array.
311 """
312 return arraysize is not None and "x" in arraysize
313
315 """returns True if the VOTable arraysize denotes a variable-length array.
316
317 This is, of course, False for None arraysizes,
318 """
319 return arraysize and arraysize.endswith("*")
320
321
323 """returns the number of elements expected for an array described with
324 the VOTable attribute arraysize.
325
326 A 1-element array isn't told apart from a scalar here. Both return 1.
327 For variable-length arrays, this returns None.
328
329 Bad arraysize specs will give ValueErrors (perhaps not always with the
330 most helpful messages).
331
332 >>> getLength(None)
333 1
334 >>> getLength("*")
335 >>> getLength("5")
336 5
337 >>> getLength("5x*")
338 >>> getLength("5x6*")
339 >>> getLength("7x5x6")
340 210
341 >>> getLength("7*x5x6")
342 Traceback (most recent call last):
343 ValueError: invalid literal for int() with base 10: '7*'
344 """
345 if arraysize is None:
346 return 1
347 if arraysize.endswith("*"):
348 return None
349 elif isMultiDim(arraysize):
350 return reduce(lambda a, b: a*b, map(int, arraysize.split("x")))
351 else:
352 try:
353 return int(arraysize)
354 except ValueError:
355
356 pass
357 raise ValueError("Invalid arraysize specification: %s"%arraysize)
358
359
361 """returns a numpy-compatible shape for a VOTable arraysize.
362
363 For variable length 1D arrays, this returns None; for 2+D arrrays, the
364 last dimension is currently replaced by 1. Which doesn't sound smart.
365 """
366 if arraysize is None:
367 return None
368
369 if datatype=="char" and not "x" in arraysize:
370
371 return None
372
373 if arraysize=="*":
374 return None
375
376 val = arraysize.replace("*", "")
377 if "x" in val:
378 if val.endswith("x"):
379 val = val+'1'
380 return tuple(int(d) for d in val.split("x"))
381
382 else:
383 return (int(val),)
384
385
389
390
391 if __name__=="__main__":
392 _test()
393