1 """
2 Code to bind the adql library to the data center software.
3 """
4
5
6
7
8
9
10
11 import sys
12
13
14 from gavo import adql
15 from gavo import base
16 from gavo import rsc
17 from gavo import rscdef
18 from gavo import svcs
19 from gavo import utils
20
21
27
28
29 -class TDContext(object):
30 """An object keeping track of the generation of a table definition
31 for ADQL output.
32 """
34 self.existingNames = set()
35
36 - def getName(self, desiredName):
37 while desiredName in self.existingNames:
38 desiredName = desiredName+"_"
39 self.existingNames.add(desiredName)
40 return desiredName
41
42
43
44
45
46
47
48
49
50 _artificialNULLs = {
51 "bytea": "255",
52 "smallint": "-32768",
53 "integer": "-2147483648",
54 "bigint": "-9223372036854775808",
55 }
56
58 """constructs a rscdef.Column from a field info pair as left by the
59 ADQL machinery.
60
61 The strategy: If there's only one userData, we copy the Column
62 contained in there, update the unit and the ucd, plus a warning
63 if the Column has been tainted.
64
65 If there's more or less than one userData, we create a new
66 Column, use the data provided by fi and make up a description
67 consisting of the source descriptions. Add a taint warning
68 if necessary.
69
70 Since we cannot assign sensible verbLevels and assume the user wants
71 to see what s/he selected, all fields get verbLevel 1.
72
73 Types are a serious problem, handled by typesystems.
74 """
75 if len(fi.userData)==1:
76 res = svcs.OutputField.fromColumn(fi.userData[0])
77
78
79 if hasattr(fi.userData[0], "originalName"):
80 colName = fi.userData[0].originalName
81 else:
82 res = base.makeStruct(svcs.OutputField, name=colName)
83 res.name = ctx.getName(colName)
84 res.ucd = fi.ucd
85 res.unit = fi.unit
86 res.type = fi.type
87
88
89 res.stc = fi.stc
90
91 if len(fi.userData)>1:
92 res.description = ("This field has traces of: %s"%("; ".join([
93 f.description for f in fi.userData if f.description])))
94
95 if fi.tainted:
96 res.description = (res.description+" -- *TAINTED*: the value"
97 " was operated on in a way that unit and ucd may be severely wrong")
98
99
100
101 if "xtype" in fi.properties:
102 res.xtype = fi.properties["xtype"]
103 res.needMunging = True
104
105
106 if res.type=="date" or res.type=="timestamp":
107 res.xtype = "timestamp"
108
109
110
111
112
113 if (res.type in _artificialNULLs
114 and (
115 not (res.values and res.values.nullLiteral)
116 or fi.tainted)):
117 nullLiteral = _artificialNULLs[res.type]
118 if res.values:
119 res.feedObject("values", res.values.change(nullLiteral=nullLiteral))
120 else:
121 res.feedObject("values", base.makeStruct(rscdef.Values,
122 nullLiteral=nullLiteral))
123
124 res.verbLevel = 1
125 res.finishElement()
126 return res
127
128
130 """returns a sequence of Column instances describing the output of the
131 parsed and annotated ADQL query parsedTree.
132 """
133 ctx = TDContext()
134 columns = [_makeColumnFromFieldInfo(ctx, *fi)
135 for fi in parsedTree.fieldInfos.seq]
136
137
138
139 fromNames = [t.qName
140 for t in parsedTree.fromClause.getAllTables()
141 if hasattr(t, "qName")]
142
143 if len(fromNames)==1:
144 try:
145 srcTable = base.caches.getMTH(None).getTableDefForTable(fromNames[0])
146
147
148 resTable = srcTable.change(columns=columns, groups=[], primary=())
149 resTable.copyMetaFrom(srcTable)
150 resTable.id = srcTable.id
151 return resTable
152 except base.NotFoundError:
153
154
155 pass
156
157 resTable = base.makeStruct(rscdef.TableDef, columns=columns,
158 id=parsedTree.suggestAName())
159
160 return resTable
161
162
166
167
169 """returns the name a column is known as within the ADQL query.
170
171 This can be different from the actual column name for uploaded
172 tables, where we have to rename columns called oid, tableoid,...
173
174 On the SQL side, our internal name is being used.
175 """
176 return getattr(col, "originalName", col.name)
177
178
180 """returns a DaCHS-internal table name suitable for dc.tablemeta for
181 an ADQL TableName node.
182
183 In particular, in DaCHS we don't support catalog, so that errors
184 out immediately. Also, we don't support delimited table identifiers.
185 Anything delimited not consisting exclusively of lower case letters
186 must therefore fail immediately. When they're all lowercase, people
187 engaged in gratuitous quoting. Then, just unquote and move on.
188 """
189 if isinstance(tableName, basestring):
190 return tableName
191
192 surfaceForm = adql.flatten(tableName)
193 if tableName.cat:
194 raise base.NotFoundError(surfaceForm, "table", "published tables",
195 hint="DaCHS services have no tables with catalog parts.")
196
197 if isinstance(tableName.schema, utils.QuotedName):
198 if not tableName.schema.isRegularLower():
199 raise base.NotFoundError(surfaceForm, "table", "published tables",
200 hint="You probably should not quote the table schema")
201 schema = tableName.schema.name+"."
202 else:
203 if tableName.schema:
204 schema = tableName.schema+"."
205 else:
206 schema = ""
207
208 if isinstance(tableName.name, utils.QuotedName):
209 if not tableName.name.isRegularLower():
210 raise base.NotFoundError(surfaceForm, "table", "published tables",
211 hint="You probably should not quote the table name")
212 else:
213 name = tableName.name.name
214 else:
215 name = tableName.name
216
217 return schema+name
218
219
221 - def __init__(self, accessProfile=None, tdsForUploads=[]):
228
233
234
301
302
304 """instruments the ADQL tree for the user row set limit maxrec
305 and the system row set limit hard limit.
306
307 maxrec is a match limit from the protocol level, as opposed to the
308 setLimit from the ADQL TOP clause. The rules of interaction between
309 the two are documented inline below (it's messy).
310
311 This returns the overflow set limit. If exactly this many rows
312 are returned from he query, and overflow indicator should be set.
313 """
314 tree.overflowLimit = None
315
316
317 if hardLimit is None:
318 hardLimit = base.getConfig("async", "hardMAXREC")
319
320 if maxrec is None:
321 maxrec = base.getConfig("async", "defaultMAXREC")
322
323 maxrec = min(maxrec, hardLimit)
324
325 if not tree.setLimit:
326
327
328 tree.setLimit = maxrec
329 return maxrec
330
331 elif maxrec>tree.setLimit:
332
333
334 return maxrec
335
336 elif maxrec==tree.setLimit:
337
338
339 return maxrec+1
340
341 else:
342
343
344
345 tree.setLimit = maxrec+1
346 return maxrec+1
347
348
349 -def morphADQL(query, metaProfile=None, tdsForUploads=[],
350 maxrec=None, hardLimit=None):
351 """returns an postgres query and an (empty) result table for the
352 ADQL in query.
353
354 For an explanation of maxrec and hardLimit, as well as the
355 additional table.tableDef.overflowLimit attribute on the returned table,
356 see _updateMatchLimits above; this will always be an integer.
357 """
358 ctx, t = adql.parseAnnotating(query,
359 DaCHSFieldInfoGetter(metaProfile, tdsForUploads))
360
361 table = rsc.TableForDef(_getTableDescForOutput(t))
362 table.tableDef.overflowLimit = _updateMatchLimits(t, maxrec, hardLimit)
363 if hardLimit and int(t.setLimit)>hardLimit:
364 table.addMeta("_warning", "This service has a hard row limit"
365 " of %s. Your row limit was decreased to this value."%hardLimit)
366 t.setLimit = str(hardLimit)
367
368 morphStatus, morphedTree = adql.morphPG(t)
369 for warning in morphStatus.warnings:
370 table.addMeta("_warning", warning)
371
372
373 query = adql.flatten(morphedTree).replace("%", "%%")
374
375 _addTableMeta(query, t, table)
376
377 return query, table
378
379
380 -def query(querier, query, timeout=15, metaProfile=None, tdsForUploads=[],
381 externalLimit=None, hardLimit=None):
382 """returns a DataSet for query (a string containing ADQL).
383
384 This will set timeouts and other things for the connection in
385 question. You should have one allocated especially for this query.
386 """
387 query, table = morphADQL(query, metaProfile, tdsForUploads, externalLimit,
388 hardLimit=hardLimit)
389 addTuple = table.addTuple
390 oldTimeout = querier.getTimeout()
391 querier.setTimeout(timeout)
392
393
394 querier.configureConnection([("enable_seqscan", False)])
395
396 for tuple in querier.query(query):
397 addTuple(tuple)
398 querier.setTimeout(oldTimeout)
399
400 if len(table)==table.tableDef.overflowLimit:
401 table.addMeta("_warning", "Query result probably incomplete due"
402 " to the match limit kicking in. Queries not providing a TOP"
403 " clause will be furnished with an automatic TOP %s by the machinery,"
404 " so adding a TOP clause with a higher number may help."%
405 base.getConfig("adql", "webDefaultLimit"))
406 return table
407
408
410 if (isinstance(excValue, adql.ParseException)
411 or isinstance(excValue, adql.ParseSyntaxException)):
412 raise base.ui.logOldExc(
413 base.ValidationError("Could not parse your query: %s"%
414 unicode(excValue), "query"))
415 elif isinstance(excValue, adql.ColumnNotFound):
416 raise base.ui.logOldExc(base.ValidationError("No such field known: %s"%
417 unicode(excValue), "query"))
418 elif isinstance(excValue, adql.AmbiguousColumn):
419 raise base.ui.logOldExc(base.ValidationError("%s needs to be qualified."%
420 unicode(excValue), "query"))
421 elif isinstance(excValue, adql.Error):
422 raise base.ui.logOldExc(base.ValidationError(unicode(excValue), "query"))
423 else:
424 svcs.mapDBErrors(excType, excValue, excTb)
425
426
427 -class ADQLCore(svcs.Core, base.RestrictionMixin):
428 """A core taking an ADQL query from its query argument and returning the
429 result of that query in a standard table.
430
431 Since the columns returned depend on the query, the outputTable of an
432 ADQL core must not be defined.
433 """
434 name_ = "adqlCore"
435
438
439 - def run(self, service, inputTable, queryMeta):
440 inRow = inputTable.getParamDict()
441 queryString = inRow["query"]
442 base.ui.notifyInfo("Incoming ADQL query: %s"%queryString)
443 try:
444 with base.AdhocQuerier(base.getUntrustedConn) as querier:
445 res = query(querier, queryString,
446 timeout=queryMeta["timeout"], hardLimit=100000,
447 externalLimit=queryMeta["dbLimit"])
448
449
450
451
452
453
454
455
456
457 res.noPostprocess = True
458 queryMeta["Matched"] = len(res.rows)
459 return res
460 except:
461 mapADQLErrors(*sys.exc_info())
462
463
464
465
466
467
468
469
470
471
472
473
474 import re
475
477 mat = pat.match(regionSpec)
478 if mat:
479 return mat.group()
480
481
482
483
494