Package gavo :: Package adql :: Module morphpg
[frames] | no frames]

Source Code for Module gavo.adql.morphpg

  1  """ 
  2  Morphing ADQL into queries that postgres/pgSphere can understand. 
  3   
  4  Basically, Postgres support most of the stuff out of the box, and it's 
  5  just a matter of syntax. 
  6   
  7  We morph most of the geometry stuff to pgsphere; while some of it would 
  8  work with plain postgres in a plane approximation, it's certainly not 
  9  worth the effort. 
 10   
 11  There's also code to replace certain CONTAINS calls with q3c function 
 12  calls. 
 13  """ 
 14   
 15  #c Copyright 2008-2019, the GAVO project 
 16  #c 
 17  #c This program is free software, covered by the GNU GPL.  See the 
 18  #c COPYING file in the source distribution. 
 19   
 20   
 21  from gavo.adql import common 
 22  from gavo.adql import morphhelpers 
 23  from gavo.adql import nodes 
 24  from gavo.adql.nodes import flatten 
 25  from gavo.stc import tapstc 
 26   
 27   
28 -class PostgresMorphError(common.MorphError):
29 pass
30 31 32 33 ######## Begin q3c specials 34 # q3c morphing must happen before pgsphere morphs all the mess to 35 # become spoints and stuff (at least the way we built things so far). 36 # Hence, this is written as a fairly freaky early morpher. 37
38 -def _flatAndMorph(node):
39 # This helper flattens a node after applying standard morphs on it. 40 # I need this for the arguments of q3c stuff, since there may 41 # be ADQL specifics in there. 42 if isinstance(node, basestring): 43 return node 44 else: 45 return nodes.flatten(morphPG(node)[1])
46 47
48 -def _booleanizeContainsQ3C(node, operator, operand):
49 """turns ADQL CONTAINS calls into q3c expressions if appropriate. 50 51 This will only work if the arguments have been morphed into pgsphere 52 geometries already. It will leave alone anything it doesn't understand, 53 hopefully for pgsphere to pick it up. 54 """ 55 args = [] 56 for arg in node.args: 57 if hasattr(arg, "original"): # recover pre-pgsphere-morph object 58 args.append(arg.original) 59 else: 60 args.append(arg) 61 62 # leave morphing to someone else if we don't check for point in shape 63 # or if system transformations are required. 64 if len(args)!=2: 65 return None 66 if not hasattr(args[0], "cooSys") or not hasattr(args[1], "cooSys"): 67 # arguments do not look like geometries; leave it to someone else 68 # to blow up 69 return None 70 if tapstc.getPGSphereTrafo(args[0].cooSys, args[1].cooSys) is not None: 71 # we'll need a transform; q3c cannot do this. 72 return None 73 74 expr = None 75 p, shape = args 76 77 if shape.type=="circle": 78 # if we have an spoint-valued center, there's nothing q3c can do 79 if p.x is None: 80 return None 81 82 # The pg planner works much smoother if you have constants first. 83 if p.x.type=='columnReference': 84 expr = ("q3c_join(%s, %s, %s, %s, %s)"%tuple(map(_flatAndMorph, 85 (shape.x, shape.y, p.x, p.y, shape.radius)))) 86 else: 87 expr = ("q3c_join(%s, %s, %s, %s, %s)"%tuple(map(_flatAndMorph, 88 (p.x, p.y, shape.x, shape.y, shape.radius)))) 89 90 elif shape.type=="polygon": 91 # if there's spoint columns in the array, there's no coos but 92 # points, and q3c can't do it. 93 if shape.coos is not None: 94 expr = "q3c_poly_query(%s, %s, ARRAY[%s])"%( 95 _flatAndMorph(p.x), _flatAndMorph(p.y), ",".join([ 96 "%s,%s"%(_flatAndMorph(x), _flatAndMorph(y)) for x,y in shape.coos])) 97 98 return morphhelpers.addNotToBooleanized(expr, operator, operand)
99 100 morphhelpers.registerBooleanizer("CONTAINS", _booleanizeContainsQ3C) 101 102
103 -def _booleanizeCROSSMATCH(node, operator, operand):
104 node.funName = "q3c_join" 105 return morphhelpers.addNotToBooleanized( 106 _flatAndMorph(node), operator, operand)
107 108 109 morphhelpers.registerBooleanizer("CROSSMATCH", _booleanizeCROSSMATCH) 110
111 -def _distanceToQ3C(node, state):
112 # this is called by _distanceToPG below (which seems preferable 113 # to a complex mechanism letting us decouple pgs and q3c morphing) 114 # _distanceToPG only calls us if node has split arguments, so we just 115 # assume that. 116 # Experimentally, comparisons with q3c_dist are much less likely to 117 # be index-optimised than q3c_join, so we take a bit of pains to 118 # generate a q3c_join. This includes rather crazy stack manipulations 119 # to tell the comparison to disappear. 120 121 parent = state.nodeStack[-1] 122 if (parent.type!="comparisonPredicate" 123 or parent.opr not in ['<', '<=', '>', '>=']): 124 # no supported comparison; let's punt to q3c_distance 125 return "q3c_dist(%s, %s, %s, %s)"%tuple( 126 flatten(a) for a in node.args) 127 128 selfOp = getattr(node, "original", node) 129 if parent.op1 is not selfOp: 130 parent.op1, parent.op2 = node, parent.op1 131 parent.opr = {'<': '>', '<=': '>=', '>': '<', '>=': '<='}[parent.opr] 132 assert parent.op1 is node 133 134 negation = "" 135 if parent.opr in ['>', '>=']: 136 negation = "NOT " 137 138 if (getattr(node.args[0], "type", None)=="columnReference" 139 and getattr(node.args[2], "type", None)!="columnReference"): 140 # help the planner a bit by moving constants first 141 node.args = node.args[2:]+node.args[:2] 142 143 fillers = (negation,) + tuple( 144 flatten(a) for a in node.args)+(flatten(parent.op2),) 145 146 parent.OVERRIDE_RESULT = "%s q3c_join(%s, %s, %s, %s, %s)"%fillers 147 return node
148 149 ######### End q3c specials 150 151 152 153 ######### Begin morphing to pgSphere 154 155
156 -class PgSphereCode(object):
157 """A node that contains serialized pgsphere expressions plus 158 a coordinate system id for cases in which we must conform. 159 160 Pass the optional original (the node that generates the stuff) 161 to allow code like the q3c booleanizer above to still work on 162 things if necessary. 163 """ 164 type = "pgsphere literal" 165
166 - def __init__(self, cooSys, content, original=None):
167 self.cooSys, self.content = cooSys, content 168 self.original = original
169
170 - def flatten(self):
171 return self.content
172
173 - def iterAttributes(self):
174 if False: 175 yield None
176 177
178 -def _morphCircle(node, state):
179 if node.center is None: 180 return PgSphereCode(node.cooSys, 181 "scircle(spoint(RADIANS(%s), RADIANS(%s)), RADIANS(%s))"%tuple( 182 _flatAndMorph(a) 183 for a in (node.x, node.y, node.radius)), 184 original=node) 185 else: 186 return PgSphereCode(node.cooSys, 187 "scircle(%s, RADIANS(%s))"%( 188 _flatAndMorph(node.center), _flatAndMorph(node.radius)), 189 original=node)
190 191
192 -def _morphPoint(node, state):
193 return PgSphereCode(node.cooSys, 194 "spoint(RADIANS(%s), RADIANS(%s))"%tuple( 195 flatten(a) for a in (node.x, node.y)), 196 original=node)
197 198
199 -def _makePoly(cooSys, points, node):
200 # helper for _morph(Polygon|Box) 201 return PgSphereCode(cooSys, 202 "(SELECT spoly(q.p) FROM (VALUES %s ORDER BY column1) as q(ind,p))"%", ".join( 203 '(%d, %s)'%(i, p) for i, p in enumerate(points)), 204 original=node)
205 206
207 -def _morphPolygon(node, state):
208 if node.coos is not None: 209 points = ['spoint(RADIANS(%s), RADIANS(%s))'%( 210 _flatAndMorph(a[0]), _flatAndMorph(a[1])) 211 for a in node.coos] 212 elif node.points is not None: 213 points = [_flatAndMorph(p) for p in node.points] 214 else: 215 assert False 216 217 return _makePoly(node.cooSys, points, node)
218 219
220 -def _morphBox(node, state):
221 args = tuple("RADIANS(%s)"%_flatAndMorph(v) for v in ( 222 node.x, node.width, node.y, node.height)) 223 points = [ 224 "spoint(%s-%s/2, %s-%s/2)"%args, 225 "spoint(%s-%s/2, %s+%s/2)"%args, 226 "spoint(%s+%s/2, %s+%s/2)"%args, 227 "spoint(%s+%s/2, %s-%s/2)"%args] 228 return _makePoly(node.cooSys, points, node)
229 230
231 -def _getSystem(node):
232 return getattr(node, "cooSys", None)
233 234
235 -def _transformSystems(pgLiteral, fromSystem, toSystem):
236 # a helper to _booleanizeGeoPredsPGS 237 if fromSystem!=toSystem: 238 trafo = tapstc.getPGSphereTrafo(fromSystem, toSystem) 239 if trafo is not None: 240 pgLiteral = "(%s)%s"%(pgLiteral, trafo) 241 return pgLiteral
242 243
244 -def _booleanizeGeoPredsPGS(node, operator, operand):
245 """morphs contains and intersects to pgsphere expressions when 246 they are arguments to a suitable comparison. 247 """ 248 if node.funName=="CONTAINS": 249 geoOp = "@" 250 elif node.funName=="INTERSECTS": 251 geoOp = "&&" 252 else: 253 return None 254 255 expr = None 256 sys1, sys2 = _getSystem(node.args[0]), _getSystem(node.args[1]) 257 if isinstance(node.args[0], tapstc.GeomExpr): 258 if isinstance(node.args[1], tapstc.GeomExpr): 259 raise NotImplementedError("Cannot have compound regions in both" 260 " arguments of a geometry predicate") 261 arg2Str = _transformSystems(flatten(node.args[1]), sys1, sys2) 262 expr = node.args[0].asLogic("(%%s %s (%s))"%(geoOp, arg2Str)) 263 elif isinstance(node.args[1], tapstc.GeomExpr): 264 arg1Str = _transformSystems(flatten(node.args[0]), sys2, sys1) 265 expr = node.args[0].asLogic("((%s) %s (%%s))"%(arg1Str, geoOp)) 266 else: # both arguments plain 267 arg1Str = _transformSystems(flatten(node.args[0]), sys1, sys2) 268 arg2Str = flatten(node.args[1]) 269 expr = "((%s) %s (%s))"%(arg1Str, geoOp, arg2Str) 270 271 return morphhelpers.addNotToBooleanized(expr, operator, operand)
272 273 274 morphhelpers.registerBooleanizer("CONTAINS", _booleanizeGeoPredsPGS) 275 morphhelpers.registerBooleanizer("INTERSECTS", _booleanizeGeoPredsPGS) 276 277
278 -def _computePointFunction(node, state):
279 if node.funName=="COORD1": 280 return "DEGREES(long(%s))"%flatten(node.args[0]) 281 elif node.funName=="COORD2": 282 return "DEGREES(lat(%s))"%flatten(node.args[0]) 283 elif node.funName=="COORDSYS": 284 if node.args[0].fieldInfo: 285 cSys = tapstc.getTAPSTC(node.args[0].fieldInfo.stc) 286 else: 287 cSys = getattr(node.args[0], "cooSys", "UNKNOWN") 288 return "'%s'"%cSys 289 else: 290 return node
291 292
293 -def _distanceToPG(node, state):
294 if node.pointArguments: 295 return "DEGREES((%s) <-> (%s))"%tuple(flatten(a) for a in node.args) 296 else: 297 return _distanceToQ3C(node, state)
298 299
300 -def _centroidToPG(node, state):
301 # pgsphere right now can only to centroids of points and circles. Try 302 # to come up with a good error message otherwise. 303 304 def _fail(): 305 raise PostgresMorphError("Can only compute centroids of circles and points" 306 " yet. Complain to make us implement other geometries faster.")
307 308 arg = node.args[0] 309 if hasattr(arg, "original"): 310 arg = arg.original 311 if arg.type=="polygon" or arg.type=="box": 312 _fail() 313 314 if getattr(arg, "fieldInfo", None): 315 fi = arg.fieldInfo 316 if fi.type=="spoly" or fi.type=="sbox": 317 _fail() 318 319 return "@@(%s)"%(flatten(node.args[0])) 320 321
322 -def _areaToPGSphere(node, state):
323 # pgsphere returns rad**2, adql wants deg**2 324 return "3282.806350011744*%s"%flatten(node)
325 326
327 -def _regionToPG(node, state):
328 # Too obscure right now. 329 raise NotImplementedError("The REGION string you supplied is not" 330 " supported on this server")
331 332
333 -def _stcsRegionToPGSphere(node, state):
334 # STCSRegions embed something returned by tapstc's parser. This is 335 # a pgsphere instance if we're lucky (just dump the thing as a string) 336 # or a tapstc.GeomExpr object if we're unlucky -- in that case, we 337 # leave the GeomExpr here and leave it to a contains or intersects 338 # handler to rewrite the entire expression. 339 if isinstance(node.tapstcObj, tapstc.GeomExpr): 340 return node.tapstcObj 341 else: 342 return PgSphereCode(node.cooSys, node.tapstcObj.asPgSphere())
343 344 345 346 _geometricMorphers = { 347 'circle': _morphCircle, 348 'point': _morphPoint, 349 'box': _morphBox, 350 'polygon': _morphPolygon, 351 "pointFunction": _computePointFunction, 352 "distanceFunction": _distanceToPG, 353 "centroid": _centroidToPG, 354 "region": _regionToPG, 355 "stcsRegion": _stcsRegionToPGSphere, 356 "area": _areaToPGSphere, 357 } 358 359 360 ########## End morphing to pgSphere 361 362 363 364 _renamedFunctions = { 365 "LOG": "LN", 366 "LOG10": "LOG", 367 "TRUNCATE": "TRUNC", 368 } 369 370 _BITWISE_MORPHERS = { 371 'BITWISE_NOT': lambda x: "~(%s)"%flatten(x), 372 'BITWISE_AND': lambda a, b: "(%s)&(%s)"%(flatten(a), flatten(b)), 373 'BITWISE_OR': lambda a, b: "(%s)|(%s)"%(flatten(a), flatten(b)), 374 'BITWISE_XOR': lambda a, b: "(%s)#(%s)"%(flatten(a), flatten(b)), 375 } 376 377
378 -def _adqlFunctionToPG(node, state):
379 if node.funName in _renamedFunctions: 380 node.funName = _renamedFunctions[node.funName] 381 382 # ADQL lets RAND set a seed, fake this in an ugly way 383 if node.funName=='RAND': 384 if len(node.args)==1: 385 # I suppose we should execute a separate query here with 386 # a crafted call to setseed. There's no way to do 387 # that right now, and I'm not forcing it at this point since 388 # the semantics in the ADQL spec are dubious anyway. 389 return "random()" 390 else: 391 return "random()" 392 393 # ADQL has two-arg TRUNCATE/ROUND -- these become expressions, 394 # so we play it easy and return strings 395 elif node.funName=='TRUNC' or node.funName=='ROUND': 396 if len(node.args)==2: 397 val, prec = flatten(node.args[0]), flatten(node.args[1]) 398 newTerm = nodes.Term(children=[ 399 node.change(args=['(%s)*10^(%s)'%(val, prec)]), 400 "/", 401 "10^(%s)"%prec]) 402 newTerm.addFieldInfo(None) 403 return newTerm 404 405 # ADQL SQUARE becomes a PG expression. Again, we downgrade to a string. 406 elif node.funName=='SQUARE': 407 return "(%s)^2"%flatten(node.args[0]) 408 409 elif node.funName in _BITWISE_MORPHERS: 410 return _BITWISE_MORPHERS[node.funName](*node.args) 411 412 return node
413 414
415 -def _morphTimestamp(node, state):
416 assert len(node.args)==1 417 return "(%s)::TIMESTAMP"%flatten(node.args[0])
418 419 420 _miscMorphers = { 421 "numericValueFunction": _adqlFunctionToPG, 422 "timestampFunction": _morphTimestamp, 423 } 424
425 -def morphMiscFunctions(tree):
426 """replaces ADQL functions with (almost) equivalent expressions from 427 postgres or postgastro. 428 429 This is a function mostly for unit tests, morphPG does these 430 transformations. 431 """ 432 return morphhelpers.morphTreeWithMorphers(tree, _miscMorphers)
433 434
435 -class _PGSC(nodes.SelectNoParens):
436 """A modifield selectNoParens that fixes the syntactic differences 437 between ADQL and postgres. 438 """
439 - def flatten(self):
440 return nodes.flattenKWs(self, 441 ("SELECT", None), 442 ("", "setQuantifier"), 443 ("", "selectList"), 444 ("", "fromClause"), 445 ("", "whereClause"), 446 ("", "groupby"), 447 ("", "having"), 448 ("", "orderBy"), 449 ("LIMIT", "setLimit"))
450 451
452 -class _PGQS(nodes.ADQLNode):
453 """A wrapper for a postgres query specification. 454 455 The only funciton here is to make sure there's just one LIMIT part 456 at the very end (except, of course, in deeper subqueries). 457 458 Nuking operand setLimits is already performed by _fixSetLimit below. 459 """ 460 type = "postgres query specification" 461 _a_original = None 462 _a_setLimit = None 463 _a_offset = None 464
465 - def flatten(self):
466 return nodes.flattenKWs(self, 467 ("", "original"), 468 ("LIMIT", "setLimit"), 469 ("OFFSET", "offset"))
470 471
472 -def _insertPGSC(node, state):
473 """wraps a select clause into something that serializes to postgres. 474 """ 475 return _PGSC.cloneFrom(node)
476 477
478 -def _expandStars(node, state):
479 """tries to replace all expressions with * in a select list. 480 481 I'm forcing this because that seems easier than figuring out how 482 to apply the sequencing rules from sql1992, 7.5, to joins with more 483 than two operands. 484 """ 485 # only work if annotation has taken place (else it's probably a test 486 # run anyway) 487 if state.nodeStack[-1].fieldInfos: 488 if node.allFieldsQuery: 489 return nodes.SelectList( 490 selectFields=state.nodeStack[-1].getSelectFields()) 491 else: 492 newCols = [] 493 for col in node.selectFields: 494 if isinstance(col, nodes.QualifiedStar): 495 newCols.extend(state.nodeStack[-1].fromClause.getFieldsForTable( 496 col.sourceTable)) 497 else: 498 newCols.append(col) 499 return node.change(selectFields=tuple(newCols)) 500 501 return node
502 503
504 -def _forceAlias(node, state):
505 """forces anonymous expressions to have an alias. 506 507 We need this as we expand stars here, and with these we need some 508 way to refer to the items. 509 """ 510 if isinstance(node.expr, basestring): 511 # this can happen if node.expr has been morphed. Though it may be 512 # silly, unconditionally add an alias (unless there already is one) 513 if node.alias is None: 514 node.alias = node.name 515 return node 516 517 if not isinstance(node.expr, nodes.ColumnReference) and node.alias is None: 518 node.alias = node.name 519 return node
520 521
522 -def _fixSetLimit(node, state):
523 """postgres only wants a global limit on set expressions. 524 """ 525 for n in node.getSelectClauses(): 526 n.setLimit = None 527 offset = node.offset 528 node.offset = None 529 return _PGQS(original=node, 530 setLimit=node.setLimit and str(node.setLimit), 531 offset=offset)
532 533 534 _syntaxMorphers = { 535 "selectNoParens": _insertPGSC, 536 'comparisonPredicate': morphhelpers.booleanizeComparisons, 537 'selectList': _expandStars, 538 'derivedColumn': _forceAlias, 539 "selectQuery": _fixSetLimit, 540 } 541 542 # Warning: if ever there are two Morphers for the same type, this will 543 # break, and we'll need to allow lists of Morphers (and need to think 544 # about their sequence...) 545 _allMorphers = _geometricMorphers.copy() 546 _allMorphers.update(_miscMorphers) 547 _allMorphers.update(_syntaxMorphers) 548 549 550 _pgMorpher = morphhelpers.Morpher(_allMorphers) 551 552 morphPG = _pgMorpher.morph 553