Source code for gavo.adql.ufunctions

"""
"User" defined functions, i.e., ADQL functions defined only on this
system.

See the userFunction docstring on how to use these.
"""

#c Copyright 2008-2023, the GAVO project <gavo@ari.uni-heidelberg.de>
#c
#c This program is free software, covered by the GNU GPL.  See the
#c COPYING file in the source distribution.


from gavo import base
from gavo import stc
from gavo import utils
from gavo.adql import common
from gavo.adql import fieldinfo
from gavo.adql import morphhelpers
from gavo.adql import nodes
from gavo.base import sqlsupport

if sqlsupport.GAVOConnection.extensionFunctions is None:
	raise ImportError("Early import of adql.ufunctions:"
		"  sqlsupport not initialised yet")

UFUNC_REGISTRY = {}


[docs]def userFunction(name, signature, doc, returntype="double precision", unit="", ucd="", polymorphism=(), additionalNames=[], depends=None): """a decorator adding some metadata to python functions to make them suitable as ADQL user defined functions. name is the name the function will be visible under in ADQL; signature is a signature not including the name of the form '(parName1 type1, parName1 type2) -> resulttype'; doc is preformatted ASCII documentation. The indentation of the second line will be removed from all lines. returntype is the SQL return type, which defaults to double precision. While ADQL 2.0 appears to say that UDFs must be numeric, in practice nobody cares; so, return whatever you see fit. unit and ucd are optional for when you actually have a good guess what's coming back from your ufunc. They can also be callable; in that case, they'll be passed the (annotated) arguments, and whatever they return will be the unit/ucd. polymorphism, if given, must be a sequence of pairs of (signature, documentation) for alternative signatures of the UDF. additionalNames, if given, is a list of names the function can also be used as. This is intended for when you started with gavo_somefunc and then want to move on to ivo_somefunc without functional change and want to keep legacy queries working. depends if non-None, is a name of a postgres function that this UDF needs. This is for when UDF should disappear when an extension is missing. This is not evaluated right away because we do not have a database connection during import. Host code has to arrange for calling cleanForConn(conn). The python function receives a list of arguments; this will in general be ADQL expression trees. It must return either * a string that will go literally into the eventual serialised SQL string (so take care to quote; in general, you will use nodes.flatten(arg) to flatten individual args); * or they may return None, in which case the expression tree remains unchanged. This is for when the actual implementation is in the database. * or they may raise nodes.ReplaceNode(r), where r is a nodes.ADQLNode instance, which then replaces the user defined function in the parse tree and will be annotated as usual. If you receive bad arguments or something else goes awry, raise a UfuncError. """ def deco(f): f.adqlUDF_name = name f.adqlUDF_signature = f.adqlUDF_name+signature.strip() f.adqlUDF_doc = utils.fixIndentation(doc, "", 1).strip() f.adqlUDF_returntype = returntype f.adqlUDF_unit = unit f.adqlUDF_ucd = ucd f.adqlUDF_additionalSignatures = [ {"signature": f.adqlUDF_name+signature.strip(), "doc": doc} for signature, doc in polymorphism] if depends: if depends not in sqlsupport.GAVOConnection.extensionFunctions: # don't register if the function would be broken because of # missing dependencies. return f UFUNC_REGISTRY[f.adqlUDF_name.upper()] = f for alias in additionalNames: UFUNC_REGISTRY[alias.upper()] = f return f return deco
def _makeTwoArgBooleanizer(funcName, booleanExpr): """makes and registers a 2-argument booleanizer for funcName. That's where 1=some_pred(x) should become some [not] op1 opr op2 expression. booleanExpr is the postgres expression the function should be booleanized to. Refer to the two arguments as %(1)s and %(2)s """ def _booleanizeThis(node, operator, operand): if len(node.args)!=2: raise common.UfuncError("%s takes exactly two arguments"%funcName) return morphhelpers.addNotToBooleanized( booleanExpr%{ '1': nodes.flatten(node.args[0]), '2': nodes.flatten(node.args[1])}, operator, operand) morphhelpers.registerBooleanizer(funcName.upper(), _booleanizeThis) def _makeOneArgBooleanizer(funcName): """makes and registers a one-argument booleanizer for funcName. That's for when the morphed ADQL would just be [not] expr, and expr is what the udf code has returned. """ morphhelpers.registerBooleanizer(funcName.upper(), morphhelpers.addNotToBooleanized) @userFunction("gavo_match", "(pattern TEXT, string TEXT) -> INTEGER", """ gavo_match returns 1 if the POSIX regular expression pattern matches anything in string, 0 otherwise. """, "integer") def _match(args): if len(args)!=2: raise common.UfuncError("gavo_match takes exactly two arguments") return "(CASE WHEN %s ~ %s THEN 1 ELSE 0 END)"%( nodes.flatten(args[1]), nodes.flatten(args[0])) @userFunction("ivo_hasword", "(haystack TEXT, needle TEXT) -> INTEGER", """ gavo_hasword returns 1 if needle shows up in haystack, 0 otherwise. This is for "google-like"-searches in text-like fields. In word, you can actually employ a fairly complex query language; see https://www.postgresql.org/docs/current/textsearch.html for details. """, "integer") def _hasword(args): if len(args)!=2: raise common.UfuncError("ivo_hasword takes exactly two arguments") return None _makeTwoArgBooleanizer("ivo_hasword", "(to_tsvector('english', %(1)s) @@ plainto_tsquery('english', %(2)s))") @userFunction("ivo_nocasematch", "(value TEXT, pattern TEXT) -> INTEGER", """ ivo_nocasematch returns 1 if pattern matches value, 0 otherwise. pattern is defined as for the SQL LIKE operator, but the match is performed case-insensitively. This function in effect provides a surrogate for the ILIKE SQL operator that is missing from ADQL. On this site, this is actually implemented using python's and SQL's LOWER, so for everything except ASCII, your mileage will vary. """, "integer") def _nocasematch(args): if len(args)!=2: raise common.UfuncError("ivo_nocasematch takes exactly two arguments") if args[1].type=='characterStringLiteral': args[1].value = args[1].value.lower() else: args[1] = "LOWER(%s)"%nodes.flatten(args[1]) return None _makeTwoArgBooleanizer("ivo_nocasematch", "(LOWER(%(1)s) like %(2)s)") @userFunction("ivo_hashlist_has", "(hashlist TEXT, item TEXT) -> INTEGER", """ The function takes two strings; the first is a list of words not containing the hash sign (#), concatenated by hash signs, the second is a word not containing the hash sign. It returns 1 if, compared case-insensitively, the second argument is in the list of words coded in the first argument. The behaviour in case the the second argument contains a hash sign is unspecified. """, "integer") def _hashlist_has(args): if len(args)!=2: raise common.UfuncError("ivo_haslist_has takes exactly two arguments") return None _makeTwoArgBooleanizer("ivo_hashlist_has", "lower(%(2)s) = ANY(string_to_array(lower(%(1)s), '#'))") @userFunction("ivo_interval_overlaps", "(l1 NUMERIC, h1 NUMERIC, l2 NUMERIC, h2 NUMERIC) -> INTEGER", """ The function returns 1 if the interval [l1...h1] overlaps with the interval [l2...h2]. For the purposes of this function, the case l1=h2 or l2=h1 is treated as overlap. The function returns 0 for non-overlapping intervals. """) def _interval_overlaps(args): if len(args)!=4: raise common.UfuncError( "ivo_interval_overlaps takes exactly four arguments") l1, h1, l2, h2 = args return nodes.TransparentNode(children=[ nodes.Comparison(op1=h1, opr='>=', op2=l2), "AND", nodes.Comparison(op1=h2, opr='>=', op2=l1), "AND", nodes.Comparison(op1=l1, opr='<=', op2=h1), "AND", nodes.Comparison(op1=l2, opr='<=', op2=h2)]) # return "((%s)>=(%s) AND (%s)>=(%s) AND (%s)<=(%s) AND (%s)<=(%s))::INTEGER"%( # h1, l2, h2, l1, # l1, h1, l2, h2) _makeOneArgBooleanizer("ivo_interval_overlaps") @userFunction("ivo_interval_has", "(val NUMERIC, iv INTERVAL) -> INTEGER", """ The function returns 1 if the interval iv contains val, 0 otherwise. The lower limit is always included in iv, behaviour on the upper limit is column-specific. """) def _interval_has(args): if len(args)!=2: raise common.UfuncError( "ivo_interval_has takes exactly two arguments") return None _makeTwoArgBooleanizer("ivo_interval_has", "((%(1)s) <@ (%(2)s))") @userFunction("ivo_to_mjd", "(d TIMESTAMP) -> DOUBLE PRECISION", """ The function converts a postgres timestamp to modified julian date. This is naive; no corrections for timezones, let alone time scales or the like are done; you can thus not expect this to be good to second-precision unless you are careful in the construction of the timestamp. """, additionalNames=["gavo_to_mjd"]) def _to_mjd(args): if len(args)!=1: raise common.UfuncError("gavo_to_mjd takes exactly one timestamp argument") return "ts_to_mjd(%s)"%nodes.flatten(args[0]) @userFunction("ivo_to_jd", "(d TIMESTAMP) -> DOUBLE PRECISION", """ The function converts a postgres timestamp to julian date. This is naive; no corrections for timezones, let alone time scales or the like are done; you can thus not expect this to be good to second-precision unless you are careful in the construction of the timestamp. """, additionalNames=["gavo_to_jd"]) def _to_jd(args): if len(args)!=1: raise common.UfuncError("gavo_to_jd takes exactly one timestamp argument") return "ts_to_jd(%s)"%nodes.flatten(args[0])
[docs]class HistogramNode(nodes.FunctionNode):
[docs] def addFieldInfo(self, context): ucd = "stat.histogram" baseUCD = self.args[0].fieldInfo.ucd if baseUCD: ucd = f"{ucd};{baseUCD}" self.fieldInfo = fieldinfo.FieldInfo("integer[]", "", ucd, *nodes.collectUserData(self._getInfoChildren()))
@userFunction("ivo_histogram", "(val REAL, lower REAL, upper REAL, nbins INTEGER) -> INTEGER[]", """ The aggregate function returns a histogram of val with nbins+2 elements. Assuming 0-based arrays, result[0] contains the number of underflows (i.e., val<lower), result[nbins+1] the number of overflows. Elements 1..nbins are the counts in nbins bins of width (upper-lower)/nbins. Clients will have to convert back to physical units using some external communication, there currently is no (meta-) data as lower and upper in the TAP response. """, returntype="integer[]", additionalNames=["gavo_histogram"]) def _ivo_histogram(args): if len(args)!=4: raise common.UfuncError( "ivo_histogram takes exactly four arguments (the column to aggregate," " a lower and upper limit of values to tabulate, and the number" " of bins desired).") raise nodes.ReplaceNode( HistogramNode(funName="GAVO_HISTOGRAM", args=args)) @userFunction("gavo_ipix", "(long REAL, lat REAL) -> BIGINT", """ gavo_ipix returns the q3c ipix for a long/lat pair (it simply wraps the 13c_ang2ipix function). This is probably only relevant when you play tricks with indices or PPMXL ids. """, returntype="bigint", ucd="pos", depends="q3c_ang2ipix") def _gavo_ipix(args): if len(args)!=2: raise common.UfuncError( "gavo_ipix takes exactly two arguments.") int, lat = [nodes.flatten(a) for a in args] return "q3c_ang2ipix(%s, %s)"%(int, lat) class _TransformNode(nodes.FunctionNode): """a node representing a gavo_transform call. This has a proper node mainly because of type inference. """ name = "gavo_transform" _a_trans = None _a_toSys = None ucdMap = { 'ICRS': "pos.eq", 'FK5': "pos.eq", 'FK4': "pos.eq", 'GALACTIC': "pos.galactic", } def flatten(self): return "(%s)%s"%(nodes.flatten(self.args[0]), self.trans) def addFieldInfo(self, context): if hasattr(self.args[0], "fieldInfo"): self.fieldInfo = self.args[0].fieldInfo else: # go for an STC-S string self.fieldInfo = fieldinfo.FieldInfo("text", "", "", tainted=True) self.fieldInfo.properties["xtype"] = "adql:REGION" # We discard the existing UCD, as that may change depending on the # the frame. We should probably look at the type, too, and not # call circles, polygons, or MOCs positions. Let's see when we want # that. self.fieldInfo.ucd = self.ucdMap.get(self.toSys, "pos") @userFunction("ivo_transform", "(from_sys TEXT, to_sys TEXT, geo GEOMETRY) -> GEOMETRY", """ The function transforms ADQL geometries between various reference systems. geo can be a POINT, a CIRCLE, or a POLYGON, and the function will return a geometry of the same type. In the current implementation, from_sys and to_sys must be literal strings (i.e., they cannot be computed through expressions or be taken from database columns). All transforms are just simple rotations, which is only a rough approximation to the actual relationships between reference systems (in particular between FK4 and ICRS-based ones). Note that, in particular, the epoch is not changed (i.e., no proper motions are applied). We currently support the following reference frames: ICRS, FK5 (which is treated as ICRS), FK4 (for B1950. without epoch-dependent corrections), GALACTIC. Reference frame names are case-sensitive. """, returntype="GEOMETRY", additionalNames=["gavo_transform"]) def _gavo_transform(args): if len(args)!=3: raise common.UfuncError( "gavo_transform takes exactly three arguments") fromSys = nodes.getStringLiteral(args[0], "source reference system") toSys = nodes.getStringLiteral(args[1], "target reference system") try: trans = stc.getPGSphereTrafo(fromSys, toSys) except stc.STCValueError as msg: raise common.UfuncError( "Cannot compute transformation between %s and %s: %s"%( fromSys, toSys, msg)) if trans is None: return args[2] else: raise nodes.ReplaceNode(_TransformNode( funName="gavo_transform", args=(args[2],), trans=trans, toSys=toSys)) @userFunction("ivo_normal_random", "(mu REAL, sigma REAL) -> REAL", """The function returns a random number drawn from a normal distribution with mean mu and width sigma. Implementation note: Right now, the Gaussian is approximated by summing up and scaling ten calls to random. This, hence, is not very precise or fast. It might work for some use cases, and we will provide a better implementation if this proves inadequate. """, returntype="real", additionalNames=["gavo_normal_random"]) def _gavo_normal_random(args): if len(args)!=2: raise common.UfuncError( "gavo_normal_random takes mu, sigma arguments.") return ("(((random()+random()+random()+random()+random()" "+random()+random()+random()+random()+random()-5)*(%s)" ")+(%s))")%(nodes.flatten(args[1]), nodes.flatten(args[0])) @userFunction("gavo_mocunion", "(moc1 MOC, moc2 MOC) -> MOC", """returns the union of two MOCs. """, returntype="smoc", depends="smoc_union") def _gavo_mocunion(args): if len(args)!=2: raise common.UfuncError( "gavo_mocunion only has two arguments.") return "%s | %s"%( nodes.flatten(args[0]), nodes.flatten(args[1])) @userFunction("gavo_mocintersect", "(moc1 MOC, moc2 MOC) -> MOC", """returns the intersection of two MOCs. """, returntype="smoc", depends="smoc_union") def _gavo_mocintersect(args): if len(args)!=2: raise common.UfuncError( "gavo_mocintersect only has two arguments.") return "%s & %s"%( nodes.flatten(args[0]), nodes.flatten(args[1])) @userFunction("ivo_string_agg", "(expression TEXT, delimiter TEXT) -> TEXT", """ An aggregate function returning all values of expression within a GROUP contcatenated with delimiter """, "text") def _string_agg(args): if len(args)!=2: raise common.UfuncError("ivo_string_agg takes exactly two arguments") return "string_agg(%s, %s)"%( nodes.flatten(args[0]), nodes.flatten(args[1])) @userFunction("gavo_apply_pm", "(ra DOUBLE PRECISION, dec DOUBLE PRECISION, pmra DOUBLE PRECISION, pmde DOUBLE PRECISION, epdist DOUBLE PRECISION) -> POINT", """Returns a POINT (in the UNDEFINED reference frame) for the position an object at ra/dec with proper motion pmra/pmde has after epdist years. positions must be in degrees, PMs in should be in julian years (i.e., proper motions are expected in degrees/year). pmra is assumed to contain cos(delta). This function goes through the tangential plane. Since it does not have information on distance and radial velocity, it cannot reconstruct the true space motion, and hence its results will degrade over time. This function should not be used in new queries; use ivo_epoch_prop instead. """, returntype="spoint", additionalNames=["ivo_apply_pm"], depends="epoch_prop") def _gavo_apply_pm(args): # we need to make sure we eventually flatten to ivo_apply_pm, as # that's what is defined in //adql. For that, we have to manipulate # our node, which UserFunction below somewhat stupidly does not allow. # We simply steal its self: utils.stealVar("self").funName = "IVO_APPLY_PM" if len(args)==5: # there's no indexing or anything to gain here; drop through to an # implementation in the database. # # This is the legacy ("gavo") implementation that we probably # want to drop. return None elif len(args)==6: # This is the ESAC-compliant ("ivo") implementation that we probably # want to keep. return None else: raise common.UfuncError( "gavo_apply_pm requires exactly ra, dec, pmra, pmdec, epdist.") # epoch_prop will bomb out at the postgres level unless you have a very # new pgsphere as of 2022 (actually, it's just a PR at this point). @userFunction("ivo_epoch_prop", "(ra DOUBLE PRECISION, dec DOUBLE PRECISION, parallax DOUBLE PRECISION," " pmra DOUBLE PRECISION, pmdec DOUBLE PRECISION," " radial_velocity DOUBLE PRECISION," " ref_epoch DOUBLE PRECISION, out_epoch DOUBLE PRECISION)" " -> DOUBLE PRECISION[6]", """Returns a 6-vector of (ra, dec, parallax, pmra, pmdec, rv) at out_epoch for these quantities at ref_epoch. Essentially, it will apply the proper motion under the assumption of linear motion. Despite the name of the positional parameters, this is not restricted to equatorial systems, as long as positions and proper motions are expressed in the same reference frames. Units on input and output are degrees for ra and dec, mas for parallax, mas/yr for pmra and pmdec, and km/s for the radial velocity. ref_epoch and out_epoch are given in Julian years. parallax, pmra, pmdec, and radial_velocity may be None and will enter the computations as 0 then, except in the case of parallax, which will be some small value. When abs(parallax) is smaller or equal to that small value, parallax and radial velocity will be NULL on output. In daily use, you probably want to use the ivo_epoch_prop_pos functions. """, returntype="double precision[6]", depends="epoch_prop") def _epoch_prop(args): if len(args)!=8: raise common.UfuncError("ivo_epoch_prop gets ra, dec [deg], parallax" " [mas], pmra, pmdec [mas/yr], radial_velocity [km/s], ref_epoch," " out_epoch [yr] arguments") else: return ("(SELECT array[DEGREES(t[1]), DEGREES(t[2]), t[3]," " DEGREES(t[4]*3.6e6), DEGREES(t[5]*3.6e6), t[6]] FROM" " epoch_prop(spoint(RADIANS({ra}), RADIANS({dec}))," " {parallax}, RADIANS(({pmra})/3.6e6), RADIANS(({pmdec})/3.6e6)," " {radial_velocity}, ({out_epoch})-({ref_epoch})) as t)".format( ra=nodes.flatten(args[0]), dec=nodes.flatten(args[1]), parallax=nodes.flatten(args[2]), pmra=nodes.flatten(args[3]), pmdec=nodes.flatten(args[4]), radial_velocity=nodes.flatten(args[5]), ref_epoch=nodes.flatten(args[6]), out_epoch=nodes.flatten(args[7]))) @userFunction("ivo_epoch_prop_pos", "(ra DOUBLE PRECISION, dec DOUBLE PRECISION, parallax DOUBLE PRECISION," " pmra DOUBLE PRECISION, pmdec DOUBLE PRECISION," " radial_velocity DOUBLE PRECISION," " ref_epoch DOUBLE PRECISION, out_epoch DOUBLE PRECISION)" " -> POINT", """Returns a POINT giving the position at out_epoch for an object with the six parameters at ref_epoch. Essentially, it will apply the proper motion under the assumption of linear motion. Despite the name of the positional parameters, this is not restricted to equatorial systems, as long as positions and proper motions are expressed in the same reference frames. Units on input are degrees for ra and dec, mas for parallax, mas/yr for pmra and pmdec, and km/s for the radial velocity. ref_epoch and out_epoch are given in Julian years. parallax, pmra, pmdec, and radial_velocity may be None and will enter the computations as 0 then, except in the case of parallax, which will be some small value. """, returntype="spoint", polymorphism=[ ("(ra DOUBLE PRECISION, dec DOUBLE PRECISION," " pmra DOUBLE PRECISION, pmdec DOUBLE PRECISION," " ref_epoch DOUBLE PRECISION, out_epoch DOUBLE PRECISION)" " -> POINT", """A variant of ivo_epoch_prop_pos that behave as if parallax and radial_velocity were both passed as NULL.""")], ucd="pos", depends="epoch_prop") def _epoch_prop_pos(args): if len(args)==6: kws = dict((label, nodes.flatten(args[ind])) for ind, label in enumerate( "ra dec pmra pmdec ref_epoch out_epoch".split())) kws["parallax"] = kws["radial_velocity"] = "NULL" elif len(args)==8: kws = dict((label, nodes.flatten(args[ind])) for ind, label in enumerate( "ra dec parallax pmra pmdec radial_velocity ref_epoch out_epoch" .split())) else: raise common.UfuncError("ivo_epoch_prop_pos gets ra, dec [deg], parallax" " [mas], pmra, pmdec [mas/yr], radial_velocity [km/s], ref_epoch," " out_epoch [yr] arguments, where you may leave out parallax and" " radial_velocity.") return ("(SELECT spoint(t[1], t[2]) FROM" " epoch_prop(spoint(RADIANS({ra}), RADIANS({dec}))," " {parallax}, RADIANS(({pmra})/3.6e6), RADIANS(({pmdec})/3.6e6)," " {radial_velocity}, ({out_epoch})-({ref_epoch})) as t)" .format(**kws)) # the healpix functions require a fairly recent pgspsphere (available # as of Debian bullseye) @userFunction("ivo_healpix_index", "(order INTEGER, ra DOUBLE PRECISION, dec DOUBLE PRECISION) -> BIGINT", """Returns the index of the (nest) healpix with order containing the spherical point (ra, dec). An alternative, 2-argument form ivo_healpix_index(order INTEGER, p POINT) -> BIGINT is also available. """, returntype="bigint", depends="healpix_nest") def _ivo_healpix_index(args): if len(args)==2: return "healpix_nest(%s, %s)"%( nodes.flatten(args[0]), nodes.flatten(args[1])) elif len(args)==3: return "healpix_nest(%s, spoint(RADIANS(%s), RADIANS(%s)))"%( nodes.flatten(args[0]), nodes.flatten(args[1]), nodes.flatten(args[2])) else: raise common.UfuncError("ivo_healpix_index takes either (ra, dec, order)" " or (point, order) arguments") @userFunction("ivo_healpix_center", "(hpxOrder INTEGER, hpxIndex BIGINT) -> POINT", """returns a POINT corresponding to the center of the healpix with the given index at the given order. """, returntype="spoint", depends="healpix_nest") def _ivo_healpix_center(args): if len(args)!=2: raise common.UfuncError("ivo_healpix_center only takes (index, order)" " arguments") return "center_of_healpix_nest(%s, %s)"%( nodes.flatten(args[0]), nodes.flatten(args[1])) @userFunction("gavo_getauthority", "(ivoid TEXT) -> TEXT", """returns the authority part of an ivoid (or, more generally a URI). So, ivo://org.gavo.dc/foo/bar#baz becomes org.gavo.dc. The behaviour for anything that's not a full URI is undefined. """, returntype="text") def _gavo_getauthority(args): if len(args)!=1: raise common.UfuncError("gavo_getauthority only takes an ivoid" " argument.") return None @userFunction("gavo_vocmatch", "(vocname TEXT, term TEXT, matchagainst TEXT) -> INTEGER", """returns 1 if matchagainst is term or narrower in the IVOA vocabulary vocname, 0 otherwise. This is intended for semantic querying. For instance, gavo_vocmatch('datalink/core', 'calibration', semantics) would be 1 if semantics is any of calibration, bias, dark, or flat. For RDF-flavoured vocabularies (strict trees), term will expand to the entire branch rooted in term. For SKOS-flavoured vocabularies (where narrower is not transitive), only directly narrower terms will be included. Both the term and the vocabulary name must be string literals (i.e., constants). matchagainst can be any string-valued expression. """, returntype="integer") def _gavo_vocmatch(args): if len(args)!=3: raise common.UfuncError("gavo_getauthority takes three" " arguments.") from gavo.protocols import vocabularies voc = vocabularies.get_vocabulary( nodes.getStringLiteral(args[0], "vocabulary name")) term = nodes.getStringLiteral(args[1], "term") if term not in voc["terms"]: raise common.UfuncError("'%s' is not a term in the vocabulary %s"%( term, voc["uri"])) expanded = [args[1]] for narrower in voc["terms"][term]["narrower"]: expanded.append( nodes.CharacterStringLiteral(value=narrower)) args[1] = "(%s)"%(", ".join(nodes.flatten(n) for n in expanded)) raise nodes.ReplaceNode(nodes.FunctionNode( funName='GAVO_VOCMATCH', args=(args[2], args[1]))) _makeTwoArgBooleanizer("gavo_vocmatch", "%(1)s IN %(2)s") class _SpecconvNode(nodes.FieldInfoedNode): """A node left by gavo_specconv. It is constructed with the target unit and the expression to convert. During annotation, it will mutate its children so they are in the target unit. We have this complication because at UDF creation time, the columns are not annotated and hence we may not know the unit to convert from. If forceUnit is given (three-argument form), we could do the transformation immediately, but we still use the code in addFieldInfo to avoid too many code paths. The price is the stupid exprReplaced hack. """ _a_targetUnit = None _a_forceUnit = None _a_expr = None exprReplaced = False def _getFromUnit(self, context): """returns the unit we transform from. This needs the context because it might produce warnings. """ fromUnit = None try: fromUnit = self.expr.fieldInfo.unit if not fromUnit: raise AttributeError("Empty from Unit") except AttributeError: pass # handle missing fromUnit below if self.forceUnit: if fromUnit: context.warnings.append("Warning in specconv: overriding" f" unit {fromUnit} with {self.forceUnit}") fromUnit = self.forceUnit if not fromUnit: raise common.UfuncError("specconv: Cannot infer unit of first" " argument. Perhaps use the three-argument specconv?") return fromUnit def _replaceExpr(self, sentinelName, newExpr): """replaces self.expr with newExpr, where a (fake) column reference named sentinelName is replaced by the previous expression. """ def replaceTarget(nodes): for ind, child in enumerate(nodes): if getattr(child, "name", None)==sentinelName: nodes[ind] = self.expr return True if hasattr(child, "children"): if replaceTarget(child.children): return True if replaceTarget(newExpr.children): self.expr = newExpr else: raise common.UfuncError("specconv: Internal Error: No sentinel found.") def addFieldInfo(self, context): try: nodes.FieldInfoedNode.addFieldInfo(self, context) fromUnit = self._getFromUnit(context) exprLit = base.getSpecExpr(fromUnit, self.targetUnit ).format("REPLACE_ME") exprNode = nodes.getTreeBuildingGrammar()[0][ "userDefinedFunctionParam" ].parseString(exprLit, parseAll=True)[0] self._replaceExpr("REPLACE_ME", exprNode) self.exprReplaced = True self.fieldInfo = self.fieldInfo.change( unit=self.targetUnit, type="double precision") # TODO: We should probably fix the description to indicate # it's some sort of different thing. except base.IncompatibleUnits as msg: raise common.UfuncError(str(msg)) def flatten(self): if not self.exprReplaced: raise ValueError("You cannot use ivo_specconv here. Complain" " to the operators.") return self.expr.flatten() @userFunction("gavo_specconv", "(expr DOUBLE PRECISION, dest_unit TEXT) -> DOUBLE PRECISION", """returns the spectral value expr converted to dest_unit. expr has to be in either energy, wavelength, or frequency, and dest_unit must be a VOUnit giving another spectral unit (e.g., MHz, keV, nm, or Angstrom). This is intended to let users express spectral constraints in their preferred unit independently of the choice of unit in the database. Examples:: gavo_specconv(obscore.em_min, "keV") > 300 gavo_specconv(obscore.em_max, "MHz") > 30 gavo_specconv(spectral_start, "Angstrom") > 4000 There is a variant of gavo_specconv accepting expr's unit in a third argument. """, polymorphism=[( "(expr NUMERIC, expr_unit TEXT, dest_unit TEXT) -> NUMERIC", """returns expr assumed to be in expr_unit expressed in dest_unit. This is a variant of the two-argument gavo_specconv for when the unit of expr is not known to the ADQL translator, either because it because it is a literal or because it does not look like a spectral unit. Examples:: gavo_specconv(656, 'nm', 'J') BETWEEN spectral_start AND spectral_end gavo_specconv(arccos(phi)*incidence, 'Hz', 'eV') Clearly, overriding known units is likely to yield bad results; the translator therefore warns if an existing unit is overridden with a different unit.""")], additionalNames=["ivo_specconv"]) def _gavo_specconv(args): try: if len(args)==2: newNode = _SpecconvNode(expr=args[0], targetUnit=nodes.getStringLiteral(args[1])) elif len(args)==3: newNode = _SpecconvNode(expr=args[0], targetUnit=nodes.getStringLiteral(args[2]), forceUnit=nodes.getStringLiteral(args[1])) else: raise common.UfuncError("gavo_specconv takes two or three arguments") except base.IncompatibleUnits as msg: raise common.UfuncError(msg) raise nodes.ReplaceNode(newNode)
[docs]class UserFunction(nodes.FunctionNode): """A node processing user defined functions. See the userFunction docstring for how ADQL user defined functions are defined. """ type = "userDefinedFunction" def _getFunc(self): try: return UFUNC_REGISTRY[self.funName.upper()] except: raise common.UfuncError("No such function: %s"%self.funName) def _polish(self): if self.args: self.args = list(self.args) self.processedExpression = self._getFunc()(self.args)
[docs] def flatten(self): if self.processedExpression is None: return nodes.FunctionNode.flatten(self) else: return nodes.flatten(self.processedExpression)
[docs] def addFieldInfo(self, context): ufunc = self._getFunc() unit = (ufunc.adqlUDF_unit(self.args) if callable(ufunc.adqlUDF_unit) else ufunc.adqlUDF_unit) ucd = (ufunc.adqlUDF_ucd(self.args) if callable(ufunc.adqlUDF_ucd) else ufunc.adqlUDF_ucd) self.fieldInfo = fieldinfo.FieldInfo( ufunc.adqlUDF_returntype, unit, ucd)
nodes.registerNode(UserFunction)