1 """
2 "User" defined functions, i.e., ADQL functions defined only on this
3 system.
4
5 See the userFunction docstring on how to use these.
6 """
7
8
9
10
11
12
13
14 from gavo import stc
15 from gavo import utils
16 from gavo.adql import common
17 from gavo.adql import fieldinfo
18 from gavo.adql import morphhelpers
19 from gavo.adql import nodes
20 from gavo.adql import tree
21
22
23 UFUNC_REGISTRY = {}
24
25
26 -def userFunction(name, signature, doc, returntype="double precision",
27 unit="", ucd=""):
28 """a decorator adding some metadata to python functions to make
29 them suitable as ADQL user defined functions.
30
31 name is the name the function will be visible under in ADQL; signature is a
32 signature not including the name of the form '(parName1 type1, parName1
33 type2) -> resulttype'; doc is preformatted ASCII documentation. The
34 indentation of the second line will be removed from all lines.
35
36 returntype is the SQL return type, which defaults to double
37 precision. While ADQL 2.0 appears to say that UDFs must be
38 numeric, in practice nobody cares; so, return whatever you see fit.
39
40 unit and ucd are optional for when you actually have a good guess what's
41 coming back from your ufunc. They can also be callable; in that
42 case, they'll be passed the (annotated) arguments, and whatever
43 they return will be the unit/ucd.
44
45 The python function receives an array of arguments; this will in
46 general be ADQL expression trees. It must return either
47
48 * a string that will go literally into the eventual serialised SQL
49 string (so take care to quote; in general, you will use
50 nodes.flatten(arg) to flatten individual args);
51 * or they may return None, in which case the expression tree
52 remains unchanged. This is for when the actual implementation is
53 in the database.
54 * or they may return a nodes.ADQLNode instance, which then replaces
55 the user defined function in the parse tree and will be annotated
56 as usual.
57
58 If you receive bad arguments or something else goes awry, raise
59 a UfuncError.
60 """
61 def deco(f):
62 f.adqlUDF_name = name
63 f.adqlUDF_signature = f.adqlUDF_name+signature.strip()
64 f.adqlUDF_doc = utils.fixIndentation(doc, "", 1).strip()
65 f.adqlUDF_returntype = returntype
66 f.adqlUDF_unit = unit
67 f.adqlUDF_ucd = ucd
68 UFUNC_REGISTRY[f.adqlUDF_name.upper()] = f
69 return f
70 return deco
71
74 """makes and registers a booleanizer for funcName.
75
76 booleanExpr is the postgres expression the function should be booleanized
77 to. Refer to the two arguments as %(1)s and %(2)s
78 """
79 def _booleanizeThis(node, operator, operand):
80 if len(node.args)!=2:
81 raise common.UfuncError("%s takes exactly two arguments"%funcName)
82 return morphhelpers.addNotToBooleanized(
83 booleanExpr%{
84 '1': nodes.flatten(node.args[0]),
85 '2': nodes.flatten(node.args[1])},
86 operator, operand)
87
88 morphhelpers.registerBooleanizer(funcName.upper(), _booleanizeThis)
89
90
91
92 @userFunction("gavo_match",
93 "(pattern TEXT, string TEXT) -> INTEGER",
94 """
95 gavo_match returns 1 if the POSIX regular expression pattern
96 matches anything in string, 0 otherwise.
97 """,
98 "integer")
104
105
106 @userFunction("ivo_hasword",
107 "(haystack TEXT, needle TEXT) -> INTEGER",
108 """
109 gavo_hasword returns 1 if needle shows up in haystack, 0 otherwise. This
110 is for "google-like"-searches in text-like fields. In word, you can
111 actually employ a fairly complex query language; see
112 http://www.postgresql.org/docs/8.3/static/textsearch.html
113 for details.
114 """,
115 "integer")
117 if len(args)!=2:
118 raise common.UfuncError("ivo_hasword takes exactly two arguments")
119 return None
120
121 _makeBooleanizer("ivo_hasword",
122 "(to_tsvector('english', %(1)s) @@ plainto_tsquery('english', %(2)s))")
123
124
125 @userFunction("ivo_nocasematch",
126 "(value TEXT, pattern TEXT) -> INTEGER",
127 """
128 ivo_nocasematch returns 1 if pattern matches value, 0 otherwise.
129 pattern is defined as for the SQL LIKE operator, but the
130 match is performed case-insensitively. This function in effect
131 provides a surrogate for the ILIKE SQL operator that is missing from
132 ADQL.
133
134 On this site, this is actually implemented using python's and SQL's
135 LOWER, so for everything except ASCII, your milage will vary.
136 """,
137 "integer")
139 if len(args)!=2:
140 raise common.UfuncError("ivo_nocasematch takes exactly two arguments")
141 if args[1].type=='characterStringLiteral':
142 args[1].value = args[1].value.lower()
143 else:
144 args[1] = "LOWER(%s)"%nodes.flatten(args[1])
145 return None
146
147 _makeBooleanizer("ivo_nocasematch", "(LOWER(%(1)s) like %(2)s)")
148
149
150 @userFunction("ivo_hashlist_has",
151 "(hashlist TEXT, item TEXT) -> INTEGER",
152 """
153 The function takes two strings; the first is a list of words not
154 containing the hash sign (#), concatenated by hash signs, the second is
155 a word not containing the hash sign. It returns 1 if, compared
156 case-insensitively, the second argument is in the list of words coded in
157 the first argument. The behaviour in case the the second
158 argument contains a hash sign is unspecified.
159 """,
160 "integer")
162 if len(args)!=2:
163 raise common.UfuncError("ivo_haslist_has takes exactly two arguments")
164 return None
165
166 _makeBooleanizer("ivo_hashlist_has",
167 "lower(%(2)s) = ANY(string_to_array(%(1)s, '#'))")
168
169 @userFunction("ivo_interval_overlaps",
170 "(l1 NUMERIC, h1 NUMERIC, l2 NUMERIC, h2 NUMERIC) -> INTEGER",
171 """
172 The function returns 1 if the interval [l1...h1] overlaps with
173 the interval [l2...h2]. For the purposes of this function,
174 the case l1=h2 or l2=h1 is treated as overlap. The function
175 returns 0 for non-overlapping intervals.
176 """)
178 if len(args)!=4:
179 raise common.UfuncError(
180 "ivo_interval_overlaps takes exactly four arguments")
181 l1, h1, l2, h2 = [nodes.flatten(a) for a in args]
182 return "((%s)>=(%s) AND (%s)>=(%s) AND (%s)<=(%s) AND (%s)<=(%s))::INTEGER"%(
183 h1, l2, h2, l1,
184 l1, h1, l2, h2)
185
186
187 @userFunction("ivo_interval_has",
188 "(val NUMERIC, iv INTERVAL) -> INTEGER",
189 """
190 The function returns 1 if the interval iv contains val, 0 otherwise.
191 The lower limit is always included in iv, behaviour on the upper
192 limit is column-specific.
193 """)
195 if len(args)!=2:
196 raise common.UfuncError(
197 "ivo_interval_has takes exactly two arguments")
198 return None
199
200 _makeBooleanizer("ivo_interval_has", "((%(1)s) <@ (%(2)s))")
201
202
203 @userFunction("gavo_to_mjd",
204 "(d TIMESTAMP) -> DOUBLE PRECISION",
205 """
206 The function converts a postgres timestamp to modified julian date.
207 This is naive; no corrections for timezones, let alone time
208 scales or the like are done; you can thus not expect this to be
209 good to second-precision unless you are careful in the construction
210 of the timestamp.
211 """)
213 if len(args)!=1:
214 raise common.UfuncError("gavo_to_mjd takes exactly one timestamp argument")
215 return "ts_to_mjd(%s)"%nodes.flatten(args[0])
216
217
218 @userFunction("gavo_to_jd",
219 "(d TIMESTAMP) -> DOUBLE PRECISION",
220 """
221 The function converts a postgres timestamp to julian date.
222 This is naive; no corrections for timezones, let alone time
223 scales or the like are done; you can thus not expect this to be
224 good to second-precision unless you are careful in the construction
225 of the timestamp.
226 """)
228 if len(args)!=1:
229 raise common.UfuncError("gavo_to_jd takes exactly one timestamp argument")
230 return "ts_to_jd(%s)"%nodes.flatten(args[0])
231
234 baseUCD = args[0].fieldInfo.ucd
235 if baseUCD:
236 return "stat.histogram;"+baseUCD
237 else:
238 return "stat.histogram"
239
240
241 @userFunction("gavo_histogram",
242 "(val REAL, lower REAL, upper REAL, nbins INTEGER) -> INTEGER[]",
243 """
244 The aggregate function returns a histogram of val with nbins+2 elements.
245 Assuming 0-based arrays, result[0] contains the number of underflows (i.e.,
246 val<lower), result[nbins+1] the number of overflows. Elements 1..nbins
247 are the counts in nbins bins of width (upper-lower)/nbins. Clients
248 will have to convert back to physical units using some external
249 communication, there currently is no (meta-) data as lower and upper in
250 the TAP response.
251 """,
252 returntype="integer[]",
253 ucd=_get_histogram_ucd)
255 if len(args)!=4:
256 raise common.UfuncError(
257 "gavo_histogram takes exactly four arguments (the column to aggregate,"
258 " a lower and upper limit of values to tabulate, and the number"
259 " of bins desired).")
260 return None
261
262
263 @userFunction("gavo_ipix",
264 "(long REAL, lat REAL) -> BIGINT",
265 """
266 gavo_ipix returns the q3c ipix for a long/lat pair (it simply wraps
267 the 13c_ang2ipix function).
268
269 This is probably only relevant when you play tricks with indices or
270 PPMXL ids.
271 """,
272 returntype="bigint",
273 ucd="pos")
281
282
283 @userFunction("gavo_transform",
284 "(from_sys TEXT, to_sys TEXT, geo GEOMETRY) -> GEOMETRY",
285 """
286 The function transforms ADQL geometries between various reference systems.
287 geo can be a POINT, a CIRCLE, or a POLYGON, and the function will return a
288 geometry of the same type. In the current implementation, from_sys and
289 to_sys must be literal strings (i.e., they cannot be computed through
290 expressions or be taken from database columns).
291
292 All transforms are just simple rotations, which is only a rough
293 approximation to the actual relationships between reference systems
294 (in particular between FK4 and ICRS-based ones). Not that, in particular,
295 the epoch is not changed (i.e., no proper motions are applied).
296
297 We currently support the following reference frames: ICRS, FK5 (which
298 is trested as ICRS), FK4 (for B1950. without epoch-dependent corrections),
299 GALACTIC. Reference frame names are case-sensitive.
300 """,
301 returntype="GEOMETRY")
324
325
326 @userFunction("ivo_string_agg",
327 "(expression TEXT, delimiter TEXT) -> TEXT",
328 """
329 An aggregate function returning all values of
330 expression within a GROUP contcatenated with delimiter
331 """,
332 "text")
338
339
340 @userFunction("ivo_apply_pm",
341 "(ra DOUBLE PRECISION, dec DOUBLE PRECISION, pmra DOUBLE PRECISION, pmde DOUBLE PRECISON, epdist DOUBLE PRECISION) -> POINT",
342 """Returns a POINT (in the UNDEFINED reference frame) for the position
343 an object at ra/dec with proper motion pmra/pmde has after epdist years.
344
345 positions must be in degrees, PMs in should be in julian years (i.e., proper
346 motions are expected in degrees/year). pmra is assumed to contain
347 cos(delta).
348
349 NOTE: This currently is a crappy approximation that does *not* go
350 through the tangential plane. If you use it, let the operators know
351 so we replace it with something real.
352 """,
353 returntype="spoint")
355 if len(args)!=5:
356 raise common.UfuncError(
357 "ivo_apply_pm requires exactly ra, dec, pmra, pmdec, epdist.")
358
359 ra, dec, pmra, pmdec, epdist = args
360 movedPos = nodes.Point(
361 cooSys = 'UNKNOWN',
362 x=nodes.NumericValueExpression(children=[
363 ra, '+',
364 nodes.Term(children=[
365 epdist, '*', nodes.Term(children=[
366 pmra, '/', nodes.NumericValueFunction(
367 funName="COS",
368 args=[nodes.NumericValueFunction(
369 funName="RADIANS",
370 args=[dec])])])])]),
371 y=nodes.NumericValueExpression(children=[
372 dec, '+', nodes.Term(children=[
373 pmdec, '*', epdist])]))
374 raise nodes.ReplaceNode(movedPos)
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400 @userFunction("ivo_healpix_index",
401 "(order INTEGER, ra DOUBLE PRECISION, dec DOUBLE PRECISION) -> BIGINT",
402 """Returns the index of the (nest) healpix with order containing the
403 spherical point (ra, dec).
404
405 An alternative, 2-argument form
406
407 ivo_healpix_index(order INTEGER, p POINT) -> BIGINT
408
409 is also available.
410 """,
411 returntype="bigint")
413 if len(args)==2:
414 return "healpix_nest(%s, %s)"%(
415 nodes.flatten(args[0]), nodes.flatten(args[1]))
416 elif len(args)==3:
417 return "healpix_nest(%s, spoint(RADIANS(%s), RADIANS(%s)))"%(
418 nodes.flatten(args[0]), nodes.flatten(args[1]), nodes.flatten(args[2]))
419 else:
420 raise common.UfuncError("ivo_healpix_index takes either (ra, dec, order)"
421 " or (point, order) arguments")
422
423
424 @userFunction("ivo_healpix_center",
425 "(hpxOrder INTEGER, hpxIndex BIGINT) -> POINT",
426 """returns a POINT corresponding to the center of the healpix with
427 the given index at the given order.
428 """,
429 returntype="spoint")
431 if len(args)!=2:
432 raise common.UfuncError("ivo_healpix_center only takes (index, order)"
433 " arguments")
434 return "center_of_healpix_nest(%s, %s)"%(
435 nodes.flatten(args[0]), nodes.flatten(args[1]))
436
439 """A node processing user defined functions.
440
441 See the userFunction docstring for how ADQL user defined functions
442 are defined.
443 """
444 type = "userDefinedFunction"
445
451
453 if self.args:
454 self.args = list(self.args)
455 self.processedExpression = self._getFunc()(self.args)
456
458 if self.processedExpression is None:
459 return nodes.FunctionNode.flatten(self)
460 else:
461 return self.processedExpression
462
464 ufunc = self._getFunc()
465
466 unit = (ufunc.adqlUDF_unit(self.args) if callable(ufunc.adqlUDF_unit)
467 else ufunc.adqlUDF_unit)
468 ucd = (ufunc.adqlUDF_ucd(self.args) if callable(ufunc.adqlUDF_ucd)
469 else ufunc.adqlUDF_ucd)
470
471 self.fieldInfo = fieldinfo.FieldInfo(
472 ufunc.adqlUDF_returntype,
473 unit,
474 ucd)
475
476
477 tree.registerNode(UserFunction)
478