1 """
2 Code to support PQL syntax (as found in various DAL protocols).
3
4 PQL range-list syntax is
5
6 valSep ::= ","
7 rangeSep ::= "/"
8 qualSep ::= ";"
9 step ::= somethingMagicallyDefined
10 range ::= [literal] rangeSep literal | literal rangeSep
11 steppedRange ::= range [rangeSep step]
12 qualification ::= qualSep somethingMagicallyDefined
13 listItem ::= steppedRange | literal
14 rangeList ::= listItem {valSep listItem} [qualification]
15
16 This defines a regular language, and we're going to slaughter it using
17 REs and ad hoccing.
18
19 Since the actually allowed grammar depends on the type of the parameter
20 (e.g., steps make no sense for strings, and have a special grammar for
21 dates), parsing is done by the specific PQLPar types (fromLiteral). See
22 the PQLPar docstring for further info.
23 """
24
25
26
27
28
29
30
31 import datetime
32 import re
33 import urllib
34
35 from gavo import base
36 from gavo import stc
37 from gavo import utils
38 from gavo.base import literals
39 from gavo.base import sqlmunge
40 from gavo.base import typesystems
41 from gavo.utils import DEG, pgsphere
42
43
44 QUALIFIER_RE = re.compile("([^;]*)(;[^;]*)?$")
45 LIST_RE = re.compile("([^,]*),")
46 RANGE_RE = re.compile("([^/]*)(/[^/]*)?(/[^/]*)?$")
50 raise ValueError("Step/stride specification not allowed here.")
51
54 if not val or not val[valInd:]:
55 return None
56 else:
57 return vp(urllib.unquote(val[valInd:]))
58
61 """a representation of a PQL range.
62
63 PQLRanges have a value attribute that is non-None when there is
64 only a single value.
65
66 For ranges, there is start, stop and step, all of which may be
67 None.
68
69 The attributes contain whatever the parent's valParser (or stepParser)
70 functions return.
71 """
72 - def __init__(self, value=None, start=None, stop=None, step=None):
73 self.start, self.stop, self.step = start, stop, step
74 self.value = value
75 if (self.step is not None
76 and (self.start is None or self.stop is None)):
77 raise ValueError("Open intervals cannot have steps")
78 if (self.value is None
79 and (self.start is None and self.stop is None)):
80 raise ValueError("Doubly open intervals are not allowed")
81
88
90 return "%s(%s, %s, %s, %s)"%(self.__class__.__name__,
91 repr(self.value),
92 repr(self.start),
93 repr(self.stop),
94 repr(self.step))
95
97 if self.value is not None:
98 return urllib.quote(str(self.value))
99 else:
100 def e(v):
101 if v is None:
102 return ""
103 else:
104 return urllib.quote(str(v))
105 return "/".join(e(v) for v in (self.start, self.stop, self.step))
106
107 @classmethod
108 - def fromLiteral(cls, literal, destName, valParser, stepParser):
109 """creates a PQLRange from a PQL range literal.
110
111 For the meaning of the arguments, see PQLPar.fromLiteral.
112 """
113 if literal=="":
114 return cls(value="")
115 mat = RANGE_RE.match(literal)
116 if not mat:
117 raise base.LiteralParseError(destName, literal,
118 hint="PQL ranges roughly have the form [start][/stop[/stop]]."
119 " Literal slashes need to be escaped (as %2f).")
120 vals = mat.groups()
121
122 try:
123 if vals[1] is None and vals[2] is None:
124 return cls(value=_parsePQLValue(vals[0], vp=valParser))
125 else:
126 start, stop, step = vals
127 return cls(
128 start=_parsePQLValue(start, vp=valParser),
129 stop=_parsePQLValue(stop, 1, vp=valParser),
130 step=_parsePQLValue(step, 1, vp=stepParser))
131 except ValueError as ex:
132 raise base.LiteralParseError("range within %s"%destName, literal,
133 hint=str(ex))
134
136 """returns a set containing all values matching the PQL condition if
137 they form a discrete set or raises a ValueError if not.
138 """
139 if self.value is not None:
140 return set([self.value])
141 elif (self.step is not None \
142 and self.start is not None
143 and self.stop is not None):
144 if (self.stop-self.start)/abs(self.step)+1e-10>2000:
145 raise ValueError("Too many steps; will not check discretely")
146 res, val = set(), self.start
147 while val<=self.stop:
148 res.add(val)
149 val = val+self.step
150 return res
151 raise ValueError("No set representation for non-stepped or open ranges.")
152
153 - def getSQL(self, colName, sqlPars, cmpExpr=None):
154 """returns an SQL boolean expression for representing this constraint.
155
156 cmpExpr, if given, will be an expression that is compared
157 against. It defaults to colName, but this is, of course, intended
158 to allow stuff like LOWER(colName).
159 """
160 if cmpExpr is None:
161 cmpExpr = colName
162
163
164 if self.value is not None:
165 return "%s = %%(%s)s"%(cmpExpr,
166 base.getSQLKey(colName, self.value, sqlPars))
167
168
169 try:
170 return "%s IN %%(%s)s"%(cmpExpr, base.getSQLKey(colName,
171 self.getValuesAsSet(), sqlPars))
172 except ValueError:
173 pass
174
175
176 if self.start is None and self.stop is not None:
177 return "%s <= %%(%s)s"%(cmpExpr,
178 base.getSQLKey(colName, self.stop, sqlPars))
179 elif self.start is not None and self.stop is None:
180 return "%s >= %%(%s)s"%(cmpExpr,
181 base.getSQLKey(colName, self.start, sqlPars))
182 else:
183 assert self.start is not None and self.stop is not None
184 return "%s BETWEEN %%(%s)s AND %%(%s)s "%(cmpExpr,
185 base.getSQLKey(colName, self.start, sqlPars),
186 base.getSQLKey(colName, self.stop, sqlPars))
187
189 """returns an SQL boolean expression for representing this constraint
190 against an upper, lower interval in the DB table.
191
192 This will silently discard any step specification.
193 """
194
195 if self.value is not None:
196 return "%%(%s)s BETWEEN %s AND %s"%(
197 base.getSQLKey("val", self.value, sqlPars),
198 lowerColName, upperColName)
199 else:
200 constraints = []
201 if self.stop is not None:
202 constraints.append("%%(%s)s>%s"%(
203 base.getSQLKey("val", self.stop, sqlPars),
204 lowerColName))
205 if self.start is not None:
206 constraints.append("%%(%s)s<%s"%(
207 base.getSQLKey("val", self.start, sqlPars),
208 upperColName))
209 return "(%s)"%" AND ".join(constraints)
210
212 """returns True if value is covered by this interval.
213
214 value must be type-true, i.e. in whatever type value, start, and stop
215 have.
216 """
217
218 if self.value is not None:
219 return value==self.value
220
221
222 try:
223 return value in self.getValuesAsSet()
224 except ValueError:
225 pass
226
227
228 covers = True
229 if self.start is not None:
230 covers &= self.start<=value
231 if self.stop is not None:
232 covers &= self.stop>=value
233 return covers
234
237 """a stand-in for PQLRange when no ranges are to be supported
238
239 It seems this is intended for string-typed values. We try to
240 be compatible with PQLRange in the relevant API aspects.
241 """
245
252
255
256 @classmethod
257 - def fromLiteral(cls, literal, destName, valParser, stepParser):
258 """creates a PQLRange from a PQL range literal.
259
260 For the meaning of the arguments, see PQLPar.fromLiteral.
261 """
262 if literal=="":
263 return cls(value="")
264 return cls(value=_parsePQLValue(literal, vp=valParser))
265
268 """a representation for PQL expressions.
269
270 PQLPar objects have an attribute qualifier (None or a string),
271 and an attribute ranges, a list of PQLRange objects.
272
273 As a client, you will ususally construct PQLPar objects using the
274 fromLiteral class method; it takes a PQL literal and a name to be
275 used for LiteralParseErrors it may raise.
276
277 The plain PQLPar parses string ranges and does not allow steps.
278
279 Inheriting classes must override the valParser and stepParser attributes.
280 Both take a string and have to return a typed value or raise a
281 ValueError if the string does not contain a proper literal.
282 The default for valParser is str, the default for stepParser
283 a function that always raises a ValueError.
284
285 PQLPars usually support a covers(value) method that you can
286 pass a value having the required type; it will return whether or
287 not value would be picked up by the condition formulated in PQL.
288 Some advanced PQLPars do not support this method and will
289 raise a ValueError if called.
290
291 Since "PQL" is totally crazy, not even the range parser is constant.
292 It seems string ranges were never meant to be supported, and therefore
293 we support RangeClass. PQLRange allows the "/" syntax and is supposed
294 to work for most things but strings. PQLNoRange is just always a simple
295 value.
296
297 Note: valParser and stepParser must not be *methods* of the
298 class but plain functions; since they are function-like class attributes,
299 you will usually have to wrap them in staticmethods
300 """
301 nullvalue = None
302 valParser = str
303 stepParser = staticmethod(_raiseNoSteps)
304 rangeClass = PQLRange
305
306 - def __init__(self, ranges, qualifier=None, destName=None):
307 self.qualifier = qualifier
308 self.ranges = ranges
309 self.destName = destName
310
312 return (isinstance(other, PQLPar)
313 and self.qualifier==other.qualifier
314 and self.ranges==other.ranges)
315
317 res = ",".join(str(r) for r in self.ranges)
318 if self.qualifier:
319 res = res+";"+urllib.quote(self.qualifier)
320 return res
321
323 return "%s(%s)"%(self.__class__.__name__,
324 repr(str(self)))
325
326 @staticmethod
328
329
330 if val is None:
331 return None
332
333 if val==cls.nullvalue:
334 return None
335
336 mat = QUALIFIER_RE.match(val)
337 if not mat:
338 raise base.LiteralParseError(destName, val, hint="Not more than one"
339 " semicolon is allowed in PQL expressions")
340 qualifier = _parsePQLValue(mat.group(2), 1)
341
342 ranges = []
343 listLiteral = mat.group(1)
344
345 rangeMat = re.match("", listLiteral)
346 for rangeMat in LIST_RE.finditer(listLiteral):
347 try:
348 ranges.append(
349 cls.rangeClass.fromLiteral(rangeMat.group(1), destName,
350 cls.valParser, cls.stepParser))
351 except base.LiteralParseError as ex:
352 ex.pos = rangeMat.start()
353 raise
354 ranges.append(
355 cls.rangeClass.fromLiteral(listLiteral[rangeMat.end():], destName,
356 cls.valParser, cls.stepParser))
357 return cls(ranges, qualifier, destName)
358
359 @classmethod
361 """returns a parsed representation of a literal in PQL range-list syntax.
362
363 val is a string containing the PQL expression, destName is a name to
364 be used for the LiteralParseErrors the function raises when there are
365 syntax errors in val.
366 """
367 return cls._parsePQLString(cls, val, destName)
368
370 """returns a set of all values mentioned within the PQL expression.
371
372 This raises a ValueError if this is not possible (e.g., due to
373 non-stepped intervals).
374 """
375 res = set()
376 for r in self.ranges:
377 res.update(r.getValuesAsSet())
378 return res
379
380 - def getSQL(self, colName, sqlPars, cmpExpr=None):
381 """returns an SQL condition expressing this PQL constraint for colName.
382
383 The parameters necessary are added to sqlPars.
384
385 cmpExpr can be used to override the cmpExpr argument to PQLRange.getSQL;
386 this is not really intended for user code, though, but rather for
387 subclasses of PQLPar
388 """
389 if cmpExpr is None:
390 cmpExpr = colName
391
392 if len(self.ranges)==1:
393 return self.ranges[0].getSQL(colName, sqlPars, cmpExpr=cmpExpr)
394
395 try:
396 return "%s IN %%(%s)s"%(cmpExpr, base.getSQLKey(colName,
397 self.getValuesAsSet(), sqlPars))
398 except ValueError:
399 return "(%s)"%" OR ".join(
400 r.getSQL(colName, sqlPars, cmpExpr=cmpExpr) for r in self.ranges)
401
403 """returns true if value is within the ranges specified by the PQL
404 expression.
405
406 value must be type-true, i.e., you are responsible for converting it
407 into the type the range are in.
408 """
409 for r in self.ranges:
410 if r.covers(value):
411 return True
412 return False
413
423
426 """a PQL parameter containing a date.
427
428 steps in ranges are allowed.
429
430 There's an additional complication here: in the database, dates can be
431 represented in various forms. To save the day, getSQL takes an
432 additional optional parameter and transfroms the input values as
433 appropriate before passing them to the database.
434 """
435 nullvalue = ""
436 valParser = staticmethod(literals.parseDefaultDatetime)
437
438 @staticmethod
440 return datetime.timedelta(days=float(val))
441
442 - def getSQL(self, colName, sqlPars, convert=None):
443 """returns an SQL condition expressing the PQL constraint for colName.
444
445 In addition to the usual parameters, we here accept an additonal
446 argument convert with possible values None (meaning timestamp,
447 which is the default) mjd, jd, and jy, which represents how the
448 datetimes are represented in the database.
449 """
450 converter = {
451 None: utils.identity,
452 "mjd": stc.dateTimeToMJD,
453 "jd": stc.dateTimeToJdn,
454 "jy": stc.dateTimeToJYear,}[convert]
455
456 oldKeys = set(sqlPars.keys())
457 res = PQLPar.getSQL(self, colName, sqlPars)
458
459
460 if converter:
461 for key in sqlPars:
462 if key not in oldKeys:
463 if sqlPars[key] is not None:
464 sqlPars[key] = converter(sqlPars[key])
465 return res
466
469 """a PQL position parameter, as for SSA.
470
471 Cones and intervals or real lists do not mix; we support STC-S
472 identifiers as qualifiers.
473
474 The literals here are basically two-float lists.
475 """
476 valParser = float
477 nullvalue = ""
478
479 @classmethod
481
482
483 if val is not None:
484 val = val.upper().replace("%2C", ",")
485 return cls._parsePQLString(cls, val, destName)
486
487 - def getSQL(self, colName, sqlPars):
489
490 - def getConeSQL(self, colName, sqlPars, coneSize):
491 if self.qualifier and self.qualifier!='ICRS':
492
493 raise base.ValidationError("Cannot match against coordinates"
494 " given in %s frame"%self.qualifier, self.destName)
495
496 sizeName = base.getSQLKey("size", coneSize*DEG, sqlPars)
497 parts = []
498 if len(self.ranges)%2:
499 raise base.ValidationError("PQL position values must be lists of"
500 " length divisible by 2.", self.destName)
501 lastCoo = None
502 for r in self.ranges:
503 if r.value is None:
504 raise base.ValidationError("Ranges are not allowed as cone centers",
505 self.destName)
506 if lastCoo is None:
507 lastCoo = r.value
508 else:
509 parts.append("%s <@ scircle(%%(%s)s, %%(%s)s)"%(colName,
510 base.getSQLKey("pos", pgsphere.SPoint.fromDegrees(lastCoo, r.value),
511 sqlPars), sizeName))
512 lastCoo = None
513 return "(%s)"%" OR ".join(parts)
514
516 raise ValueError("%s do not support PQL covers yet. Complain."
517 " This is fairly easy to fix."%self.__class__.__name__)
518
521 """a PQL float parameter.
522
523 This has a special getSQLForInterval method for cases like SSA's
524 BAND.
525 """
526 valParser = float
527 nullvalue = ""
528
530 """returns an SQL phrase against an interval in a table.
531 """
532 if len(self.ranges)==1:
533 return self.ranges[0].getSQLForInterval(
534 lowerColName, upperColName, sqlPars)
535 else:
536 return "(%s)"%" OR ".join(
537 r.getSQLForInterval(lowerColName, upperColName, sqlPars)
538 for r in self.ranges)
539
542 """a PQL string parameter that's compared with case folding.
543
544 Don't count on case folding to work outside of ASCII.
545 """
546 valParser = staticmethod(lambda val: val and val.lower())
547
548 - def getSQL(self, colName, sqlPars, cmpExpr=None):
549 """Overridden to change cmpExpr.
550 """
551 return PQLPar.getSQL(self, colName, sqlPars, "LOWER(%s)"%colName)
552
557
560 """a PQL shell pattern parameter.
561
562 These are posix shell patterns, where no PQL metacharacters are evaluated
563 at all.
564 """
565 _reOperator = "~"
566
567 @classmethod
569 if val is None:
570 return None
571 val = getREForShPat(val)
572 return cls([cls.rangeClass(val)])
573
574 - def getSQL(self, colName, sqlPars):
575 """returns an RE-based query equivalent to the input shell pattern.
576 """
577 return "ssa_targname %s %%(%s)s"%(self._reOperator,
578 base.getSQLKey(colName, self.ranges[0].value, sqlPars))
579
581 raise ValueError("%s do not support PQL covers yet. Complain."
582 " This is easy to fix."%self.__class__.__name__)
583
586 """a shell-pattern matching parameter, ignoring case.
587 """
588 _reOperator = "~*"
589
592 """a PQL normal string parameter.
593
594 "normal" means that range expressions are not supported.
595 """
596 rangeClass = PQLNoRange
597
598
599 -class PQLTextParIR(PQLPar):
600 """a PQL string parameter matching "google-like", "Information Retrieval".
601
602 Basically, this matches the input and the database column as document
603 vectors. Correspondingly, ranges are disallowed.
604 """
605 nullvalue = ""
606 rangeClass = PQLNoRange
607
608 - def getSQL(self, colName, sqlPars):
609 docs = self.getValuesAsSet()
610 keys = []
611 for doc in docs:
612 keys.append(base.getSQLKey(colName, doc, sqlPars))
613
614 return "(%s)"%" OR ".join(
615 "to_tsvector('english', %s) @@ plainto_tsquery('english', %%(%s)s)"%(
616 colName,
617 keyName)
618 for keyName in keys)
619
620 - def covers(self, value):
621 raise ValueError("%s do not support PQL covers."%self.__class__.__name__)
622
626 """a pyparsing handler for transforming shell character enumerations to
627 pcre character enumerations.
628
629 (this is a helper for _getShPatGrammar)
630 """
631 seq = "".join(t)
632
633
634 negate = seq.startswith("!")
635 if negate:
636 seq = seq[1:]
637 seq = seq.replace("]", "\\]"
638 ).replace("\\", "\\\\"
639 ).replace("-", "\\-")
640
641 if negate:
642 return "[^%s]"%seq
643 else:
644 return "[%s]"%seq
645
649 """returns a grammar to translate posix shell patterns to posix regular
650 expressions.
651
652 This is different from fnmatch.translate in that it handles escaping
653 correctly.
654 """
655 from pyparsing import (
656 Literal, Regex, CharsNotIn, ZeroOrMore, QuotedString)
657
658 with utils.pyparsingWhitechars(""):
659 enumChars = QuotedString(quoteChar="[", endQuoteChar="]", escChar="\\"
660 ).addParseAction(_mungeEnumSequence)
661 noEnum = Literal("[").addParseAction(lambda s, p, t: "\\[")
662 star = Literal("*").addParseAction(lambda s, p, t: ".*")
663 questionmark = Literal("?").addParseAction(lambda s, p, t: ".")
664 escSeq = Regex(r"\\(.)").addParseAction(lambda s, p, t: re.escape(t[0][1]))
665 normalStuff = CharsNotIn(r"*?[\\").addParseAction(lambda s, p, t:
666 re.escape("".join(t)))
667 shPat = ZeroOrMore(escSeq | enumChars | noEnum
668 | star | questionmark | normalStuff)
669 return shPat
670
673 r"""returns a POSIX RE for a POSIX shell pattern.
674
675 >>> getREForShPat(r"ZU?\*[!A-Z]*")
676 'ZU.\\*[^A\\-Z].*'
677 >>> getREForShPat("no[*")
678 'no\\[.*'
679 """
680 return "".join(utils.pyparseString(_getShPatGrammar(), shPat, parseAll=True))
681
693 return factory
694
695
696 sqlmunge.registerSQLFactory("pql-int", _makeFactory(PQLIntPar))
697 sqlmunge.registerSQLFactory("pql-float", _makeFactory(PQLFloatPar))
698 sqlmunge.registerSQLFactory("pql-string", _makeFactory(PQLPar))
699 sqlmunge.registerSQLFactory("pql-date", _makeFactory(PQLDatePar))
703 typeSystem = "pqlexpr"
704 simpleMap = {
705 "smallint": "pql-int",
706 "integer": "pql-int",
707 "int": "pql-int",
708 "bigint": "pql-int",
709 "real": "pql-float",
710 "float": "pql-float",
711 "double precision": "pql-float",
712 "double": "pql-float",
713 "text": "pql-string",
714 "char": "pql-string",
715 "date": "pql-date",
716 "timestamp": "pql-date",
717 "pql-date": "pql-date",
718 "pql-float": "pql-float",
719 "pql-string": "pql-string",
720 }
721
723 if sqlType=="char":
724 return "pql-string"
725 if sqlType=="varchar":
726 return "pql-string"
727
728
729 getPQLTypeFor = ToPQLTypeConverter().convert
745
746
747
748 import sys
749 from gavo import rscdef
750 rscdef.addProcDefObject("pql", sys.modules[__name__])
754 import pql, doctest
755 doctest.testmod(pql)
756
757 if __name__=="__main__":
758 _test()
759