1 """
2 Classes and methods to support vizier-type specifications on fields.
3 """
4
5
6
7
8
9
10
11 from __future__ import print_function
12
13 import datetime
14 import re
15
16 from pyparsing import (Word, Literal, Optional, Forward,
17 ZeroOrMore, Suppress, ParseException, StringEnd, Regex,
18 OneOrMore, CharsNotIn)
19
20 from gavo import base
21 from gavo import stc
22 from gavo import utils
23 from gavo.base import literals
24 from gavo.base import sqlmunge
25 from gavo.base import typesystems
26
27
29 """is a parse node, consisting of an operator and children.
30
31 The parse trees returned by the various parse functions are built from
32 these.
33
34 This is an abstract class; concrete derivations need to define
35 a set _standardOperators containing the normal binary operators
36 for their types and a dictionary _sqlEmitters containing functions
37 returning SQL fragments, or override asSQL.
38 """
40 self.children = children
41 self.operator = operator
42
44 return "(%s %s)"%(self.operator, " ".join([str(c) for c in self.children]))
45
47 return "(%r %r)"%(self.operator, " ".join([str(c) for c in self.children]))
48
50 """wraps base.getSQLKey and can be overridden in case operand
51 mangling is necessary.
52
53 Do not call getSQLKey directly from within ParseNodes.
54 """
55 return base.getSQLKey(name, item, sqlPars)
56
58 """inserts children[index] into sqlPars with a unique key and returns
59 the key.
60
61 children[index] must be atomic (i.e., no ParseNode).
62 """
63 item = self.children[index]
64 if item is None:
65 return None
66 assert not isinstance(item, ParseNode)
67 if field.scaling:
68 item *= field.scaling
69 return self._getSQLKey(field.name, item, sqlPars)
70
71 - def asSQL(self, field, sqlPars):
72 if self.operator in self._standardOperators:
73 return "%s %s %%(%s)s"%(field.name, self.operator,
74 self._insertChild(0, field, sqlPars))
75 else:
76 return self._sqlEmitters[self.operator](self, field, sqlPars)
77
78
80 """A node containing numeric operands (floats or dates).
81 """
85
87 operand = self.children[0].asSQL(field, sqlPars)
88 if operand:
89 return "%s (%s)"%(self.operator, operand)
90
92 return "%s IN (%s)"%(field.name, ", ".join([
93 "%%(%s)s"%self._insertChild(i, field, sqlPars)
94 for i in range(len(self.children))]))
95
96 _standardOperators = set(["=", ">=", ">", "<=", "<"])
97 _sqlEmitters = {
98 '..': lambda self, field, sqlPars: "%s BETWEEN %%(%s)s AND %%(%s)s"%(
99 field.name, self._insertChild(0, field, sqlPars),
100 self._insertChild(1, field, sqlPars)),
101 'AND': _emitBinop,
102 'OR': _emitBinop,
103 'NOT': _emitUnop,
104 ',': _emitEnum,
105 }
106
107
109 """A node containing date operands (datetime objects, as a rule).
110
111 As an extension to VizieR, we interpret floats as well, where
112 1000 .. 3000 is a julian year, 10000 ... 100000 is an MJD and
113 2000000 .. 4000000 is a JD.
114 """
116
117
118
119 if arg.hour==arg.minute==arg.second==0:
120 return arg.replace(hour=23,
121 minute=59, second=59)
122 return arg
123
127
129 operand = self.children[0].asSQL(field, sqlPars)
130 if operand:
131 return "%s (%s)"%(self.operator, operand)
132
134 return "%s BETWEEN %%(%s)s AND %%(%s)s"%(
135 field.name,
136 self._insertChild(0, field, sqlPars),
137 self._insertChild(1, field, sqlPars))
138
143
145
146 return "%s %s %%(%s)s"%(field.name, self.operator,
147 self._insertChild(0, field, sqlPars))
148
150 if self.children[0].hour==self.children[0].minute==0:
151 return "%s BETWEEN %%(%s)s AND %%(%s)s"%(
152 field.name,
153 self._getSQLKey(field.name, self.children[0], sqlPars),
154 self._getSQLKey(field.name,
155 self._expandDate(self.children[0]), sqlPars))
156
157 else:
158 return self._emitSimple(field, sqlPars)
159
161
162 self.children[0] = self._expandDate(self.children[0])
163 return "%s %s %%(%s)s"%(field.name, self.operator,
164 self._insertChild(0, field, sqlPars))
165
166 - def asSQL(self, field, sqlPars):
167 return self._sqlEmitters[self.operator](self, field, sqlPars)
168
169 _sqlEmitters = {
170 '..': _emitRange,
171 'AND': _emitBinop,
172 'OR': _emitBinop,
173 'NOT': _emitUnop,
174 ',': _emitEnum,
175 "=": _emitEqual,
176 ">=": _emitSimple,
177 "<=": _emitExpanded,
178 "<": _emitSimple,
179 ">": _emitExpanded,
180 "<=": _emitExpanded,
181 }
182
183
190
191
193 """A node containing string operands.
194 """
195 - def asSQL(self, field, sqlPars):
196 if self.operator=="[":
197 return "[%s]"%self.children[0]
198 if self.operator in self._nullOperators:
199 return self._nullOperators[self.operator]
200 else:
201 return super(StringNode, self).asSQL(field, sqlPars)
202
203 _metaEscapes = {
204 "|": r"\|",
205 "*": r"\*",
206 "+": r"\+",
207 "(": r"\(",
208 ")": r"\)",
209 "[": r"\[",
210 "%": r"\%",
211 "_": r"\_",
212 "\\\\": "\\\\",
213 }
214 _escapeRE = re.compile("[%s]"%"".join(_metaEscapes.keys()))
215
216
217 _metaEscapes.update({"\\": "\\\\"})
218
220 """returns aString with SQL RE metacharacters escaped.
221 """
222 return self._escapeRE.sub(lambda mat: self._metaEscapes[mat.group()],
223 aString)
224
226 parts = []
227 for child in self.children:
228 if isinstance(child, basestring):
229 parts.append(self._escapeSpecials(child))
230 else:
231 parts.append(child.asSQL(field, sqlPars))
232 return "^%s$"%("".join(parts))
233
234 _patOps = {
235 "~": "~*",
236 "=": "~",
237 "!~": "!~*",
238 "!": "!~",
239 "=~": "~*",
240 }
242 pattern = self._makePattern(field, sqlPars)
243 return "%s %s %%(%s)s"%(field.name, self._patOps[self.operator],
244 self._getSQLKey(field.name, pattern, sqlPars))
245
247 query = "%s IN (%s)"%(field.name, ", ".join([
248 "%%(%s)s"%self._insertChild(i, field, sqlPars)
249 for i in range(len(self.children))]))
250 if self.operator=="!=,":
251 query = "NOT (%s)"%query
252 return query
253
254 _translatedOps = {
255 "==": "=",
256 }
258 return "%s = %%(%s)s"%(field.name,
259 self._insertChild(0, field, sqlPars))
260
261 _nullOperators = {"*": ".*", "?": "."}
262 _standardOperators = set(["<", ">", "<=", ">=", "!="])
263 _sqlEmitters = {
264 "~": _emitPatOp,
265 "=": _emitPatOp,
266 "!~": _emitPatOp,
267 "!": _emitPatOp,
268 "=~": _emitPatOp,
269 "=,": _emitEnum,
270 "=|": _emitEnum,
271 "!=,": _emitEnum,
272 "==": _emitTranslatedOp,
273 }
274
275
277 def _(s, loc, toks):
278 return nodeClass(toks, op)
279 return _
280
281
283 def _makeNotNode(s, loc, toks):
284 if len(toks)==1:
285 return toks[0]
286 elif len(toks)==2:
287 return nodeClass(toks[1:], "NOT")
288 else:
289 raise Exception("Busted by not")
290 return _makeNotNode
291
292
294 return NumericNode([toks[0]-toks[1], toks[0]+toks[1]], "..")
295
296
298 """returns a +/- node for dates, i.e., toks[1] is a float in days.
299 """
300 days = datetime.timedelta(days=toks[1])
301 return DateNode([toks[0]-days, toks[0]+days], "..")
302
304 """returns a +/- node for MJDs, i.e., toks[1] is a float in days, and
305 an MJDNode must be returned.
306 """
307 days = datetime.timedelta(days=toks[1])
308 return MJDNode([toks[0]-days, toks[0]+days], "..")
309
310
311
313 def _(s, loc, toks):
314 if len(toks)==1:
315 return toks[0]
316 else:
317 return nodeClass(toks, op)
318 return _
319
320
322 def _makeSimpleExprNode(s, loc, toks):
323 if len(toks)==1:
324 return nodeClass(toks[0:], "=")
325 else:
326 return nodeClass(toks[1:], toks[0])
327 return _makeSimpleExprNode
328
329
332 """returns the root element of a grammar parsing numeric vizier-like
333 expressions.
334
335 This is used for both dates and floats, use baseLiteral to match the
336 operand terminal. The trouble with dates is that the +/- operator
337 has a simple float as the second operand, and that's why you can
338 pass in an errorLiteral and and pmBuilder.
339 """
340 if errorLiteral is None:
341 errorLiteral = baseLiteral
342
343 with utils.pyparsingWhitechars(" \t"):
344 preOp = Literal("=") | Literal(">=") | Literal(">"
345 ) | Literal("<=") | Literal("<")
346 rangeOp = Literal("..")
347 pmOp = Literal("+/-") | Literal("\xb1".decode("iso-8859-1"))
348 orOp = Literal("|")
349 andOp = Literal("&")
350 notOp = Literal("!")
351 commaOp = Literal(",")
352
353 preopExpr = Optional(preOp) + baseLiteral
354 rangeExpr = baseLiteral + Suppress(rangeOp) + baseLiteral
355 valList = baseLiteral + OneOrMore( Suppress(commaOp) + baseLiteral)
356 pmExpr = baseLiteral + Suppress(pmOp) + errorLiteral
357 simpleExpr = rangeExpr | pmExpr | valList | preopExpr
358
359 expr = Forward()
360
361 notExpr = Optional(notOp) + simpleExpr
362 andExpr = notExpr + ZeroOrMore( Suppress(andOp) + notExpr )
363 orExpr = andExpr + ZeroOrMore( Suppress(orOp) + expr)
364 expr << orExpr
365 exprInString = expr + StringEnd()
366
367 rangeExpr.setName("rangeEx")
368 rangeOp.setName("rangeOp")
369 notExpr.setName("notEx")
370 andExpr.setName("andEx")
371 andOp.setName("&")
372 orExpr.setName("orEx")
373 expr.setName("expr")
374 simpleExpr.setName("simpleEx")
375
376 preopExpr.addParseAction(_simpleExprFactory(nodeClass))
377 rangeExpr.addParseAction(_getNodeFactory("..", nodeClass))
378 pmExpr.addParseAction(pmBuilder)
379 valList.addParseAction(_getNodeFactory(",", nodeClass))
380 notExpr.addParseAction(_makeNotNodeFactory(nodeClass))
381 andExpr.addParseAction(_getBinopFactory("AND", nodeClass))
382 orExpr.addParseAction(_getBinopFactory("OR", nodeClass))
383
384 return exprInString
385
386
388
389
390 try:
391 return int(tok[0])
392 except ValueError:
393 return float(tok[0])
394
395
397 """returns a datetime from a date/time spec.
398
399 This can be an ISO string or a julian year, JD, or MJD by
400 heuristics (see DateNode for details).
401 """
402 assert len(tok)==1
403 try:
404 floatVal = float(tok[0])
405 if 1000<=floatVal<=3000:
406 return utils.roundToSeconds(stc.jYearToDateTime(floatVal))
407 elif 10000<=floatVal<=100000:
408 return utils.roundToSeconds(stc.mjdToDateTime(floatVal))
409 elif 2e6<=floatVal<=4e6:
410 return utils.roundToSeconds(stc.jdnToDateTime(floatVal))
411 else:
412 raise base.ParseException("Floats as dates must be "
413 " credible years (1000..2000), MJDs (1e4 .. 1e5), or"
414 " JDs (2e6 .. 4e6)")
415 except ValueError:
416 return literals.parseDefaultDatetime(tok[0])
417
418
419 floatLiteral = Regex(utils.floatRE).addParseAction(parseFloat)
420
421 _DATE_REGEX = r"\d\d\d\d-\d\d-\d\d(T\d\d:\d\d:\d\d)?|(\d+(\.\d*)?)"
422 _DATE_LITERAL_DT = Regex(_DATE_REGEX).addParseAction(parseDateTime)
423
424
427 """returns a parse tree for vizier-like expressions over floats.
428 """
429 return utils.pyparseString(baseSymbol, str)[0]
430
431
434 """returns a parse tree for vizier-like expressions over ISO dates.
435
436 Note that the semantic validity of the date (like, month<13) is not
437 checked by the grammar.
438 """
439 return utils.pyparseString(baseSymbol, str)[0]
440
441
444 """returns a parse tree for vizier-like expression of ISO dates with
445 parsed values in MJD.
446 """
447 return utils.pyparseString(baseSymbol, str)[0]
448
449
452
453
455 """returns a grammar for parsing vizier-like string expressions.
456 """
457
458 with utils.pyparsingWhitechars(" \t"):
459 simpleOperator = Literal("==") | Literal("!=") | Literal(">=") |\
460 Literal(">") | Literal("<=") | Literal("<") | Literal("=~") |\
461 Literal("=,")
462 simpleOperand = Regex(r"[^\s].*|")
463
464 White = Word(" \t")
465 simpleExpr = simpleOperator + Optional( White ) + simpleOperand
466
467 commaOperand = Regex("[^,]+")
468 barOperand = Regex("[^|]+")
469 commaEnum = Literal("=,") + commaOperand + ZeroOrMore(
470 Suppress(",") + commaOperand)
471 exclusionEnum = Literal("!=,") + commaOperand + ZeroOrMore(
472 Suppress(",") + commaOperand)
473 barEnum = Literal("=|") + barOperand + ZeroOrMore(
474 Suppress("|") + barOperand)
475 enumExpr = exclusionEnum | commaEnum | barEnum
476
477 patLiterals = CharsNotIn("[*?")
478 wildStar = Literal("*")
479 wildQmark = Literal("?")
480 setElems = CharsNotIn("]")
481 setSpec = Suppress("[") + setElems + Suppress("]")
482 pattern = OneOrMore(setSpec | wildStar | wildQmark | patLiterals)
483
484 patternOperator = Literal("~") | Literal("=") | Literal("!~") |\
485 Literal("!")
486 patternExpr = patternOperator + Optional( White ) + pattern
487 nakedExpr = Regex("[^=!~|><]") + Optional( simpleOperand )
488
489 stringExpr = enumExpr | simpleExpr | patternExpr | nakedExpr
490
491 doc = stringExpr + StringEnd()
492
493 stringExpr.setName("StringExpr")
494 enumExpr.setName("EnumExpr")
495 simpleOperand.setName("Operand")
496 simpleOperator.setName("Operator")
497 nakedExpr.setName("SingleOperand")
498
499 debug = False
500 stringExpr.setDebug(debug)
501 enumExpr.setDebug(debug)
502 patLiterals.setDebug(debug)
503 simpleOperand.setDebug(debug)
504 simpleOperator.setDebug(debug)
505 nakedExpr.setDebug(debug)
506
507 simpleExpr.addParseAction(_makeOpNode)
508 patternExpr.addParseAction(_makeOpNode)
509 enumExpr.addParseAction(_makeOpNode)
510 makeDefaultExpr = _getNodeFactory("==", StringNode)
511 nakedExpr.addParseAction(lambda s,p,toks: makeDefaultExpr(s,p,
512 ["".join(toks)]))
513 wildStar.addParseAction(_makeOpNode)
514 wildQmark.addParseAction(_makeOpNode)
515 setElems.addParseAction(_getNodeFactory("[", StringNode))
516
517 return doc
518
519
522
523
525 def factory(field, val, sqlPars):
526 try:
527 return parser(val).asSQL(field, sqlPars)
528 except ParseException:
529 raise base.ui.logOldExc(utils.ValidationError(
530 "Invalid input for type %s (see help for valid type literals)"%
531 field.type, field.name))
532 return factory
533
534
535 sqlmunge.registerSQLFactory("vexpr-float",
536 _makeFactory(parseNumericExpr))
537 sqlmunge.registerSQLFactory("vexpr-date",
538 _makeFactory(parseDateExpr))
539 sqlmunge.registerSQLFactory("vexpr-mjd",
540 _makeFactory(parseDateExprToMJD))
541 sqlmunge.registerSQLFactory("vexpr-string",
542 _makeFactory(parseStringExpr))
543
544
546 typeSystem = "vizierexpr"
547 simpleMap = {
548 "smallint": "vexpr-float",
549 "integer": "vexpr-float",
550 "int": "vexpr-float",
551 "bigint": "vexpr-float",
552 "real": "vexpr-float",
553 "float": "vexpr-float",
554 "double precision": "vexpr-float",
555 "double": "vexpr-float",
556 "text": "vexpr-string",
557 "unicode": "vexpr-string",
558 "char": "vexpr-string",
559 "date": "vexpr-date",
560 "timestamp": "vexpr-date",
561 "vexpr-date": "vexpr-date",
562 "vexpr-float": "vexpr-float",
563 "vexpr-string": "vexpr-string",
564 }
565
567 if sqlType=="char":
568 return "vexpr-string"
569 if sqlType=="varchar":
570 return "vexpr-string"
571
572 getVexprFor = ToVexprConverter().convert
573
574
586
587
607
608
610 """returns a placeholder (suggested input) for inputKey, where values
611 is the original values element.
612
613 This will currently be None unless we do a numeric input.
614 """
615 if not values:
616 return
617
618 if inputKey.type=="vexpr-float":
619 scaling = inputKey.scaling or 1
620 return format_placeholder(values.min, values.max,
621 lambda val: "%s"%(val/scaling))
622
623 elif inputKey.type=="vexpr-mjd":
624
625 return format_placeholder(values.min, values.max,
626 lambda val: "%s"%utils.formatISODT(stc.mjdToDateTime(val)))
627
628 elif inputKey.type=="vexpr-date":
629
630 return format_placeholder(values.min, values.max,
631 lambda val: "%s"%utils.formatISODT(val))
632
633
634
635
672
673
677
678
679 if __name__=="__main__":
680 print(repr(parseDateExpr("")))
681