1  """ 
  2  Classes and methods to support vizier-type specifications on fields. 
  3  """ 
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11  from __future__ import print_function 
 12   
 13  import datetime 
 14  import re 
 15   
 16  from pyparsing import (Word, Literal, Optional, Forward, 
 17          ZeroOrMore, Suppress, ParseException, StringEnd, Regex, 
 18          OneOrMore, CharsNotIn) 
 19   
 20  from gavo import base 
 21  from gavo import stc 
 22  from gavo import utils 
 23  from gavo.base import literals 
 24  from gavo.base import sqlmunge 
 25  from gavo.base import typesystems 
 26   
 27   
 29          """is a parse node, consisting of an operator and children. 
 30   
 31          The parse trees returned by the various parse functions are built from 
 32          these. 
 33   
 34          This is an abstract class; concrete derivations need to define 
 35          a set _standardOperators containing the normal binary operators 
 36          for their types and a dictionary _sqlEmitters containing functions 
 37          returning SQL fragments, or override asSQL. 
 38          """ 
 40                  self.children = children 
 41                  self.operator = operator 
  42           
 44                  return "(%s %s)"%(self.operator, " ".join([str(c) for c in self.children])) 
  45   
 47                  return "(%r %r)"%(self.operator, " ".join([str(c) for c in self.children])) 
  48   
 50                  """wraps base.getSQLKey and can be overridden in case operand  
 51                  mangling is necessary. 
 52   
 53                  Do not call getSQLKey directly from within ParseNodes. 
 54                  """ 
 55                  return base.getSQLKey(name, item, sqlPars) 
  56   
 58                  """inserts children[index] into sqlPars with a unique key and returns 
 59                  the key. 
 60   
 61                  children[index] must be atomic (i.e., no ParseNode). 
 62                  """ 
 63                  item = self.children[index] 
 64                  if item is None: 
 65                          return None 
 66                  assert not isinstance(item, ParseNode) 
 67                  if field.scaling: 
 68                          item *= field.scaling 
 69                  return self._getSQLKey(field.name, item, sqlPars) 
  70   
 71 -        def asSQL(self, field, sqlPars): 
  72                  if self.operator in self._standardOperators: 
 73                          return "%s %s %%(%s)s"%(field.name, self.operator,  
 74                                  self._insertChild(0, field, sqlPars)) 
 75                  else: 
 76                          return self._sqlEmitters[self.operator](self, field, sqlPars) 
   77   
 78   
 80          """A node containing numeric operands (floats or dates). 
 81          """ 
 85                   
 87                  operand = self.children[0].asSQL(field, sqlPars) 
 88                  if operand: 
 89                          return "%s (%s)"%(self.operator, operand) 
  90   
 92                  return "%s IN (%s)"%(field.name, ", ".join([ 
 93                                          "%%(%s)s"%self._insertChild(i, field, sqlPars)  
 94                                  for i in range(len(self.children))])) 
  95   
 96          _standardOperators = set(["=", ">=", ">", "<=", "<"]) 
 97          _sqlEmitters = { 
 98                  '..': lambda self, field, sqlPars: "%s BETWEEN %%(%s)s AND %%(%s)s"%( 
 99                          field.name, self._insertChild(0, field, sqlPars),  
100                          self._insertChild(1, field, sqlPars)), 
101                  'AND': _emitBinop, 
102                  'OR': _emitBinop, 
103                  'NOT': _emitUnop, 
104                  ',': _emitEnum, 
105          } 
 106   
107   
109          """A node containing date operands (datetime objects, as a rule). 
110   
111          As an extension to VizieR, we interpret floats as well, where  
112          1000 .. 3000 is a julian year, 10000 ... 100000 is an MJD and 
113          2000000 .. 4000000 is a JD. 
114          """ 
116                   
117                   
118                   
119                  if arg.hour==arg.minute==arg.second==0: 
120                          return arg.replace(hour=23,  
121                                  minute=59, second=59) 
122                  return arg 
 123   
127   
129                  operand = self.children[0].asSQL(field, sqlPars) 
130                  if operand: 
131                          return "%s (%s)"%(self.operator, operand) 
 132   
134                  return "%s BETWEEN %%(%s)s AND %%(%s)s"%( 
135                          field.name,  
136                          self._insertChild(0, field, sqlPars),  
137                          self._insertChild(1, field, sqlPars)) 
 138   
143   
145                   
146                  return "%s %s %%(%s)s"%(field.name, self.operator,  
147                          self._insertChild(0, field, sqlPars)) 
 148   
150                  if self.children[0].hour==self.children[0].minute==0: 
151                          return "%s BETWEEN %%(%s)s AND %%(%s)s"%( 
152                                  field.name,  
153                                  self._getSQLKey(field.name, self.children[0], sqlPars), 
154                                  self._getSQLKey(field.name,  
155                                          self._expandDate(self.children[0]), sqlPars)) 
156   
157                  else: 
158                          return self._emitSimple(field, sqlPars) 
 159   
161                   
162                  self.children[0] = self._expandDate(self.children[0]) 
163                  return "%s %s %%(%s)s"%(field.name, self.operator,  
164                          self._insertChild(0, field, sqlPars)) 
 165   
166 -        def asSQL(self, field, sqlPars): 
 167                  return self._sqlEmitters[self.operator](self, field, sqlPars) 
 168   
169          _sqlEmitters = { 
170                  '..': _emitRange, 
171                  'AND': _emitBinop, 
172                  'OR': _emitBinop, 
173                  'NOT': _emitUnop, 
174                  ',': _emitEnum, 
175                  "=": _emitEqual, 
176                  ">=": _emitSimple, 
177                  "<=": _emitExpanded, 
178                  "<": _emitSimple, 
179                  ">": _emitExpanded, 
180                  "<=": _emitExpanded, 
181          } 
 182   
183   
190   
191   
193          """A node containing string operands. 
194          """ 
195 -        def asSQL(self, field, sqlPars): 
 196                  if self.operator=="[": 
197                          return "[%s]"%self.children[0] 
198                  if self.operator in self._nullOperators: 
199                          return self._nullOperators[self.operator] 
200                  else: 
201                          return super(StringNode, self).asSQL(field, sqlPars) 
 202   
203          _metaEscapes = { 
204                  "|": r"\|", 
205                  "*": r"\*", 
206                  "+": r"\+", 
207                  "(": r"\(", 
208                  ")": r"\)", 
209                  "[": r"\[", 
210                  "%": r"\%", 
211                  "_": r"\_", 
212                  "\\\\": "\\\\", 
213          } 
214          _escapeRE = re.compile("[%s]"%"".join(_metaEscapes.keys())) 
215           
216           
217          _metaEscapes.update({"\\": "\\\\"}) 
218   
220                  """returns aString with SQL RE metacharacters escaped. 
221                  """ 
222                  return self._escapeRE.sub(lambda mat: self._metaEscapes[mat.group()], 
223                          aString) 
 224   
226                  parts = [] 
227                  for child in self.children: 
228                          if isinstance(child, basestring): 
229                                  parts.append(self._escapeSpecials(child)) 
230                          else: 
231                                  parts.append(child.asSQL(field, sqlPars)) 
232                  return "^%s$"%("".join(parts)) 
 233   
234          _patOps = { 
235                  "~": "~*", 
236                  "=": "~", 
237                  "!~": "!~*", 
238                  "!": "!~", 
239                  "=~": "~*", 
240          } 
242                  pattern = self._makePattern(field, sqlPars) 
243                  return "%s %s %%(%s)s"%(field.name, self._patOps[self.operator], 
244                          self._getSQLKey(field.name, pattern, sqlPars)) 
 245   
247                  query = "%s IN (%s)"%(field.name, ", ".join([ 
248                                          "%%(%s)s"%self._insertChild(i, field, sqlPars)  
249                                  for i in range(len(self.children))])) 
250                  if self.operator=="!=,": 
251                          query = "NOT (%s)"%query 
252                  return query 
 253   
254          _translatedOps = { 
255                  "==": "=", 
256          } 
258                  return "%s = %%(%s)s"%(field.name,  
259                          self._insertChild(0, field, sqlPars)) 
 260   
261          _nullOperators = {"*": ".*", "?": "."} 
262          _standardOperators = set(["<", ">", "<=", ">=", "!="]) 
263          _sqlEmitters = { 
264                  "~": _emitPatOp, 
265                  "=": _emitPatOp, 
266                  "!~": _emitPatOp, 
267                  "!": _emitPatOp, 
268                  "=~": _emitPatOp,   
269                  "=,": _emitEnum, 
270                  "=|": _emitEnum, 
271                  "!=,": _emitEnum, 
272                  "==": _emitTranslatedOp, 
273                  } 
 274   
275   
277          def _(s, loc, toks): 
278                  return nodeClass(toks, op) 
 279          return _ 
280   
281   
283          def _makeNotNode(s, loc, toks): 
284                  if len(toks)==1: 
285                          return toks[0] 
286                  elif len(toks)==2: 
287                          return nodeClass(toks[1:], "NOT") 
288                  else:  
289                          raise Exception("Busted by not") 
 290          return _makeNotNode 
291   
292   
294          return NumericNode([toks[0]-toks[1], toks[0]+toks[1]], "..") 
 295   
296   
298          """returns a +/- node for dates, i.e., toks[1] is a float in days. 
299          """ 
300          days = datetime.timedelta(days=toks[1]) 
301          return DateNode([toks[0]-days, toks[0]+days], "..") 
 302   
304          """returns a +/- node for MJDs, i.e., toks[1] is a float in days, and  
305          an MJDNode must be returned. 
306          """ 
307          days = datetime.timedelta(days=toks[1]) 
308          return MJDNode([toks[0]-days, toks[0]+days], "..") 
 309   
310   
311   
313          def _(s, loc, toks): 
314                  if len(toks)==1: 
315                          return toks[0] 
316                  else: 
317                          return nodeClass(toks, op) 
 318          return _ 
319   
320   
322          def _makeSimpleExprNode(s, loc, toks): 
323                  if len(toks)==1: 
324                          return nodeClass(toks[0:], "=") 
325                  else: 
326                          return nodeClass(toks[1:], toks[0]) 
 327          return _makeSimpleExprNode 
328   
329   
332          """returns the root element of a grammar parsing numeric vizier-like  
333          expressions. 
334   
335          This is used for both dates and floats, use baseLiteral to match the 
336          operand terminal.  The trouble with dates is that the +/- operator 
337          has a simple float as the second operand, and that's why you can 
338          pass in an errorLiteral and and pmBuilder. 
339          """ 
340          if errorLiteral is None: 
341                  errorLiteral = baseLiteral 
342   
343          with utils.pyparsingWhitechars(" \t"): 
344                  preOp = Literal("=") |  Literal(">=") | Literal(">" 
345                          ) | Literal("<=") | Literal("<") 
346                  rangeOp = Literal("..") 
347                  pmOp = Literal("+/-") | Literal("\xb1".decode("iso-8859-1")) 
348                  orOp = Literal("|") 
349                  andOp = Literal("&") 
350                  notOp = Literal("!") 
351                  commaOp = Literal(",") 
352   
353                  preopExpr = Optional(preOp) + baseLiteral 
354                  rangeExpr = baseLiteral + Suppress(rangeOp) + baseLiteral 
355                  valList = baseLiteral + OneOrMore( Suppress(commaOp) + baseLiteral) 
356                  pmExpr = baseLiteral + Suppress(pmOp) + errorLiteral 
357                  simpleExpr = rangeExpr | pmExpr | valList | preopExpr 
358   
359                  expr = Forward() 
360   
361                  notExpr = Optional(notOp) +  simpleExpr 
362                  andExpr = notExpr + ZeroOrMore( Suppress(andOp) + notExpr ) 
363                  orExpr = andExpr + ZeroOrMore( Suppress(orOp) + expr) 
364                  expr << orExpr 
365                  exprInString = expr + StringEnd() 
366   
367                  rangeExpr.setName("rangeEx") 
368                  rangeOp.setName("rangeOp") 
369                  notExpr.setName("notEx") 
370                  andExpr.setName("andEx") 
371                  andOp.setName("&") 
372                  orExpr.setName("orEx") 
373                  expr.setName("expr") 
374                  simpleExpr.setName("simpleEx") 
375   
376                  preopExpr.addParseAction(_simpleExprFactory(nodeClass)) 
377                  rangeExpr.addParseAction(_getNodeFactory("..", nodeClass)) 
378                  pmExpr.addParseAction(pmBuilder) 
379                  valList.addParseAction(_getNodeFactory(",", nodeClass)) 
380                  notExpr.addParseAction(_makeNotNodeFactory(nodeClass)) 
381                  andExpr.addParseAction(_getBinopFactory("AND", nodeClass)) 
382                  orExpr.addParseAction(_getBinopFactory("OR", nodeClass)) 
383   
384                  return exprInString 
 385   
386   
388   
389   
390          try: 
391                  return int(tok[0]) 
392          except ValueError: 
393                  return float(tok[0]) 
 394   
395   
397          """returns a datetime from a date/time spec. 
398   
399          This can be an ISO string or a julian year, JD, or MJD by 
400          heuristics (see DateNode for details). 
401          """ 
402          assert len(tok)==1 
403          try: 
404                  floatVal = float(tok[0]) 
405                  if 1000<=floatVal<=3000: 
406                          return utils.roundToSeconds(stc.jYearToDateTime(floatVal)) 
407                  elif 10000<=floatVal<=100000: 
408                          return utils.roundToSeconds(stc.mjdToDateTime(floatVal)) 
409                  elif 2e6<=floatVal<=4e6: 
410                          return utils.roundToSeconds(stc.jdnToDateTime(floatVal)) 
411                  else: 
412                          raise base.ParseException("Floats as dates must be " 
413                                  " credible years (1000..2000), MJDs (1e4 .. 1e5), or" 
414                                  " JDs (2e6 .. 4e6)") 
415          except ValueError: 
416                  return literals.parseDefaultDatetime(tok[0]) 
 417   
418   
419  floatLiteral = Regex(utils.floatRE).addParseAction(parseFloat) 
420   
421  _DATE_REGEX = r"\d\d\d\d-\d\d-\d\d(T\d\d:\d\d:\d\d)?|(\d+(\.\d*)?)" 
422  _DATE_LITERAL_DT = Regex(_DATE_REGEX).addParseAction(parseDateTime) 
423   
424   
427          """returns a parse tree for vizier-like expressions over floats. 
428          """ 
429          return utils.pyparseString(baseSymbol, str)[0] 
 430   
431   
434          """returns a parse tree for vizier-like expressions over ISO dates. 
435   
436          Note that the semantic validity of the date (like, month<13) is not 
437          checked by the grammar. 
438          """ 
439          return utils.pyparseString(baseSymbol, str)[0] 
 440   
441   
444          """returns a parse tree for vizier-like expression of ISO dates with 
445          parsed values in MJD. 
446          """ 
447          return utils.pyparseString(baseSymbol, str)[0] 
 448   
449   
452   
453   
455          """returns a grammar for parsing vizier-like string expressions. 
456          """ 
457   
458          with utils.pyparsingWhitechars(" \t"): 
459                  simpleOperator = Literal("==") | Literal("!=") | Literal(">=") |\ 
460                          Literal(">") | Literal("<=") | Literal("<") | Literal("=~") |\ 
461                          Literal("=,") 
462                  simpleOperand = Regex(r"[^\s].*|") 
463                   
464                  White = Word(" \t") 
465                  simpleExpr = simpleOperator + Optional( White ) + simpleOperand 
466   
467                  commaOperand = Regex("[^,]+") 
468                  barOperand = Regex("[^|]+") 
469                  commaEnum = Literal("=,") + commaOperand + ZeroOrMore( 
470                          Suppress(",") + commaOperand) 
471                  exclusionEnum = Literal("!=,") + commaOperand + ZeroOrMore( 
472                          Suppress(",") + commaOperand) 
473                  barEnum = Literal("=|") + barOperand + ZeroOrMore( 
474                          Suppress("|") + barOperand) 
475                  enumExpr = exclusionEnum | commaEnum | barEnum 
476   
477                  patLiterals = CharsNotIn("[*?") 
478                  wildStar = Literal("*") 
479                  wildQmark = Literal("?") 
480                  setElems = CharsNotIn("]") 
481                  setSpec = Suppress("[") + setElems + Suppress("]") 
482                  pattern = OneOrMore(setSpec | wildStar | wildQmark | patLiterals) 
483   
484                  patternOperator = Literal("~") | Literal("=") | Literal("!~") |\ 
485                          Literal("!") 
486                  patternExpr = patternOperator + Optional( White ) + pattern 
487                  nakedExpr = Regex("[^=!~|><]") + Optional( simpleOperand ) 
488   
489                  stringExpr = enumExpr | simpleExpr | patternExpr | nakedExpr 
490                   
491                  doc = stringExpr + StringEnd() 
492   
493                  stringExpr.setName("StringExpr") 
494                  enumExpr.setName("EnumExpr") 
495                  simpleOperand.setName("Operand") 
496                  simpleOperator.setName("Operator") 
497                  nakedExpr.setName("SingleOperand") 
498   
499                  debug = False 
500                  stringExpr.setDebug(debug) 
501                  enumExpr.setDebug(debug) 
502                  patLiterals.setDebug(debug) 
503                  simpleOperand.setDebug(debug) 
504                  simpleOperator.setDebug(debug) 
505                  nakedExpr.setDebug(debug) 
506   
507                  simpleExpr.addParseAction(_makeOpNode) 
508                  patternExpr.addParseAction(_makeOpNode) 
509                  enumExpr.addParseAction(_makeOpNode) 
510                  makeDefaultExpr = _getNodeFactory("==", StringNode) 
511                  nakedExpr.addParseAction(lambda s,p,toks: makeDefaultExpr(s,p, 
512                          ["".join(toks)])) 
513                  wildStar.addParseAction(_makeOpNode) 
514                  wildQmark.addParseAction(_makeOpNode) 
515                  setElems.addParseAction(_getNodeFactory("[", StringNode)) 
516   
517                  return doc 
 518   
519   
522   
523   
525          def factory(field, val, sqlPars): 
526                  try: 
527                          return parser(val).asSQL(field, sqlPars) 
528                  except ParseException: 
529                          raise base.ui.logOldExc(utils.ValidationError( 
530                                  "Invalid input for type %s (see help for valid type literals)"% 
531                                          field.type, field.name)) 
 532          return factory 
533   
534   
535  sqlmunge.registerSQLFactory("vexpr-float", 
536          _makeFactory(parseNumericExpr)) 
537  sqlmunge.registerSQLFactory("vexpr-date", 
538          _makeFactory(parseDateExpr)) 
539  sqlmunge.registerSQLFactory("vexpr-mjd", 
540          _makeFactory(parseDateExprToMJD)) 
541  sqlmunge.registerSQLFactory("vexpr-string", 
542          _makeFactory(parseStringExpr)) 
543   
544   
546          typeSystem = "vizierexpr" 
547          simpleMap = { 
548                  "smallint": "vexpr-float", 
549                  "integer": "vexpr-float", 
550                  "int": "vexpr-float", 
551                  "bigint": "vexpr-float", 
552                  "real": "vexpr-float", 
553                  "float": "vexpr-float", 
554                  "double precision": "vexpr-float", 
555                  "double": "vexpr-float", 
556                  "text": "vexpr-string", 
557                  "unicode": "vexpr-string", 
558                  "char": "vexpr-string", 
559                  "date": "vexpr-date", 
560                  "timestamp": "vexpr-date", 
561                  "vexpr-date": "vexpr-date", 
562                  "vexpr-float": "vexpr-float", 
563                  "vexpr-string": "vexpr-string", 
564          } 
565   
567                  if sqlType=="char": 
568                          return "vexpr-string" 
569                  if sqlType=="varchar": 
570                          return "vexpr-string" 
  571   
572  getVexprFor = ToVexprConverter().convert 
573   
574   
586   
587   
607   
608   
610          """returns a placeholder (suggested input) for inputKey, where values 
611          is the original values element. 
612   
613          This will currently be None unless we do a numeric input. 
614          """ 
615          if not values: 
616                  return 
617   
618          if inputKey.type=="vexpr-float": 
619                  scaling = inputKey.scaling or 1 
620                  return format_placeholder(values.min, values.max, 
621                          lambda val: "%s"%(val/scaling)) 
622   
623          elif inputKey.type=="vexpr-mjd": 
624                   
625                  return format_placeholder(values.min, values.max, 
626                          lambda val: "%s"%utils.formatISODT(stc.mjdToDateTime(val))) 
627   
628          elif inputKey.type=="vexpr-date": 
629                   
630                  return format_placeholder(values.min, values.max, 
631                          lambda val: "%s"%utils.formatISODT(val)) 
 632   
633           
634   
635   
672   
673   
677   
678   
679  if __name__=="__main__": 
680          print(repr(parseDateExpr(""))) 
681