Source code for gavo.svcs.vizierexprs

"""
Classes and methods to support vizier-type specifications on fields.
"""

#c Copyright 2008-2023, the GAVO project <gavo@ari.uni-heidelberg.de>
#c
#c This program is free software, covered by the GNU GPL.  See the
#c COPYING file in the source distribution.


import datetime
import re


from gavo import base
from gavo import stc
from gavo import utils
from gavo.base import literals
from gavo.base import sqlmunge
from gavo.base import typesystems
from gavo.utils.parsetricks import (Word, Literal, Optional, Forward,
	ZeroOrMore, Suppress, ParseException, StringEnd, Regex,
	OneOrMore, CharsNotIn, pyparsingWhitechars)


[docs]class ParseNode(object): """is a parse node, consisting of an operator and children. The parse trees returned by the various parse functions are built from these. This is an abstract class. """ def __init__(self, children, operator): self.children = children self.operator = operator def __str__(self): return "(%s %s)"%(self.operator, " ".join([str(c) for c in self.children])) def __repr__(self): return "(%r %r)"%(self.operator, " ".join([str(c) for c in self.children]))
[docs] def getSQLKey(self, name, item, sqlPars): """wraps base.getSQLKey and can be overridden in case operand mangling is necessary. Do not call base.getSQLKey directly from within ParseNodes. """ return base.getSQLKey(name, item, sqlPars)
[docs] def insertChild(self, index, field, sqlPars): """inserts children[index] into sqlPars with a unique key and returns the key. children[index] must be atomic (i.e., no ParseNode). """ item = self.children[index] if item is None: return None assert not isinstance(item, ParseNode) if getattr(field, "scaling", None): item *= field.scaling return self.getSQLKey(field.name, item, sqlPars)
[docs]class NumericNode(ParseNode): """A node containing numeric operands (floats or dates). """
[docs]class DateNode(ParseNode): """A node containing date operands (datetime objects, as a rule). As an extension to VizieR, we interpret floats as well, where 1000 .. 3000 is a julian year, 10000 ... 100000 is an MJD and 2000000 .. 4000000 is a JD. """
[docs]class MJDNode(DateNode):
[docs] def getSQLKey(self, name, item, sqlPars): if (isinstance(item, datetime.datetime) or isinstance(item, datetime.date)): item = stc.dateTimeToMJD(item) return base.getSQLKey(name, item, sqlPars)
[docs]class StringNode(ParseNode): """A node containing string operands. """
def _getNodeFactory(op, nodeClass): def _(s, loc, toks): return nodeClass(toks, op) return _ def _makeNotNodeFactory(nodeClass): def _makeNotNode(s, loc, toks): if len(toks)==1: return toks[0] elif len(toks)==2: return nodeClass(toks[1:], "NOT") else: # Can't happen :-) raise Exception("Busted by not") return _makeNotNode
[docs]def makePmNode(s, loc, toks): return NumericNode([toks[0]-toks[1], toks[0]+toks[1]], "..")
[docs]def makeDatePmNode(s, loc, toks): """returns a +/- node for dates, i.e., toks[1] is a float in days. """ days = datetime.timedelta(days=toks[1]) return DateNode([toks[0]-days, toks[0]+days], "..")
[docs]def makeMJDPmNode(s, loc, toks): """returns a +/- node for MJDs, i.e., toks[1] is a float in days, and an MJDNode must be returned. """ days = datetime.timedelta(days=toks[1]) return MJDNode([toks[0]-days, toks[0]+days], "..")
def _getBinopFactory(op, nodeClass): def _(s, loc, toks): if len(toks)==1: return toks[0] else: return nodeClass(toks, op) return _ def _simpleExprFactory(nodeClass): def _makeSimpleExprNode(s, loc, toks): if len(toks)==1: return nodeClass(toks[0:], "=") else: return nodeClass(toks[1:], toks[0]) return _makeSimpleExprNode
[docs]def getComplexGrammar(baseLiteral, pmBuilder, errorLiteral=None, nodeClass=NumericNode): """returns the root element of a grammar parsing numeric vizier-like expressions. This is used for both dates and floats, use baseLiteral to match the operand terminal. The trouble with dates is that the +/- operator has a simple float as the second operand, and that's why you can pass in an errorLiteral and and pmBuilder. """ if errorLiteral is None: errorLiteral = baseLiteral with pyparsingWhitechars(" \t"): preOp = Literal("=") | Literal(">=") | Literal(">" ) | Literal("<=") | Literal("<") rangeOp = Literal("..") pmOp = Literal("+/-") | Literal("\xb1") orOp = Literal("|") andOp = Literal("&") notOp = Literal("!") commaOp = Literal(",") preopExpr = Optional(preOp) + baseLiteral rangeExpr = baseLiteral + Suppress(rangeOp) + baseLiteral valList = baseLiteral + OneOrMore( Suppress(commaOp) + baseLiteral) pmExpr = baseLiteral + Suppress(pmOp) + errorLiteral simpleExpr = rangeExpr | pmExpr | valList | preopExpr expr = Forward() notExpr = Optional(notOp) + simpleExpr andExpr = notExpr + ZeroOrMore( Suppress(andOp) + notExpr ) orExpr = andExpr + ZeroOrMore( Suppress(orOp) + expr) expr << orExpr exprInString = expr + StringEnd() rangeExpr.setName("rangeEx") rangeOp.setName("rangeOp") notExpr.setName("notEx") andExpr.setName("andEx") andOp.setName("&") orExpr.setName("orEx") expr.setName("expr") simpleExpr.setName("simpleEx") preopExpr.addParseAction(_simpleExprFactory(nodeClass)) rangeExpr.addParseAction(_getNodeFactory("..", nodeClass)) pmExpr.addParseAction(pmBuilder) valList.addParseAction(_getNodeFactory(",", nodeClass)) notExpr.addParseAction(_makeNotNodeFactory(nodeClass)) andExpr.addParseAction(_getBinopFactory("AND", nodeClass)) orExpr.addParseAction(_getBinopFactory("OR", nodeClass)) return exprInString
[docs]def parseFloat(s, pos, tok): # If something looks like an int, return it as an int. # Otherwise, postgres won't use int-indices try: return int(tok[0]) except ValueError: return float(tok[0])
[docs]def parseDateTime(s, pos, tok): """returns a datetime from a date/time spec. This can be an ISO string or a julian year, JD, or MJD by heuristics (see DateNode for details). """ assert len(tok)==1 try: floatVal = float(tok[0]) if 1000<=floatVal<=3000: return utils.roundToSeconds(stc.jYearToDateTime(floatVal)) elif 10000<=floatVal<=100000: return utils.roundToSeconds(stc.mjdToDateTime(floatVal)) elif 2e6<=floatVal<=4e6: return utils.roundToSeconds(stc.jdnToDateTime(floatVal)) else: raise base.ParseException("Floats as dates must be " " credible years (1000..2000), MJDs (1e4 .. 1e5), or" " JDs (2e6 .. 4e6)") except ValueError: return literals.parseDefaultDatetime(tok[0])
floatLiteral = Regex(utils.floatRE).addParseAction(parseFloat) _DATE_REGEX = r"\d\d\d\d-\d\d-\d\d(T\d\d:\d\d:\d\d(\.\d*)?)?|(\d+(\.\d*)?)" _DATE_LITERAL_DT = Regex(_DATE_REGEX).addParseAction(parseDateTime)
[docs]def parseNumericExpr(str, baseSymbol=getComplexGrammar(floatLiteral, makePmNode)): """returns a parse tree for vizier-like expressions over floats. """ return utils.pyparseString(baseSymbol, str)[0]
[docs]def parseDateExpr(str, baseSymbol=getComplexGrammar(_DATE_LITERAL_DT, makeDatePmNode, floatLiteral, DateNode)): """returns a parse tree for vizier-like expressions over ISO dates. Note that the semantic validity of the date (like, month<13) is not checked by the grammar. """ return utils.pyparseString(baseSymbol, str)[0]
[docs]def parseDateExprToMJD(str, baseSymbol=getComplexGrammar(_DATE_LITERAL_DT, makeMJDPmNode, floatLiteral, MJDNode)): """returns a parse tree for vizier-like expression of ISO dates with parsed values in MJD. """ return utils.pyparseString(baseSymbol, str)[0]
def _makeOpNode(s, loc, toks): return StringNode(toks[1:], toks[0])
[docs]def getStringGrammar(): """returns a grammar for parsing vizier-like string expressions. """ # XXX TODO: should we cut at =| (which is currently parsed as = |)? with pyparsingWhitechars(" \t"): simpleOperator = Literal("==") | Literal("!=") | Literal(">=") |\ Literal(">") | Literal("<=") | Literal("<") | Literal("=~") |\ Literal("=,") simpleOperand = Regex(r"[^\s].*|") # XXX probably a bug in pyparsing: White shouldn't be necessary here White = Word(" \t") simpleExpr = simpleOperator + Optional( White ) + simpleOperand commaOperand = Regex("[^,]+") barOperand = Regex("[^|]+") commaEnum = Literal("=,") + commaOperand + ZeroOrMore( Suppress(",") + commaOperand) exclusionEnum = Literal("!=,") + commaOperand + ZeroOrMore( Suppress(",") + commaOperand) barEnum = Literal("=|") + barOperand + ZeroOrMore( Suppress("|") + barOperand) enumExpr = exclusionEnum | commaEnum | barEnum patLiterals = CharsNotIn("[*?") wildStar = Literal("*") wildQmark = Literal("?") setElems = CharsNotIn("]") setSpec = Suppress("[") + setElems + Suppress("]") pattern = OneOrMore(setSpec | wildStar | wildQmark | patLiterals) patternOperator = Literal("~") | Literal("=") | Literal("!~") |\ Literal("!") patternExpr = patternOperator + Optional( White ) + pattern nakedExpr = Regex("[^=!~|><]") + Optional( simpleOperand ) stringExpr = enumExpr | simpleExpr | patternExpr | nakedExpr doc = stringExpr + StringEnd() stringExpr.setName("StringExpr") enumExpr.setName("EnumExpr") simpleOperand.setName("Operand") simpleOperator.setName("Operator") nakedExpr.setName("SingleOperand") debug = False stringExpr.setDebug(debug) enumExpr.setDebug(debug) patLiterals.setDebug(debug) simpleOperand.setDebug(debug) simpleOperator.setDebug(debug) nakedExpr.setDebug(debug) simpleExpr.addParseAction(_makeOpNode) patternExpr.addParseAction(_makeOpNode) enumExpr.addParseAction(_makeOpNode) makeDefaultExpr = _getNodeFactory("==", StringNode) nakedExpr.addParseAction(lambda s,p,toks: makeDefaultExpr(s,p, ["".join(toks)])) wildStar.addParseAction(_makeOpNode) wildQmark.addParseAction(_makeOpNode) setElems.addParseAction(_getNodeFactory("[", StringNode)) return doc
[docs]def parseStringExpr(str, baseSymbol=getStringGrammar()): return utils.pyparseString(baseSymbol, str)[0]
class _Flattener: """An abstract base class for things turning our parse trees into SQL. Concrete derivations need to define a set _standardOperators containing the normal binary operators for their types and a dictionary _sqlEmitters containing functions returning SQL fragments; of course, they can also completey override getSQLFor. """ def getSQLFor(self, node, field, sqlPars): """returns SQL for a parse node node over an InputKey field. sqlPars is the dict that is later passed to conn.execute and will be modified here. """ if node.operator in self._standardOperators: return "{} {} %({})s".format( field.name, node.operator, node.insertChild(0, field, sqlPars)) else: return self._sqlEmitters[node.operator]( self, node, field, sqlPars)
[docs]class NumericFlattener(_Flattener): """A _Flattener for float- and int-valued columns. """ def _emitBinop(self, node, field, sqlPars): return base.joinOperatorExpr(node.operator, [self.getSQLFor(c, field, sqlPars) for c in node.children]) def _emitUnop(self, node, field, sqlPars): operand = self.getSQLFor(node.children[0], field, sqlPars) if operand: return "{} ({})".format(node.operator, operand) def _emitEnum(self, node, field, sqlPars): return "{} IN ({})".format( field.name, ", ".join("%({})s".format(node.insertChild(i, field, sqlPars)) for i in range(len(node.children)))) _standardOperators = set(["=", ">=", ">", "<=", "<"]) _sqlEmitters = { '..': lambda self, node, field, sqlPars: "{} BETWEEN %({})s AND %({})s".format( field.name, node.insertChild(0, field, sqlPars), node.insertChild(1, field, sqlPars)), 'AND': _emitBinop, 'OR': _emitBinop, 'NOT': _emitUnop, ',': _emitEnum, }
[docs]class NumericIntervalFlattener(NumericFlattener): """A _Flattener for matching against min/max pairs of columns. Here, field is this (min_column, max_column) pair. This currently isn't used anywhere in DaCHS itself. You can use it in custom phraseMakers, though; cf. :samplerd:`obsform/q` """ def _emitEnum(self, node, field, sqlPars): return base.joinOperatorExpr("OR", [self._emitEquality(node, field, sqlPars, useChild=childInd) for childInd, _ in enumerate(node.children)]) def _emitInterval(self, node, field, sqlPars): # interval condition: overlaps return ("{h1}>=%({l2})s AND %({h2})s>={l1}" " AND {l1}<={h1} AND %({l2})s<=%({h2})s").format( l1=field[0].name, h1=field[1].name, l2=node.insertChild(0, field[0], sqlPars), h2=node.insertChild(1, field[1], sqlPars)) def _emitEquality(self, node, field, sqlPars, useChild=0): # equality is interpreted as "contains" here return "%({})s BETWEEN {} AND {}".format( node.insertChild(useChild, field[0], sqlPars), field[0].name, field[1].name) def _emitLarger(self, node, field, sqlPars): # larger/larger equal is, perhaps a bit funkily, larger than # upper/lower bound. if "=" in node.operator: limitField = field[1] else: limitField = field[0] return "{} > %({})s".format( limitField.name, node.insertChild(0, limitField, sqlPars)) def _emitSmaller(self, node, field, sqlPars): # smaller/smaller equal is, perhaps a bit funkily, smaller than # lower/upper bound. if "=" in node.operator: limitField = field[0] else: limitField = field[1] return "{} < %({})s".format( limitField.name, node.insertChild(0, limitField, sqlPars)) _standardOperators = set() _sqlEmitters = { '..': _emitInterval, '=': _emitEquality, '>=': _emitLarger, '>': _emitLarger, '<': _emitSmaller, '<=': _emitSmaller, 'AND': NumericFlattener._emitBinop, 'OR': NumericFlattener._emitBinop, 'NOT': NumericFlattener._emitUnop, ',': _emitEnum, }
[docs]class DateFlattener(_Flattener): """A _Flattener producing SQL for data-like columns. This is supposed to work for timestamp-valued and MJD-valued columns alike. The difference is handled on the level of nodes (DateNode vs. MJDNode). """ def _expandDate(self, arg): # returns the last second of arg if it looks like a day (i.e., hms=0) # this is to fix postgres' behaviour when comparing timestamps and # dates (e.g., 1990-01-01 < 1990-01-01T00:00:01). if arg.hour==arg.minute==arg.second==0: return arg.replace(hour=23, minute=59, second=59) return arg def _emitBinop(self, node, field, sqlPars): return base.joinOperatorExpr(node.operator, [self.getSQLFor(c, field, sqlPars) for c in node.children]) def _emitUnop(self, node, field, sqlPars): operand = self.getSQLFor(node.children[0], field, sqlPars) if operand: return "%s (%s)"%(node.operator, operand) def _emitRange(self, node, field, sqlPars): return "{} BETWEEN %({})s AND %({})s".format( field.name, node.insertChild(0, field, sqlPars), node.insertChild(1, field, sqlPars)) def _emitEnum(self, node, field, sqlPars): return base.joinOperatorExpr("OR", [self.getSQLFor(node.__class__([child], "="), field, sqlPars) for child in node.children]) def _emitSimple(self, node, field, sqlPars): # return a simple comparison with the date itself return "{} {} %({})s".format(field.name, node.operator, node.insertChild(0, field, sqlPars)) def _emitEqual(self, node, field, sqlPars): if node.children[0].hour==node.children[0].minute==0: # expand this to a condition covering the whole day return "{} BETWEEN %({})s AND %({})s".format( field.name, node.getSQLKey(field.name, node.children[0], sqlPars), node.getSQLKey(field.name, self._expandDate(node.children[0]), sqlPars)) else: return self._emitSimple(node, field, sqlPars) def _emitExpanded(self, node, field, sqlPars): # return a simple comparison with date's midnight node.children[0] = self._expandDate(node.children[0]) return "{} {} %({})s".format( field.name, node.operator, node.insertChild(0, field, sqlPars)) _standardOperators = set() _sqlEmitters = { '..': _emitRange, 'AND': _emitBinop, 'OR': _emitBinop, 'NOT': _emitUnop, ',': _emitEnum, "=": _emitEqual, ">=": _emitSimple, "<=": _emitExpanded, "<": _emitSimple, ">": _emitExpanded, "<=": _emitExpanded, }
[docs]class StringFlattener(_Flattener): """A _Flattener turning String-like Vizier expressions into SQL conditions against string-valued columns. """ _metaEscapes = { "|": r"\|", "*": r"\*", "+": r"\+", "(": r"\(", ")": r"\)", "[": r"\[", "%": r"\%", "_": r"\_", "\\\\": "\\\\", } _escapeRE = re.compile("[%s]"%"".join(list(_metaEscapes.keys()))) # The backslash in _metaEscapes is escaped to make _escapeRE work, # but of course I need to replace the unescaped version. _metaEscapes.update({"\\": "\\\\"}) def _escapeSpecials(self, aString): """returns aString with SQL RE metacharacters escaped. """ return self._escapeRE.sub( lambda mat: self._metaEscapes[mat.group()], aString) def _makePattern(self, node, field, sqlPars): parts = [] for child in node.children: if isinstance(child, str): parts.append(self._escapeSpecials(child)) else: parts.append(self.getSQLFor(child, field, sqlPars)) return "^%s$"%("".join(parts)) _patOps = { "~": "~*", "=": "~", "!~": "!~*", "!": "!~", "=~": "~*", } def _emitPatOp(self, node, field, sqlPars): pattern = self._makePattern(node, field, sqlPars) return "{} {} %({})s".format( field.name, self._patOps[node.operator], node.getSQLKey(field.name, pattern, sqlPars)) def _emitEnum(self, node, field, sqlPars): query = "{} IN ({})".format( field.name, ", ".join( "%({})s".format(node.insertChild(i, field, sqlPars)) for i in range(len(node.children)))) if node.operator=="!=,": query = "NOT ({})".format(query) return query _translatedOps = { "==": "=", } def _emitTranslatedOp(self, node, field, sqlPars): return "{} = %({})s".format(field.name, node.insertChild(0, field, sqlPars)) _nullOperators = {"*": ".*", "?": "."} _standardOperators = set(["<", ">", "<=", ">=", "!="]) _sqlEmitters = { "~": _emitPatOp, "=": _emitPatOp, "!~": _emitPatOp, "!": _emitPatOp, "=~": _emitPatOp, # this happens to work because of pattern escaping "=,": _emitEnum, "=|": _emitEnum, "!=,": _emitEnum, "==": _emitTranslatedOp, }
[docs] def getSQLFor(self, node, field, sqlPars): if node.operator=="[": return "[%s]"%node.children[0] if node.operator in self._nullOperators: return self._nullOperators[node.operator] else: return _Flattener.getSQLFor(self, node, field, sqlPars)
def _makeFactory(parser, flattener): def factory(field, val, sqlPars): try: tree = parser(val) return flattener.getSQLFor(tree, field, sqlPars) except ParseException: raise base.ui.logOldExc(utils.ValidationError( "Invalid input for type %s (see help for valid type literals)"% field.type, field.name)) return factory _dateFlattener = DateFlattener() sqlmunge.registerSQLFactory("vexpr-float", _makeFactory(parseNumericExpr, NumericFlattener())) sqlmunge.registerSQLFactory("vexpr-date", _makeFactory(parseDateExpr, _dateFlattener)) sqlmunge.registerSQLFactory("vexpr-mjd", _makeFactory(parseDateExprToMJD, _dateFlattener)) sqlmunge.registerSQLFactory("vexpr-string", _makeFactory(parseStringExpr, StringFlattener()))
[docs]class ToVexprConverter(typesystems.FromSQLConverter): typeSystem = "vizierexpr" simpleMap = { "smallint": "vexpr-float", "integer": "vexpr-float", "int": "vexpr-float", "bigint": "vexpr-float", "real": "vexpr-float", "float": "vexpr-float", "double precision": "vexpr-float", "double": "vexpr-float", "text": "vexpr-string", "unicode": "vexpr-string", "char": "vexpr-string", "date": "vexpr-date", "timestamp": "vexpr-date", "vexpr-date": "vexpr-date", "vexpr-float": "vexpr-float", "vexpr-string": "vexpr-string", }
[docs] def mapComplex(self, sqlType, length): if sqlType=="char": return "vexpr-string" if sqlType=="varchar": return "vexpr-string"
getVexprFor = ToVexprConverter().convert
[docs]def makeConeSearchFor(inputKey): """returns an //scs#makeSpointCD condDesc tailored for inputKey. """ from gavo.svcs import standardcores return base.parseFromString(standardcores.CondDesc, """ <FEED source="//scs#makeSpointCD" tablehead=%s matchColumn=%s/> """%( utils.escapeAttrVal(inputKey.tablehead), utils.escapeAttrVal(inputKey.name)))
[docs]def format_placeholder(min_val, max_val, formatter): """returns a placeholder string for a possibly open interval. ``*_val`` may be None, formatter has to accept a non-None value and return the proper representation for the placeholder string. If no placeholder can be generated, this function returns None. """ if min_val is None: if max_val is None: return None else: return "< %s"%formatter(max_val) else: if max_val is None: return "> %s"%formatter(min_val) else: return "%s .. %s"%( formatter(min_val), formatter(max_val))
[docs]def getPlaceholderFor(inputKey, values): """returns a placeholder (suggested input) for inputKey, where values is the original values element. This will currently be None unless we do a numeric input. """ if not values: return if inputKey.type=="vexpr-float": scaling = inputKey.scaling or 1 return format_placeholder(values.min_typed, values.max_typed, lambda val: "%s"%(val/scaling)) elif inputKey.type=="vexpr-mjd": # date with underlying MJD column return format_placeholder(values.min_typed, values.max_typed, lambda val: "%s"%utils.formatISODT(stc.mjdToDateTime(val))) elif inputKey.type=="vexpr-date": # date with underlying timestamp column return format_placeholder(values.min_typed, values.max_typed, lambda val: "%s"%utils.formatISODT(val))
# fall through to None if no placeholder can be make
[docs]def adaptInputKey(inputKey): """returns ik changed to generate SQL for Vizier-like expressions. This is used for buildFrom on CondDescs and renderers having parameterStyle form. """ res, oldValues = inputKey, inputKey.values # manually check for things that need to change the whole condDesc. if inputKey.type=='spoint': raise base.Replace(makeConeSearchFor(inputKey)) elif inputKey.xtype=="mjd": res = inputKey.change(type="vexpr-mjd", unit="", parent_=None) elif inputKey.isEnumerated(): res = inputKey else: try: changes = { "type": getVexprFor(inputKey.type), "values": None, "parent_": None} if inputKey.displayHint.get("displayUnit") and \ inputKey.displayHint["displayUnit"]!=inputKey.unit: changes["inputUnit"] = inputKey.displayHint["displayUnit"] res = inputKey.change(**changes) except base.ConversionError: # No vexpr type, leave things pass res.setProperty("placeholder", getPlaceholderFor(res, oldValues)) return res
if __name__=="__main__": # pragma: no cover import doctest doctest.testmod()