Source code for gavo.rscdef.column

"""
Description of columns (and I/O fields).
"""

#c Copyright 2008-2023, the GAVO project <gavo@ari.uni-heidelberg.de>
#c
#c This program is free software, covered by the GNU GPL.  See the
#c COPYING file in the source distribution.


from gavo import adql
from gavo import base
from gavo import dm
from gavo import utils
from gavo.base import typesystems
from gavo.utils import codetricks
from gavo.votable import paramval

__docformat__ = "restructuredtext en"


# A set of database type names that need explicit null values when
# they are serialized into VOTables.  We don't check array types
# here at all, since that's another can of worms entirely.
EXPLICIT_NULL_TYPES = set([
	"smallint", "integer", "bigint", "char", "boolean", "bytea"])


[docs]class TypeNameAttribute(base.AtomicAttribute): """An attribute with values constrained to types we understand. """ @property def typeDesc_(self): return ("a type name; the internal type system is similar to SQL's" " with some restrictions and extensions. The known atomic types" " include: %s"%(", ".join(typesystems.ToPythonConverter.simpleMap)))
[docs] def parse(self, value): try: typesystems.sqltypeToVOTable(value) except base.Error: raise base.ui.logOldExc(base.LiteralParseError(self.name_, value, hint="A supported SQL type was expected here. If in doubt," " check base/typeconversions.py, in particular ToPythonCodeConverter.")) return value
[docs] def unparse(self, value): return value
[docs]class ParamNameAttribute(base.UnicodeAttribute): """An attribute containing a param or column name. These, in DaCHS, have to match identifierPat (essentially, like python identifiers). For compatibility with columns, they do allow the quoted/ prefix, but the only place this is really supported is in query generation. """ @property def typeDesc_(self): return ("A name for a table or service parameter. These have to match" " ``%s``." )%utils.identifierPattern.pattern
[docs] def parse(self, value): if value.startswith("quoted/"): return utils.QuotedName(value[7:]) if not utils.identifierPattern.match(value): raise base.StructureError( f"'{value}' is not a valid column identifier") return value
[docs]class ColumnNameAttribute(ParamNameAttribute): """An attribute containing a name suitable for SQL table names. Column names are special in that you can prefix them with "quoted/" and then get a delimited identifier. This is something you probably shouldn't use. Using ADQL/SQL reserved words (without quoting) here yields a warning. """ @property def typeDesc_(self): return ("a column name within an SQL table. These have to match the" " SQL regular_identifier production." " In a desperate pinch, you can generate delimited identifiers" " (that can contain anything) by prefixing the name with 'quoted/'")
[docs] def parse(self, value): if value.startswith("quoted/"): return utils.QuotedName(value[7:]) value = ParamNameAttribute.parse(self, value) if value.upper() in adql.ALL_RESERVED_WORDS: base.ui.notifyWarning("Column name '%s' coincides with a" " ADQL/SQL reserved name. This may lead to serious trouble" " later. Please consider changing the name, or prepend quoted/" " to make it a delimited identifier."%value) return value
[docs] def unparse(self, value): if isinstance(value, utils.QuotedName): return "quoted/"+value.name else: return value
class _AttBox(object): """A helper for TableManagedAttribute. When a TableManagedAttribute ships off its value into an event it packs its value into an _AttBox. That way, the receiver can tell whether the value comes from another TableManagedAttribute (which is ok) or comes from an XML parser (which is forbidden). """ def __init__(self, payload): self.payload = payload
[docs]class TableManagedAttribute(base.AttributeDef): """An attribute not settable from XML for holding information managed by the parent table. That's stc and stcUtype here, currently. Do not use this in new code. This should to when the stc element can safely be replaced by gavo-dm-based stuff (here: DmRoles). """ typeDesc_ = "non-settable internally used value"
[docs] def feed(self, ctx, instance, value): if isinstance(value, _AttBox): # synthetic event during object copying, accept self.feedObject(instance, value.payload) else: # do not let people set that stuff directly raise base.StructureError("Cannot set %s attributes from XML"%self.name_)
[docs] def feedObject(self, instance, value): setattr(instance, self.name_, value)
[docs] def iterEvents(self, instance): val = getattr(instance, self.name_) if val!=self.default_: yield ("value", self.name_, _AttBox(val))
[docs] def getCopy(self, instance, newParent, ctx): # these never get copied; the values are potentially shared # between many objects, so the must not be changed anyway. return getattr(instance, self.name_)
[docs]class OldRoles(object): """A sentinel class for Table to signal non-adapted DM roles on a column or param. """ def __init__(self, oldRoles): if isinstance(oldRoles, OldRoles): self.oldRoles = oldRoles.oldRoles else: self.oldRoles = oldRoles def __bool__(self): return bool(self.oldRoles)
[docs]class DMRolesAttribute(base.AttributeDef): """An attribute managing DM roles. It is not set directly from XML but filled when a table parses DM annotation. When copying around columns between tables, this is used to build the new annotation; the value is an oldAnnotations instance rather than a list as usual until the new parent table has started constructing its own DM annotations. """ typeDesc_ = "read-only list of roles played by this column in DMs" def __init__(self, name, description="Undocumented"): base.AttributeDef.__init__(self, name, default=base.Computed, description=description, copyable=True) @property def default_(self): return []
[docs] def feedObject(self, instance, value): setattr(instance, self.name_, value)
[docs] def iterEvents(self, instance): # these are entirely externally managed if False: yield None
[docs] def getCopy(self, instance, newParent, ctx): # Wrap the previous contents into a container that will prevent # accidental changes and lets the new parent table figure out # that the roles haven't been updated val = getattr(instance, self.name_) if val: return OldRoles(val) else: return []
[docs]class RoEmptyDict(dict): """is a read-only standin for a dict. It's hashable, though, since it's always empty... This is used here for a default for displayHint. """ def __setitem__(self, what, where): raise TypeError("RoEmptyDicts are immutable")
_roEmptyDict = RoEmptyDict()
[docs]class DisplayHintAttribute(base.AtomicAttribute): """is a display hint. Display hint literals are comma-separated key=value sequences. Keys are up to the application and evaluated by htmltable, votable, etc. The parsed values are simply dictionaries mapping strings to strings, i.e., value validation cannot be performed here (yet -- do we want this? A central repository of display hints would be kinda useful...) """ typeDesc_ = "Display hint" def __init__(self, name, description, **kwargs): base.AtomicAttribute.__init__(self, name, default=_roEmptyDict, description=description, **kwargs)
[docs] def parse(self, value): if not value.strip(): return _roEmptyDict try: return dict([f.split("=") for f in value.split(",")]) except (ValueError, TypeError): raise base.ui.logOldExc(base.LiteralParseError(self.name_, value, hint="DisplayHints have a format like tag=value{,tag=value}"))
[docs] def unparse(self, value): return ",".join( ["%s=%s"%(k,v) for k,v in value.items()])
[docs]class Option(base.Structure): """A value for enumerated columns. For presentation purposes, an option can have a title, defaulting to the option's value. """ name_ = "option" _title = base.UnicodeAttribute("title", default=base.NotGiven, description="A Label for presentation purposes; defaults to val.", copyable=True) _val = base.DataContent(copyable=True, description="The value of" " the option; this is what is used in, e.g., queries and the like.") def __repr__(self): # may occur in user messages from formal, so we use title. return self.title def __str__(self): return repr(self)
[docs] def completeElement(self, ctx): if self.title is base.NotGiven: self.title = str(self.content_) super().completeElement(ctx)
[docs]def makeOptions(*args): """returns a list of Option instances with values given in args. """ return [base.makeStruct(Option, content_=arg) for arg in args]
[docs]class Values(base.Structure): """Information on a column's values, in particular its domain. This is quite like the values element in a VOTable. In particular, to accommodate VOTable usage, we require nullLiteral to be a valid literal for the parent's type. Note that DaCHS does not validate for constraints from values on table import. This is mainly because before ``dachs limits`` has run, values may not represent the new dataset in semiautomatic values. With HTTP parameters, values validation does take place (but again, that's mostly not too helpful because there are query languages sitting in between most of the time). Hence, the main utility of values is metadata declaration, both in the form renderer (where they become placeholders) and in datalink (where they are communicated as VOTable values). """ name_ = "values" _min = base.UnicodeAttribute("min", default=None, description="Minimum acceptable" " value as a datatype literal", copyable=True) _max = base.UnicodeAttribute("max", default=None, description="Maximum acceptable" " value as a datatype literal", copyable=True) _options = base.StructListAttribute("options", childFactory=Option, description="List of acceptable values (if set)", copyable=True) _default = base.UnicodeAttribute("default", default=None, description="A default" " value (currently only used for options).", copyable=True) _nullLiteral = base.UnicodeAttribute("nullLiteral", default=None, description= "An appropriate value representing a NULL for this column in VOTables" " and similar places. You usually should only set it for integer" " types and chars. Note that rowmakers make no use of this nullLiteral," " i.e., you can and should choose null values independently of" " your source. Again, for reals, floats and (mostly) text you probably" " do not want to do this.", copyable=True) _multiOk = base.BooleanAttribute("multiOk", False, "Deprecated, use" " multiplicity=multiple on input keys instead.", copyable=True) _fromDB = base.ActionAttribute("fromdb", "_evaluateFromDB", description= "A query fragment returning just one column to fill options from (will" " add to options if some are given). Do not write SELECT or anything," " just the column name and the where clause. Do not do this for" " large tables even if there are reasonably few values, because" " there is no good way to speed up this kind of query using indices.") _caseless = base.BooleanAttribute("caseless", description="When validating, ignore the case of string values." " For non-string types, behaviour is undefined (i.e., DaCHS is" " going to spit on you).", default=False, copyable=True) _percentile03 = base.UnicodeAttribute("percentile03", default=None, description="Value at the 3rd percentile of the distribution of this" " column.", copyable=True) _percentile97 = base.UnicodeAttribute("percentile97", default=None, description="Value at the 97rd percentile of the distribution of this" " column.", copyable=True) _median = base.UnicodeAttribute("median", default=None, description="Median of the distribution of this column.", copyable=True) _fillFactor = base.UnicodeAttribute("fillFactor", default=None, description="Ratio of non-NULL values to the number of rows in the" " embedding table.", copyable=True) _original = base.OriginalAttribute() validValues = None
[docs] @classmethod def fromOptions(cls, labels): """returns Values with the elements of labels as valid options. """ return base.makeStruct(cls, options=[base.makeStruct(Option, content_=l) for l in labels])
@property def min_typed(self): if not hasattr(self, "_min_type_cache"): self._min_type_cache = self.makePythonVal( self.min, typesystems.scalarify(self.parent.type)) return self._min_type_cache @property def max_typed(self): if not hasattr(self, "_max_type_cache"): self._max_type_cache = self.makePythonVal( self.max, typesystems.scalarify(self.parent.type)) return self._max_type_cache
[docs] def makePythonVal(self, literal, sqltype): if literal is None: return None return typesystems.sqltypeToPython(sqltype)(literal)
def _evaluateFromDB(self, ctx): if not getattr(ctx, "doQueries", True): return try: with base.getTableConn() as conn: for row in conn.query(self.parent.parent.expand( "SELECT DISTINCT %s"%(self.fromdb))): if row[0] is None: # a NULL here would be about the same as required=False; # dealing with it properly in constraints is hard. Let's # see how well we get away with ignoring it. continue self._options.feedObject(self, base.makeStruct(Option, content_=row[0])) except base.DBError: # Table probably doesn't exist yet, ignore. base.ui.notifyWarning("Values fromdb '%s' failed, ignoring"%self.fromdb)
[docs] def onParentComplete(self): """converts options, the nullLiteral, and the default from string literals to python values. """ dataField = self.parent if self.options: dbt = dataField.type for opt in self.options: opt.content_ = self.makePythonVal(opt.content_, dbt) self.validValues = set(o.content_ for o in self.options) if self.caseless: self.validValues = set(o and o.lower() for o in self.validValues) if self.nullLiteral: try: self.makePythonVal(self.nullLiteral, dataField.type) except ValueError: raise base.LiteralParseError("nullLiteral", self.nullLiteral, hint="If you want to *parse* whatever you gave into a NULL," " use the parseWithNull function in a rowmaker. The null" " literal gives what value will be used for null values" " when serializing to VOTables and the like.") if self.default and isinstance(self.default, str): type, arraysize, xtype = dataField._getVOTableType() self.default = paramval.getVOTParser(type, arraysize, xtype)( self.default)
[docs] def setFromStatRow(self, statRow, ctx): """fills statistics data from one of our statistics rows. This will not overwrite values already given to let people manually give statistics. """ for srcKey, destKey in [ ("min_value", "min"), ("max_value", "max"), ("percentile03", "percentile03"), ("percentile97", "percentile97"), ("median", "median"), ("fill_factor", "fillFactor"), ]: if srcKey in statRow: if (getattr(self, destKey, None) is None and statRow[srcKey] is not None): self.feed(destKey, str(statRow[srcKey]), ctx)
[docs] def setOptionsFromDict(self, dist, ctx): """sets Options from the keys of the dict dist. This is primarily for injection of values from the DB into the RD. It will hence not change anything if there are already options defined (which are presumably manually set). """ if self.options: return self.options = makeOptions(*dist.keys())
[docs] def validateOptions(self, value): """returns false if value isn't either in options or doesn't consist of items in options. Various null values always validate here; non-null checking is done by the column on its required attribute. """ if value=="None": assert False, "Literal 'None' passed as a value to validateOptions" if self.validValues is None: return True if self.caseless and value: value = value.lower() if isinstance(value, (list, tuple, set)): for val in value: if val and not val in self.validValues: return False else: return value in self.validValues or value is None return True
[docs]class ColumnBase(base.Structure, base.MetaMixin): """A base class for columns, parameters, output fields, etc. Actually, right now there's far too much cruft in here that should go into Column proper or still somewhere else. Hence: XXX TODO: Refactor. See also Column for a docstring that still applies to all we've in here. """ _name = ParamNameAttribute("name", default=base.Undefined, description="Name of the param", copyable=True, before="type") _type = TypeNameAttribute("type", default="real", description= "datatype for the column (SQL-like type system)", copyable=True, before="unit") _unit = base.UnicodeAttribute("unit", default="", description= "Unit of the values. Use VOUnits syntax and use single quotes when" " you use custom units (you should avoid that).", copyable=True, before="ucd", strip=True) _ucd = base.UnicodeAttribute("ucd", default="", description= "UCD of the column", copyable=True, before="description") _description = base.NWUnicodeAttribute("description", default="", copyable=True, description="A short (one-line) description of the values in this column.") _tablehead = base.UnicodeAttribute("tablehead", default=None, description="Terse phrase to put into table headers for this" " column", copyable=True) _utype = base.UnicodeAttribute("utype", default=None, description= "utype for this column", copyable=True) _required = base.BooleanAttribute("required", default=False, description="Record becomes invalid when this column is NULL", copyable=True) _displayHint = DisplayHintAttribute("displayHint", description="Suggested presentation; the format is " " <kw>=<value>{,<kw>=<value>}, where what is interpreted depends" " on the output format. See, e.g., documentation on HTML renderers" " and the formatter child of outputFields.", copyable=True) _verbLevel = base.IntAttribute("verbLevel", default=20, description="Minimal verbosity level at which to include this column", copyable=True) _values = base.StructAttribute("values", default=None, childFactory=Values, description="Specification of legal values", copyable=True) _fixup = base.UnicodeAttribute("fixup", description= "A python expression the value of which will replace this column's" " value on database reads. Write a ___ to access the original" ' value. You can use macros for the embedding table.' ' This is for, e.g., simple URL generation' ' (fixup="\'\\internallink{/this/svc}\'+___").' ' It will *only* kick in when tuples are deserialized from the' " database, i.e., *not* for values taken from tables in memory.", default=None, copyable=True) _note = base.UnicodeAttribute("note", description="Reference to a note meta" " on this table explaining more about this column", default=None, copyable=True) _xtype = base.UnicodeAttribute("xtype", description="VOTable xtype giving" " the serialization form; you usually do *not* want to set this," " as the xtypes actually used are computed from database type." " DaCHS xtypes are only used for a few unsavoury, hopefully temporary," " hacks", default=None, copyable=True) _stc = TableManagedAttribute("stc", description="Internally used" " STC information for this column (do not assign to unless instructed" " to do so)", default=None, copyable=True) _stcUtype = TableManagedAttribute("stcUtype", description="Internally used" " STC information for this column (do not assign to)", default=None, copyable=True) _dmRoles = DMRolesAttribute("dmRoles", description="Roles played by this column; cannot be assigned to.") _properties = base.PropertyAttribute(copyable=True) _original = base.OriginalAttribute() restrictedMode = False def __repr__(self): return "<Column %s>"%repr(self.name)
[docs] def setParent(self, parent): # overridden to turn off automatic meta parenting from MetaMixin. # This does not upcall on purpose. self.parent = parent
[docs] def setMetaParent(self, parent): # columns should *not* take part in meta inheritance. The reason is # that there are usually many columns to a table, and there's no # way I can see that any piece of metadata should be repeated in # all of them. On the other hand, for votlinks (to name an example), # meta inheritance would have disastrous consequences. # So, we bend the rules a bit. raise base.StructureError("Columns may not have meta parents.", hint="The rationale for this is explained in rscdef/column.py," " look for setMetaParent.")
[docs] def onParentComplete(self): # we need to resolve note on construction since columns are routinely # copied to other tables and meta info does not necessarily follow. if isinstance(self.note, str): try: self.note = self.parent.getNote(self.note) except base.NotFoundError: # non-existing notes silently ignored self.note = None
[docs] def completeElement(self, ctx): self.restrictedMode = getattr(ctx, "restricted", False) if isinstance(self.name, utils.QuotedName): self.key = self.name.name if ')' in self.key: # No '()' allowed in key for that breaks the %()s syntax (sigh!). # Work around with the following quick hack that would break # if people carefully chose proper names. Anyone using delim. # ids in SQL deserves a good spanking anyway. self.key = self.key.replace(')', "__").replace('(', "__") else: self.key = self.name super().completeElement(ctx)
[docs] def isEnumerated(self): return self.values and self.values.options
[docs] def validate(self): super().validate() if self.restrictedMode and self.fixup: raise base.RestrictedElement("fixup") if self.xtype=="adql:TIMESTAMP": base.ui.notifyWarning("At %s: Do not use adql:TIMESTAMP xtype any more." " Simply drop xtype for timestamp and date-typed columns."%( self.getSourcePosition()))
[docs] def validateValue(self, value): """raises a ValidationError if value does not match the constraints given here. """ if value is None: if self.required: raise base.ValidationError( "Field %s is empty but non-optional"%self.name, self.name) return # Only validate these if we're not a database column if not isinstance(self, Column): vals = self.values if vals: if vals.options: if value and not vals.validateOptions(value): raise base.ValidationError("Value %s not consistent with" " legal values %s"%(value, vals.options), self.name) else: if vals.min and value<vals.min_typed: raise base.ValidationError("%s too small (must be at least %s)"%( value, vals.min), self.name) if vals.max and value>vals.max_typed: raise base.ValidationError("%s too large (must be less than %s)"%( value, vals.max), self.name)
[docs] def isIndexed(self): """returns a set of index kinds if this column is indexed, False if it isn't or None if we don't know because we don't have a proper parent. In other words: it's a true value if the thing is indexed, a false one if it is. All further refinements are probably only there for more informed ADQL query morphing. """ if self.parent and hasattr(self.parent, "indexedColumns"): # parent is something like a TableDef return self.parent.indexedColumns.get(self.name, False)
[docs] def isPrimary(self): """returns a guess as to whether this column is a primary key of the embedding table. This may return True, False, or None (unknown). """ if self.parent and hasattr(self.parent, "primary"): # parent is something like a TableDef if self.name in self.parent.primary: return True else: return False
[docs] def isScalar(self): """returns true if we consider the array's values as scalar. A single string is scalar for us. """ type, arraysize, _ = self._getVOTableType() if type=="char" or type=="unicodeChar": return "x" not in arraysize else: return arraysize is None
_indexedCleartext = { True: "indexed", False: "notIndexed", None: "unknown", }
[docs] def asInfoDict(self): """returns a dictionary of certain, "user-interesting" properties of the data field, in a dict of strings. """ return { "name": str(self.name), "type": self.type, "description": self.description or "N/A", "tablehead": self.getLabel(), "unit": self.unit or "N/A", "ucd": self.ucd or "N/A", "verbLevel": self.verbLevel, "indexState": self._indexedCleartext[bool(self.isIndexed())], "note": self.note, }
[docs] def getDDL(self): """returns an SQL fragment describing this column ready for inclusion in a DDL statement. """ type = base.sqltypeToPG(self.type) # The "str" does magic for delimited identifiers, so it's important. items = [str(self.name), type] if self.required: items.append("NOT NULL") return " ".join(items)
[docs] def getDisplayHintAsString(self): return self._displayHint.unparse(self.displayHint)
[docs] def getLabel(self): """returns a short label for this column. The label is either the tablehead or, missing it, the capitalized column name. """ if self.tablehead is not None: return self.tablehead return str(self.key).capitalize()
def _getVOTableType(self): """returns the VOTable type, arraysize and xtype for this column-like thing. """ type, arraysize, xtype = base.sqltypeToVOTable(self.type) if self.type=="date": xtype = "dachs:DATE" return type, arraysize, xtype
[docs]class Column(ColumnBase): """A database column. Columns contain almost all metadata to describe a column in a database table or a VOTable (the exceptions are for column properties that may span several columns, most notably indices). Note that the type system adopted by the DC software is a subset of postgres' type system. Thus when defining types, you have to specify basically SQL types. Types for other type systems (like VOTable, XSD, or the software-internal representation in python values) are inferred from them. Columns can have delimited identifiers as names. Don't do this, it's no end of trouble. For this reason, however, you should not use name but rather key to programmatially obtain field's values from rows. Properties evaluated: - std -- set to 1 to tell the tap schema importer to have the column's std column in TAP_SCHEMA 1 (it's 0 otherwise). - statisticsTarget -- an integer to be set as this column's statistics-gathering target. Set this to something between 100 and 10000 on postgres if you have large tables and columns with strongly non-uniform distributions. Set to -1 to revert to the system default. gavo imp -m will apply changes here; you'll manually have to run ``analyze <tablename>`` after that. - statistics -- set this to "no" to keep DaCHS from using this column in dachs limits. Set this to "enumerate" to make DaCHS collect the discrete values allowed (currently only supported for strings). - targetType -- for a column containing a URL, the media type of the resource pointed at. This is for producing extra annotation for Aladin and friends as per http://mail.ivoa.net/pipermail/dal/2018-May/008017.html - targetTitle -- if you give targetType, use this to set the link title (defaults to "Link"). """ name_ = "column" _name = ColumnNameAttribute("name", default=base.Undefined, description="Name of the column", copyable=True, before="type") _hidden = base.BooleanAttribute("hidden", default=False, description="Hide the column from most of the user interface" " (specifically, you can't use it in TAP queries or results," " and it won't be in TAP_SCHEMA). You typically want this for" " internal, administrative columns.", copyable=True) _ignored = base.DataContent(description="Columns admit data" " content but ignore it. This is exclusively a convenience" " for building columns from params and should not be used for" " anything else.", copyable=False)
[docs] def validate(self): super().validate() # Now check if we can serialize the column safely in VOTables. # I only want to hear about this when the column may end up in # a VOTable; if self.type in EXPLICIT_NULL_TYPES: if not self.required and not ( self.values and self.values.nullLiteral): try: pos = codetricks.stealVar("context").pos base.ui.notifyWarning("Somewhere near %s: " " Column %s may be null but has no explicit" " null value."%(pos, self.name)) except (ValueError, AttributeError): # This is stealVar's ValueError, we have no context in stack (or # it's a context var not from our parsing code). # Seems we're not parsing from a file, so the user probably # can't help it anyway. Don't complain. pass
[docs] def getAnnotation(self, roleName, container, instance): if self.parent==container: return dm.ColumnAnnotation(roleName, self, instance) else: raise base.ReportableError("You cannot use columns from" " other tables in your DM annotations directly.", hint="If you really need something like this, you need to" " define a datatype corresponding to what's in the other table" " and reference a corresponding dm declaration.")
[docs] def updateFromContext(self, tableId, columnName, ctx): """pulls the injected column metadata from the context into our values. This is being called from table's completeElement -- only then is everything we depend on reasonably complete, and table knows better if we should even bother. """ statRow = ctx.getInjected(f"colstats:{tableId}:{columnName}", None) discDist = ctx.getInjected( f"discrete-strings:{tableId}:{columnName}", None) if not statRow and not discDist: return if not self.values: self.values = base.makeStruct(Values) if statRow: self.values.setFromStatRow(statRow, ctx) if discDist: self.values.setOptionsFromDict(discDist, ctx)
[docs] def getStatistics(self): """returns a dictionary with COLSTATS (or later VS.column) keys containing any statistic information in this column's values. Values are blindly stringified right now; we may have to switch to using VOTable tabledata serialisation as we allow column statistics on items that have non-trivial serialisations. """ res = {} if not self.values: return res for attName, colstatsName in [ ("min", "minValue"), ("max", "maxValue"), ("median", "median"), ("percentile03", "percentile03"), ("percentile97", "percentile97"), ("fillFactor", "fillFactor"),]: val = getattr(self.values, attName) if val is not None and val!="": res[colstatsName] = val return res
[docs]class ParamBase(ColumnBase): """A basic parameter. This is the base for both Param and InputKey. """ _value = base.DataContent(description="The value of parameter." " It is parsed according to the param's type using the default" " parser for the type VOTable tabledata.", default=base.NotGiven, copyable=True, expand=True) _valueCache = base.Undefined __contentStore = base.NotGiven nullLiteral = "" unprocessedTypes = set(["raw", "file"]) def __repr__(self): try: contentSer = repr(self.content_) except: contentSer = "<not serialisable>" return f"<{self.__class__.__name__}, {self.name}={contentSer}>" def __set_content(self, val): self.__contentStore = val def __get_content(self): if (self.__contentStore is base.NotGiven and self._valueCache is not base.Undefined): self.__contentStore = self._unparse(self._valueCache) return self.__contentStore content_ = property(__get_content, __set_content)
[docs] def expand(self, value): """hands up macro expansion requests to a parent, if there is one and it can handle expansions. """ if hasattr(self.parent, "expand"): return self.parent.expand(value) return value
[docs] def completeElement(self, ctx): if not self.values: self.values = base.makeStruct(Values, parent_=self) super().completeElement(ctx)
[docs] def onElementComplete(self): super().onElementComplete() if self.content_ is base.NotGiven: if self.values.default is not None: self.set(self.values.default) else: self.set(self.content_)
@property def value(self): """returns a typed value for the parameter. Unset items give None here. """ if self._valueCache is base.Undefined: if self.content_ is base.NotGiven: self._valueCache = None else: self._valueCache = self._parse(self.content_) return self._valueCache
[docs] def getStringValue(self): """returns a string serialisation of the value. This is what would reproduce the value if embedded in an XML serialisation of the param. """ if self.type in self.unprocessedTypes: return "(Unrepresentable %s)"%self.type return self.content_
[docs] def set(self, val): """sets this parameter's value. val can be a python value, or string literal. In the second case, this string literal will be preserved in string serializations of this param. If val is an invalid value for this item, a ValidationError is raised and the item's value will be Undefined. """ if isinstance(val, str): self.content_ = val else: self.content_ = base.NotGiven self._valueCache = self._parse(val)
def _parse(self, literal, atom=False): """parses literal using the default value parser for this param's type. If literal is not a string, it will be returned unchanged. The method also makes sure literal matches any constraints set by a values child and raises a ValidationError if not. """ if self.type in self.unprocessedTypes: return literal elif not isinstance(literal, str): return literal elif literal=="__NULL__" or literal=="": value = None elif ((self.type=="text" or self.type=="unicode") and literal=="__EMPTY__"): value = "" else: if literal==self.values.nullLiteral: value = None else: try: type, arraysize, xtype = self._getVOTableType() if atom: arraysize = None if type not in ["char", "unicodeChar"]: literal = literal.strip() if self.xtype=="interval": # prefer interval datatype over anything user-specified, # and do special magic on xtype collision if self.type=="timestamp": type = "timestamp-interval" else: xtype = xtype or self.xtype else: xtype = self.xtype or xtype value = paramval.getVOTParser(type, arraysize, xtype)(literal) # make NaNs NULL here for consistent VOTable practice if value!=value: value = None except ValueError: raise base.ValidationError("%s is not a valid literal for %s"%( repr(literal), self.name), self.name) if not self.values.validateOptions(value): # as long as we still have shitty "PQL" (old SIA, SSA and such): # we need to skip pql-float and pql-int, as these can have # sane Value ranges/enumerations but values like "4/5". if self.type not in ["pql-int", "pql-float"]: raise base.ValidationError("%s is not a valid value for %s"%( repr(literal), self.name), self.name) # unify NULL value representation to the empty string if value is None: self.content_ = "" return value def _unparse(self, value): """returns a string representation of value appropriate for this type. Actually, for certain types only handled internally (like file or raw), this is not a string representation at all but just the python stuff. Plus, right now, for sequences we're not doing anything. We probably should; but we'll need to be much more careful in ContextGramar then. """ if self.type in self.unprocessedTypes: return value if value is None: return "" else: type, arraysize, xtype = self._getVOTableType() # deal as gracefully as possible with xtype collisions, and # prefer manually set xtype if possible if xtype and not "interval" in xtype: xtype = self.xtype or xtype else: xtype = xtype or self.xtype val = paramval.getVOTSerializer(type, arraysize, xtype)(value) return val
[docs]class Param(ParamBase): """A table parameter. This is like a column, except that it conceptually applies to all rows in the table. In VOTables, params will be rendered as PARAMs. While we validate the values passed using the DaCHS default parsers, at least the VOTable params will be literal copies of the string passed in. You can obtain a parsed value from the value attribute. Null value handling is a bit tricky with params. An empty param (like ``<param name="x"/>)`` is always NULL (None in python). In order to allow setting NULL even where syntactially something has to stand, we also turn any __NULL__ to None. For floats, NaN will also yield NULLs. For integers, you can also use <param name="x" type="integer"><values nullLiteral="-1"/>-1</params> For arrays, floats, and strings, the interpretation of values is undefined. Following VOTable practice, we do not tell empty strings and NULLs apart; for internal usage, there is a little hack: __EMPTY__ as literal does set an empty string. This is to allow defaulting of empty strings -- in VOTables, these cannot be distinguished from "true" NULLs. """ name_ = "param" _hidden = base.BooleanAttribute("hidden", default=False, description="Ignored on params, just present for constructor compatibility" " with column")
[docs] def validate(self): super().validate() if self.content_ is base.NotGiven: self.set(None) if self.required and self.value is None: raise base.StructureError("Required value not given for param" " %s"%self.name) try: # the value property will bomb on malformed literals self.value except ValueError as msg: raise base.LiteralParseError(self.name, self.content_, hint="Param content must be parseable as in VOTable TABLEDATA." " The value you passed caused the error: %s"%msg)
[docs] def set(self, val): """sets the value of the parameter. Macros will be expanded if the parent object supports macro expansion. """ if (isinstance(val, str) and "\\" in val and hasattr(self.parent, "expand")): val = self.parent.expand(val) return ParamBase.set(self, val)
[docs] def getAnnotation(self, roleName, container, instance): """returns a dm annotation for this param (i.e., a paramRef). """ # TODO: Figure out how to make sure that this param actually # ends up in the target VOTable. Or do we want to enforce # params to be within the table? return dm.ParamAnnotation(roleName, self, instance)
[docs] def copy(self, parent, ctx=None): # overridden so we can set originalParent, which in turn we need # in DM serialisation to VOTables newInstance = super().copy(parent, ctx) newInstance.originalParam = self return newInstance