Source code for gavo.utils.serializers

A framework for pluggable serialisation of (python) values.

This module collects a set of basic (looking primarily towards
VOTables) serialiser factories.  These are just functions receiving
AnnotatedColumn objects and returning either None ("not responsible")
or a function taking a value and returning a string.  They may change
the AnnotatedColumn objects, for instance, when an MJD (float)
becomes a datetime.

These factories are registered in ValueMapperFactoryRegistry classes;
the one used for "normal" VOTables is the defaultMFRegistry.

Most factories are created here.  However, some depend on advance
functionality not available here; they will be registered on import of the
respective modules (for instance, stc).

In DaCHS, a second such factory registry is created in web.htmltable.

#c Copyright 2008-2023, the GAVO project <>
#c This program is free software, covered by the GNU GPL.  See the
#c COPYING file in the source distribution.

import datetime
import re

from gavo.utils import algotricks
from gavo.utils import typeconversions

__docformat__ = "restructuredtext en"

[docs]class ValueMapperFactoryRegistry(object): """An object clients can ask for functions fixing up values for encoding. A mapper factory is just a function that takes an AnnotatedColumn instance. It must return either None (for "I don't know how to make a function for this combination these column properties") or a callable that takes a value of the given type and returns a mapped value. To add a mapper, call registerFactory. To find a mapper for a set of column properties, call getMapper -- column properties should be an instance of AnnotatedColumn, but for now a dictionary with the right keys should mostly do. Mapper factories are tried in the reverse order of registration, and the first that returns non-None wins, i.e., you should register more general factories first. If no registered mapper declares itself responsible, getMapper returns an identity function. If you want to catch such a situation, you can use something like res = vmfr.getMapper(...); if res is utils.identity ... """ def __init__(self, factories=None): if factories is None: self.factories = [] else: self.factories = factories[:]
[docs] def clone(self): """returns a clone of the factory. This is a copy, i.e., factories added will not change the original. """ return self.__class__(self.factories)
[docs] def getFactories(self): """returns the list of factories. This is *not* a copy. It may be manipulated to remove or add factories. """ return self.factories
[docs] def registerFactory(self, factory): self.factories.insert(0, factory)
[docs] def appendFactory(self, factory): self.factories.append(factory)
[docs] def getMapper(self, colDesc): """returns a mapper for values with the python value instance, according to colDesc. This method may change colDesc. We do a linear search here, so you shouldn't call this function too frequently. """ for factory in self.factories: mapper = factory(colDesc) if mapper: colDesc["winningFactory"] = factory break else: mapper = algotricks.identity return mapper
defaultMFRegistry = ValueMapperFactoryRegistry() registerDefaultMF = defaultMFRegistry.registerFactory def _timeMapperFactory(annCol): # XXX TODO: Unify with analogous code in web.htmltable if (annCol["dbtype"]=="time" or annCol["displayHint"].get("type")=="humanTime"): sf = int(annCol["displayHint"].get("sf", 0)) fmtStr = "%%02d:%%02d:%%0%d.%df"%(sf+3, sf) def mapper(val): if val is None: return val elif isinstance(val, (datetime.time, datetime.datetime)): res = fmtStr%(val.hour, val.minute, val.second) elif isinstance(val, datetime.timedelta): hours = val.seconds//3600 minutes = (val.seconds-hours*3600)//60 seconds = (val.seconds-hours*3600-minutes*60)+val.microseconds/1e6 res = fmtStr%(hours, minutes, seconds) else: return val annCol["datatype"], annCol["arraysize"] = "char", "*" return res return mapper registerDefaultMF(_timeMapperFactory) def _byteaMapperFactory(colDesc): if colDesc["dbtype"]=="bytea": # psycopg2 here returns buffers which are painful in some situations. def _(val): if isinstance(val, str): # that's legacy at best, but I don't want to break it just yet return val.encode("iso-8859-1") return bytes(val) return _ registerDefaultMF(_byteaMapperFactory) GEOMETRY_TYPES = set(["spoint", "spoly", "scircle", "sbox", "smoc"]) def _pgSphereMapperFactory(colDesc): """A factory for functions turning pgsphere types to DALI arrays. """ # even though the VOTable library can and will do this natively, # we still map this manually right now in order to help non-VOTable # output formats; this will not work (properly) when the output # format doesn't have array support. For these, just create # a new mapper factory for GEOMETRY_TYPES. if not colDesc["dbtype"] in GEOMETRY_TYPES: return def mapper(val): if val is None: return None return val.asDALI() colDesc["datatype"], colDesc["arraysize"], colDesc["xtype" ] = typeconversions.sqltypeToVOTable(colDesc["dbtype"]) return mapper registerDefaultMF(_pgSphereMapperFactory) def _legacyGeometryMapperFactory(colDesc): """A factory to support TAP 1.0-style (STC-S) geometry maps. These are requested through the semi-custom, legacy adql:REGION xtype. """ if not colDesc["xtype"] in ["adql:REGION", "adql:POINT"]: return systemString = None if systemString = if systemString is None: systemString = "UNKNOWNFrame" def mapper(val): if val is None: return None return val.asSTCS(systemString) colDesc["datatype"], colDesc["arraysize"] = "char", "*" return mapper registerDefaultMF(_legacyGeometryMapperFactory) def _castMapperFactory(colDesc): """is a factory that picks up castFunctions set up by user casts. """ if "castFunction" in colDesc: return colDesc["castFunction"] registerDefaultMF(_castMapperFactory) def _htmlScrubMapperFactory(colDesc): if colDesc["displayHint"].get("type")!="keephtml": return tagPat = re.compile("<[^>]*>") def coder(data): if data: return tagPat.sub("", data) return "" return coder registerDefaultMF(_htmlScrubMapperFactory)
[docs]def getMapperRegistry(): """returns a copy of the default value mapper registry. """ return ValueMapperFactoryRegistry( defaultMFRegistry.getFactories())
[docs]class AnnotatedColumn(object): """A collection of annotations for a column. ColumnAnntotations are constructed with columns and retain a reference to them ("original"). In addition, they provide a getitem/setitem interface to a dictionary that contains "digested" information on the column. This dictionary serves as an accumulator for information useful during the serialization process. The main reason for this class is that Columns are supposed to be immutable; thus, any ephemeral information needs to be kept in a different place. In particular, the mapper factories receive such annotations. As a special service to coerce internal tables to external standards, you can pass a votCast dictionary to AnnotatedColumn. Give any key/value pairs in there to override what AnnotatedColumn guesses or infers. This is used to force the sometimes a bit funky SCS/SIAP types to standard values. The castMapperFactory enabled by default checks for the presence of a castFunction in an AnnotatedColumn. If it is there, it will be used for mapping the values, so this is another thing you can have in votCast. The SerManager tries to obtain votCasts from a such-named attribute on the table passed in. Though of course clients can access original, the mapping facets should only be accessed through getitem/setitem since they may be updated wrt what is in original. Attributes available via the setitem/getitem interface include: - nullvalue -- a suitable nullvalue for this column, if provided by the column's values or otherwise obtained - name -- a name for the column - dbtype -- the column's database type - xtype -- the column's xtype (e.g., "timestamp") - datatype, arraysize -- a VOTable type for the column - displayHint -- a parsed display hint - note -- a reference to a table not (these get entered by SerManager) - ucd, utype, unit, description -- as for column - id -- a string suitable as XML id (externally managed) - votablewrite would evaluate min and max (but right now nothing adds this) """ def __init__(self, column, votCast=None): self.original = column self._initAnnotation() if votCast is not None: self.annotations.update(votCast) def _initAnnotation(self): type, size, xtype = typeconversions.sqltypeToVOTable(self.original.type) # the interval xtype clashes with other xtypes (right now, in particular # timestamp; the others aren't orderable). Hence, we need to # be clever with the computation of the new xtype (analogous # code in rscdef.column; we should fix that) if self.original.xtype=="interval": xtype = xtype or self.original.xtype else: xtype = self.original.xtype or xtype self.annotations = { "nullvalue": self.original.values and self.original.values.nullLiteral, "name": self.original.key, "dbtype": self.original.type, "xtype": xtype, "datatype": type, "arraysize": size, "displayHint": self.original.displayHint, "note": None, "ucd": self.original.ucd, "utype": self.original.utype, "unit": self.original.unit, "description": self.original.description, # id is managed by SerManager "id": None, "ref": None, # used for legacy COOSYS only } def __getitem__(self, key): return self.annotations[key] def __setitem__(self, key, value): self.annotations[key] = value def __contains__(self, key): return key in self.annotations
[docs] def get(self, key, default=None): return self.annotations.get(key, default)