Source code for

Parsing VO-DML files and (perhaps one day) validating against the
rules obtained in this way.

Validation is something we expect to do only fairly rarely, so none of
this code is expected to be efficient.

#c Copyright 2008-2023, the GAVO project <>
#c This program is free software, covered by the GNU GPL.  See the
#c COPYING file in the source distribution.

import functools

from lxml import etree

from gavo import base
from gavo.votable import V

# maps the canonical prefix to the file name within resources/dm
	"ivoa": "IVOA.vo-dml.xml",
	"meas": "meas.vo-dml.xml",
	"coords": "coords.vo-dml.xml",
	"dachstoy": "dachstoy.vo-dml.xml",
	"geojson": "geojson.vo-dml.xml",
	"phot": "phot.vo-dml.xml",

[docs]def openModelFile(prefix): """returns an open file for the VO-DML file corresponding to prefix. This will raise a NotFoundError for an unknown prefix. """ try: fName = KNOWN_MODELS[prefix] except KeyError: raise base.NotFoundError(prefix, "VO-DML file for prefix", "data models known to DaCHS", hint="This can happen if there" " are new data models around or if data providers have defined" " custom data models. If this error was fatal during VOTable" " processing, please report it as an error; bad data model" " annotation should not be fatal in DaCHS.") return base.openDistFile("dm/"+fName, "rb")
[docs]class Model(object): """a vo-dml model. These are usually constructed using the fromPrefix constructor, which uses a built-in mapping from well-known prefix to VO-DML file to populate the model. """ # non-well-known models can be fed in through fromFile; they will # be entered here and can then be obtained through fromPrefix # as long as the don't clash with KNOWN_MODELS. _modelsReadFromFile = {} def __init__(self, prefix, dmlTree): self.prefix = prefix self.title = self.version = None self.version = self.uri = None self.description = None self.dmlTree = dmlTree self.__idIndex = None self._getModelMeta()
[docs] @classmethod def fromPrefix(cls, prefix): """returns a VO-DML model for a well-known prefix. User code should typically use the getModelFromPrefix function. """ if prefix in cls._modelsReadFromFile: return cls._modelsReadFromFile[prefix] inF = openModelFile(prefix) try: try: return cls(prefix, etree.parse(inF)) except Exception as ex: raise base.ui.logOldExc( base.StructureError("Failure to parse VO-DML for prefix %s: %s"%( prefix, repr(ex)))) finally: inF.close()
[docs] @classmethod def fromFile(cls, src, srcURL="http //not.given/invalid"): """returns a VO-DML model from src. src can either be a file name (interpreted relative to the root of DaCHS' VO-DML repository) or an open file (which will be closed as a side effect of this function). This is intended for documents using non-standard models with custom prefixes (i.e., not known to DaCHS). """ if hasattr(src, "read"): inF = src else: inF = openModelFile(src) try: tree = etree.parse(inF) prefix = tree.find("name").text res = cls(prefix, tree) res.uri = srcURL if prefix not in KNOWN_MODELS: cls._modelsReadFromFile[prefix] = res return res finally: inF.close()
def _getModelMeta(self): """sets some metadata on the model from the parsed VO-DML. This will fail silently (i.e., the metadata will remain on its default). Metadata obtained so far includes: title, version, description, """ try: self.title = self.dmlTree.find("title").text self.version = self.dmlTree.find("version").text self.description = self.dmlTree.find("description").text self.uri = self.dmlTree.find("uri").text except AttributeError: # probably the VO-DML file is bad; just fall through to # non-validatable model. pass
[docs] @functools.lru_cache(200) def getByVODMLId(self, vodmlId): """returns the element with vodmlId. This raises a NotFoundError for elements that are not present. This can be used with or without a prefix. The prefix is just discarded, though. Do not pass in unparsed ids; they are used in xpaths. """ vodmlId = vodmlId.split(":")[-1] res = self.dmlTree.xpath(f"//*[vodml-id='{vodmlId}']") if res: return res[0] else: raise base.NotFoundError(vodmlId, "data model element", self.prefix+" data model")
def _resolveVODMLId(self, vodmlId): """returns an etree element for vodmlId, which may include a prefix. (in which case we'd probably be looking in a different DM). This will raise a NotFoundError when the vodmlId points nowhere. """ if ":" in vodmlId: return resolveVODMLId(vodmlId) else: return self.getByVODMLElement(vodmlId) def _makeAttrDict(self, attrNode): """returns a dictionary of attribute metadata for an etree attrNode. """ res = {} for child in attrNode: if child.tag=="vodml-id": res["vodml-id"] = child.text.strip() elif child.tag=="description": res["description"] = child.text.strip() elif child.tag=="datatype": res["datatype"] = child[0].text return res
[docs] @functools.lru_cache(100) def getType(self, name): """returns an etree for a data or object type with the *name* name within this DM. Any prefix on name will be discarded without further ado. Don't pass in anythin unparsed here; we are using xpaths. """ name = name.split(":")[-1] type = self.dmlTree.xpath(f"dataType[name='{name}']") if type: return type[0] type = self.dmlTree.xpath(f"objectType[name='{name}']") if type: return type[0] raise base.NotFoundError( name, "VO-DML type", self.prefix+" data model")
[docs] @functools.lru_cache(1000) def getAttributeMeta(self, typeName, attrName): """returns a metadata dictionary for a the attribute attrName of the type referenced by type name. attrName is the unqualified attribute name. typeName may be qualified, but prefixes are ignored. Do not pass in unparsed strings to typeName and attrName; they are blindly used in xpath expressions. The metadata returned includes datatype, description, and vodmlId. If either the data model, or type, or the attribute cannot be found a NotFoundError is raised. """ type = self.getType(typeName) while True: attr = type.xpath(f"(attribute|composition)[name='{attrName}']") if attr: return self._makeAttrDict(attr[0]) extends = type.xpath("extends/vodml-ref") if extends: type = self._resolveVODMLId(extends[0].text) else: raise base.NotFoundError( attrName, "attribute", f"VO-DML type {typeName}")
[docs] def getVOT(self, ctx, instance): """returns xmlstan for a VOTable declaration of this DM. """ return V.MODEL(name=self.prefix, url=self.uri)
[docs]@functools.lru_cache(30) def getModelForPrefix(prefix): """returns a vodml.Model instance for as well-known VODML prefix. This caches models for prefixes and thus should usually be used from user code. Note that this currently will currently return some stand-in shim for unknown prefixes. That behaviour will change to become a NotFoundError exception when there's actually useful data models. """ try: return Model.fromPrefix(prefix) except base.NotFoundError: res = Model(prefix, etree.fromstring( """<junk><title>DaCHS standin model</title> <description>This is used by DaCHS during the old west days of VO DM development. Any annotation using this will not be interoperable.</description> <version>invalid</version></junk>""")) res.uri = "urn:dachsjunk:not-model:"+prefix return res
[docs]def getAttributeDefinition(qualifiedType, attrName): """returns attribute metadata for a type. qualifiedType is a type name with a prefix, attrName is the attribute's name (not vodml-id). """ prefix, typeName = qualifiedType.split(":") return getModelForPrefix(prefix).getAttributeMeta(typeName, attrName)
[docs]def resolveVODMLId(vodmlId): """returns an etree element corresponding to the prefixed vodmlId. Of course, this only works if vodmlId has a well-known prefix. """ prefix, id = vodmlId.split(":", 1) return getModelForPrefix(prefix).getByVODMLId(id)