Source code for gavo.base.metavalidation

"""
Meta information validation.

The idea is that you define certain assertions about the meta information
of a given object type.  Defined assertions are

	- MetaExists -- a key is present
	- MetaIsAtomic -- a key is present and a "leaf", i.e., has a single value
	- MetaAtomicExistsOnSelf -- a key is present even without meta inheritance,
		and has a single value
	- MetaValueInVocabulary -- the value of a meta item is in an IVOA
	  vocabulary.

Validators are usually built using model descriptions.  These are enumerations
of meta keys, separated by commas, with an optional code in parenteses.
Whitespace is ignored.  Codes allowed in parens are:

	- empty (default): plain existence
	- !: atomic existence on self
	- 1: atomic existence
	- ?: optional (this only makes sense with other constraints, as
	  any meta value is allowed on anything
	- voc:vocname: all values must come from the IVOA vocabulary
	  vocname.

Multiple assertions are separated by whitespace.

An example for a valid model description:
"publisher.name,creator.email(), identifier (!), dateUpdated(1)"

These model descriptions can come in metaModel attributes of structures.
If they are, you can use the validateStructure function below to validate
an entire structure tree.
"""

#c Copyright 2008-2025, the GAVO project <gavo@ari.uni-heidelberg.de>
#c
#c This program is free software, covered by the GNU GPL.  See the
#c COPYING file in the source distribution.


import functools

from gavo import utils
from gavo.base import meta

from gavo.utils.dachstypes import (cast,
	Optional, Sequence, Structure, Vocabulary)


[docs]class MetaValidationError(meta.MetaError): def __init__(self, carrier: meta.MetaMixin, failures: Sequence[str]): self.pos = getattr(carrier, "getSourcePosition", lambda: None)() self.failures = failures if hasattr(carrier, "id") and carrier.id: self.carrierRepr = carrier.id else: self.carrierRepr = "%s item"%carrier.__class__.__name__ meta.MetaError.__init__(self, "Meta structure on %s did not validate"% self.carrierRepr, carrier) def __str__(self) -> str: return "Meta structure on %s (within %s) did not validate: %s"%( self.carrierRepr, self.pos, ", ".join(self.failures))
[docs]class MetaAssertion: """An assertion about the meta content of an object. You must override the C{check} method. """ def __init__(self, key: str): self.key = key
[docs] def check(self, metaCarrier: meta.MetaMixin) -> Optional[str]: """returns None if the assertion is true, a user-displayable string of what failed otherwise. This must be overridden in derived classes. @param metaCarrier: an object mixing in L{MetaMixin}. """ return f"Null assertion on {self.key} always fails"
[docs]class MetaExists(MetaAssertion): """An assertion that a meta item is present for key in whatever form. """
[docs] def check(self, metaCarrier): if metaCarrier.getMeta(self.key) is None: return "Meta key %s missing"%self.key
[docs]class MetaIsAtomic(MetaAssertion): """An assertion that a meta item is present and contains a single value only. """ propagate = True
[docs] def check(self, metaCarrier): val = metaCarrier.getMeta(self.key, propagate=self.propagate) if val is None: return "Meta key %s missing"%self.key if len(val.children)!=1: return "Meta key %s is not atomic"%self.key
[docs]class MetaAtomicExistsOnSelf(MetaIsAtomic): """An assertion that a meta item is present and unique for key on metaCarrier itself. """ propagate = False
[docs]class NullAssertion(MetaAssertion): """An assertion that always succeeds. This is for implementation convenience. """
[docs] def check(self, metaCarrier): return None
[docs]class MetaValueInVocabulary(MetaAssertion): def __init__(self, key: str, vocName: str): self._vocName = vocName MetaAssertion.__init__(self, key)
[docs] @functools.cache def getVocabulary(self) -> Vocabulary: from gavo.protocols import vocabularies return vocabularies.get_vocabulary(self._vocName)
[docs] def check(self, metaCarrier: meta.MetaMixin) -> Optional[str]: voc = self.getVocabulary() for val in metaCarrier.iterMeta(self.key): if str(val) not in voc["terms"]: return (f"{self.key} meta only admits values from {self._vocName}" f", but '{val}' is not in there.") return None
[docs]class MetaValidator: """A metadata model that can verify objects of compliance. The model is quite simple: it's a sequence of MetaAssertions. The validate(metaCarrier) -> None method raises a MetaNotValid exception with all failed assertions in its failedAssertions attribute. """ def __init__(self, model: Sequence[MetaAssertion]): self.model = model
[docs] def validate(self, metaCarrier: meta.MetaMixin) -> None: failures = [msg for msg in ( ass.check(metaCarrier) for ass in self.model) if msg] if failures: raise MetaValidationError(metaCarrier, failures)
_assertionCodes = { "": MetaExists, '!': MetaAtomicExistsOnSelf, '1': MetaIsAtomic, '?': NullAssertion, } @functools.lru_cache(1) def _getModelGrammar() -> utils.ParserElement: from gavo.utils.parsetricks import (Literal, Optional, StringEnd, Suppress, Word, ZeroOrMore, alphas, pyparsingWhitechars, Regex) with pyparsingWhitechars(" \t"): metaKey = Word(alphas+"._") vocSpec = Regex("voc:[a-z/_-]+") modChar = Literal('!') | '1' | '?' modifier = (Suppress('(') + Optional(modChar)("mod") + Optional(vocSpec)("voc") + Suppress(')')) assertion = metaKey("key")+Optional(modifier) model = assertion + ZeroOrMore( Suppress(',') + assertion ) + StringEnd() def _buildAssertions(s, p, toks): key = str(toks["key"]) mod = str(toks.get("mod", "")) assertions = [_assertionCodes[mod](key)] voc = str(toks.get("voc", "")) if voc: assertions.append(MetaValueInVocabulary(key, voc[4:])) return assertions assertion.addParseAction(_buildAssertions) model.addParseAction(lambda s,p,toks: MetaValidator(toks)) return model
[docs]def parseModel(modelDescr: str) -> utils.ParserElement: """returns a MetaValidator for a model description. model descriptions are covered in the module docstring. """ return utils.pyparseString(_getModelGrammar(), modelDescr)[0]
def _validateMetaCarrier(metaCarrier: meta.MetaMixin) -> None: """helps _validateStructNode. """ if hasattr(metaCarrier.__class__, "metaModel"): metaModel = metaCarrier.__class__.metaModel if metaModel is None: return if isinstance(metaModel, str): try: metaCarrier.__class__.metaModel = parseModel(metaModel) except utils.ParseException as msg: raise utils.StructureError( f"Invalid meta model on {metaCarrier.__class__.__name__}:" f" {msg}") metaModel = metaCarrier.__class__.metaModel metaModel.validate(metaCarrier) def _validateStructNode(aStruct: Structure) -> None: """helps _validateStructure. """ try: _validateMetaCarrier(cast(meta.MetaMixin, aStruct)) if hasattr(aStruct, "getAllMetaPairs"): for key, value in aStruct.getAllMetaPairs(): _validateMetaCarrier(value) for s in aStruct.iterChildren(): _validateStructNode(s) except MetaValidationError as exc: if getattr(exc, "pos", None) is None: exc.pos = aStruct.getSourcePosition() raise
[docs]def validateStructure(aStruct: Structure) -> None: """does a meta validation for a base.Structure. This works by traversing the children of the structure, looking for nodes with a metaModel attribute. For all these, a validation is carried out. The first node failing the validation determines the return value. The function raises a MetaValidationError if aStruct is invalid. """ _validateStructNode(aStruct)