Package gavo :: Package utils :: Module stanxml
[frames] | no frames]

Source Code for Module gavo.utils.stanxml

  1  """ 
  2  A stan-like model for building namespaced XML trees. 
  3   
  4  The main reason for this module is that much of the VO's XML mess is based 
  5  on XML schema and thus has namespaced attributes.  This single design 
  6  decision ruins the entire XML design.  To retain some rests of 
  7  sanity, I treat the prefixes themselves as namespaces and maintain 
  8  a single central registry from prefixes to namespaces in this module. 
  9   
 10  Then, the elements only use these prefixes, and this module makes sure 
 11  that during serialization the instance document's root element contains 
 12  the namespace mapping (and the schema locations) required. 
 13  """ 
 14   
 15  #c Copyright 2008-2019, the GAVO project 
 16  #c 
 17  #c This program is free software, covered by the GNU GPL.  See the 
 18  #c COPYING file in the source distribution. 
 19   
 20   
 21  from cStringIO import StringIO 
 22   
 23  try: 
 24          from xml.etree import cElementTree as ElementTree 
 25  except ImportError: 
 26          from elementtree import ElementTree #noflake: conditional import 
 27   
 28  from gavo.utils import autonode 
 29  from gavo.utils import excs 
 30  from gavo.utils import misctricks 
 31  from gavo.utils import texttricks 
32 33 -class Error(Exception):
34 pass
35
36 37 -class ChildNotAllowed(Error):
38 pass
39 40 41 encoding = "utf-8" 42 XML_HEADER = '<?xml version="1.0" encoding="%s"?>'%encoding
43 44 45 -class _Autoconstructor(autonode.AutoNodeType):
46 """A metaclass used for Elements. 47 48 On the one hand, it does autonode's constructor magic with _a_<attrname> 49 attributes, on the other, it will instanciate itself when indexed 50 -- that we want for convenient stan-like notation. 51 """
52 - def __init__(cls, name, bases, dict):
53 autonode.AutoNodeType.__init__(cls, name, bases, dict) 54 if hasattr(cls, "_childSequence") and cls._childSequence is not None: 55 cls._allowedChildren = set(cls._childSequence) 56 else: 57 cls._childSequence = None
58
59 - def __getitem__(cls, items):
60 return cls()[items]
61
62 63 -class Stub(object):
64 """A sentinel class for embedding objects not yet existing into 65 stanxml trees. 66 67 These have a single opaque object and need to be dealt with by the 68 user. One example of how these can be used is the ColRefs in stc to 69 utype conversion. 70 71 Stubs are equal to each othter if their handles are identical. 72 """ 73 name_ = "stub" 74 text_ = None 75
76 - def __init__(self, dest):
77 self.dest = dest
78
79 - def __repr__(self):
80 return "%s(%s)"%(self.__class__.__name__, repr(self.dest))
81
82 - def __eq__(self, other):
83 return self.dest==getattr(other, "dest", Stub)
84
85 - def __ne__(self, other):
86 return not self==other
87
88 - def __hash__(self):
89 return hash(self.dest)
90
91 - def isEmpty(self):
92 return False
93
94 - def shouldBeSkipped(self):
95 return False
96
97 - def getChildDict(self):
98 return {}
99
100 - def iterAttNames(self):
101 if False: 102 yield
103
104 - def apply(self, func):
105 """does nothing. 106 107 Stubs don't have what Element.apply needs, so we don't even pretend. 108 """ 109 return
110
111 112 -class Element(object):
113 """An element for serialization into XML. 114 115 This is loosely modelled after nevow stan. 116 117 Don't add to the children attribute directly, use addChild or (more 118 usually) __getitem__. 119 120 Elements have attributes and children. The attributes are defined, 121 complete with defaults, in _a_<name> attributes as in AutoNodes. 122 Attributes are checked. 123 124 Children are not usually checked, but you can set a _childSequence 125 attribute containing a list of (unqualified) element names. These 126 children will be emitted in the sequence given. 127 128 When deriving from Elements, you may need attribute names that are not 129 python identifiers (e.g., with dashes in them). In that case, define 130 an attribute _name_a_<att> and point it to any string you want as the 131 attribute. 132 133 When serializing these, empty elements (i.e. those having an empty text and 134 having no non-empty children) are usually discarded. If you need such an 135 element (e.g., for attributes), set mayBeEmpty to True. 136 137 Since insane XSD mandates that local elements must not be qualified when 138 elementFormDefault is unqualified, you need to set _local=True on 139 such local elements to suppress the namespace prefix. Attribute names 140 are never qualified here. If you need qualified attributes, you'll 141 have to use attribute name translation. 142 143 The content of the DOM may be anything recognized by addChild. 144 In particular, you can give objects a serializeToXMLStan method returning 145 strings or an Element to make them good DOM citizens. 146 147 Elements cannot harbor mixed content (or rather, there is only 148 one piece of text). 149 """ 150 __metaclass__ = _Autoconstructor 151 152 name_ = None 153 _a_id = None 154 _prefix = "" 155 _additionalPrefixes = frozenset() 156 _mayBeEmpty = False 157 _local = False 158 _stringifyContent = False 159 160 # should probably do this in the elements needing it (quite a lot of them 161 # do, however...) 162 _name_a_xsi_type = "xsi:type" 163 164 # for type dispatching in addChild. 165 _generator_t = type((x for x in ())) 166 167 # see _setupNode below for __init__ 168
169 - def __getitem__(self, children):
170 self.addChild(children) 171 return self
172
173 - def __call__(self, **kw):
174 if not kw: 175 return self 176 177 # XXX TODO: namespaced attributes? 178 for k, v in kw.iteritems(): 179 # Only allow setting attributes already present 180 getattr(self, k) 181 setattr(self, k, v) 182 return self
183
184 - def __iter__(self):
185 raise NotImplementedError("Element instances are not iterable.")
186
187 - def __nonzero__(self):
188 return self.isEmpty()
189
190 - def _setupNodeNext(self, cls):
191 try: 192 pc = super(cls, self)._setupNode 193 except AttributeError: 194 pass 195 else: 196 pc()
197
198 - def _setupNode(self):
199 self._isEmptyCache = None 200 self._children = [] 201 self.text_ = "" 202 if self.name_ is None: 203 self.name_ = self.__class__.__name__.split(".")[-1] 204 self._setupNodeNext(Element)
205
206 - def _makeAttrDict(self):
207 res = {} 208 for name, attName in self.iterAttNames(): 209 if getattr(self, name, None) is not None: 210 res[attName] = unicode(getattr(self, name)) 211 return res
212
213 - def _iterChildrenInSequence(self):
214 cDict = self.getChildDict() 215 for cName in self._childSequence: 216 if cName in cDict: 217 for c in cDict[cName]: 218 yield c
219
220 - def bailIfBadChild(self, child):
221 if (self._childSequence is not None 222 and getattr(child, "name_", None) not in self._allowedChildren 223 and type(child) not in self._allowedChildren): 224 raise ChildNotAllowed("No %s children in %s"%( 225 getattr(child, "name_", "text"), self.name_))
226
227 - def deepcopy(self):
228 """returns a deep copy of self. 229 """ 230 copy = self.__class__(**self._makeAttrDict()) 231 for child in self.iterChildren(): 232 if isinstance(child, Element): 233 copy.addChild(child.deepcopy()) 234 else: 235 copy.addChild(child) 236 return copy
237
238 - def addChild(self, child):
239 """adds child to the list of children. 240 241 Child may be an Element, a string, or a list or tuple of Elements and 242 strings. Finally, child may be None, in which case nothing will be 243 added. 244 """ 245 self._isEmptyCache = None 246 if child is None: 247 pass 248 elif hasattr(child, "serializeToXMLStan"): 249 self.addChild(child.serializeToXMLStan()) 250 elif isinstance(child, basestring): 251 self.bailIfBadChild(child) 252 self.text_ = child 253 elif isinstance(child, (Element, Stub)): 254 self.bailIfBadChild(child) 255 self._children.append(child) 256 elif isinstance(child, (list, tuple, self._generator_t)): 257 for c in child: 258 self.addChild(c) 259 elif isinstance(child, _Autoconstructor): 260 self.addChild(child()) 261 elif self._stringifyContent: 262 self.addChild(unicode(child)) 263 else: 264 raise Error("%s element %s cannot be added to %s node"%( 265 type(child), repr(child), self.name_))
266
267 - def isEmpty(self):
268 """returns true if the current node has no non-empty children and no 269 non-whitespace text content. 270 """ 271 if self._isEmptyCache is None: 272 self._isEmptyCache = True 273 274 if self.text_.strip(): 275 self._isEmptyCache = False 276 if self._isEmptyCache: 277 for c in self._children: 278 if not c.shouldBeSkipped(): 279 self._isEmptyCache = False 280 break 281 282 return self._isEmptyCache
283
284 - def shouldBeSkipped(self):
285 """returns true if the current node should be part of an output. 286 287 That is true if it is either non-empty or _mayBeEmpty is true. 288 An empty element is one that has only empty children and no 289 non-whitespace text content. 290 """ 291 if self._mayBeEmpty: 292 return False 293 return self.isEmpty()
294
295 - def iterAttNames(self):
296 """iterates over the defined attribute names of this node. 297 298 Each element returned is a pair of the node attribute name and the 299 xml name (which may be translated via _a_name_<att> 300 """ 301 for name, default in self._nodeAttrs: 302 xmlName = getattr(self, "_name_a_"+name, name) 303 yield name, xmlName
304
305 - def addAttribute(self, attName, attValue):
306 """adds attName, attValue to this Element's attributes when instanciated. 307 308 You cannot add _a_<attname> attributes to instances. Thus, when 309 in a pinch, use this. 310 """ 311 attName = str(attName) 312 if not hasattr(self, attName): 313 self._nodeAttrs.append((attName, attValue)) 314 setattr(self, attName, attValue)
315
316 - def iterChildrenOfType(self, type):
317 """iterates over all children having type. 318 """ 319 for c in self._children: 320 if isinstance(c, type): 321 yield c
322
323 - def iterChildren(self):
324 return iter(self._children)
325
326 - def getChildDict(self):
327 cDict = {} 328 for c in self._children: 329 cDict.setdefault(c.name_, []).append(c) 330 return cDict
331
332 - def iterChildrenWithName(self, elName):
333 """iterates over children whose element name is elName. 334 335 This always does a linear search through the children and hence 336 may be slow. 337 """ 338 for c in self._children: 339 if c.name_==elName: 340 yield c
341
342 - def _getChildIter(self):
343 if self._childSequence is None: 344 return iter(self._children) 345 else: 346 return self._iterChildrenInSequence()
347
348 - def apply(self, func):
349 """calls func(node, text, attrs, childIter). 350 351 This is a building block for tree traversals; the expectation is that 352 func does something like func(node, text, attrDict, childSequence). 353 """ 354 try: 355 if self.shouldBeSkipped(): 356 return 357 attrs = self._makeAttrDict() 358 return func(self, self.text_, 359 attrs, self._getChildIter()) 360 except Error: 361 raise 362 except Exception: 363 misctricks.sendUIEvent("Info", 364 "Internal failure while building XML; context is" 365 " %s node with children %s"%( 366 self.name_, 367 texttricks.makeEllipsis(repr(self._children), 60))) 368 raise
369
370 - def asETree(self, prefixForEmpty=None):
371 """returns an ElementTree instance for the tree below this node. 372 373 Deprecated. Use Serializer rather than ElementTree. 374 """ 375 return DOMMorpher(prefixForEmpty, NSRegistry).getMorphed(self)
376
377 - def render(self, prefixForEmpty=None, includeSchemaLocation=True):
378 """returns this and its children as a string. 379 """ 380 f = StringIO() 381 write(self, f, prefixForEmpty=prefixForEmpty, xmlDecl=False, 382 includeSchemaLocation=includeSchemaLocation) 383 return f.getvalue()
384
385 386 -class NSRegistry(object):
387 """A container for a registry of namespace prefixes to namespaces. 388 389 This is used to have fixed namespace prefixes (IMHO the only way 390 to have namespaced attribute values and retain sanity). The 391 class is never instanciated. It is used through the module-level 392 method registerPrefix and by DOMMorpher. 393 """ 394 _registry = {} 395 _reverseRegistry = {} 396 _schemaLocations = {} 397 398 @classmethod
399 - def registerPrefix(cls, prefix, ns, schemaLocation):
400 if prefix in cls._registry: 401 if ns!=cls._registry[prefix]: 402 raise ValueError("Prefix %s is already allocated for namespace %s"% 403 (prefix, ns)) 404 cls._registry[prefix] = ns 405 cls._reverseRegistry[ns] = prefix 406 cls._schemaLocations[prefix] = schemaLocation
407 408 @classmethod
409 - def getPrefixForNS(cls, ns):
410 try: 411 return cls._reverseRegistry[ns] 412 except KeyError: 413 raise excs.NotFoundError(ns, "XML namespace", 414 "registry of XML namespaces.", hint="The registry is filled" 415 " by modules as they are imported -- maybe you need to import" 416 " the right module?")
417 418 @classmethod
419 - def getNSForPrefix(cls, prefix):
420 try: 421 return cls._registry[prefix] 422 except KeyError: 423 raise excs.NotFoundError(prefix, "XML namespace prefix", 424 "registry of prefixes.", hint="The registry is filled" 425 " by modules as they are imported -- maybe you need to import" 426 " the right module?")
427 428 @classmethod
429 - def _iterNSAttrs(cls, prefixes, prefixForEmpty, includeSchemaLocation):
430 """iterates over pairs of (attrName, attrVal) for declaring 431 prefixes. 432 """ 433 # null prefixes are ignored here; prefixForEmpty, if non-null, gives 434 # the prefix the namespace would normally be bound to. 435 prefixes.discard("") 436 437 schemaLocations = [] 438 for pref in sorted(prefixes): 439 yield "xmlns:%s"%pref, cls._registry[pref] 440 if includeSchemaLocation and cls._schemaLocations[pref]: 441 schemaLocations.append("%s %s"%( 442 cls._registry[pref], 443 cls._schemaLocations[pref])) 444 445 if prefixForEmpty: 446 yield "xmlns", cls._registry[prefixForEmpty] 447 448 if schemaLocations: 449 if not "xsi" in prefixes: 450 yield "xmlns:xsi", cls._registry["xsi"] 451 yield "xsi:schemaLocation", " ".join(schemaLocations)
452 453 @classmethod
454 - def addNamespaceDeclarationsETree(cls, root, prefixes, prefixForEmpty=None, 455 includeSchemaLocation=True):
456 """adds xmlns declarations for prefixes to the etree node root. 457 458 With stanxml and the global-prefix scheme, xmlns declarations 459 only come at the root element; thus, root should indeed be root 460 rather than some random element. 461 462 Deprecated, don't use ElementTree with stanxml any more. 463 """ 464 for attName, attVal in cls._iterNSAttrs(prefixes, prefixForEmpty, 465 includeSchemaLocation): 466 root.attrib[attName] = attVal
467 468 @classmethod
469 - def addNamespaceDeclarations(cls, root, prefixes, prefixForEmpty=None, 470 includeSchemaLocation=True):
471 """adds xmlns declarations for prefixes to the stanxml node root. 472 473 With stanxml and the global-prefix scheme, xmlns declarations 474 only come at the root element; thus, root should indeed be root 475 rather than some random element. 476 """ 477 for attName, attVal in cls._iterNSAttrs(prefixes, prefixForEmpty, 478 includeSchemaLocation): 479 root.addAttribute(attName, attVal)
480 481 @classmethod
482 - def getPrefixInfo(cls, prefix):
483 return (cls._registry[prefix], cls._schemaLocations[prefix])
484 485 @classmethod
486 - def getSchemaForNS(self, ns):
487 try: 488 return self._schemaLocations[self._reverseRegistry[ns]] 489 except KeyError: 490 raise excs.NotFoundError(ns, "XML namespace", 491 "registry of XML namespaces.", hint="The registry is filled" 492 " by modules as they are imported -- maybe you need to import" 493 " the right module?")
494 495 496 497 registerPrefix = NSRegistry.registerPrefix 498 getPrefixInfo = NSRegistry.getPrefixInfo
499 500 -def schemaURL(xsdName):
501 """returns the URL to the local mirror of the schema xsdName. 502 503 This is used by the various xmlstan clients to make schemaLocations. 504 """ 505 return "http://vo.ari.uni-heidelberg.de/docs/schemata/"+xsdName
506 507 508 registerPrefix("xsi","http://www.w3.org/2001/XMLSchema-instance", None) 509 # convenience for _additionalPrefixes of elements needing the xsi prefix 510 # (and no others) in their attributes. 511 xsiPrefix = frozenset(["xsi"])
512 513 514 -class DOMMorpher(object):
515 """An object encapsulating the process of turning a stanxml.Element 516 tree into an ElementTree. 517 518 Discard instances after single use. 519 520 Deprecated, since the whole ElementTree-based serialization is deprecated. 521 """
522 - def __init__(self, prefixForEmpty=None, nsRegistry=NSRegistry):
523 self.prefixForEmpty, self.nsRegistry = prefixForEmpty, nsRegistry 524 self.prefixesUsed = set()
525
526 - def _morphNode(self, stanEl, content, attrDict, childIter):
527 name = stanEl.name_ 528 if stanEl._prefix: 529 self.prefixesUsed.add(stanEl._prefix) 530 if not (stanEl._local or stanEl._prefix==self.prefixForEmpty): 531 name = "%s:%s"%(stanEl._prefix, stanEl.name_) 532 if stanEl._additionalPrefixes: 533 self.prefixesUsed.update(stanEl._additionalPrefixes) 534 535 node = ElementTree.Element(name, **attrDict) 536 if content: 537 node.text = content 538 for child in childIter: 539 childNode = child.apply(self._morphNode) 540 if childNode is not None: 541 node.append(childNode) 542 return node
543
544 - def getMorphed(self, stan):
545 root = stan.apply(self._morphNode) 546 self.nsRegistry.addNamespaceDeclarationsETree(root, self.prefixesUsed) 547 if self.prefixForEmpty: 548 root.attrib["xmlns"] = self.nsRegistry.getNSForPrefix( 549 self.prefixForEmpty) 550 return root
551
552 553 -class NillableMixin(object):
554 """An Element mixin making the element XSD nillable. 555 556 This element will automatically have an xsi:nil="true" attribute 557 on empty elements (rather than leave them out entirely). 558 559 This overrides apply, so the mixin must be before the base class in 560 the inheritance list. 561 """ 562 _mayBeEmpty = True 563
564 - def apply(self, func):
565 attrs = self._makeAttrDict() 566 if self.text_: 567 return Element.apply(self, func) 568 else: 569 attrs = self._makeAttrDict() 570 attrs["xsi:nil"] = "true" 571 self._additionalPrefixes = self._additionalPrefixes|set(["xsi"]) 572 return func(self, "", attrs, ())
573
574 - def isEmpty(self):
575 return False
576
577 578 -def escapePCDATA(val):
579 return (val 580 ).replace("&", "&amp;" 581 ).replace('<', '&lt;' 582 ).replace('>', '&gt;' 583 ).replace("\0", "&x00;")
584
585 586 -def escapeAttrVal(val):
587 return '"%s"'%(escapePCDATA(val).replace('"', '&quot;').encode("utf-8"))
588
589 590 -def _makeVisitor(outputFile, prefixForEmpty):
591 """returns a function writing nodes to outputFile. 592 """ 593 594 def visit(node, text, attrs, childIter): 595 attrRepr = " ".join(sorted("%s=%s"%(k, escapeAttrVal(attrs[k])) 596 for k in attrs)) 597 if attrRepr: 598 attrRepr = " "+attrRepr 599 600 if getattr(node, "_fixedTagMaterial", None): 601 attrRepr = attrRepr+" "+node._fixedTagMaterial 602 603 if not node._prefix or node._local or node._prefix==prefixForEmpty: 604 name = node.name_ 605 else: 606 name = "%s:%s"%(node._prefix, node.name_) 607 608 if node.isEmpty(): 609 if node._mayBeEmpty: 610 outputFile.write("<%s%s/>"%(name, attrRepr)) 611 else: 612 outputFile.write("<%s%s>"%(name, attrRepr)) 613 try: 614 try: 615 if text: 616 outputFile.write(escapePCDATA(text).encode("utf-8")) 617 618 for c in childIter: 619 if hasattr(c, "write"): 620 c.write(outputFile) 621 else: 622 c.apply(visit) 623 except Exception as ex: 624 if hasattr(node, "writeErrorElement"): 625 node.writeErrorElement(outputFile, ex) 626 raise 627 finally: 628 outputFile.write("</%s>"%name)
629 630 return visit 631
632 633 -def write(root, outputFile, prefixForEmpty=None, nsRegistry=NSRegistry, 634 xmlDecl=True, includeSchemaLocation=True):
635 """writes an xmlstan tree starting at root to destFile. 636 637 prefixForEmpty is a namespace URI that should have no prefix at all. 638 """ 639 # since namespaces only enter here through prefixes, I just need to 640 # figure out which ones are used. 641 prefixesUsed = set() 642 643 def collectPrefixes(node, text, attrs, childIter, 644 prefixesUsed=prefixesUsed): 645 prefixesUsed |= node._additionalPrefixes 646 prefixesUsed.add(node._prefix) 647 for child in childIter: 648 child.apply(collectPrefixes)
649 650 root.apply(collectPrefixes) 651 # An incredibly nasty hack for VOTable generation; we need a better 652 # way to handle with the 1.1/1.2 namespaces: Root may declare it 653 # handles all NS declarations itself. Die, die, die. 654 if getattr(root, "_fixedTagMaterial", None) is None: 655 nsRegistry.addNamespaceDeclarations(root, prefixesUsed, prefixForEmpty, 656 includeSchemaLocation) 657 658 if xmlDecl: 659 outputFile.write("<?xml version='1.0' encoding='utf-8'?>\n") 660 661 root.apply(_makeVisitor(outputFile, prefixForEmpty)) 662
663 664 -def xmlrender(tree, prolog=None, prefixForEmpty=None):
665 """returns a unicode object containing tree in serialized forms. 666 667 tree can be any object with a render method or some sort of string. 668 If it's a byte string, it must not contain any non-ASCII. 669 670 If prolog is given, it must be a string that will be prepended to the 671 serialization of tree. The way ElementTree currently is implemented, 672 you can use this for xml declarations or stylesheet processing 673 instructions. 674 """ 675 if hasattr(tree, "render"): 676 res = tree.render(prefixForEmpty=prefixForEmpty) 677 elif hasattr(tree, "getchildren"): # hopefully an xml.etree Element 678 res = ElementTree.tostring(tree) 679 elif isinstance(tree, str): 680 res = unicode(tree) 681 elif isinstance(tree, unicode): 682 res = tree 683 else: 684 raise ValueError("Cannot render %s"%repr(tree)) 685 if prolog: 686 res = prolog+res 687 return res
688