1 """
2 A stan-like model for building namespaced XML trees.
3
4 The main reason for this module is that much of the VO's XML mess is based
5 on XML schema and thus has namespaced attributes. This single design
6 decision ruins the entire XML design. To retain some rests of
7 sanity, I treat the prefixes themselves as namespaces and maintain
8 a single central registry from prefixes to namespaces in this module.
9
10 Then, the elements only use these prefixes, and this module makes sure
11 that during serialization the instance document's root element contains
12 the namespace mapping (and the schema locations) required.
13 """
14
15
16
17
18
19
20
21 from cStringIO import StringIO
22
23 try:
24 from xml.etree import cElementTree as ElementTree
25 except ImportError:
26 from elementtree import ElementTree
27
28 from gavo.utils import autonode
29 from gavo.utils import excs
30 from gavo.utils import misctricks
31 from gavo.utils import texttricks
32
33 -class Error(Exception):
35
39
40
41 encoding = "utf-8"
42 XML_HEADER = '<?xml version="1.0" encoding="%s"?>'%encoding
46 """A metaclass used for Elements.
47
48 On the one hand, it does autonode's constructor magic with _a_<attrname>
49 attributes, on the other, it will instanciate itself when indexed
50 -- that we want for convenient stan-like notation.
51 """
58
61
62
63 -class Stub(object):
64 """A sentinel class for embedding objects not yet existing into
65 stanxml trees.
66
67 These have a single opaque object and need to be dealt with by the
68 user. One example of how these can be used is the ColRefs in stc to
69 utype conversion.
70
71 Stubs are equal to each othter if their handles are identical.
72 """
73 name_ = "stub"
74 text_ = None
75
78
80 return "%s(%s)"%(self.__class__.__name__, repr(self.dest))
81
83 return self.dest==getattr(other, "dest", Stub)
84
86 return not self==other
87
89 return hash(self.dest)
90
93
96
99
103
105 """does nothing.
106
107 Stubs don't have what Element.apply needs, so we don't even pretend.
108 """
109 return
110
113 """An element for serialization into XML.
114
115 This is loosely modelled after nevow stan.
116
117 Don't add to the children attribute directly, use addChild or (more
118 usually) __getitem__.
119
120 Elements have attributes and children. The attributes are defined,
121 complete with defaults, in _a_<name> attributes as in AutoNodes.
122 Attributes are checked.
123
124 Children are not usually checked, but you can set a _childSequence
125 attribute containing a list of (unqualified) element names. These
126 children will be emitted in the sequence given.
127
128 When deriving from Elements, you may need attribute names that are not
129 python identifiers (e.g., with dashes in them). In that case, define
130 an attribute _name_a_<att> and point it to any string you want as the
131 attribute.
132
133 When serializing these, empty elements (i.e. those having an empty text and
134 having no non-empty children) are usually discarded. If you need such an
135 element (e.g., for attributes), set mayBeEmpty to True.
136
137 Since insane XSD mandates that local elements must not be qualified when
138 elementFormDefault is unqualified, you need to set _local=True on
139 such local elements to suppress the namespace prefix. Attribute names
140 are never qualified here. If you need qualified attributes, you'll
141 have to use attribute name translation.
142
143 The content of the DOM may be anything recognized by addChild.
144 In particular, you can give objects a serializeToXMLStan method returning
145 strings or an Element to make them good DOM citizens.
146
147 Elements cannot harbor mixed content (or rather, there is only
148 one piece of text).
149 """
150 __metaclass__ = _Autoconstructor
151
152 name_ = None
153 _a_id = None
154 _prefix = ""
155 _additionalPrefixes = frozenset()
156 _mayBeEmpty = False
157 _local = False
158 _stringifyContent = False
159
160
161
162 _name_a_xsi_type = "xsi:type"
163
164
165 _generator_t = type((x for x in ()))
166
167
168
170 self.addChild(children)
171 return self
172
174 if not kw:
175 return self
176
177
178 for k, v in kw.iteritems():
179
180 getattr(self, k)
181 setattr(self, k, v)
182 return self
183
186
189
191 try:
192 pc = super(cls, self)._setupNode
193 except AttributeError:
194 pass
195 else:
196 pc()
197
199 self._isEmptyCache = None
200 self._children = []
201 self.text_ = ""
202 if self.name_ is None:
203 self.name_ = self.__class__.__name__.split(".")[-1]
204 self._setupNodeNext(Element)
205
212
214 cDict = self.getChildDict()
215 for cName in self._childSequence:
216 if cName in cDict:
217 for c in cDict[cName]:
218 yield c
219
226
237
239 """adds child to the list of children.
240
241 Child may be an Element, a string, or a list or tuple of Elements and
242 strings. Finally, child may be None, in which case nothing will be
243 added.
244 """
245 self._isEmptyCache = None
246 if child is None:
247 pass
248 elif hasattr(child, "serializeToXMLStan"):
249 self.addChild(child.serializeToXMLStan())
250 elif isinstance(child, basestring):
251 self.bailIfBadChild(child)
252 self.text_ = child
253 elif isinstance(child, (Element, Stub)):
254 self.bailIfBadChild(child)
255 self._children.append(child)
256 elif isinstance(child, (list, tuple, self._generator_t)):
257 for c in child:
258 self.addChild(c)
259 elif isinstance(child, _Autoconstructor):
260 self.addChild(child())
261 elif self._stringifyContent:
262 self.addChild(unicode(child))
263 else:
264 raise Error("%s element %s cannot be added to %s node"%(
265 type(child), repr(child), self.name_))
266
268 """returns true if the current node has no non-empty children and no
269 non-whitespace text content.
270 """
271 if self._isEmptyCache is None:
272 self._isEmptyCache = True
273
274 if self.text_.strip():
275 self._isEmptyCache = False
276 if self._isEmptyCache:
277 for c in self._children:
278 if not c.shouldBeSkipped():
279 self._isEmptyCache = False
280 break
281
282 return self._isEmptyCache
283
285 """returns true if the current node should be part of an output.
286
287 That is true if it is either non-empty or _mayBeEmpty is true.
288 An empty element is one that has only empty children and no
289 non-whitespace text content.
290 """
291 if self._mayBeEmpty:
292 return False
293 return self.isEmpty()
294
296 """iterates over the defined attribute names of this node.
297
298 Each element returned is a pair of the node attribute name and the
299 xml name (which may be translated via _a_name_<att>
300 """
301 for name, default in self._nodeAttrs:
302 xmlName = getattr(self, "_name_a_"+name, name)
303 yield name, xmlName
304
306 """adds attName, attValue to this Element's attributes when instanciated.
307
308 You cannot add _a_<attname> attributes to instances. Thus, when
309 in a pinch, use this.
310 """
311 attName = str(attName)
312 if not hasattr(self, attName):
313 self._nodeAttrs.append((attName, attValue))
314 setattr(self, attName, attValue)
315
317 """iterates over all children having type.
318 """
319 for c in self._children:
320 if isinstance(c, type):
321 yield c
322
324 return iter(self._children)
325
327 cDict = {}
328 for c in self._children:
329 cDict.setdefault(c.name_, []).append(c)
330 return cDict
331
333 """iterates over children whose element name is elName.
334
335 This always does a linear search through the children and hence
336 may be slow.
337 """
338 for c in self._children:
339 if c.name_==elName:
340 yield c
341
343 if self._childSequence is None:
344 return iter(self._children)
345 else:
346 return self._iterChildrenInSequence()
347
349 """calls func(node, text, attrs, childIter).
350
351 This is a building block for tree traversals; the expectation is that
352 func does something like func(node, text, attrDict, childSequence).
353 """
354 try:
355 if self.shouldBeSkipped():
356 return
357 attrs = self._makeAttrDict()
358 return func(self, self.text_,
359 attrs, self._getChildIter())
360 except Error:
361 raise
362 except Exception:
363 misctricks.sendUIEvent("Info",
364 "Internal failure while building XML; context is"
365 " %s node with children %s"%(
366 self.name_,
367 texttricks.makeEllipsis(repr(self._children), 60)))
368 raise
369
370 - def asETree(self, prefixForEmpty=None):
371 """returns an ElementTree instance for the tree below this node.
372
373 Deprecated. Use Serializer rather than ElementTree.
374 """
375 return DOMMorpher(prefixForEmpty, NSRegistry).getMorphed(self)
376
377 - def render(self, prefixForEmpty=None, includeSchemaLocation=True):
378 """returns this and its children as a string.
379 """
380 f = StringIO()
381 write(self, f, prefixForEmpty=prefixForEmpty, xmlDecl=False,
382 includeSchemaLocation=includeSchemaLocation)
383 return f.getvalue()
384
387 """A container for a registry of namespace prefixes to namespaces.
388
389 This is used to have fixed namespace prefixes (IMHO the only way
390 to have namespaced attribute values and retain sanity). The
391 class is never instanciated. It is used through the module-level
392 method registerPrefix and by DOMMorpher.
393 """
394 _registry = {}
395 _reverseRegistry = {}
396 _schemaLocations = {}
397
398 @classmethod
407
408 @classmethod
410 try:
411 return cls._reverseRegistry[ns]
412 except KeyError:
413 raise excs.NotFoundError(ns, "XML namespace",
414 "registry of XML namespaces.", hint="The registry is filled"
415 " by modules as they are imported -- maybe you need to import"
416 " the right module?")
417
418 @classmethod
420 try:
421 return cls._registry[prefix]
422 except KeyError:
423 raise excs.NotFoundError(prefix, "XML namespace prefix",
424 "registry of prefixes.", hint="The registry is filled"
425 " by modules as they are imported -- maybe you need to import"
426 " the right module?")
427
428 @classmethod
429 - def _iterNSAttrs(cls, prefixes, prefixForEmpty, includeSchemaLocation):
430 """iterates over pairs of (attrName, attrVal) for declaring
431 prefixes.
432 """
433
434
435 prefixes.discard("")
436
437 schemaLocations = []
438 for pref in sorted(prefixes):
439 yield "xmlns:%s"%pref, cls._registry[pref]
440 if includeSchemaLocation and cls._schemaLocations[pref]:
441 schemaLocations.append("%s %s"%(
442 cls._registry[pref],
443 cls._schemaLocations[pref]))
444
445 if prefixForEmpty:
446 yield "xmlns", cls._registry[prefixForEmpty]
447
448 if schemaLocations:
449 if not "xsi" in prefixes:
450 yield "xmlns:xsi", cls._registry["xsi"]
451 yield "xsi:schemaLocation", " ".join(schemaLocations)
452
453 @classmethod
456 """adds xmlns declarations for prefixes to the etree node root.
457
458 With stanxml and the global-prefix scheme, xmlns declarations
459 only come at the root element; thus, root should indeed be root
460 rather than some random element.
461
462 Deprecated, don't use ElementTree with stanxml any more.
463 """
464 for attName, attVal in cls._iterNSAttrs(prefixes, prefixForEmpty,
465 includeSchemaLocation):
466 root.attrib[attName] = attVal
467
468 @classmethod
471 """adds xmlns declarations for prefixes to the stanxml node root.
472
473 With stanxml and the global-prefix scheme, xmlns declarations
474 only come at the root element; thus, root should indeed be root
475 rather than some random element.
476 """
477 for attName, attVal in cls._iterNSAttrs(prefixes, prefixForEmpty,
478 includeSchemaLocation):
479 root.addAttribute(attName, attVal)
480
481 @classmethod
484
485 @classmethod
487 try:
488 return self._schemaLocations[self._reverseRegistry[ns]]
489 except KeyError:
490 raise excs.NotFoundError(ns, "XML namespace",
491 "registry of XML namespaces.", hint="The registry is filled"
492 " by modules as they are imported -- maybe you need to import"
493 " the right module?")
494
495
496
497 registerPrefix = NSRegistry.registerPrefix
498 getPrefixInfo = NSRegistry.getPrefixInfo
501 """returns the URL to the local mirror of the schema xsdName.
502
503 This is used by the various xmlstan clients to make schemaLocations.
504 """
505 return "http://vo.ari.uni-heidelberg.de/docs/schemata/"+xsdName
506
507
508 registerPrefix("xsi","http://www.w3.org/2001/XMLSchema-instance", None)
509
510
511 xsiPrefix = frozenset(["xsi"])
515 """An object encapsulating the process of turning a stanxml.Element
516 tree into an ElementTree.
517
518 Discard instances after single use.
519
520 Deprecated, since the whole ElementTree-based serialization is deprecated.
521 """
523 self.prefixForEmpty, self.nsRegistry = prefixForEmpty, nsRegistry
524 self.prefixesUsed = set()
525
526 - def _morphNode(self, stanEl, content, attrDict, childIter):
543
551
554 """An Element mixin making the element XSD nillable.
555
556 This element will automatically have an xsi:nil="true" attribute
557 on empty elements (rather than leave them out entirely).
558
559 This overrides apply, so the mixin must be before the base class in
560 the inheritance list.
561 """
562 _mayBeEmpty = True
563
573
576
584
588
591 """returns a function writing nodes to outputFile.
592 """
593
594 def visit(node, text, attrs, childIter):
595 attrRepr = " ".join(sorted("%s=%s"%(k, escapeAttrVal(attrs[k]))
596 for k in attrs))
597 if attrRepr:
598 attrRepr = " "+attrRepr
599
600 if getattr(node, "_fixedTagMaterial", None):
601 attrRepr = attrRepr+" "+node._fixedTagMaterial
602
603 if not node._prefix or node._local or node._prefix==prefixForEmpty:
604 name = node.name_
605 else:
606 name = "%s:%s"%(node._prefix, node.name_)
607
608 if node.isEmpty():
609 if node._mayBeEmpty:
610 outputFile.write("<%s%s/>"%(name, attrRepr))
611 else:
612 outputFile.write("<%s%s>"%(name, attrRepr))
613 try:
614 try:
615 if text:
616 outputFile.write(escapePCDATA(text).encode("utf-8"))
617
618 for c in childIter:
619 if hasattr(c, "write"):
620 c.write(outputFile)
621 else:
622 c.apply(visit)
623 except Exception as ex:
624 if hasattr(node, "writeErrorElement"):
625 node.writeErrorElement(outputFile, ex)
626 raise
627 finally:
628 outputFile.write("</%s>"%name)
629
630 return visit
631
632
633 -def write(root, outputFile, prefixForEmpty=None, nsRegistry=NSRegistry,
634 xmlDecl=True, includeSchemaLocation=True):
635 """writes an xmlstan tree starting at root to destFile.
636
637 prefixForEmpty is a namespace URI that should have no prefix at all.
638 """
639
640
641 prefixesUsed = set()
642
643 def collectPrefixes(node, text, attrs, childIter,
644 prefixesUsed=prefixesUsed):
645 prefixesUsed |= node._additionalPrefixes
646 prefixesUsed.add(node._prefix)
647 for child in childIter:
648 child.apply(collectPrefixes)
649
650 root.apply(collectPrefixes)
651
652
653
654 if getattr(root, "_fixedTagMaterial", None) is None:
655 nsRegistry.addNamespaceDeclarations(root, prefixesUsed, prefixForEmpty,
656 includeSchemaLocation)
657
658 if xmlDecl:
659 outputFile.write("<?xml version='1.0' encoding='utf-8'?>\n")
660
661 root.apply(_makeVisitor(outputFile, prefixForEmpty))
662
663
664 -def xmlrender(tree, prolog=None, prefixForEmpty=None):
665 """returns a unicode object containing tree in serialized forms.
666
667 tree can be any object with a render method or some sort of string.
668 If it's a byte string, it must not contain any non-ASCII.
669
670 If prolog is given, it must be a string that will be prepended to the
671 serialization of tree. The way ElementTree currently is implemented,
672 you can use this for xml declarations or stylesheet processing
673 instructions.
674 """
675 if hasattr(tree, "render"):
676 res = tree.render(prefixForEmpty=prefixForEmpty)
677 elif hasattr(tree, "getchildren"):
678 res = ElementTree.tostring(tree)
679 elif isinstance(tree, str):
680 res = unicode(tree)
681 elif isinstance(tree, unicode):
682 res = tree
683 else:
684 raise ValueError("Cannot render %s"%repr(tree))
685 if prolog:
686 res = prolog+res
687 return res
688