Package gavo :: Package base :: Module xmlstruct
[frames] | no frames]

Source Code for Module gavo.base.xmlstruct

  1  """ 
  2  Code to parse structures from XML sources. 
  3   
  4  The purpose of much of the mess here is to symmetrized XML attributes 
  5  and values.  Basically, we want start, value, end events whether 
  6  or not a piece of data comes in an element with a certain tag name or 
  7  via a named attribute. 
  8  """ 
  9   
 10  #c Copyright 2008-2019, the GAVO project 
 11  #c 
 12  #c This program is free software, covered by the GNU GPL.  See the 
 13  #c COPYING file in the source distribution. 
 14   
 15   
 16  import re 
 17  from cStringIO import StringIO 
 18   
 19  from gavo import utils 
 20  from gavo.base import activetags 
 21  from gavo.base import common 
 22  from gavo.base import parsecontext 
 23   
 24   
 25  ALL_WHITESPACE = re.compile("\s*$") 
 26   
 27   
28 -class EventProcessor(object):
29 """A dispatcher for parse events to structures. 30 31 It is constructed with the root structure of the result tree, either 32 as a type or as an instance. 33 34 After that, events can be fed to the feed method that makes sure 35 they are routed to the proper object. 36 """ 37 38 # The event processor distinguishes between parsing atoms (just one 39 # value) and structured data using the next attribute. If it is not 40 # None, the next value coming in will be turned to a "value" event 41 # on the current parser. If it is None, we hand through the event 42 # to the current structure. 43 44 debug = False 45
46 - def __init__(self, rootStruct, ctx):
47 self.rootStruct = rootStruct 48 self.curParser, self.next = self, None 49 self.result, self.ctx = None, ctx 50 # a queue of events to replay after the current structured 51 # element has been processed 52 self.eventQueue = []
53
54 - def _processEventQueue(self):
55 while self.eventQueue: 56 self.feed(*self.eventQueue.pop(0))
57
58 - def _feedToAtom(self, type, name, value):
59 if type=='start': 60 raise common.StructureError("%s elements cannot have %s children"%( 61 self.next, name)) 62 elif type=='value' or type=="parsedvalue": 63 self.curParser.feedEvent(self.ctx, 'value', self.next, value) 64 elif type=='end': 65 self.next = None
66
67 - def _feedToStructured(self, type, name, value):
68 next = self.curParser.feedEvent(self.ctx, type, name, value) 69 if isinstance(next, basestring): 70 self.next = next 71 else: 72 self.curParser = next 73 if type=="end": 74 self._processEventQueue()
75
76 - def feed(self, type, name, value=None):
77 """feeds an event. 78 79 This is the main entry point for user calls. 80 """ 81 # Special handling for active tags: They may occur everywhere and 82 # thus are not not parsed by the element parsers but by us. 83 # Active tags may define ACTIVE_NOEXPAND to undo that behaviour 84 # (i.e., see active tag events themselves). 85 if (type=="start" 86 and activetags.isActive(name) 87 and not hasattr(self.curParser, "ACTIVE_NOEXPAND")): 88 self.curParser = activetags.getActiveTag(name)(self.curParser) 89 return 90 91 if self.next is None: 92 self._feedToStructured(type, name, value) 93 else: 94 self._feedToAtom(type, name, value)
95
96 - def feedEvent(self, ctx, evType, name, value):
97 """dispatches an event to the root structure. 98 99 Do not call this yourself unless you know what you're doing. The 100 method to feed "real" events to is feed. 101 """ 102 if name!=self.rootStruct.name_: 103 raise common.StructureError("Expected root element %s, found %s"%( 104 self.rootStruct.name_, name)) 105 if evType=="start": 106 if isinstance(self.rootStruct, type): 107 self.result = self.rootStruct(None) 108 else: 109 self.result = self.rootStruct 110 self.result.idmap = ctx.idmap 111 ctx.setPositionOn(self.result) 112 return self.result 113 else: 114 raise common.StructureError("Bad document structure")
115
116 - def setRoot(self, root):
117 """artifically inserts an instanciated root element. 118 119 In particular, this bypasses any checks that the event stream coming 120 is is actually destined for root. Use this for replay-type things 121 (feedFrom, active tags) exclusively. 122 """ 123 self.result = root 124 self.curParser = root 125 self.result.idmap = self.ctx.idmap
126
127 - def clone(self):
128 return EventProcessor(self.rootStruct, self.ctx)
129 130
131 -def _synthesizeAttributeEvents(evProc, context, attrs):
132 """generates value events for the attributes in attrs. 133 """ 134 # original attributes must be fed first since they will ususally 135 # yield a different target object 136 original = attrs.pop("original", None) 137 if original: 138 evProc.feed("value", "original", original) 139 140 # mixins must be fed last as they might depend on stuff set 141 # in other attributes 142 mixin = attrs.pop("mixin", None) 143 144 for key, val in attrs.iteritems(): 145 evProc.feed("value", key, val) 146 147 if mixin: 148 evProc.feed("value", "mixin", mixin)
149
150 -def feedTo(rootStruct, eventSource, context, feedInto=False):
151 """feeds events from eventSource to rootStruct. 152 153 A new event processor is used for feeding. No context 154 exit functions are run. 155 156 The processed root structure is returned. 157 158 if feedInto is true, the event creating the root structure is not 159 expected (TODO: this is crap; fix it so that this is always the 160 case when rootStruct is an instance). 161 """ 162 evProc = EventProcessor(rootStruct, context) 163 if feedInto: 164 evProc.setRoot(rootStruct) 165 buf = [] 166 167 try: 168 for type, name, payload in eventSource: 169 170 # buffer data 171 if type=="data": 172 buf.append(payload) 173 continue 174 else: 175 if buf: 176 res = "".join(buf) 177 if not ALL_WHITESPACE.match(res): 178 evProc.feed("value", "content_", res) 179 buf = [] 180 181 # "normal" event feed 182 evProc.feed(type, name, payload) 183 184 # start event: Synthesize value events for attributes. 185 if type=="start" and payload: 186 _synthesizeAttributeEvents(evProc, context, payload) 187 payload = None 188 189 except Exception as ex: 190 if (not getattr(ex, "posInMsg", False) 191 and getattr(ex, "pos", None) is None): 192 # only add pos when the message string does not already have it. 193 ex.pos = eventSource.pos 194 raise 195 return evProc.result
196 197
198 -def parseFromStream(rootStruct, inputStream, context=None):
199 """parses a tree rooted in rootStruct from some file-like object inputStream. 200 201 It returns the root element of the resulting tree. If rootStruct is 202 a type subclass, it will be instanciated to create a root 203 element, if it is an instance, this instance will be the root. 204 """ 205 eventSource = utils.iterparse(inputStream) 206 if context is None: 207 context = parsecontext.ParseContext() 208 context.setEventSource(eventSource) 209 res = feedTo(rootStruct, eventSource, context) 210 context.runExitFuncs(res) 211 return res
212 213
214 -def parseFromString(rootStruct, inputString, context=None):
215 """parses a DaCHS RD tree rooted in ``rootStruct`` from a string. 216 217 It returns the root element of the resulting tree. You would use this like 218 this:: 219 220 parseFromString(rscdef.Column, "<column name='foo'/>") 221 """ 222 return parseFromStream(rootStruct, StringIO(inputString), context)
223