1 """
2 Code to parse structures from XML sources.
3
4 The purpose of much of the mess here is to symmetrized XML attributes
5 and values. Basically, we want start, value, end events whether
6 or not a piece of data comes in an element with a certain tag name or
7 via a named attribute.
8 """
9
10
11
12
13
14
15
16 import re
17 from cStringIO import StringIO
18
19 from gavo import utils
20 from gavo.base import activetags
21 from gavo.base import common
22 from gavo.base import parsecontext
23
24
25 ALL_WHITESPACE = re.compile("\s*$")
26
27
29 """A dispatcher for parse events to structures.
30
31 It is constructed with the root structure of the result tree, either
32 as a type or as an instance.
33
34 After that, events can be fed to the feed method that makes sure
35 they are routed to the proper object.
36 """
37
38
39
40
41
42
43
44 debug = False
45
47 self.rootStruct = rootStruct
48 self.curParser, self.next = self, None
49 self.result, self.ctx = None, ctx
50
51
52 self.eventQueue = []
53
55 while self.eventQueue:
56 self.feed(*self.eventQueue.pop(0))
57
66
75
76 - def feed(self, type, name, value=None):
77 """feeds an event.
78
79 This is the main entry point for user calls.
80 """
81
82
83
84
85 if (type=="start"
86 and activetags.isActive(name)
87 and not hasattr(self.curParser, "ACTIVE_NOEXPAND")):
88 self.curParser = activetags.getActiveTag(name)(self.curParser)
89 return
90
91 if self.next is None:
92 self._feedToStructured(type, name, value)
93 else:
94 self._feedToAtom(type, name, value)
95
96 - def feedEvent(self, ctx, evType, name, value):
97 """dispatches an event to the root structure.
98
99 Do not call this yourself unless you know what you're doing. The
100 method to feed "real" events to is feed.
101 """
102 if name!=self.rootStruct.name_:
103 raise common.StructureError("Expected root element %s, found %s"%(
104 self.rootStruct.name_, name))
105 if evType=="start":
106 if isinstance(self.rootStruct, type):
107 self.result = self.rootStruct(None)
108 else:
109 self.result = self.rootStruct
110 self.result.idmap = ctx.idmap
111 ctx.setPositionOn(self.result)
112 return self.result
113 else:
114 raise common.StructureError("Bad document structure")
115
117 """artifically inserts an instanciated root element.
118
119 In particular, this bypasses any checks that the event stream coming
120 is is actually destined for root. Use this for replay-type things
121 (feedFrom, active tags) exclusively.
122 """
123 self.result = root
124 self.curParser = root
125 self.result.idmap = self.ctx.idmap
126
129
130
132 """generates value events for the attributes in attrs.
133 """
134
135
136 original = attrs.pop("original", None)
137 if original:
138 evProc.feed("value", "original", original)
139
140
141
142 mixin = attrs.pop("mixin", None)
143
144 for key, val in attrs.iteritems():
145 evProc.feed("value", key, val)
146
147 if mixin:
148 evProc.feed("value", "mixin", mixin)
149
150 -def feedTo(rootStruct, eventSource, context, feedInto=False):
151 """feeds events from eventSource to rootStruct.
152
153 A new event processor is used for feeding. No context
154 exit functions are run.
155
156 The processed root structure is returned.
157
158 if feedInto is true, the event creating the root structure is not
159 expected (TODO: this is crap; fix it so that this is always the
160 case when rootStruct is an instance).
161 """
162 evProc = EventProcessor(rootStruct, context)
163 if feedInto:
164 evProc.setRoot(rootStruct)
165 buf = []
166
167 try:
168 for type, name, payload in eventSource:
169
170
171 if type=="data":
172 buf.append(payload)
173 continue
174 else:
175 if buf:
176 res = "".join(buf)
177 if not ALL_WHITESPACE.match(res):
178 evProc.feed("value", "content_", res)
179 buf = []
180
181
182 evProc.feed(type, name, payload)
183
184
185 if type=="start" and payload:
186 _synthesizeAttributeEvents(evProc, context, payload)
187 payload = None
188
189 except Exception as ex:
190 if (not getattr(ex, "posInMsg", False)
191 and getattr(ex, "pos", None) is None):
192
193 ex.pos = eventSource.pos
194 raise
195 return evProc.result
196
197
199 """parses a tree rooted in rootStruct from some file-like object inputStream.
200
201 It returns the root element of the resulting tree. If rootStruct is
202 a type subclass, it will be instanciated to create a root
203 element, if it is an instance, this instance will be the root.
204 """
205 eventSource = utils.iterparse(inputStream)
206 if context is None:
207 context = parsecontext.ParseContext()
208 context.setEventSource(eventSource)
209 res = feedTo(rootStruct, eventSource, context)
210 context.runExitFuncs(res)
211 return res
212
213
215 """parses a DaCHS RD tree rooted in ``rootStruct`` from a string.
216
217 It returns the root element of the resulting tree. You would use this like
218 this::
219
220 parseFromString(rscdef.Column, "<column name='foo'/>")
221 """
222 return parseFromStream(rootStruct, StringIO(inputString), context)
223