1 """
2 Parsing various forms of tabular data embedded in VOTables.
3
4 WARNING: This will fail if the parser exposes namespaces in its
5 events (utils.iterparse doesn't).
6 """
7
8
9
10
11
12
13
14 from gavo.votable import coding
15 from gavo.votable import common
16 from gavo.votable import dec_binary
17 from gavo.votable import dec_binary2
18 from gavo.votable import dec_tabledata
19
20
22 """A base for the classes actually doing the iteration.
23
24 You need to give a decoderModule attribute and implement _getRawRow.
25 """
26 - def __init__(self, tableDefinition, nodeIterator):
27 self.nodeIterator = nodeIterator
28 self._decodeRawRow = coding.buildDecoder(
29 tableDefinition,
30 self.decoderModule)
31
33 while True:
34 rawRow = self._getRawRow()
35 if rawRow is None:
36 break
37 yield self._decodeRawRow(rawRow)
38
39
41 """An internal class used by Rows to actually iterate over rows
42 in TABLEDATA serialization.
43 """
44 decoderModule = dec_tabledata
45
47 """returns a row in strings or None.
48 """
49
50 for type, tag, payload in self.nodeIterator:
51 if type=="end" and tag=="TABLEDATA":
52 return None
53 elif type=="start":
54 if tag=="TR":
55 break
56 else:
57 raise self.nodeIterator.getParseError(
58 "Unexpected element %s"%tag)
59
60
61
62 rawRow = []
63 dataBuffer = []
64 for type, tag, payload in self.nodeIterator:
65 if type=="start":
66 dataBuffer = []
67 if tag!="TD":
68 raise self.nodeIterator.getParseError(
69 "Unexpected element %s"%tag)
70
71 elif type=="data":
72 dataBuffer.append(payload)
73
74 elif type=="end":
75 if tag=="TR":
76 break
77 elif tag=="TD":
78 rawRow.append("".join(dataBuffer))
79 else:
80 assert False
81 dataBuffer = []
82
83 else:
84 assert False
85 return rawRow
86
87
89 """A stand-in for a file that decodes VOTable stream data on
90 an as-needed basis.
91 """
92 minChunk = 20000
93 lastRes = None
94
96 self.nodeIterator = nodeIterator
97 self.curChunk = ""
98 self.leftover = ""
99 self.fPos = 0
100 self._eof = False
101
103 """cleans up at end of stream and sets eof flag.
104
105 This is called by _fillBuffer exclusively.
106 """
107 for evtype, element, payload in self.nodeIterator:
108 if evtype!="data":
109 break
110 self._eof = True
111
113 """obtains events from node iterator fo fill curChunk.
114 """
115 if self._eof:
116 return
117 destBytes = max(nBytes*2, self.minChunk)
118 curBytes, hadLf = 0, False
119 encoded = [self.leftover]
120
121 for type, tag, payload in self.nodeIterator:
122 if type=="end":
123 self._setEOF()
124 break
125 assert type=="data"
126 encoded.append(payload)
127 curBytes += len(payload)
128 hadLf = hadLf or "\n" in payload or "\r" in payload
129 if hadLf and curBytes>destBytes:
130 break
131
132 return self._decodeBase64("".join(encoded))
133
135 """decodes input and sets curChunk, leftover, and fPos accordingly.
136
137 The method behaves slightly differently when the _eof attribute is
138 true -- normally, it will leave anything after the last line feed
139 alone, but at _eof, it will decode even that.
140
141 It is an error to pass in anything that has no line break unless
142 at _eof.
143 """
144 if not self._eof:
145 try:
146 lastBreak = input.rindex("\n")+1
147 except ValueError:
148 lastBreak = input.rindex("\r")+1
149 self.leftover = input[lastBreak:]
150 input = input[:lastBreak]
151
152 self.curChunk = self.curChunk[self.fPos:]+input.decode("base64")
153 self.fPos = 0
154
155 - def read(self, nBytes):
156 """returns a string containing the next nBytes of the input
157 stream.
158
159 The function raises an IOError if there's not enough data left.
160 """
161 if self.fPos+nBytes>len(self.curChunk):
162 self._fillBuffer(nBytes)
163 if self.fPos+nBytes>len(self.curChunk):
164 raise IOError("No data left")
165 self.lastRes = self.curChunk[self.fPos:self.fPos+nBytes]
166 self.fPos += nBytes
167 return self.lastRes
168
170 return self._eof and self.fPos==len(self.curChunk)
171
172
174 """A base class used by Rows to actually iterate over rows
175 in BINARY(2) serialization.
176
177 Since the VOTable binary serialization has no framing, we need to
178 present the data stream coming from the parser as a file to the decoder.
179 """
180
181
182
184 for type, tag, payload in self.nodeIterator:
185 if type!="data":
186 break
187 if not (type=="start"
188 and tag=="STREAM"
189 and payload.get("encoding")=="base64"):
190 raise common.VOTableError("Can only read BINARY data from base64"
191 " encoded streams")
192
193 inF = _StreamData(self.nodeIterator)
194 while not inF.atEnd():
195 row = self._decodeRawRow(inF)
196 if row is not None:
197 yield row
198
199
202
203
206
207
209 """returns an iterator for the rows contained within node.
210 """
211 if elementName=='TABLEDATA':
212 return iter(TableDataIterator(tableDefinition, nodeIterator))
213 elif elementName=='BINARY':
214 return iter(BinaryIterator(tableDefinition, nodeIterator))
215 elif elementName=='BINARY2':
216 return iter(Binary2Iterator(tableDefinition, nodeIterator))
217
218 else:
219 raise common.VOTableError("Unknown table serialization: %s"%
220 elementName, hint="We only support TABLEDATA, BINARY2,"
221 " and BINARY coding")
222
223
225 """a wrapper for data within a VOTable.
226
227 Tabledatas are constructed with a model.VOTable.TABLE instance and
228 the iterator maintained by parser.parse. They yield individual
229 table lines.
230
231 In reality, __iter__ just dispatches to the various deserializers.
232 """
233 - def __init__(self, tableDefinition, nodeIterator):
234 self.tableDefinition, self.nodeIterator = tableDefinition, nodeIterator
235
237 for type, tag, payload in self.nodeIterator:
238 if type=="data":
239 pass
240 elif tag=="INFO":
241 pass
242 else:
243 return _makeTableIterator(tag,
244 self.tableDefinition, self.nodeIterator)
245