1 """
2 Helpers for manipulating serialised RDs.
3
4 The problem here is that RDs typically are formatted with lots of love,
5 also within elements -- e.g., like this:
6
7 <column name="bla" type="text"
8 ucd="foo.bar"
9 description="A long text carefully
10 broken at the right place"
11 />
12
13 There's no way one can coax a normal XML parser into giving events that'd
14 allow us to preserve this formatting. Hence, when manipulating
15 RD sources, I need something less sophisticated -- the dump XML parser
16 implemented here.
17
18 Note that this will accept non-well-formed documents; don't use this except
19 for the limited purpose of editing supposedly well-formed documents.
20 """
21
22
23
24
25
26
27
28 import os
29
30 from gavo import base
31 from gavo import rscdesc
32 from gavo import utils
33 from gavo.user import info
34 from pyparsing import (CharsNotIn, Forward, Literal, Optional,
35 ParseResults, QuotedString, SkipTo,
36 StringEnd, White, Word, ZeroOrMore, alphas, alphanums)
37
38
40 """returns a string from a (possibly edited) parse tree.
41 """
42 if isinstance(arg, basestring):
43 return arg
44 elif isinstance(arg, (list, ParseResults)):
45 return "".join(flatten(a) for a in arg)
46 else:
47 return arg.flatten()
48
49
51
52
53 return [t.asList()]
54
55
57 """a sentinel for XML attributes.
58 """
62
63
65 """returns the Attribute element with name within parseResult.
66
67 If no such attribute exists, a KeyError is raised.
68 """
69 for el in parseResult:
70 if isinstance(el, Attribute):
71 if el.name==name:
72 return el
73 raise KeyError("No attribute %s in %s"%(name, flatten(parseResult)))
74
75
77 """an element to be inserted into a parsed xml tree.
78 """
79 - def __init__(self, elementName, textContent):
80 self.elementName, self.textContent = elementName, textContent
81
83 return "<%s>%s</%s>"%(
84 self.elementName,
85 utils.escapePCDATA(self.textContent),
86 self.elementName)
87
88
90 """a sentinel for XML elements.
91
92 These are constructed with lists of the type [tag,...]; the opening (or
93 empty) tag is always item 0.
94 """
98
100
101
102 self[-2:-2] = [newChild]
103
105 """returns the Attribute element with name within self.
106
107 If no such attribute exists, a KeyError is raised.
108 """
109 return getAttribute(self[0], name)
110
112 """returns the first element called name somewhere within the xml
113 grammar-parsed parseResult
114
115 This is a depth-first search, and it will return None if there
116 is no such element.
117 """
118 for el in self:
119 if isinstance(el, Element):
120 if el.name==name:
121 return el
122
123 res = el.findElement(name)
124 if res is not None:
125 return res
126
128 """returns the number of name elements that are direct children
129 of self.
130 """
131 res = 0
132 for el in self:
133 if isinstance(el, Element) and el.name==name:
134 res = res+1
135 return res
136
137
139
140 with utils.pyparsingWhitechars("\r"):
141 name = Word(alphas+"_:", alphanums+".:_-")
142 opener = Literal("<")
143 closer = Literal(">")
144 value = (QuotedString(quoteChar="'", multiline=True, unquoteResults=False)
145 | QuotedString(quoteChar='"', multiline=True, unquoteResults=False))
146 attribute = (name
147 + Optional(White())
148 + Literal("=")
149 + Optional(White())
150 + value)
151 tagOpener = (opener
152 + name
153 + ZeroOrMore(White() + attribute)
154 + Optional(White()))
155
156 openingTag = (tagOpener
157 + closer)
158 closingTag = (opener
159 + Literal("/")
160 + name
161 + Optional(White())
162 + closer)
163 emptyTag = (tagOpener
164 + Optional(White())
165 + Literal("/>"))
166
167 processingInstruction = (opener
168 + Literal("?")
169 + SkipTo("?>", include="True"))
170 comment = (opener
171 + Literal("!--")
172 + SkipTo("-->", include="True"))
173 cdataSection = (opener
174 + Literal("![CDATA[")
175 + SkipTo("]]>", include="True"))
176
177 nonTagStuff = CharsNotIn("<", min=1)
178
179 docItem = Forward()
180 element = (
181 (openingTag + ZeroOrMore(docItem) + closingTag)
182 | emptyTag)
183 docItem << (element
184 | processingInstruction
185 | comment
186 | cdataSection
187 | nonTagStuff)
188
189 document = (ZeroOrMore(Optional(White()) + docItem)
190 + Optional(White()) + StringEnd())
191 document.parseWithTabs()
192
193 element.addParseAction(manipulator._feedElement)
194 tagOpener.addParseAction(manipulator._openElement)
195 attribute.addParseAction(lambda s,p,t: [Attribute(t)])
196 openingTag.addParseAction(_nodify)
197 closingTag.addParseAction(_nodify)
198 emptyTag.addParseAction(_nodify)
199
200 del manipulator
201 for el in locals().itervalues():
202
203 el.leaveWhitespace()
204 del el
205
206 return locals()
207
208
210 """processes an XML-document with manipulator.
211
212 document is a string containing the XML, and the function returns
213 serialized an XML. You're doing yourself a favour if document is
214 a unicode string.
215
216 manipulator is an instance of a derivation of Manipulator below.
217 There's a secret handshake between Manipulator and the grammar, so
218 you really need to inherit, just putting in the two methods won't do.
219 """
220 syms = getXMLGrammar(manipulator)
221
222 res = utils.pyparseString(syms["document"], document)
223 return flatten(res)
224
225
227 """a base class for processXML manipulators.
228
229 Pass instances of these into processXML. You must up-call the
230 constructor without arguments.
231
232 Override the gotElement(parseResult) method to do what you want. The
233 parseResult is a pyparsing object with the tag name in second position of the
234 first matched thing and the attributes barely parsed out (if you need them,
235 improve the parsing to get at the attributes with less effort.)
236
237 gotElement receives an entire element with opening tag, content, and
238 closing tag (or just an empty tag). To manipulate the thing, just
239 return what you want in the document.
240
241 There's also startElement(parsedOpener) that essentially works
242 analogously; you will, however *not* receive startElements for
243 empty elements, so that's really intended for bookkeeping.
244
245 You also have a hasParent(tagName) method on Manipulators returning
246 whether there's a tagName element somewhere among the ancestors
247 of the current tag.
248 """
251
256
258 return name in self.tagStack
259
261
262 self.tagStack.pop()
263 parsedElement = Element(parsedElement)
264 return [self.gotElement(parsedElement)]
265
268
271
272
274 """a singleton sentinel to communicate nrows in tableTriggers.
275 """
276
278 """a manipulator fiddling in values limits as returned by
279 iterLimitsForTable.
280
281 Note again: this implementation just supports a single coverage
282 element per RD. We'll have to change limits contents when
283 there can reasonable be more.
284 """
286 self.tableTriggers, self.coverageItems = {}, {}
287 self.curColumns = None
288 for kind, payload in limits:
289 if kind=="limits":
290 tableName, columnName, min, max = payload
291 self.tableTriggers.setdefault(tableName, {})[
292 columnName] = (min, max)
293
294 elif kind=="coverage":
295 reserved, axis, value = payload
296 self.coverageItems[axis] = value
297
298 elif kind=="nrows":
299 tableName, nrows = payload
300 self.tableTriggers.setdefault(tableName, {})[
301 NROWS] = nrows
302
303 else:
304 assert False
305
306 Manipulator.__init__(self)
307
317
327
329 for axisName in self.coverageItems:
330 destEl = coverageElement.findElement(axisName)
331 if destEl is None:
332 coverageElement.append(
333 NewElement(axisName, self.coverageItems[axisName]))
334 else:
335 if coverageElement.countElements(axisName)!=1:
336 raise base.ReportableError("Cannot replace coverage for"
337 " axis '%s': unsupported previous content."%axisName,
338 hint="DaCHS will only replace coverage if there is"
339 " just one element for an axis. If you want DaCHS"
340 " to update the coverage on this axis, delete any"
341 " previous elements for this axis.")
342
343 if len(destEl)==1:
344 elName = destEl.pop()[1]
345
346 destEl[:] = [
347 ['<', elName, '>'],
348 self.coverageItems[axisName],
349 ['</', elName, '>'],]
350 elif len(destEl)==2:
351
352 destEl[1:1] = self.coverageItems[axisName]
353 elif len(destEl)==3:
354
355 destEl[1] = self.coverageItems[axisName]
356 else:
357 assert False
358
360 """changes nRowsElement to postgres' current estimate of the table
361 size, if available.
362 """
363 if self.curNRows:
364 assert len(nRowsElement)==3
365 nRowsElement[1] = str(self.curNRows)
366
368 if self.curColumns is not None:
369 if parsedElement.name=="column":
370 for attrName in ["name", "original"]:
371 try:
372 colName = parsedElement.getAttribute(attrName).value
373 if colName in self.curColumns:
374 self._fixValues(parsedElement, self.curColumns[colName])
375 except KeyError:
376 continue
377 break
378
379 if parsedElement.name=="table":
380 self.curColumns = None
381
382 elif parsedElement.name=="coverage":
383 self._fixCoverage(parsedElement)
384
385 elif parsedElement.name=="nrows":
386 self._updateNRows(parsedElement)
387
388 return parsedElement
389
390
392 """yields coverage items for inclusion in RDs.
393
394 NOTE: so far, we can only have one coverage item. So, it's enough
395 to just say "fill this into axis x of coverage". If and when we
396 have more than one coverage items, we'll have to re-think that.
397 That's why there's the "reserved" value in the tuples. We'll have to
398 put something in there (presumably the index of the coverage element,
399 but perhaps we'll have a better identity at some point).
400 """
401 if updater.parent.spatial is not None:
402 sourceTable = updater.spaceTable or updater.sourceTable
403 if sourceTable:
404 yield "coverage", ("reserved", "spatial", info.getMOCForStdTable(
405 sourceTable, updater.mocOrder).asASCII())
406
407 if updater.parent.temporal is not None:
408 sourceTable = updater.timeTable or updater.sourceTable
409 if sourceTable:
410 for interval in info.iterScalarLimits(
411 sourceTable,
412 info.getTimeLimitsColumnNames):
413 yield "coverage", ("reserved", "temporal",
414 str(interval))
415
416 if updater.parent.spectral is not None:
417 sourceTable = updater.spectralTable or updater.sourceTable
418 if sourceTable:
419 for interval in info.iterScalarLimits(
420 sourceTable,
421 info.getSpectralLimitsColumnNames):
422 yield "coverage", ("reserved", "spectral",
423 str(interval))
424
425
427 """returns a list of values to fill in into tableDef.
428
429 This will be empty if the table doesn't exist. Otherwise, it will be
430 a tuple ("limit", table-id, column-name, min, max) for every column with
431 a reasonably numeric type that has a min and max values.
432
433 The other thing that *could* come back (but currently only does for
434 iterLimitsForRD) is ("coverage", reserved, axis, literal); see
435 iterCoverageItems for details.
436 """
437 with base.AdhocQuerier() as q:
438 if q.getTableType(tableDef.getQName()) is None:
439 return
440 yield "nrows", (tableDef.id, q.getRowEstimate(tableDef.getQName()))
441
442 info.annotateDBTable(tableDef, extended=False, requireValues=True)
443 for col in tableDef:
444 if col.annotations:
445 min, max = col.annotations["min"], col.annotations["max"]
446 yield "limits", (tableDef.id, col.name, min, max)
447
448
465
466
468 """returns a string corresponding to the RD with rdId with limits applied.
469
470 Limits is a sequence of (table-id, column-name, min, max) tuples.
471 We assume the values elements already exist.
472 """
473 _, f = rscdesc.getRDInputStream(rdId)
474 content = f.read()
475 f.close()
476 return processXML(content, _ValuesChanger(limits))
477
478
480 from argparse import ArgumentParser
481
482 parser = ArgumentParser(
483 description="Updates existing values min/max items in a referenced"
484 " table or RD.")
485 parser.add_argument("itemId", help="Cross-RD reference of a table or"
486 " RD to update, as in ds/q or ds/q#mytable; only RDs in inputsDir"
487 " can be updated.")
488 return parser.parse_args()
489
490
492 from gavo import api
493 args = parseCmdLine()
494 item = api.getReferencedElement(args.itemId)
495
496 if isinstance(item, api.TableDef):
497 changes = iterLimitsForTable(item)
498 rd = item.rd
499
500 elif isinstance(item, api.RD):
501 changes = iterLimitsForRD(item)
502 rd = item
503
504 else:
505 raise base.ReportableError(
506 "%s references neither an RD nor a table definition"%args.itemId)
507
508 newText = getChangedRD(rd.sourceId, changes)
509 destFName = os.path.join(
510 api.getConfig("inputsDir"),
511 rd.sourceId+".rd")
512 with utils.safeReplaced(destFName) as f:
513 f.write(newText)
514