1 """
2 Parsing and generating STC-S
3
4 The general plan is to parse STC-S into some sort of tree (dictionaries
5 with list values, possibly containing more such dictionaries). These
6 trees can then be processed into something roughly resembling the data
7 model, furnished with defaults, and processed by what essentially is
8 user code.
9
10 Extensions to what the note says:
11
12 - After flavor, you can add an epoch using something like "Epoch J2000.0".
13 - After the FK5, FK4 and ECLIPTIC frame specs, you can add an optional
14 astroYear (Bnnnn, Jnnnn) designating a custom equinox.
15 - There is a system subphrase that lets you specify a system from the
16 STC library (without the ivo:// decoration). It starts with System
17 and is specifed last. It will override all other system specifications.
18 - If enabled, you can use identifiers in double quotes whereever values
19 are allowed; this will generate column references.
20 - After the reference position, you can optionally mention the planetary
21 ephemeris used; currently, only JPL-DE200 and JPL-DE405 are allowed.
22 """
23
24
25
26
27
28
29
30 from pyparsing import (
31 Word, Literal, Optional, alphas, CaselessKeyword,
32 ZeroOrMore, OneOrMore, StringEnd,
33 Suppress, Forward,
34 Regex, alphanums,
35 ParseException, ParseResults,
36 ParseSyntaxException)
37
38 from gavo import utils
39 from gavo.stc import common
40 from gavo.stc import stcsdefaults
41 from gavo.stc import times
42
44 """A sentinel for computed default values.
45 """
46 pass
47
48
49
50 stcsFlavors = {
51 "SPHER2": (2, "SPHERICAL"),
52 "SPHER3": (3, "SPHERICAL"),
53 "UNITSPHER": (3, "UNITSPHERE"),
54 "CART1": (1, "CARTESIAN"),
55 "CART2": (2, "CARTESIAN"),
56 "CART3": (3, "CARTESIAN"),
57 }
58
59
60 spatialUnits = set(["deg", "arcmin", "arcsec", "m", "mm", "km", "AU",
61 "pc", "kpc", "Mpc", "rad"])
62 temporalUnits = set(["yr", "cy", "s", "d", "a"])
63 spectralUnits = set(["MHz", "GHz", "Hz", "Angstrom", "keV", "MeV",
64 "eV", "mm", "um", "nm", "m"])
65
69
70
72 """does iterNode's work for dict nodes.
73 """
74 for k, v in node.iteritems():
75 if isinstance(v, list):
76 subIter = _iterListNode(v, path+(k,))
77 elif isinstance(v, dict):
78 subIter = _iterDictNode(v, path+(k,))
79 else:
80 continue
81 for res in subIter:
82 yield res
83 yield path, node
84
86 """does iterNode's work for list nodes.
87 """
88 for subNode in node:
89 if isinstance(subNode, dict):
90 for res in _iterDictNode(subNode, path):
91 yield res
92
94 """traverses the concrete syntax tree in postorder, returning pairs of
95 paths and nodes.
96
97 A node returned here is always a dictionary. The path consists of the
98 keys leading to the node in a tuple.
99 """
100 if isinstance(tree, list):
101 return _iterListNode(tree, ())
102 elif isinstance(tree, dict):
103 return _iterDictNode(tree, ())
104 else:
105 raise common.STCInternalError("Bad node in tree %s"%tree)
106
107
117
118
128
129
131 """returns the pyparsing parseResult as a data structure consisting
132 of simple python dicts and lists.
133
134 The "tree" has two kinds of nodes: Dictionaries having lists as
135 values, and lists containing (as a rule) literals or (for more deeply
136 nested constructs, which are rare in STC-S) other dictionaries of
137 this kind.
138
139 A parse node becomes a dict node if it has named children. The root
140 always is a dict.
141
142 Note that unnamed children of nodes becoming dicts will be lost in
143 the result.
144 """
145 if not len(parseResult):
146 res = None
147 elif parseResult.keys():
148 res = {}
149 for k in parseResult.keys():
150 v = parseResult[k]
151
152 if isinstance(v, ParseResults):
153 v = makeTree(v)
154 if v is not None:
155 res[k] = v
156 else:
157 if isinstance(parseResult[0], ParseResults):
158 res = [makeTree(child) for child in parseResult]
159 else:
160 res = list(parseResult)
161 return res
162
163
165 """returns a regular expression matching any of the strings in iterable.
166
167 The trick is that the longest keys must come first.
168 """
169 return "|".join(sorted(iterable, key=lambda x:-len(x)))
170
171
173 """returns a dictionary of pyparsing symbols defined in the locals.
174
175 locals would be the value locals() as a rule.
176 """
177 syms = dict((n, v) for n, v in locals.iteritems()
178 if hasattr(v, "setName"))
179 if not exportAll:
180 syms = dict((n, v) for n, v in syms.iteritems()
181 if not n.startswith("_"))
182 return syms
183
184
186 """a parse action joining items in parse results with blanks.
187 """
188 return " ".join(t)
189
191 """a parse action joining items in parse results.
192 """
193 return "".join(t)
194
196 """a parse action that returns the first item of the tokens.
197
198 You typically want this when you know there's only one token, e.g.,
199 on Disjunctions or such
200 """
201 return t[0]
202
203
204 -def _getSTCSGrammar(numberLiteral,
205 timeLiteral,
206 _exportAll=False,
207 _addGeoReferences=False,
208 _astroYearOverride=None):
209 """returns a dictionary of symbols for a grammar parsing STC-S into
210 a concrete syntax tree.
211
212 numberLiteral and timeLiteral are pyparsing symbols for numbers and
213 datetimes, respectively.
214
215 _addGeoReferences lets you write quoted references to vectors
216 (like Circle "center" 20.).
217 """
218 with utils.pyparsingWhitechars("\n\t\r "):
219
220 number = numberLiteral
221 del numberLiteral
222
223
224 _unitOpener = Suppress( CaselessKeyword("unit") )
225 _spaceUnitWord = Regex(_reFromKeys(spatialUnits))
226 _timeUnitWord = Regex(_reFromKeys(temporalUnits))
227 spaceUnit = _unitOpener - OneOrMore( _spaceUnitWord ).addParseAction(
228 _stringifyBlank)("unit")
229 timeUnit = _unitOpener - _timeUnitWord("unit")
230 spectralUnit = _unitOpener - Regex(_reFromKeys(spectralUnits))("unit")
231 redshiftUnit = _unitOpener - (
232 (_spaceUnitWord + "/" + _timeUnitWord).addParseAction(_stringify)
233 | CaselessKeyword("nil") )("unit")
234 velocityUnit = _unitOpener - (OneOrMore(
235 (_spaceUnitWord + "/" + _timeUnitWord).addParseAction(_stringify)
236 ).addParseAction(_stringifyBlank))("unit")
237
238
239 literalAstroYear = Regex("[BJ][0-9]+([.][0-9]*)?")
240 if _astroYearOverride:
241 astroYear = _astroYearOverride
242 else:
243 astroYear = literalAstroYear
244
245 fillfactor = (Suppress( CaselessKeyword("fillfactor")
246 ) + number("fillfactor"))
247 noEqFrame = (CaselessKeyword("J2000")
248 | CaselessKeyword("B1950")
249 | CaselessKeyword("ICRS")
250 | CaselessKeyword("GALACTIC")
251 | CaselessKeyword("GALACTIC_I")
252 | CaselessKeyword("GALACTIC_II")
253 | CaselessKeyword("SUPER_GALACTIC")
254 | CaselessKeyword("GEO_C")
255 | CaselessKeyword("GEO_D")
256 | CaselessKeyword("HPR")
257 | CaselessKeyword("HGS")
258 | CaselessKeyword("HGC")
259 | CaselessKeyword("HPC")
260 | CaselessKeyword("UNKNOWNFrame"))("frame")
261 eqFrameName = (CaselessKeyword("FK5")
262 | CaselessKeyword("FK4")
263 | CaselessKeyword("ECLIPTIC"))("frame")
264 eqFrame = eqFrameName + Optional( literalAstroYear("equinox") )
265 frame = eqFrame | noEqFrame
266 plEphemeris = CaselessKeyword("JPL-DE200") | CaselessKeyword("JPL-DE405")
267 refpos = ((Regex(_reFromKeys(common.stcRefPositions)))("refpos")
268 + Optional( plEphemeris("plEphemeris") ))
269 flavor = (Regex(_reFromKeys(stcsFlavors)))("flavor")
270
271
272 error = Suppress( CaselessKeyword("Error") ) + OneOrMore( number )
273 resolution = Suppress( CaselessKeyword("Resolution")
274 ) + OneOrMore( number )
275 size = Suppress( CaselessKeyword("Size") ) + OneOrMore(number)
276 pixSize = Suppress( CaselessKeyword("PixSize") ) + OneOrMore(number)
277 cooProps = (Optional( error("error") )
278 + Optional( resolution("resolution") )
279 + Optional( size("size") )
280 + Optional( pixSize("pixSize") ))
281
282
283 _coos = ZeroOrMore( number )("coos")
284 _pos = Optional( ZeroOrMore( number )("pos") )
285 if _addGeoReferences:
286 complexColRef = Regex('[[][A-Za-z_][A-Za-z_0-9]*[]]').addParseAction(
287 lambda s,p,toks: common.GeometryColRef(toks[0][1:-1]))
288 _coos = complexColRef("coos") | _coos
289 _pos = complexColRef("pos") | _pos
290 positionSpec = Suppress( CaselessKeyword("Position") ) + _pos
291 epochSpec = Suppress( CaselessKeyword("Epoch") ) - astroYear
292 _spatialProps = Optional( spaceUnit ) + cooProps
293 velocitySpec = (CaselessKeyword("Velocity")("type")
294 + OneOrMore( number )("pos"))
295 velocityInterval = (
296 Optional(
297 CaselessKeyword("VelocityInterval")("type")
298 + Optional( fillfactor )
299 + _coos )
300 + Optional( velocitySpec )
301 + Optional( velocityUnit )
302 + cooProps).addParseAction(makeTree)
303 _spatialTail = (_spatialProps +
304 Optional( velocityInterval)("velocity"))
305 _regionTail = Optional( positionSpec ) + _spatialTail
306 _commonSpaceItems = ( frame + Optional( refpos ) +
307 Optional( flavor ) + Optional(
308 epochSpec("epoch").addParseAction(lambda s,p,t: t[0])))
309 _commonRegionItems = Optional( fillfactor ) + _commonSpaceItems
310
311
312 timescale = (Regex("|".join(common.stcTimeScales)))("timescale")
313 timephrase = Suppress( CaselessKeyword("Time") ) + timeLiteral
314 _commonTimeItems = Optional( timeUnit ) + cooProps
315 _intervalOpener = ( Optional( fillfactor ) +
316 Optional( timescale("timescale") ) +
317 Optional( refpos ) )
318 _intervalCloser = Optional( timephrase("pos") ) + _commonTimeItems
319
320 timeInterval = (CaselessKeyword("TimeInterval")("type") +
321 _intervalOpener + ZeroOrMore( timeLiteral )("coos") +
322 _intervalCloser)
323 startTime = (CaselessKeyword("StartTime")("type") + _intervalOpener +
324 timeLiteral.setResultsName("coos", True) + _intervalCloser)
325 stopTime = (CaselessKeyword("StopTime")("type") + _intervalOpener +
326 timeLiteral.setResultsName("coos", True) + _intervalCloser)
327 time = (CaselessKeyword("Time")("type") + Optional( timescale("timescale") ) +
328 Optional( refpos ) + Optional(
329 timeLiteral.setResultsName("pos", True) ) + _commonTimeItems)
330 timeSubPhrase = (timeInterval
331 | startTime
332 | stopTime
333 | time).addParseAction(makeTree)
334
335
336
337
338 _atomicGeometryKey = ( CaselessKeyword("AllSky").setName("sub-geometry")
339 | CaselessKeyword("Circle")
340 | CaselessKeyword("Ellipse")
341 | CaselessKeyword("Box")
342 | CaselessKeyword("Polygon")
343 | CaselessKeyword("Convex")
344 | CaselessKeyword("PositionInterval") )
345 atomicGeometry = ( _atomicGeometryKey("type")
346 + _commonRegionItems
347 + _coos
348 + _regionTail )
349
350
351 _compoundGeoExpression = Forward()
352 _compoundGeoOperand = (( _atomicGeometryKey("subtype") + _coos )
353 | _compoundGeoExpression ).addParseAction(lambda s,p,t: dict(t))
354
355 _compoundGeoOperatorUnary = CaselessKeyword("Not")
356 _compoundGeoOperandsUnary = ( Suppress( '(' )
357 + _compoundGeoOperand + Suppress( ')' ) )
358 _compoundGeoExprUnary = ( _compoundGeoOperatorUnary("subtype")
359 + _compoundGeoOperandsUnary("children") )
360
361 _compoundGeoOperatorBinary = CaselessKeyword("Difference")
362 _compoundGeoOperandsBinary = ( Suppress( '(' )
363 + _compoundGeoOperand + _compoundGeoOperand + Suppress( ')' ) )
364 _compoundGeoExprBinary = ( _compoundGeoOperatorBinary("subtype")
365 + _compoundGeoOperandsBinary("children") )
366
367 _compoundGeoOperatorNary = ( CaselessKeyword("Union")
368 | CaselessKeyword("Intersection") )
369 _compoundGeoOperandsNary = ( Suppress( '(' )
370 + _compoundGeoOperand + _compoundGeoOperand
371 + ZeroOrMore( _compoundGeoOperand ) + Suppress( ')' ) )
372 _compoundGeoExprNary = ( _compoundGeoOperatorNary("subtype")
373 + _compoundGeoOperandsNary("children") )
374
375 _compoundGeoExpression << ( _compoundGeoExprUnary
376 | _compoundGeoExprBinary
377 | _compoundGeoExprNary )
378 compoundGeoPhrase = ( _compoundGeoOperatorUnary("type")
379 + _commonRegionItems
380 + _compoundGeoOperandsUnary("children") + _regionTail
381 | _compoundGeoOperatorBinary("type")
382 + _commonRegionItems
383 + _compoundGeoOperandsBinary("children") + _regionTail
384 | _compoundGeoOperatorNary("type")
385 + _commonRegionItems
386 - _compoundGeoOperandsNary("children") + _regionTail )
387
388
389 positionInterval = ( CaselessKeyword("PositionInterval")("type")
390 + _commonRegionItems
391 + _coos
392 + _regionTail )
393 position = ( CaselessKeyword("Position")("type")
394 + _commonSpaceItems
395 + _pos
396 + _spatialTail )
397 spaceSubPhrase = ( positionInterval
398 | position
399 | atomicGeometry
400 | compoundGeoPhrase ).addParseAction(makeTree)
401
402
403 spectralSpec = (Suppress( CaselessKeyword("Spectral") )
404 + number)("pos")
405 _spectralTail = Optional( spectralUnit ) + cooProps
406 spectralInterval = (CaselessKeyword("SpectralInterval")("type")
407 + Optional( fillfactor )
408 + Optional( refpos )
409 + _coos
410 + Optional( spectralSpec )
411 + _spectralTail)
412 spectral = (CaselessKeyword("Spectral")("type")
413 + Optional( refpos )
414 + _pos
415 + _spectralTail)
416 spectralSubPhrase = (spectralInterval | spectral ).addParseAction(
417 makeTree)
418
419
420 redshiftType = Regex("VELOCITY|REDSHIFT")("redshiftType")
421 redshiftSpec = (Suppress( CaselessKeyword("Redshift") ) + number)("pos")
422 dopplerdef = Regex("OPTICAL|RADIO|RELATIVISTIC")("dopplerdef")
423 _redshiftTail = Optional( redshiftUnit ) + cooProps
424 redshiftInterval = (CaselessKeyword("RedshiftInterval")("type")
425 + Optional( fillfactor )
426 + Optional( refpos )
427 + Optional( redshiftType )
428 + Optional( dopplerdef )
429 + _coos
430 + Optional( redshiftSpec )
431 + _redshiftTail)
432 redshift = (CaselessKeyword("Redshift")("type")
433 + Optional( refpos )
434 + Optional( redshiftType )
435 + Optional( dopplerdef )
436 + _pos
437 + _redshiftTail)
438 redshiftSubPhrase = (redshiftInterval | redshift).addParseAction(
439 makeTree)
440
441
442
443 xmlName = Word(alphas+"_:", alphanums+'.-_:').addParseAction(_stringify)
444 systemDefinition = (Suppress( CaselessKeyword("System") )
445 + xmlName("libSystem"))
446
447
448
449 stcsPhrase = (
450 Optional( timeSubPhrase )("time") +
451 Optional( spaceSubPhrase )("space") +
452 Optional( spectralSubPhrase )("spectral") +
453 Optional( redshiftSubPhrase )("redshift") +
454 Optional( systemDefinition ) ) + StringEnd()
455
456 return _makeSymDict(locals(), _exportAll)
457
458
459 -def getSymbols(_exportAll=False, _colrefLiteral=None,
460 _addGeoReferences=False):
461 """returns an STC-S grammar with terminal values.
462 """
463 with utils.pyparsingWhitechars("\n\t\r "):
464 _exactNumericRE = r"[+-]?\d+(\.(\d+)?)?|[+-]?\.\d+"
465 exactNumericLiteral = Regex(_exactNumericRE)
466 numberLiteral = Regex(r"(?i)(%s)(E[+-]?\d+)?"%_exactNumericRE
467 ).addParseAction(lambda s,p,toks: float(toks[0]))
468
469 jdLiteral = (Suppress( Literal("JD") ) + exactNumericLiteral
470 ).addParseAction(lambda s,p,toks: times.jdnToDateTime(float(toks[0])))
471 mjdLiteral = (Suppress( Literal("MJD") ) + exactNumericLiteral
472 ).addParseAction(lambda s,p,toks: times.mjdToDateTime(float(toks[0])))
473 isoTimeLiteral = Regex(r"\d\d\d\d-?\d\d-?\d\d(T\d\d:?\d\d:?\d\d(\.\d*)?Z?)?"
474 ).addParseAction(lambda s,p,toks: times.parseISODT(toks[0]))
475 timeLiteral = (isoTimeLiteral | jdLiteral | mjdLiteral)
476 astroYear = Regex("[BJ][0-9]+([.][0-9]*)?")
477
478 if _colrefLiteral:
479 numberLiteral = _colrefLiteral ^ numberLiteral
480 timeLiteral = _colrefLiteral ^ timeLiteral
481 astroYear = _colrefLiteral ^ astroYear
482
483 res = _getSTCSGrammar(numberLiteral,
484 timeLiteral,
485 _exportAll,
486 _addGeoReferences=_addGeoReferences,
487 _astroYearOverride=astroYear)
488 res.update(_makeSymDict(locals(), _exportAll))
489 return res
490
491
493 """returns an STC-S grammar with column references as values.
494
495 The column references used here have the form "<colref>" to cut down
496 on ambiguities. We only accept simple identifiers (i.e., not quoted in
497 the SQL sense), though.
498 """
499 def makeColRef(s, p, toks):
500 return common.ColRef(toks[0][1:-1])
501 with utils.pyparsingWhitechars("\n\t\r "):
502 atomicColRef = Regex('"[A-Za-z_][A-Za-z_0-9]*"').addParseAction(
503 makeColRef)
504 return getSymbols(_colrefLiteral=atomicColRef, _addGeoReferences=True)
505
506
508 if not debugNames:
509 debugNames = syms
510 for name in debugNames:
511 ob = syms[name]
512 ob.setDebug(True)
513 ob.setName(name)
514
515
516 getGrammar = utils.CachedGetter(getSymbols)
517 getColrefGrammar = utils.CachedGetter(getColrefSymbols)
518
519
520 -def getCST(literal, grammarFactory=None):
521 """returns a CST for an STC-S expression.
522
523 grammarFactory is a function returning the grammar, in this case
524 either getGrammar (which gets used if the argument is left out) or
525 getColrefGrammar.
526 """
527
528 if not literal.strip():
529 return {}
530
531 if grammarFactory is None:
532 grammarFactory = getGrammar
533 try:
534 tree = makeTree(utils.pyparseString(
535 grammarFactory()["stcsPhrase"], literal))
536 except (ParseException, ParseSyntaxException) as ex:
537 raise common.STCSParseError(
538 "Invalid STCS expression (%s at %s)"%(ex.msg, ex.loc),
539 expr=literal, pos=ex.loc)
540 addDefaults(tree)
541 return tree
542
543
544 if __name__=="__main__":
545 import pprint
546 syms = getColrefSymbols()
547 enableDebug(syms)
548 pprint.pprint(makeTree(syms["stcsPhrase"].parseString(
549 "Position ICRS Epoch J2000.0 20 21"
550 , parseAll=True)))
551