Package gavo :: Package stc :: Module stcs
[frames] | no frames]

Source Code for Module gavo.stc.stcs

  1  """ 
  2  Parsing and generating STC-S 
  3   
  4  The general plan is to parse STC-S into some sort of tree (dictionaries 
  5  with list values, possibly containing more such dictionaries).  These 
  6  trees can then be processed into something roughly resembling the data 
  7  model, furnished with defaults, and processed by what essentially is 
  8  user code. 
  9   
 10  Extensions to what the note says: 
 11   
 12          - After flavor, you can add an epoch using something like "Epoch J2000.0". 
 13          - After the FK5, FK4 and ECLIPTIC frame specs, you can add an optional 
 14                  astroYear (Bnnnn, Jnnnn) designating a custom equinox. 
 15          - There is a system subphrase that lets you specify a system from the 
 16                  STC library (without the ivo:// decoration).  It starts with System 
 17                  and is specifed last.  It will override all other system specifications. 
 18          - If enabled, you can use identifiers in double quotes whereever values 
 19                  are allowed; this will generate column references. 
 20          - After the reference position, you can optionally mention the planetary 
 21                  ephemeris used; currently, only JPL-DE200 and JPL-DE405 are allowed. 
 22  """ 
 23   
 24  #c Copyright 2008-2019, the GAVO project 
 25  #c 
 26  #c This program is free software, covered by the GNU GPL.  See the 
 27  #c COPYING file in the source distribution. 
 28   
 29   
 30  from pyparsing import ( 
 31          Word, Literal, Optional, alphas, CaselessKeyword, 
 32                  ZeroOrMore, OneOrMore, StringEnd, 
 33                  Suppress, Forward,  
 34                  Regex, alphanums, 
 35                  ParseException, ParseResults,  
 36                  ParseSyntaxException) 
 37   
 38  from gavo import utils 
 39  from gavo.stc import common 
 40  from gavo.stc import stcsdefaults 
 41  from gavo.stc import times 
 42   
43 -class AComputedDefault(object):
44 """A sentinel for computed default values. 45 """ 46 pass
47 48 49 # STC-S spatial flavors, with dimensions and stc flavors 50 stcsFlavors = { 51 "SPHER2": (2, "SPHERICAL"), 52 "SPHER3": (3, "SPHERICAL"), 53 "UNITSPHER": (3, "UNITSPHERE"), 54 "CART1": (1, "CARTESIAN"), 55 "CART2": (2, "CARTESIAN"), 56 "CART3": (3, "CARTESIAN"), 57 } 58 59 60 spatialUnits = set(["deg", "arcmin", "arcsec", "m", "mm", "km", "AU", 61 "pc", "kpc", "Mpc", "rad"]) 62 temporalUnits = set(["yr", "cy", "s", "d", "a"]) 63 spectralUnits = set(["MHz", "GHz", "Hz", "Angstrom", "keV", "MeV", 64 "eV", "mm", "um", "nm", "m"]) 65
66 -def _assertGrammar(cond, msg, pos):
67 if not cond: 68 raise common.STCSParseError(msg, pos)
69 70
71 -def _iterDictNode(node, path):
72 """does iterNode's work for dict nodes. 73 """ 74 for k, v in node.iteritems(): 75 if isinstance(v, list): 76 subIter = _iterListNode(v, path+(k,)) 77 elif isinstance(v, dict): 78 subIter = _iterDictNode(v, path+(k,)) 79 else: 80 continue # content does not contain a subtree 81 for res in subIter: 82 yield res 83 yield path, node
84
85 -def _iterListNode(node, path):
86 """does iterNode's work for list nodes. 87 """ 88 for subNode in node: 89 if isinstance(subNode, dict): 90 for res in _iterDictNode(subNode, path): 91 yield res
92
93 -def iterNodes(tree):
94 """traverses the concrete syntax tree in postorder, returning pairs of 95 paths and nodes. 96 97 A node returned here is always a dictionary. The path consists of the 98 keys leading to the node in a tuple. 99 """ 100 if isinstance(tree, list): 101 return _iterListNode(tree, ()) 102 elif isinstance(tree, dict): 103 return _iterDictNode(tree, ()) 104 else: 105 raise common.STCInternalError("Bad node in tree %s"%tree)
106 107
108 -def addDefaults(tree):
109 """adds defaults for missing values for a concrete syntax tree. 110 111 The tree is changed in place. For details, see stcsdefaults. 112 """ 113 for path, node in iterNodes(tree): 114 if path and path[-1] in stcsdefaults.defaultingFunctions: 115 stcsdefaults.defaultingFunctions[path[-1]](node) 116 return tree
117 118
119 -def removeDefaults(tree):
120 """removes defaults from a concrete syntax tree. 121 122 The tree is changed in place. For details, see stcsdefaults. 123 """ 124 for path, node in iterNodes(tree): 125 if path and path[-1] in stcsdefaults.undefaultingFunctions: 126 stcsdefaults.undefaultingFunctions[path[-1]](node) 127 return tree
128 129
130 -def makeTree(parseResult):
131 """returns the pyparsing parseResult as a data structure consisting 132 of simple python dicts and lists. 133 134 The "tree" has two kinds of nodes: Dictionaries having lists as 135 values, and lists containing (as a rule) literals or (for more deeply 136 nested constructs, which are rare in STC-S) other dictionaries of 137 this kind. 138 139 A parse node becomes a dict node if it has named children. The root 140 always is a dict. 141 142 Note that unnamed children of nodes becoming dicts will be lost in 143 the result. 144 """ 145 if not len(parseResult): # empty parse results become Nones 146 res = None 147 elif parseResult.keys(): # named children, generate a dict 148 res = {} 149 for k in parseResult.keys(): 150 v = parseResult[k] 151 # discard empty branches 152 if isinstance(v, ParseResults): 153 v = makeTree(v) 154 if v is not None: # discard empty branches 155 res[k] = v 156 else: # no named children, generate a list 157 if isinstance(parseResult[0], ParseResults): 158 res = [makeTree(child) for child in parseResult] 159 else: 160 res = list(parseResult) 161 return res
162 163
164 -def _reFromKeys(iterable):
165 """returns a regular expression matching any of the strings in iterable. 166 167 The trick is that the longest keys must come first. 168 """ 169 return "|".join(sorted(iterable, key=lambda x:-len(x)))
170 171
172 -def _makeSymDict(locals, exportAll):
173 """returns a dictionary of pyparsing symbols defined in the locals. 174 175 locals would be the value locals() as a rule. 176 """ 177 syms = dict((n, v) for n, v in locals.iteritems() 178 if hasattr(v, "setName")) 179 if not exportAll: 180 syms = dict((n, v) for n, v in syms.iteritems() 181 if not n.startswith("_")) 182 return syms
183 184
185 -def _stringifyBlank(s, p, t):
186 """a parse action joining items in parse results with blanks. 187 """ 188 return " ".join(t)
189
190 -def _stringify(s, p, t):
191 """a parse action joining items in parse results. 192 """ 193 return "".join(t)
194
195 -def _makeSingle(s, p, t):
196 """a parse action that returns the first item of the tokens. 197 198 You typically want this when you know there's only one token, e.g., 199 on Disjunctions or such 200 """ 201 return t[0]
202 203
204 -def _getSTCSGrammar(numberLiteral, 205 timeLiteral, 206 _exportAll=False, 207 _addGeoReferences=False, 208 _astroYearOverride=None):
209 """returns a dictionary of symbols for a grammar parsing STC-S into 210 a concrete syntax tree. 211 212 numberLiteral and timeLiteral are pyparsing symbols for numbers and 213 datetimes, respectively. 214 215 _addGeoReferences lets you write quoted references to vectors 216 (like Circle "center" 20.). 217 """ 218 with utils.pyparsingWhitechars("\n\t\r "): 219 220 number = numberLiteral 221 del numberLiteral 222 223 # units 224 _unitOpener = Suppress( CaselessKeyword("unit") ) 225 _spaceUnitWord = Regex(_reFromKeys(spatialUnits)) 226 _timeUnitWord = Regex(_reFromKeys(temporalUnits)) 227 spaceUnit = _unitOpener - OneOrMore( _spaceUnitWord ).addParseAction( 228 _stringifyBlank)("unit") 229 timeUnit = _unitOpener - _timeUnitWord("unit") 230 spectralUnit = _unitOpener - Regex(_reFromKeys(spectralUnits))("unit") 231 redshiftUnit = _unitOpener - ( 232 (_spaceUnitWord + "/" + _timeUnitWord).addParseAction(_stringify) 233 | CaselessKeyword("nil") )("unit") 234 velocityUnit = _unitOpener - (OneOrMore( 235 (_spaceUnitWord + "/" + _timeUnitWord).addParseAction(_stringify) 236 ).addParseAction(_stringifyBlank))("unit") 237 238 # basic productions common to most STC-S subphrases 239 literalAstroYear = Regex("[BJ][0-9]+([.][0-9]*)?") 240 if _astroYearOverride: 241 astroYear = _astroYearOverride 242 else: 243 astroYear = literalAstroYear 244 245 fillfactor = (Suppress( CaselessKeyword("fillfactor") 246 ) + number("fillfactor")) 247 noEqFrame = (CaselessKeyword("J2000") 248 | CaselessKeyword("B1950") 249 | CaselessKeyword("ICRS") 250 | CaselessKeyword("GALACTIC") 251 | CaselessKeyword("GALACTIC_I") 252 | CaselessKeyword("GALACTIC_II") 253 | CaselessKeyword("SUPER_GALACTIC") 254 | CaselessKeyword("GEO_C") 255 | CaselessKeyword("GEO_D") 256 | CaselessKeyword("HPR") 257 | CaselessKeyword("HGS") 258 | CaselessKeyword("HGC") 259 | CaselessKeyword("HPC") 260 | CaselessKeyword("UNKNOWNFrame"))("frame") 261 eqFrameName = (CaselessKeyword("FK5") 262 | CaselessKeyword("FK4") 263 | CaselessKeyword("ECLIPTIC"))("frame") 264 eqFrame = eqFrameName + Optional( literalAstroYear("equinox") ) 265 frame = eqFrame | noEqFrame 266 plEphemeris = CaselessKeyword("JPL-DE200") | CaselessKeyword("JPL-DE405") 267 refpos = ((Regex(_reFromKeys(common.stcRefPositions)))("refpos") 268 + Optional( plEphemeris("plEphemeris") )) 269 flavor = (Regex(_reFromKeys(stcsFlavors)))("flavor") 270 271 # properties of coordinates 272 error = Suppress( CaselessKeyword("Error") ) + OneOrMore( number ) 273 resolution = Suppress( CaselessKeyword("Resolution") 274 ) + OneOrMore( number ) 275 size = Suppress( CaselessKeyword("Size") ) + OneOrMore(number) 276 pixSize = Suppress( CaselessKeyword("PixSize") ) + OneOrMore(number) 277 cooProps = (Optional( error("error") ) 278 + Optional( resolution("resolution") ) 279 + Optional( size("size") ) 280 + Optional( pixSize("pixSize") )) 281 282 # properties of most spatial specs 283 _coos = ZeroOrMore( number )("coos") 284 _pos = Optional( ZeroOrMore( number )("pos") ) 285 if _addGeoReferences: # include references to vectors, for getColrefSymbols 286 complexColRef = Regex('[[][A-Za-z_][A-Za-z_0-9]*[]]').addParseAction( 287 lambda s,p,toks: common.GeometryColRef(toks[0][1:-1])) 288 _coos = complexColRef("coos") | _coos 289 _pos = complexColRef("pos") | _pos 290 positionSpec = Suppress( CaselessKeyword("Position") ) + _pos 291 epochSpec = Suppress( CaselessKeyword("Epoch") ) - astroYear 292 _spatialProps = Optional( spaceUnit ) + cooProps 293 velocitySpec = (CaselessKeyword("Velocity")("type") 294 + OneOrMore( number )("pos")) 295 velocityInterval = ( 296 Optional( 297 CaselessKeyword("VelocityInterval")("type") 298 + Optional( fillfactor ) 299 + _coos ) 300 + Optional( velocitySpec ) 301 + Optional( velocityUnit ) 302 + cooProps).addParseAction(makeTree) 303 _spatialTail = (_spatialProps + 304 Optional( velocityInterval)("velocity")) 305 _regionTail = Optional( positionSpec ) + _spatialTail 306 _commonSpaceItems = ( frame + Optional( refpos ) + 307 Optional( flavor ) + Optional( 308 epochSpec("epoch").addParseAction(lambda s,p,t: t[0]))) 309 _commonRegionItems = Optional( fillfactor ) + _commonSpaceItems 310 311 # times and time intervals 312 timescale = (Regex("|".join(common.stcTimeScales)))("timescale") 313 timephrase = Suppress( CaselessKeyword("Time") ) + timeLiteral 314 _commonTimeItems = Optional( timeUnit ) + cooProps 315 _intervalOpener = ( Optional( fillfactor ) + 316 Optional( timescale("timescale") ) + 317 Optional( refpos ) ) 318 _intervalCloser = Optional( timephrase("pos") ) + _commonTimeItems 319 320 timeInterval = (CaselessKeyword("TimeInterval")("type") + 321 _intervalOpener + ZeroOrMore( timeLiteral )("coos") + 322 _intervalCloser) 323 startTime = (CaselessKeyword("StartTime")("type") + _intervalOpener + 324 timeLiteral.setResultsName("coos", True) + _intervalCloser) 325 stopTime = (CaselessKeyword("StopTime")("type") + _intervalOpener + 326 timeLiteral.setResultsName("coos", True) + _intervalCloser) 327 time = (CaselessKeyword("Time")("type") + Optional( timescale("timescale") ) + 328 Optional( refpos ) + Optional( 329 timeLiteral.setResultsName("pos", True) ) + _commonTimeItems) 330 timeSubPhrase = (timeInterval 331 | startTime 332 | stopTime 333 | time).addParseAction(makeTree) 334 335 # atomic "geometries"; I do not bother to specify their actual 336 # arguments since, without knowing the frame, they may be basically 337 # anthing. Also, I want to allow geometry column references. 338 _atomicGeometryKey = ( CaselessKeyword("AllSky").setName("sub-geometry") 339 | CaselessKeyword("Circle") 340 | CaselessKeyword("Ellipse") 341 | CaselessKeyword("Box") 342 | CaselessKeyword("Polygon") 343 | CaselessKeyword("Convex") 344 | CaselessKeyword("PositionInterval") ) 345 atomicGeometry = ( _atomicGeometryKey("type") 346 + _commonRegionItems 347 + _coos 348 + _regionTail ) 349 350 # compound "geometries" 351 _compoundGeoExpression = Forward() 352 _compoundGeoOperand = (( _atomicGeometryKey("subtype") + _coos ) 353 | _compoundGeoExpression ).addParseAction(lambda s,p,t: dict(t)) 354 355 _compoundGeoOperatorUnary = CaselessKeyword("Not") 356 _compoundGeoOperandsUnary = ( Suppress( '(' ) 357 + _compoundGeoOperand + Suppress( ')' ) ) 358 _compoundGeoExprUnary = ( _compoundGeoOperatorUnary("subtype") 359 + _compoundGeoOperandsUnary("children") ) 360 361 _compoundGeoOperatorBinary = CaselessKeyword("Difference") 362 _compoundGeoOperandsBinary = ( Suppress( '(' ) 363 + _compoundGeoOperand + _compoundGeoOperand + Suppress( ')' ) ) 364 _compoundGeoExprBinary = ( _compoundGeoOperatorBinary("subtype") 365 + _compoundGeoOperandsBinary("children") ) 366 367 _compoundGeoOperatorNary = ( CaselessKeyword("Union") 368 | CaselessKeyword("Intersection") ) 369 _compoundGeoOperandsNary = ( Suppress( '(' ) 370 + _compoundGeoOperand + _compoundGeoOperand 371 + ZeroOrMore( _compoundGeoOperand ) + Suppress( ')' ) ) 372 _compoundGeoExprNary = ( _compoundGeoOperatorNary("subtype") 373 + _compoundGeoOperandsNary("children") ) 374 375 _compoundGeoExpression << ( _compoundGeoExprUnary 376 | _compoundGeoExprBinary 377 | _compoundGeoExprNary ) 378 compoundGeoPhrase = ( _compoundGeoOperatorUnary("type") 379 + _commonRegionItems 380 + _compoundGeoOperandsUnary("children") + _regionTail 381 | _compoundGeoOperatorBinary("type") 382 + _commonRegionItems 383 + _compoundGeoOperandsBinary("children") + _regionTail 384 | _compoundGeoOperatorNary("type") 385 + _commonRegionItems 386 - _compoundGeoOperandsNary("children") + _regionTail ) 387 388 # space subphrase 389 positionInterval = ( CaselessKeyword("PositionInterval")("type") 390 + _commonRegionItems 391 + _coos 392 + _regionTail ) 393 position = ( CaselessKeyword("Position")("type") 394 + _commonSpaceItems 395 + _pos 396 + _spatialTail ) 397 spaceSubPhrase = ( positionInterval 398 | position 399 | atomicGeometry 400 | compoundGeoPhrase ).addParseAction(makeTree) 401 402 # spectral subphrase 403 spectralSpec = (Suppress( CaselessKeyword("Spectral") ) 404 + number)("pos") 405 _spectralTail = Optional( spectralUnit ) + cooProps 406 spectralInterval = (CaselessKeyword("SpectralInterval")("type") 407 + Optional( fillfactor ) 408 + Optional( refpos ) 409 + _coos 410 + Optional( spectralSpec ) 411 + _spectralTail) 412 spectral = (CaselessKeyword("Spectral")("type") 413 + Optional( refpos ) 414 + _pos 415 + _spectralTail) 416 spectralSubPhrase = (spectralInterval | spectral ).addParseAction( 417 makeTree) 418 419 # redshift subphrase 420 redshiftType = Regex("VELOCITY|REDSHIFT")("redshiftType") 421 redshiftSpec = (Suppress( CaselessKeyword("Redshift") ) + number)("pos") 422 dopplerdef = Regex("OPTICAL|RADIO|RELATIVISTIC")("dopplerdef") 423 _redshiftTail = Optional( redshiftUnit ) + cooProps 424 redshiftInterval = (CaselessKeyword("RedshiftInterval")("type") 425 + Optional( fillfactor ) 426 + Optional( refpos ) 427 + Optional( redshiftType ) 428 + Optional( dopplerdef ) 429 + _coos 430 + Optional( redshiftSpec ) 431 + _redshiftTail) 432 redshift = (CaselessKeyword("Redshift")("type") 433 + Optional( refpos ) 434 + Optional( redshiftType ) 435 + Optional( dopplerdef ) 436 + _pos 437 + _redshiftTail) 438 redshiftSubPhrase = (redshiftInterval | redshift).addParseAction( 439 makeTree) 440 441 # system subphrase (extension, see docs) 442 # ids match Name from XML spec; we're not doing char refs and similar here 443 xmlName = Word(alphas+"_:", alphanums+'.-_:').addParseAction(_stringify) 444 systemDefinition = (Suppress( CaselessKeyword("System") ) 445 + xmlName("libSystem")) 446 447 448 # top level 449 stcsPhrase = ( #noflake: stcsPhrase is returned through locals() 450 Optional( timeSubPhrase )("time") + 451 Optional( spaceSubPhrase )("space") + 452 Optional( spectralSubPhrase )("spectral") + 453 Optional( redshiftSubPhrase )("redshift") + 454 Optional( systemDefinition ) ) + StringEnd() 455 456 return _makeSymDict(locals(), _exportAll)
457 458
459 -def getSymbols(_exportAll=False, _colrefLiteral=None, 460 _addGeoReferences=False):
461 """returns an STC-S grammar with terminal values. 462 """ 463 with utils.pyparsingWhitechars("\n\t\r "): 464 _exactNumericRE = r"[+-]?\d+(\.(\d+)?)?|[+-]?\.\d+" 465 exactNumericLiteral = Regex(_exactNumericRE) 466 numberLiteral = Regex(r"(?i)(%s)(E[+-]?\d+)?"%_exactNumericRE 467 ).addParseAction(lambda s,p,toks: float(toks[0])) 468 469 jdLiteral = (Suppress( Literal("JD") ) + exactNumericLiteral 470 ).addParseAction(lambda s,p,toks: times.jdnToDateTime(float(toks[0]))) 471 mjdLiteral = (Suppress( Literal("MJD") ) + exactNumericLiteral 472 ).addParseAction(lambda s,p,toks: times.mjdToDateTime(float(toks[0]))) 473 isoTimeLiteral = Regex(r"\d\d\d\d-?\d\d-?\d\d(T\d\d:?\d\d:?\d\d(\.\d*)?Z?)?" 474 ).addParseAction(lambda s,p,toks: times.parseISODT(toks[0])) 475 timeLiteral = (isoTimeLiteral | jdLiteral | mjdLiteral) 476 astroYear = Regex("[BJ][0-9]+([.][0-9]*)?") 477 478 if _colrefLiteral: 479 numberLiteral = _colrefLiteral ^ numberLiteral 480 timeLiteral = _colrefLiteral ^ timeLiteral 481 astroYear = _colrefLiteral ^ astroYear 482 483 res = _getSTCSGrammar(numberLiteral, 484 timeLiteral, 485 _exportAll, 486 _addGeoReferences=_addGeoReferences, 487 _astroYearOverride=astroYear) 488 res.update(_makeSymDict(locals(), _exportAll)) 489 return res
490 491
492 -def getColrefSymbols():
493 """returns an STC-S grammar with column references as values. 494 495 The column references used here have the form "<colref>" to cut down 496 on ambiguities. We only accept simple identifiers (i.e., not quoted in 497 the SQL sense), though. 498 """ 499 def makeColRef(s, p, toks): 500 return common.ColRef(toks[0][1:-1])
501 with utils.pyparsingWhitechars("\n\t\r "): 502 atomicColRef = Regex('"[A-Za-z_][A-Za-z_0-9]*"').addParseAction( 503 makeColRef) 504 return getSymbols(_colrefLiteral=atomicColRef, _addGeoReferences=True) 505 506
507 -def enableDebug(syms, debugNames=None):
508 if not debugNames: 509 debugNames = syms 510 for name in debugNames: 511 ob = syms[name] 512 ob.setDebug(True) 513 ob.setName(name)
514 515 516 getGrammar = utils.CachedGetter(getSymbols) 517 getColrefGrammar = utils.CachedGetter(getColrefSymbols) 518 519
520 -def getCST(literal, grammarFactory=None):
521 """returns a CST for an STC-S expression. 522 523 grammarFactory is a function returning the grammar, in this case 524 either getGrammar (which gets used if the argument is left out) or 525 getColrefGrammar. 526 """ 527 # special case: the empty input yields an empty CST 528 if not literal.strip(): 529 return {} 530 531 if grammarFactory is None: 532 grammarFactory = getGrammar 533 try: 534 tree = makeTree(utils.pyparseString( 535 grammarFactory()["stcsPhrase"], literal)) 536 except (ParseException, ParseSyntaxException) as ex: 537 raise common.STCSParseError( 538 "Invalid STCS expression (%s at %s)"%(ex.msg, ex.loc), 539 expr=literal, pos=ex.loc) 540 addDefaults(tree) 541 return tree
542 543 544 if __name__=="__main__": 545 import pprint 546 syms = getColrefSymbols() 547 enableDebug(syms) 548 pprint.pprint(makeTree(syms["stcsPhrase"].parseString( 549 "Position ICRS Epoch J2000.0 20 21" 550 , parseAll=True))) 551