Package gavo :: Package base :: Module unitconv
[frames] | no frames]

Source Code for Module gavo.base.unitconv

  1  """ 
  2  A python module to parse VOUnit strings and compute conversion factors. 
  3   
  4  We believe this implements the full VOUnit specification. 
  5   
  6  To use this, you must have the gavo utils package  installed.  For 
  7  details, see http://soft.g-vo.org.  The changes required to make this 
  8  work without gavo.utils are minute, though.  If you want that, talk to 
  9  the authors. 
 10   
 11  Unit tests for this are at 
 12   
 13  http://svn.ari.uni-heidelberg.de/svn/gavo/python/trunk/tests/unitconvtest.py 
 14  """ 
 15   
 16  #c Copyright 2008-2019, the GAVO project 
 17  #c 
 18  #c This program is free software, covered by the GNU GPL.  See the 
 19  #c COPYING file in the source distribution. 
 20   
 21   
 22  from __future__ import print_function 
 23   
 24  import math 
 25   
 26  import pyparsing 
 27   
 28  from gavo import utils 
29 30 31 -class IncompatibleUnits(utils.Error):
32 pass
33
34 -class BadUnit(utils.Error):
35 pass
36 37 PLANCK_H = 6.626070040e-34 38 LIGHT_C = 299792458.0 39 40 41 # We can't yet restructure the tree, so we don't do SI-base-casting for 42 # compound units. Also, we don't change anything when a change of exponent 43 # would be necessary 44 PLAIN_UNITS = units = { 45 "a": (3600*24*365.25, "s"), # This is the SI/julian year! 46 "A": (1, "A"), # *Ampere* not Angstrom 47 "adu": (1, "adu"), 48 u"\xc5": (1e-10, "m"), 49 "Angstrom": (1e-10, "m"), 50 "angstrom": (1e-10, "m"), 51 "AU": (1.49598e11, "m"), 52 "arcmin": (math.pi/180./60., "rad"), 53 "arcsec": (math.pi/180./3600., "rad"), 54 "barn": (1, "barn"), # 1e-28 m2 55 "beam": (1, "beam"), 56 "bit": (1, "bit"), 57 "bin": (1, "bin"), 58 "byte": (1, "byte"), # I don't think we ever want to unify bit and byte 59 "C": (1, "C"), # A.s 60 "cd": (1, "cd"), 61 "ct": (1, "ct"), 62 "count": (1, "ct"), 63 "chan": (1, "chan"), 64 "D": (1e-19/3., "D"), # C.m 65 "d": (3600*24, "s"), 66 "deg": (math.pi/180., "rad"), 67 "erg": (1e-7, "J"), 68 "eV": (1.6021766208e-19, "J"), 69 "F": (1, "F"), # C/V 70 "g": (1e-3, "kg"), 71 "G": (1e-4, "T"), 72 "h": (3600., "s"), 73 "H": (1, "H"), # Wb/A 74 "Hz": (1, "Hz"), # s-1 75 "J": (1, "J"), # kg m^2/s^2 76 "Jy": (1, "Jy"), # 1e-26 W/m2/Hz 77 "K": (1, "K"), 78 "lm": (1, "lm"), 79 "lx": (1, "lx"), 80 "lyr": (2627980686828.0, "m"), 81 "m": (1, "m"), 82 "mag": (1, "mag"), # correlate that with, erm, lux? 83 "mas": (math.pi/180./3.6e6, "rad"), 84 "min": (60, "s"), 85 "mol": (1, "mol"), 86 "N": (1, "N"), # kg.m/s2 87 "Ohm": (1, "Ohm"), # V/A 88 "Pa": (1, "Pa"), # N/m^2 89 "pc": (3.0857e16, "m"), 90 "ph": (1, "ph"), 91 "photon": (1, "ph"), 92 "pix": (1, "pix"), 93 "pixel": (1, "pix"), 94 "R": (1, "R"), # Rayleigh 95 "rad": (1, "rad"), 96 "Ry": (2.17989e-18, "J"), 97 "s": (1, "s"), 98 "S": (1, "S"), # A/V 99 "solLum": (3.826e26, "W"), 100 "solMass": (1.989e30, "kg"), 101 "solRad": (6.9559e8, "m"), 102 "sr": (1, "sr"), 103 "T": (1, "T"), # V.s/m2 104 "u": (1.66053886e-27, "kg"), 105 "V": (1, "V"), 106 "voxel": (1, "voxel"), 107 "W": (1, "W"), # kg.m2/s3 or A.V -- that's going to be a tough one 108 "Wb": (1, "Wb"), 109 "yr": (3600*24*365.25, "s"), # This is the SI/julian year! 110 } 111 112 113 EXCEPTIONAL_CONVERSIONS = { 114 # tuple-form of units: conversions that are still linear but 115 # somehow ideosyncratic. 116 117 # wave number to energy 118 ((('m', -1.0),), (('J', 1),)): PLANCK_H*LIGHT_C, 119 } 120 121 # These are the keys from PLAIN_UNITS that cannot take SI prefixes 122 NON_PREFIXABLE = frozenset([ 123 "AU", "au", "D", "Ry", "arcmin", "beam", "bin", "chan", "d", "h", "mas", 124 "min", "ph", "photon", "pix", "pixel", "solLum", "solMass", "solRad", 125 "voxel"]) 126 127 128 PREFIXES = prefixes = {"d": 1e-1, "c": 1e-2, "m":1e-3, "u":1e-6, 129 "n":1e-9, "p":1e-12, "f":1e-15, "a":1e-18, "z":1e-21, "y":1e-24, 130 "da": 10., "h":1e2, "k":1e3, "M":1e6, "G":1e9, "T":1e12, "P":1e15, 131 "E":1e18, "Z":1e21, "Y":1e24, "": 1} 132 133 134 # these map VOUnit Table 5 "magic" units to UnitNode constructor arguments 135 AMBIGUOUS_STRINGS = { 136 'Pa': ('Pa', ''), 137 'ha': ('yr', 'h'), 138 'cd': ('cd', ''), 139 'dB': ('Bel', 'd'), 140 'B': ('Byte', ''), 141 'au': ('AU', ''), 142 'dadu': ('adu', 'd'), 143 }
144 145 146 -def formatScaleFactor(aFloat):
147 """returns a reasonable decorative but python-readable representation 148 of aFloat. 149 150 Floats looking good as simple decimals (modulus between 0.01 and 1000) 151 are returned without exponent. 152 """ 153 if 0.01<=abs(aFloat)<=1000: 154 return ("%f"%aFloat).rstrip("0") 155 156 exponent = int(math.log10(aFloat)) 157 mantissa = ("%f"%(aFloat/10**exponent)).rstrip("0") 158 return "%se%+d"%(mantissa, exponent)
159
160 161 -class _Node(object):
162 """the abstract base for a node in a Unit tree. 163 164 All these must implement fromToks methods that can be called 165 as pyparsing callbacks. 166 """ 167 @classmethod
168 - def fromToks(cls, s, p, toks):
169 raise NotImplementedError("%s objects have no implementation of" 170 " fromToks (i.e., they are broken)."%cls.__name__)
171
172 173 -class UnitNode(_Node):
174 """a terminal node containing a unit, possibly with a prefix 175 176 This is normally constructed through the fromToks constructor. 177 178 Check out the unit, prefix, and isUnknown attributes. The prefix 179 can be turned to a factor using the PREFIXES dictionary. An empty 180 prefix (factor 1) is represented by an empty string. 181 """
182 - def __init__(self, unit, prefix=""):
183 self.unit = unit 184 self.isUnknown = self.unit not in PLAIN_UNITS 185 self.prefix = prefix
186 187 @classmethod
188 - def fromToks(cls, s, p, toks):
189 assert len(toks)==1 190 unitString = toks[0] 191 192 if unitString in AMBIGUOUS_STRINGS: 193 return cls(*AMBIGUOUS_STRINGS[unitString]) 194 195 elif unitString in PLAIN_UNITS: 196 return cls(unitString) 197 198 elif len(unitString)>2 and unitString[:2]=="da": 199 return cls(unitString[2:], "da") 200 201 elif len(unitString)>1 and unitString[0] in PREFIXES: 202 if unitString[1:] in NON_PREFIXABLE: 203 raise BadUnit("No Prefixes allowed on %s"%unitString[1:]) 204 return cls(unitString[1:], unitString[0]) 205 206 else: 207 return cls(unitString, '')
208
209 - def __str__(self):
210 return "%s%s"%(self.prefix, self.unit)
211
212 - def __repr__(self):
213 if self.isUnknown: 214 return "U?(%s ; %s)"%(self.prefix, repr(self.unit)) 215 else: 216 return "U(%s ; %s)"%(self.prefix, repr(self.unit))
217
218 - def getSI(self):
219 """returns a pair of factor and basic unit. 220 221 Basic units are what's in the defining pairs in the PLAIN_UNITS dict. 222 """ 223 if self.isUnknown: 224 # these don't hurt if they're on both sides of an equation 225 return PREFIXES[self.prefix], {self.unit: 1} 226 return 227 factor, basic = PLAIN_UNITS[self.unit] 228 return PREFIXES[self.prefix]*factor, {basic: 1}
229
230 231 -class FunctionApplication(_Node):
232 """A function applied to a term. 233 """ 234 _pythonFunc = { 235 "ln": math.log, 236 "log": math.log10, 237 "exp": math.exp, 238 "sqrt": math.sqrt, 239 } 240
241 - def __init__(self, funcName, term):
242 self.funcName = funcName 243 self.term = term 244 self.isUnknown = (self.funcName not in self._pythonFunc 245 ) or self.term.isUnknown
246 247 @classmethod
248 - def fromToks(cls, s, p, toks):
249 assert len(toks)==2 250 return cls(toks[0], toks[1])
251
252 - def __str__(self):
253 return "%s(%s)"%(self.funcName, self.term)
254
255 - def __repr__(self):
256 return "A(%s ; %s)"%(repr(self.funcName), repr(self.term))
257
258 - def getSI(self):
259 factor, powers = self.term.getSI() 260 if self.funcName=="sqrt": 261 powers = dict((key, value/2.) 262 for key, value in powers.iteritems()) 263 else: 264 powers = dict(((self.funcName, key), value) 265 for key, value in powers.iteritems()) 266 return self._pythonFunc[self.funcName](factor), powers
267
268 269 -class QuotedUnitNode(_Node):
270 """a quoted ("defined unknown") unit. 271 """
272 - def __init__(self, unit):
273 self.unit = unit 274 self.isUnknown = True
275 276 @classmethod
277 - def fromToks(cls, s, p, toks):
278 return cls(toks[1])
279
280 - def __str__(self):
281 return "'%s'"%(self.unit)
282
283 - def __repr__(self):
284 return "U?('%s')"%(repr(self.unit))
285
286 - def getSI(self):
287 # These units stand for themselves. They don't really hurt if 288 # they're in both terms of a conversion 289 return 1, {self.unit: 1}
290
291 292 -class Factor(_Node):
293 """A UnitNode with a power. 294 """
295 - def __init__(self, unit, power):
296 self.unit, self.power = unit, power 297 self.isUnknown = self.unit.isUnknown
298 299 @classmethod
300 - def fromToks(cls, s, p, toks):
301 if len(toks)==2: 302 return cls(toks[0], toks[1]) 303 elif len(toks)==1: 304 return cls(toks[0], 1) 305 else: 306 raise Exception("This cannot happen")
307
308 - def __str__(self):
309 powerLit = repr(self.power).rstrip("0").rstrip(".") 310 if "." in powerLit: 311 # see if we can come up with a nice fraction 312 for denom in range(2, 8): 313 if abs(int(self.power*denom)-self.power*denom)<1e-13: 314 powerLit = "(%d/%d)"%(round(self.power*denom), denom) 315 break 316 317 if powerLit=="1": 318 powerLit = "" 319 else: 320 powerLit = "**"+powerLit 321 322 return "%s%s"%(self.unit, powerLit)
323
324 - def __repr__(self):
325 return "F(%s ; %s)"%(repr(self.unit), repr(self.power))
326
327 - def getSI(self):
328 factor, powers = self.unit.getSI() 329 powers[powers.keys()[0]] = self.power 330 return factor**self.power, powers
331
332 333 -class Term(_Node):
334 """A Node containing two factors and an operator. 335 336 The operator here is either . (product) or / (division). 337 """
338 - def __init__(self, op1, operator, op2):
339 self.op1, self.operator, self.op2 = op1, operator, op2 340 self.isUnknown = self.op1.isUnknown or self.op2.isUnknown
341 342 @classmethod
343 - def fromToks(cls, s, p, toks):
344 if len(toks)==1: 345 # avoid to many internal nodes: a 1-operand expression is the operand 346 return toks[0] 347 return cls(toks[0], toks[1], toks[2])
348
349 - def __str__(self):
350 op1Lit, op2Lit = str(self.op1), str(self.op2) 351 if self.operator=='/' and isinstance(self.op2, Term): 352 op2Lit = "(%s)"%op2Lit 353 return "%s%s%s"%(op1Lit, self.operator, op2Lit)
354
355 - def __repr__(self):
356 return "T(%s ; %s ; %s)"%(repr(self.op1), 357 repr(self.operator), repr(self.op2))
358
359 - def getSI(self):
360 factor1, powers1 = self.op1.getSI() 361 factor2, powers2 = self.op2.getSI() 362 newPowers = powers1 363 if self.operator==".": 364 for si, power in powers2.iteritems(): 365 newPowers[si] = newPowers.get(si, 0)+power 366 return factor1*factor2, newPowers 367 elif self.operator=="/": 368 for si, power in powers2.iteritems(): 369 newPowers[si] = newPowers.get(si, 0)-power 370 return factor1/factor2, newPowers 371 else: 372 raise Exception("This can't happen")
373
374 375 -class Expression(_Node):
376 """The root node of an expression tree. 377 378 This contains a term and optionally a scale factor. 379 """
380 - def __init__(self, term, scaleFactor):
381 self.term, self.scaleFactor = term, scaleFactor 382 self.isUnknown = self.term.isUnknown
383 384 @classmethod
385 - def fromToks(cls, s, p, toks):
386 if len(toks)==2: 387 return cls(toks[1], float(toks[0])) 388 389 elif len(toks)==1: 390 return cls(toks[0], 1) 391 392 else: 393 raise Exception("This can't happen")
394
395 - def __str__(self):
396 if self.scaleFactor==1: 397 return str(self.term) 398 else: 399 return "%s %s"%(formatScaleFactor(self.scaleFactor), self.term)
400
401 - def __repr__(self):
402 return "R(%s ; %s)"%(repr(self.scaleFactor), repr(self.term))
403
404 - def getSI(self):
405 """returns a pair of a numeric factor and a dict mapping SI units to 406 their powers. 407 """ 408 factor, siPowers = self.term.getSI() 409 return factor*self.scaleFactor, siPowers
410
411 412 -def _buildTerm(s, pos, toks):
413 """a parseAction for terms, making trees out of parse lists 414 left-associatively. 415 """ 416 toks = list(toks) 417 curOperand = toks.pop(0) 418 while len(toks)>1: 419 curOperand = Term.fromToks(s, pos, [curOperand, toks[0], toks[1]]) 420 del toks[:2] 421 return curOperand
422
423 424 -def evalAll(s, p, toks):
425 """a parse action evaluating the whole match as a python expression. 426 427 Obviously, this should only be applied to carefully screened nonterminals. 428 """ 429 return eval("".join(str(tok) for tok in toks))
430
431 432 -class getUnitGrammar(utils.CachedResource):
433 """the grammar to parse VOUnits. 434 435 After initialization, the class has a "symbols" dictionary containing 436 the individual nonterminals. 437 """ 438 @classmethod
439 - def impl(cls):
440 from pyparsing import (Word, Literal, Regex, 441 Optional, ZeroOrMore, alphas, 442 Suppress, Forward, White) 443 444 with utils.pyparsingWhitechars(''): 445 unit_atom = Word(alphas).addParseAction(UnitNode.fromToks) 446 unit_atom.setName("atomic unit") 447 quoted_unit_atom = ("'" + Word(alphas) + "'" 448 ).addParseAction(QuotedUnitNode.fromToks) 449 quoted_unit_atom.setName("quoted atomic unit") 450 451 OPEN_P = Literal('(') 452 CLOSE_P = Literal(')') 453 SIGN = Literal('+') | Literal('-') 454 FUNCTION_NAME = Word(alphas) 455 UNSIGNED_INTEGER = Word("01234567890") 456 SIGNED_INTEGER = SIGN + UNSIGNED_INTEGER 457 FLOAT = Regex(r"[+-]?([0-9]+(\.[0-9]*)?)") 458 VOFLOAT = Regex(r"0.[0-9]+([eE][+-]?[0-9]+)?" 459 "|[1-9][0-9]*(\.[0-9]+)?([eE][+-]?[0-9]+)?") 460 461 integer = (SIGNED_INTEGER | UNSIGNED_INTEGER).setName("integer") 462 power_operator = Literal('**') 463 multiplication_operator = Literal(".") 464 division_operator = Literal("/") 465 numeric_power = (integer 466 | OPEN_P + integer + CLOSE_P 467 | OPEN_P + FLOAT + CLOSE_P 468 | OPEN_P + integer + '/' 469 + UNSIGNED_INTEGER.addParseAction(lambda s, p, t: t[0]+".") 470 + CLOSE_P) 471 numeric_power.setParseAction(evalAll) 472 473 pow_10 = Literal("10") + power_operator + numeric_power 474 scale_factor = (pow_10 | VOFLOAT 475 ).setName("scale_factor" 476 ).setParseAction(evalAll) 477 478 any_unit_atom = unit_atom | quoted_unit_atom 479 factor = (any_unit_atom 480 + Optional( Suppress(power_operator) + numeric_power ) 481 ).setName("factor" 482 ).addParseAction(Factor.fromToks) 483 484 complete_expression = Forward() 485 function_application = (FUNCTION_NAME + 486 Suppress(OPEN_P) + complete_expression + Suppress(CLOSE_P) 487 ).setName("function application") 488 function_application.addParseAction(FunctionApplication.fromToks) 489 490 unit_expression = ( 491 Suppress(OPEN_P) + complete_expression + Suppress(CLOSE_P) 492 | ( factor 493 ^ function_application )).setName("unit expression") 494 495 product_of_units = (unit_expression 496 + ZeroOrMore(multiplication_operator + unit_expression) 497 ).setName("units term" 498 ).setParseAction(_buildTerm) 499 500 complete_expression << ( 501 product_of_units + Optional(division_operator + unit_expression) ) 502 complete_expression.setParseAction(Term.fromToks) 503 504 input = (Optional(scale_factor) 505 + Optional(Suppress(White())) 506 + complete_expression 507 ).setName("unit expression" 508 ).setParseAction(Expression.fromToks) 509 510 cls.symbols = locals() 511 return input
512 513 @classmethod
514 - def enableDebuggingOutput(cls):
515 """(not user-servicable) 516 """ 517 from pyparsing import ParserElement 518 for name, sym in cls.symbols.iteritems(): 519 if isinstance(sym, ParserElement): 520 sym.setDebug(True) 521 sym.setName(name)
522
523 524 -def asSequence(unitDict):
525 """returns a sorted tuple of (si, power) for a result of getSI(). 526 527 These should be more suitable for dimensional analysis. 528 """ 529 return tuple(sorted(unitDict.iteritems()))
530
531 532 -def parseUnit(unitStr, unitGrammar=getUnitGrammar()):
533 try: 534 return utils.pyparseString(unitGrammar, unitStr, parseAll=True)[0] 535 except pyparsing.ParseException as msg: 536 raise utils.logOldExc( 537 BadUnit("%s at col. %d"%(repr(unitStr), msg.column)))
538
539 540 -def computeConversionFactor(unitStr1, unitStr2):
541 """returns the factor needed to get from quantities given in unitStr1 542 to unitStr2. 543 544 Both must be given in VOUnits form. 545 546 This function may raise a BadUnit if one of the strings are 547 malformed, or an IncompatibleUnit exception if the units don't have 548 the same SI base. 549 550 If the function is successful, unitStr1 = result*unitStr2 551 """ 552 if unitStr1==unitStr2: 553 return 1 554 factor1, powers1 = parseUnit(unitStr1).getSI() 555 factor2, powers2 = parseUnit(unitStr2).getSI() 556 powers1, powers2 = asSequence(powers1), asSequence(powers2) 557 558 if (powers1, powers2) in EXCEPTIONAL_CONVERSIONS: 559 return factor1/factor2*EXCEPTIONAL_CONVERSIONS[powers1, powers2] 560 if (powers2, powers1) in EXCEPTIONAL_CONVERSIONS: 561 return factor1/factor2/EXCEPTIONAL_CONVERSIONS[powers2, powers1] 562 563 if powers1!=powers2: 564 raise IncompatibleUnits("%s and %s do not have the same SI base"%( 565 unitStr1, unitStr2)) 566 567 # tuples as keys in powers come from non-polynomial function 568 # applications; in such cases, multiplication is not good enough 569 # for conversions, and thus we give up. 570 for u, _ in powers1: 571 if isinstance(u, tuple): 572 raise IncompatibleUnits("%s has a non-polynomial function. No" 573 " conversion by multiplication possible"%(unitStr1)) 574 for u, _ in powers2: 575 if isinstance(u, tuple): 576 raise IncompatibleUnits("%s has a non-polynomial function. No" 577 " conversion by multiplication possible"%(unitStr2)) 578 579 return factor1/factor2
580
581 582 -def computeColumnConversions(newColumns, oldColumns):
583 """returns a dict of conversion factors between newColumns and oldColumns. 584 585 Both arguments are iterables of columns. 586 587 For every column in newColumn, the function sees if the units of 588 newColumn and oldColumn match. If they don't, compute a conversion 589 factor to be multiplied to oldColumns values to make them newColumns 590 values and add it to the result dict. 591 592 The function raises a DataError if a column in newColumns has no 593 match in oldColumns. 594 """ 595 res = {} 596 for newCol in newColumns: 597 if not newCol.name in oldColumns: 598 raise utils.DataError( 599 "Request for column %s from %s cannot be satisfied in %s"%( 600 newCol.name, newColumns, oldColumns)) 601 oldCol = oldColumns.getColumnByName(newCol.name) 602 try: 603 if newCol.unit!=oldCol.unit: 604 res[newCol.name] = computeConversionFactor(oldCol.unit, newCol.unit) 605 except BadUnit: # we ignore bad units, assume they'll be handled by 606 # valuemappers. 607 pass 608 return res
609 610 611 if __name__=="__main__": 612 getUnitGrammar.enableDebuggingOutput() 613 g = getUnitGrammar() 614 res = g.parseString("10**28 s", parseAll=True)[0] 615 print(res) 616