1 """
2 A python module to parse VOUnit strings and compute conversion factors.
3
4 We believe this implements the full VOUnit specification.
5
6 To use this, you must have the gavo utils package installed. For
7 details, see http://soft.g-vo.org. The changes required to make this
8 work without gavo.utils are minute, though. If you want that, talk to
9 the authors.
10
11 Unit tests for this are at
12
13 http://svn.ari.uni-heidelberg.de/svn/gavo/python/trunk/tests/unitconvtest.py
14 """
15
16
17
18
19
20
21
22 from __future__ import print_function
23
24 import math
25
26 import pyparsing
27
28 from gavo import utils
33
36
37 PLANCK_H = 6.626070040e-34
38 LIGHT_C = 299792458.0
39
40
41
42
43
44 PLAIN_UNITS = units = {
45 "a": (3600*24*365.25, "s"),
46 "A": (1, "A"),
47 "adu": (1, "adu"),
48 u"\xc5": (1e-10, "m"),
49 "Angstrom": (1e-10, "m"),
50 "angstrom": (1e-10, "m"),
51 "AU": (1.49598e11, "m"),
52 "arcmin": (math.pi/180./60., "rad"),
53 "arcsec": (math.pi/180./3600., "rad"),
54 "barn": (1, "barn"),
55 "beam": (1, "beam"),
56 "bit": (1, "bit"),
57 "bin": (1, "bin"),
58 "byte": (1, "byte"),
59 "C": (1, "C"),
60 "cd": (1, "cd"),
61 "ct": (1, "ct"),
62 "count": (1, "ct"),
63 "chan": (1, "chan"),
64 "D": (1e-19/3., "D"),
65 "d": (3600*24, "s"),
66 "deg": (math.pi/180., "rad"),
67 "erg": (1e-7, "J"),
68 "eV": (1.6021766208e-19, "J"),
69 "F": (1, "F"),
70 "g": (1e-3, "kg"),
71 "G": (1e-4, "T"),
72 "h": (3600., "s"),
73 "H": (1, "H"),
74 "Hz": (1, "Hz"),
75 "J": (1, "J"),
76 "Jy": (1, "Jy"),
77 "K": (1, "K"),
78 "lm": (1, "lm"),
79 "lx": (1, "lx"),
80 "lyr": (2627980686828.0, "m"),
81 "m": (1, "m"),
82 "mag": (1, "mag"),
83 "mas": (math.pi/180./3.6e6, "rad"),
84 "min": (60, "s"),
85 "mol": (1, "mol"),
86 "N": (1, "N"),
87 "Ohm": (1, "Ohm"),
88 "Pa": (1, "Pa"),
89 "pc": (3.0857e16, "m"),
90 "ph": (1, "ph"),
91 "photon": (1, "ph"),
92 "pix": (1, "pix"),
93 "pixel": (1, "pix"),
94 "R": (1, "R"),
95 "rad": (1, "rad"),
96 "Ry": (2.17989e-18, "J"),
97 "s": (1, "s"),
98 "S": (1, "S"),
99 "solLum": (3.826e26, "W"),
100 "solMass": (1.989e30, "kg"),
101 "solRad": (6.9559e8, "m"),
102 "sr": (1, "sr"),
103 "T": (1, "T"),
104 "u": (1.66053886e-27, "kg"),
105 "V": (1, "V"),
106 "voxel": (1, "voxel"),
107 "W": (1, "W"),
108 "Wb": (1, "Wb"),
109 "yr": (3600*24*365.25, "s"),
110 }
111
112
113 EXCEPTIONAL_CONVERSIONS = {
114
115
116
117
118 ((('m', -1.0),), (('J', 1),)): PLANCK_H*LIGHT_C,
119 }
120
121
122 NON_PREFIXABLE = frozenset([
123 "AU", "au", "D", "Ry", "arcmin", "beam", "bin", "chan", "d", "h", "mas",
124 "min", "ph", "photon", "pix", "pixel", "solLum", "solMass", "solRad",
125 "voxel"])
126
127
128 PREFIXES = prefixes = {"d": 1e-1, "c": 1e-2, "m":1e-3, "u":1e-6,
129 "n":1e-9, "p":1e-12, "f":1e-15, "a":1e-18, "z":1e-21, "y":1e-24,
130 "da": 10., "h":1e2, "k":1e3, "M":1e6, "G":1e9, "T":1e12, "P":1e15,
131 "E":1e18, "Z":1e21, "Y":1e24, "": 1}
132
133
134
135 AMBIGUOUS_STRINGS = {
136 'Pa': ('Pa', ''),
137 'ha': ('yr', 'h'),
138 'cd': ('cd', ''),
139 'dB': ('Bel', 'd'),
140 'B': ('Byte', ''),
141 'au': ('AU', ''),
142 'dadu': ('adu', 'd'),
143 }
159
162 """the abstract base for a node in a Unit tree.
163
164 All these must implement fromToks methods that can be called
165 as pyparsing callbacks.
166 """
167 @classmethod
171
174 """a terminal node containing a unit, possibly with a prefix
175
176 This is normally constructed through the fromToks constructor.
177
178 Check out the unit, prefix, and isUnknown attributes. The prefix
179 can be turned to a factor using the PREFIXES dictionary. An empty
180 prefix (factor 1) is represented by an empty string.
181 """
186
187 @classmethod
189 assert len(toks)==1
190 unitString = toks[0]
191
192 if unitString in AMBIGUOUS_STRINGS:
193 return cls(*AMBIGUOUS_STRINGS[unitString])
194
195 elif unitString in PLAIN_UNITS:
196 return cls(unitString)
197
198 elif len(unitString)>2 and unitString[:2]=="da":
199 return cls(unitString[2:], "da")
200
201 elif len(unitString)>1 and unitString[0] in PREFIXES:
202 if unitString[1:] in NON_PREFIXABLE:
203 raise BadUnit("No Prefixes allowed on %s"%unitString[1:])
204 return cls(unitString[1:], unitString[0])
205
206 else:
207 return cls(unitString, '')
208
210 return "%s%s"%(self.prefix, self.unit)
211
213 if self.isUnknown:
214 return "U?(%s ; %s)"%(self.prefix, repr(self.unit))
215 else:
216 return "U(%s ; %s)"%(self.prefix, repr(self.unit))
217
219 """returns a pair of factor and basic unit.
220
221 Basic units are what's in the defining pairs in the PLAIN_UNITS dict.
222 """
223 if self.isUnknown:
224
225 return PREFIXES[self.prefix], {self.unit: 1}
226 return
227 factor, basic = PLAIN_UNITS[self.unit]
228 return PREFIXES[self.prefix]*factor, {basic: 1}
229
232 """A function applied to a term.
233 """
234 _pythonFunc = {
235 "ln": math.log,
236 "log": math.log10,
237 "exp": math.exp,
238 "sqrt": math.sqrt,
239 }
240
242 self.funcName = funcName
243 self.term = term
244 self.isUnknown = (self.funcName not in self._pythonFunc
245 ) or self.term.isUnknown
246
247 @classmethod
249 assert len(toks)==2
250 return cls(toks[0], toks[1])
251
253 return "%s(%s)"%(self.funcName, self.term)
254
256 return "A(%s ; %s)"%(repr(self.funcName), repr(self.term))
257
267
270 """a quoted ("defined unknown") unit.
271 """
273 self.unit = unit
274 self.isUnknown = True
275
276 @classmethod
279
281 return "'%s'"%(self.unit)
282
284 return "U?('%s')"%(repr(self.unit))
285
287
288
289 return 1, {self.unit: 1}
290
293 """A UnitNode with a power.
294 """
296 self.unit, self.power = unit, power
297 self.isUnknown = self.unit.isUnknown
298
299 @classmethod
301 if len(toks)==2:
302 return cls(toks[0], toks[1])
303 elif len(toks)==1:
304 return cls(toks[0], 1)
305 else:
306 raise Exception("This cannot happen")
307
309 powerLit = repr(self.power).rstrip("0").rstrip(".")
310 if "." in powerLit:
311
312 for denom in range(2, 8):
313 if abs(int(self.power*denom)-self.power*denom)<1e-13:
314 powerLit = "(%d/%d)"%(round(self.power*denom), denom)
315 break
316
317 if powerLit=="1":
318 powerLit = ""
319 else:
320 powerLit = "**"+powerLit
321
322 return "%s%s"%(self.unit, powerLit)
323
325 return "F(%s ; %s)"%(repr(self.unit), repr(self.power))
326
328 factor, powers = self.unit.getSI()
329 powers[powers.keys()[0]] = self.power
330 return factor**self.power, powers
331
332
333 -class Term(_Node):
334 """A Node containing two factors and an operator.
335
336 The operator here is either . (product) or / (division).
337 """
338 - def __init__(self, op1, operator, op2):
339 self.op1, self.operator, self.op2 = op1, operator, op2
340 self.isUnknown = self.op1.isUnknown or self.op2.isUnknown
341
342 @classmethod
344 if len(toks)==1:
345
346 return toks[0]
347 return cls(toks[0], toks[1], toks[2])
348
350 op1Lit, op2Lit = str(self.op1), str(self.op2)
351 if self.operator=='/' and isinstance(self.op2, Term):
352 op2Lit = "(%s)"%op2Lit
353 return "%s%s%s"%(op1Lit, self.operator, op2Lit)
354
356 return "T(%s ; %s ; %s)"%(repr(self.op1),
357 repr(self.operator), repr(self.op2))
358
360 factor1, powers1 = self.op1.getSI()
361 factor2, powers2 = self.op2.getSI()
362 newPowers = powers1
363 if self.operator==".":
364 for si, power in powers2.iteritems():
365 newPowers[si] = newPowers.get(si, 0)+power
366 return factor1*factor2, newPowers
367 elif self.operator=="/":
368 for si, power in powers2.iteritems():
369 newPowers[si] = newPowers.get(si, 0)-power
370 return factor1/factor2, newPowers
371 else:
372 raise Exception("This can't happen")
373
376 """The root node of an expression tree.
377
378 This contains a term and optionally a scale factor.
379 """
381 self.term, self.scaleFactor = term, scaleFactor
382 self.isUnknown = self.term.isUnknown
383
384 @classmethod
386 if len(toks)==2:
387 return cls(toks[1], float(toks[0]))
388
389 elif len(toks)==1:
390 return cls(toks[0], 1)
391
392 else:
393 raise Exception("This can't happen")
394
400
402 return "R(%s ; %s)"%(repr(self.scaleFactor), repr(self.term))
403
405 """returns a pair of a numeric factor and a dict mapping SI units to
406 their powers.
407 """
408 factor, siPowers = self.term.getSI()
409 return factor*self.scaleFactor, siPowers
410
413 """a parseAction for terms, making trees out of parse lists
414 left-associatively.
415 """
416 toks = list(toks)
417 curOperand = toks.pop(0)
418 while len(toks)>1:
419 curOperand = Term.fromToks(s, pos, [curOperand, toks[0], toks[1]])
420 del toks[:2]
421 return curOperand
422
425 """a parse action evaluating the whole match as a python expression.
426
427 Obviously, this should only be applied to carefully screened nonterminals.
428 """
429 return eval("".join(str(tok) for tok in toks))
430
433 """the grammar to parse VOUnits.
434
435 After initialization, the class has a "symbols" dictionary containing
436 the individual nonterminals.
437 """
438 @classmethod
440 from pyparsing import (Word, Literal, Regex,
441 Optional, ZeroOrMore, alphas,
442 Suppress, Forward, White)
443
444 with utils.pyparsingWhitechars(''):
445 unit_atom = Word(alphas).addParseAction(UnitNode.fromToks)
446 unit_atom.setName("atomic unit")
447 quoted_unit_atom = ("'" + Word(alphas) + "'"
448 ).addParseAction(QuotedUnitNode.fromToks)
449 quoted_unit_atom.setName("quoted atomic unit")
450
451 OPEN_P = Literal('(')
452 CLOSE_P = Literal(')')
453 SIGN = Literal('+') | Literal('-')
454 FUNCTION_NAME = Word(alphas)
455 UNSIGNED_INTEGER = Word("01234567890")
456 SIGNED_INTEGER = SIGN + UNSIGNED_INTEGER
457 FLOAT = Regex(r"[+-]?([0-9]+(\.[0-9]*)?)")
458 VOFLOAT = Regex(r"0.[0-9]+([eE][+-]?[0-9]+)?"
459 "|[1-9][0-9]*(\.[0-9]+)?([eE][+-]?[0-9]+)?")
460
461 integer = (SIGNED_INTEGER | UNSIGNED_INTEGER).setName("integer")
462 power_operator = Literal('**')
463 multiplication_operator = Literal(".")
464 division_operator = Literal("/")
465 numeric_power = (integer
466 | OPEN_P + integer + CLOSE_P
467 | OPEN_P + FLOAT + CLOSE_P
468 | OPEN_P + integer + '/'
469 + UNSIGNED_INTEGER.addParseAction(lambda s, p, t: t[0]+".")
470 + CLOSE_P)
471 numeric_power.setParseAction(evalAll)
472
473 pow_10 = Literal("10") + power_operator + numeric_power
474 scale_factor = (pow_10 | VOFLOAT
475 ).setName("scale_factor"
476 ).setParseAction(evalAll)
477
478 any_unit_atom = unit_atom | quoted_unit_atom
479 factor = (any_unit_atom
480 + Optional( Suppress(power_operator) + numeric_power )
481 ).setName("factor"
482 ).addParseAction(Factor.fromToks)
483
484 complete_expression = Forward()
485 function_application = (FUNCTION_NAME +
486 Suppress(OPEN_P) + complete_expression + Suppress(CLOSE_P)
487 ).setName("function application")
488 function_application.addParseAction(FunctionApplication.fromToks)
489
490 unit_expression = (
491 Suppress(OPEN_P) + complete_expression + Suppress(CLOSE_P)
492 | ( factor
493 ^ function_application )).setName("unit expression")
494
495 product_of_units = (unit_expression
496 + ZeroOrMore(multiplication_operator + unit_expression)
497 ).setName("units term"
498 ).setParseAction(_buildTerm)
499
500 complete_expression << (
501 product_of_units + Optional(division_operator + unit_expression) )
502 complete_expression.setParseAction(Term.fromToks)
503
504 input = (Optional(scale_factor)
505 + Optional(Suppress(White()))
506 + complete_expression
507 ).setName("unit expression"
508 ).setParseAction(Expression.fromToks)
509
510 cls.symbols = locals()
511 return input
512
513 @classmethod
515 """(not user-servicable)
516 """
517 from pyparsing import ParserElement
518 for name, sym in cls.symbols.iteritems():
519 if isinstance(sym, ParserElement):
520 sym.setDebug(True)
521 sym.setName(name)
522
525 """returns a sorted tuple of (si, power) for a result of getSI().
526
527 These should be more suitable for dimensional analysis.
528 """
529 return tuple(sorted(unitDict.iteritems()))
530
538
541 """returns the factor needed to get from quantities given in unitStr1
542 to unitStr2.
543
544 Both must be given in VOUnits form.
545
546 This function may raise a BadUnit if one of the strings are
547 malformed, or an IncompatibleUnit exception if the units don't have
548 the same SI base.
549
550 If the function is successful, unitStr1 = result*unitStr2
551 """
552 if unitStr1==unitStr2:
553 return 1
554 factor1, powers1 = parseUnit(unitStr1).getSI()
555 factor2, powers2 = parseUnit(unitStr2).getSI()
556 powers1, powers2 = asSequence(powers1), asSequence(powers2)
557
558 if (powers1, powers2) in EXCEPTIONAL_CONVERSIONS:
559 return factor1/factor2*EXCEPTIONAL_CONVERSIONS[powers1, powers2]
560 if (powers2, powers1) in EXCEPTIONAL_CONVERSIONS:
561 return factor1/factor2/EXCEPTIONAL_CONVERSIONS[powers2, powers1]
562
563 if powers1!=powers2:
564 raise IncompatibleUnits("%s and %s do not have the same SI base"%(
565 unitStr1, unitStr2))
566
567
568
569
570 for u, _ in powers1:
571 if isinstance(u, tuple):
572 raise IncompatibleUnits("%s has a non-polynomial function. No"
573 " conversion by multiplication possible"%(unitStr1))
574 for u, _ in powers2:
575 if isinstance(u, tuple):
576 raise IncompatibleUnits("%s has a non-polynomial function. No"
577 " conversion by multiplication possible"%(unitStr2))
578
579 return factor1/factor2
580
583 """returns a dict of conversion factors between newColumns and oldColumns.
584
585 Both arguments are iterables of columns.
586
587 For every column in newColumn, the function sees if the units of
588 newColumn and oldColumn match. If they don't, compute a conversion
589 factor to be multiplied to oldColumns values to make them newColumns
590 values and add it to the result dict.
591
592 The function raises a DataError if a column in newColumns has no
593 match in oldColumns.
594 """
595 res = {}
596 for newCol in newColumns:
597 if not newCol.name in oldColumns:
598 raise utils.DataError(
599 "Request for column %s from %s cannot be satisfied in %s"%(
600 newCol.name, newColumns, oldColumns))
601 oldCol = oldColumns.getColumnByName(newCol.name)
602 try:
603 if newCol.unit!=oldCol.unit:
604 res[newCol.name] = computeConversionFactor(oldCol.unit, newCol.unit)
605 except BadUnit:
606
607 pass
608 return res
609
610
611 if __name__=="__main__":
612 getUnitGrammar.enableDebuggingOutput()
613 g = getUnitGrammar()
614 res = g.parseString("10**28 s", parseAll=True)[0]
615 print(res)
616