Source code for gavo.base.literals

"""
Functions taking strings and returning python values.

All of them accept None and return None for Nullvalue processing.

All of them leave values alone if they already have the right type.

This is usually used in conjunction with
base.typesystems.ToPythonCodeConverter.
"""

#c Copyright 2008-2023, the GAVO project <gavo@ari.uni-heidelberg.de>
#c
#c This program is free software, covered by the GNU GPL.  See the
#c COPYING file in the source distribution.


import functools
import re

# let's depend on psycopg2 for now as regards ranges.  We can always
# provide a thin shim here if we want to use different databases.
# Advise users to only use base.NumericRange, though.
from psycopg2.extras import NumericRange

from gavo import utils
from gavo.stc import parseSimpleSTCS
from gavo.utils import pgsphere
from gavo.utils import identity #noflake: exported name
from gavo.utils import (parseDefaultDatetime,  #noflake: exported names
	parseDefaultDate, parseDefaultTime)

[docs]@utils.document def parseInt(literal): """returns an int from a literal, or None if literal is None or an empty string. >>> parseInt("32") 32 >>> parseInt("") >>> parseInt(None) """ if literal is None or (isinstance(literal, str ) and not literal.strip()): return return int(literal)
_inf = float("Inf")
[docs]@utils.document def parseFloat(literal): """returns a float from a literal, or None if literal is None or an empty string. Temporarily, this includes a hack to work around a bug in psycopg2. >>> parseFloat(" 5e9 ") 5000000000.0 >>> parseFloat(None) >>> parseFloat(" ") >>> parseFloat("wobbadobba") Traceback (most recent call last): ValueError: could not convert string to float: 'wobbadobba' """ if (literal is None or (isinstance(literal, str) and not literal.strip())): return None res = float(literal) return res
_trueLiterals = set(["true", "yes", "t", "on", "enabled", "1"]) _falseLiterals = set(["false", "no", "f", "off", "disabled", "0"])
[docs]@utils.document def parseBooleanLiteral(literal): """returns a python boolean from some string. Boolean literals are strings like True, false, on, Off, yes, No in some capitalization. """ if literal is None or isinstance(literal, bool): return literal if hasattr(literal, "item"): # numpy _bool return literal.item() literal = literal.lower() if literal in _trueLiterals: return True elif literal in _falseLiterals: return False else: raise ValueError( "'%s' is no recognized boolean literal."%literal)
[docs]def parseUnicode(literal): if literal is None: return if isinstance(literal, bytes): literal = literal.decode("ascii", "replace") return str(literal)
[docs]def parseBytes(literal): r"""returns bytes from a literal. This will interpret hex and octal byte escapes, and it'll support lists of integer-like things; not sure if that's actually more harmful than good. But then people can always override the default behaviour. >>> parseBytes("abc") b'abc' >>> parseBytes(r"\xab\000") b'\xab\x00' >>> parseBytes([123, 231, 23]) b'{\xe7\x17' >>> parseBytes([10002]) Traceback (most recent call last): ValueError: bytes must be in range(0, 256) """ if literal is None: return elif isinstance(literal, bytes): return literal elif isinstance(literal, list): return bytes(literal) elif isinstance(literal, str): return re.sub( br"\\(\d\d\d)", lambda mat: bytes([int(mat.group(1), 8)]), re.sub(br"\\x([a-zA-Z0-9][a-zA-Z0-9])", lambda mat: bytes([int(mat.group(1), 16)]), literal.encode("ascii"))) else: raise ValueError("No idea how to make bytes from %s"%repr(literal))
[docs]def parseCooPair(soup): """returns a pair of RA, DEC floats if they can be made out in soup or raises a value error. No range checking is done (yet), i.e., as long as two numbers can be made out, the function is happy. >>> parseCooPair("23 12") (23.0, 12.0) >>> parseCooPair("23.5,-12.25") (23.5, -12.25) >>> parseCooPair("3.75 -12.125") (3.75, -12.125) >>> parseCooPair("3 25,-12 30") (51.25, -12.5) >>> ["{:.9f}".format(v) for v in parseCooPair("12 15 30.5 +52 18 27.5")] ['183.877083333', '52.307638889'] >>> parseCooPair("3.39 -12 39") Traceback (most recent call last): ValueError: Invalid time with sepChar None: '3.39' >>> parseCooPair("12 15 30.5 +52 18 27.5e") Traceback (most recent call last): ValueError: 12 15 30.5 +52 18 27.5e has no discernible position in it >>> parseCooPair("QSO2230+44.3") Traceback (most recent call last): ValueError: QSO2230+44.3 has no discernible position in it """ soup = soup.strip() def parseFloatPair(soup): mat = re.match("(%s)\s*[\s,/]\s*(%s)$"%(utils.floatRE, utils.floatRE), soup) if mat: return float(mat.group(1)), float(mat.group(2)) def parseSexa(soup): timeangleRE = r"(?:\d+\s+)?(?:\d+\s+)?\d+(?:\.\d*)?" dmsRE = "[+-]?\s*(?:\d+\s+)?(?:\d+\s+)?\d+(?:\.\d*)?" mat = re.match("(%s)\s*[\s,/]?\s*(%s)$"%(timeangleRE, dmsRE), soup) if mat: try: return utils.hmsToDeg(mat.group(1)), utils.dmsToDeg( mat.group(2)) except utils.Error as msg: raise utils.logOldExc(ValueError(str(msg))) def parseSexaColon(soup): timeangleRE = r"(?:\d+:)?(?:\d+:)?\d+(?:\.\d*)?" dmsRE = "[+-]?\s*(?:\d+:)?(?:\d+:)?\d+(?:\.\d*)?" mat = re.match("(%s)\s*[\s,/]?\s*(%s)$"%(timeangleRE, dmsRE), soup) if mat: try: return (utils.hmsToDeg(mat.group(1), sepChar=":"), utils.dmsToDeg(mat.group(2), sepChar=":")) except utils.Error as msg: raise utils.logOldExc(ValueError(str(msg))) for func in [parseFloatPair, parseSexa, parseSexaColon]: res = func(soup) if res: return res raise ValueError("%s has no discernible position in it"%soup)
[docs]def parseSPoint(soup): """returns an ``SPoint`` for a coordinate pair. The coordinate pair can be formatted in a variety of ways; see the `function parseCooPair`_. Input is always in degrees. """ if soup is None or isinstance(soup, pgsphere.SPoint): return soup return pgsphere.SPoint.fromDegrees(*parseCooPair(soup))
def parseInterval(soup): """tries to parse a numeric interval out of soup. Technically, we expect a space-separated pair of numeric somethings. If a part can be parsed as an int is, else it becomes a float. What's returned is a NumericRange object (currently from psycopg2, but you should only import NumericRange from base). Invalid literals raise some sort of ValueError. >>> parseInterval('3 4') NumericRange(3, 4, '[)') >>> parseInterval('3.5 4.75') NumericRange(3.5, 4.75, '[)') >>> parseInterval('20') Traceback (most recent call last): ValueError: Not a valid numeric interval literal: '20' >>> parseInterval('gabba gubbu') Traceback (most recent call last): ValueError: could not convert string to float: 'gabba' """ try: lower, upper = soup.split() except ValueError: raise ValueError("Not a valid numeric interval literal: %s"%repr(soup)) try: lower = int(lower) except ValueError: lower = float(lower) try: upper = int(upper) except ValueError: upper = float(upper) return NumericRange(lower, upper)
[docs]def originalOrIdentity(soup): """returns soup.original or soup if there is no original attribute. This is for cooperation with BinaryItem coming in from the web into ContextGrammars. """ return getattr(soup, "original", soup)
def _numericRangeFactory(colDesc): """A factory to serialise psycopg numeric ranges into VOTables. If what's coming in is a NumericRange, it's turned into a 2-tuple; else it's left alone. (as of 1.2, DaCHS can't really read the result again properly) """ if colDesc["xtype"]=="interval" and colDesc["datatype"] in [ "int", "long", "float", "double"]: def mapper(val): if val is None: return None if isinstance(val, NumericRange): if val.upper_inc and colDesc["datatype"] in ["int", "long"]: return (val.lower, val.upper+1) return (val.lower, val.upper) return val return mapper utils.registerDefaultMF(_numericRangeFactory)
[docs]@functools.lru_cache(1) def getDefaultValueParsers(): """returns a dict containing all exported names from this module. This is useful with typesystems.ToPythonCodeConverter; see rscdef.column.Parameter for an example. This is always the same dict; thus, if you change it, copy it first. """ all = set(__all__) return dict((n,v) for n,v in globals().items() if n in all)
def _test(): import doctest doctest.testmod() if __name__=="__main__": _test() __all__ = ["parseInt", "parseFloat", "parseBooleanLiteral", "parseUnicode", "parseDefaultDate", "parseDefaultTime", "parseDefaultDatetime", "parseCooPair", "getDefaultValueParsers", "parseSPoint", "parseSimpleSTCS", "NumericRange", "originalOrIdentity", "parseBytes"]