Package gavo :: Package adql :: Module fieldinfo
[frames] | no frames]

Source Code for Module gavo.adql.fieldinfo

  1  """ 
  2  Field Infos -- annotations to ADQL parse nodes carrying values. 
  3   
  4  To do this, we have a set of naive heuristics how types, ucds, and units  
  5  behave when such "fields" are combined.  Since right now, we don't parse 
  6  out enough and, at least for ucds and units we don't have enough data 
  7  to begin with, much of this is conjecture. 
  8  """ 
  9   
 10  #c Copyright 2008-2019, the GAVO project 
 11  #c 
 12  #c This program is free software, covered by the GNU GPL.  See the 
 13  #c COPYING file in the source distribution. 
 14   
 15   
 16  import re 
17 18 19 -class _CoercNode(object):
20 """An entry in the coercion tree. 21 """
22 - def __init__(self, name, children=(), aliases=()):
23 self.name, self.aliases = name, aliases 24 self.parent, self.children = None, children 25 for child in self.children: 26 child.parent = self
27
28 - def getAncestorNames(self):
29 if self.parent is None: 30 return [self.name] 31 res = self.parent.getAncestorNames() 32 res.append(self.name) 33 return res
34
35 36 -class Coercions(object):
37 """A tree of types that can be used to infer common types. 38 39 The tree is passed in as nested sequences. 40 41 >>> c = Coercions(_CoercNode('bar', (_CoercNode('foo'), _CoercNode('baz', 42 ... (_CoercNode('quux'),))))) 43 >>> c.getSubsuming([]) 44 'bar' 45 >>> c.getSubsuming(['foo']) 46 'foo' 47 >>> c.getSubsuming(['foo', 'foo']) 48 'foo' 49 >>> c.getSubsuming(['foo', 'quux']) 50 'bar' 51 >>> c.getSubsuming(['foo', 'weird']) 52 'bar' 53 """
54 - def __init__(self, typeTree):
55 self.typesIndex = {} 56 self.root = typeTree 57 def index(node): 58 self.typesIndex[node.name] = node 59 for a in node.aliases: 60 self.typesIndex[a] = node 61 for c in node.children: 62 index(c)
63 index(self.root)
64
65 - def _unify(self, n1, n2):
66 """returns the first node that is an ancestor to both n1 and n2. 67 """ 68 ancestors = set(n1.getAncestorNames()) 69 while n2: 70 if n2.name in ancestors: 71 return n2 72 n2 = n2.parent 73 return self.root
74
75 - def getSubsuming(self, typeSeq):
76 """returns the least general type being able to represent all types 77 within typeSeq. 78 79 The method returns the root type for both an empty typeSeq or 80 a typeSeq containing an unknown type. We don't want to fail here, 81 and the "all-encompassing" type should handle any crap. 82 """ 83 try: 84 startNodes = [self.typesIndex[t] for t in typeSeq] 85 except KeyError: # don't know at least one type 86 return self.root.name 87 try: 88 return reduce(self._unify, startNodes).name 89 except TypeError: # startNodes is empty 90 return self.root.name
91 92 93 N = _CoercNode 94 _coercions = Coercions( 95 N('raw', ( 96 N('unicode', ( 97 N('text', ( 98 N("double precision", aliases=("double",), children=( 99 N("real", aliases=("float",), children=( 100 N("bigint", ( 101 N("integer", aliases=("int",), children=( 102 N("smallint", ( 103 N('bytea'), 104 N('boolean'),)),)),)),)),)), 105 N('timestamp', ( 106 N('date'), 107 N('time'),)), 108 N('file'), 109 N('box'), 110 N('spoint'), 111 N('scircle'), 112 N('spoly', ( 113 N('sbox'),)), 114 ),),),),))) 115 del N 116 117 118 _stringRE = re.compile(r"(?:character varying|varchar|char)\(\d*\)") 119 _arrayRE = re.compile(r"([^[]*)(?:\[\d*\])+")
120 121 122 -def getSubsumingType(sqlTypes):
123 """returns an approximate sql type for a value composed of the types 124 mentioned in the sequence sqlTypes. 125 126 Basically, we have the coercion sequence int -> float -> text, 127 where earlier types get clobbered by later ones. And then there's 128 messy stuff like dates. We don't want to fail here, so if all else 129 fails, we just make it a text. 130 131 Since we don't know what operation is being performed, this can never 132 be accurate; the idea is to come up with something usable to generate 133 VOTables from ADQL results. 134 135 We do arrays (and subsume them by subsuming all types and gluing a [] 136 to the result; the char(x) and friends are all subsumed to text. 137 138 All intput is supposed to be lower case. 139 140 >>> getSubsumingType(["smallint", "integer"]) 141 'integer' 142 """ 143 cleanedTypes, wasArray = [], False 144 for type in sqlTypes: 145 if _stringRE.match(type): 146 return "text" 147 mat = _arrayRE.match(type) 148 if mat: 149 type = mat.group(1) 150 wasArray = True 151 cleanedTypes.append(type) 152 153 subsType = _coercions.getSubsuming(cleanedTypes) 154 155 if wasArray: 156 return subsType+"[]" 157 else: 158 return subsType
159
160 161 -class FieldInfo(object):
162 """is a container for meta information on columns. 163 164 It is constructed with a unit, a ucd and userData. UserData is 165 a sequence of opaque objects. A FieldInfo combined from more than 166 one FieldInfo will have all userDatas of the combined FieldInfos in 167 its userData attribute. 168 169 There's also a properties dictionary you can use to set arbitrary 170 keys in. These should not be inherited. This is used for: 171 172 - xtype -- where applicable, write an ADQL xtype. 173 """
174 - def __init__(self, type, unit, ucd, userData=(), tainted=False, stc=None, 175 sqlName=None):
176 self.type = type 177 self.ucd = ucd 178 self.unit = unit 179 self.stc = stc 180 self.sqlName = sqlName 181 self.userData = userData 182 self.tainted = tainted 183 self.properties = {}
184
185 - def __eq__(self, other):
186 try: 187 return (self.type==other.type 188 and self.ucd==other.ucd 189 and self.unit==other.unit 190 and self.stc==other.stc 191 and self.tainted==other.tainted) 192 except AttributeError: 193 return False
194
195 - def __ne__(self, other):
196 return not self==other
197
198 - def __repr__(self):
199 return "FieldInfo(%s, %s, %s, %s)"%( 200 repr(self.type), 201 repr(self.unit), 202 repr(self.ucd), 203 repr(self.userData))
204 205 @staticmethod
206 - def combineUserData(fi1, fi2):
207 return fi1.userData+fi2.userData
208 209 @staticmethod
210 - def combineSTC(fi1, fi2):
211 """tries to find a common STC system for fi1 and fi2. 212 213 Two STC systems are compatible if at least one is None or if they 214 are equal. 215 216 If this method discovers incompatible systems, it will set the 217 stc attribute to "BROKEN". 218 """ 219 if fi1.stc is None and fi2.stc is None: 220 return None 221 elif fi2.stc is None or fi1.stc==fi2.stc: 222 return fi1.stc 223 elif fi1.stc is None: 224 return fi2.stc 225 else: # Trouble: stcs not equal but given, warn and blindly return 226 # fi1's stc 227 res = fi1.stc.change() 228 res.broken = ("This STC info is bogus. It is the STC from an" 229 " expression combining two different systems.") 230 return res
231 232 @classmethod
233 - def fromMulExpression(cls, opr, fi1, fi2):
234 """returns a new FieldInfo built from the multiplication-like operator opr 235 and the two field infos. 236 237 The unit is unit1 opr unit2 unless we have a dimless (empty unit), in 238 which case we keep the unit but turn the tainted flag on, unless both 239 are empty. 240 241 The ucd is always empty unless it's a simple dimless multiplication, 242 in which case the ucd of the non-dimless is kept (but the info is 243 tainted). 244 """ 245 unit1, unit2 = fi1.unit, fi2.unit 246 newUserData = cls.combineUserData(fi1, fi2) 247 stc = cls.combineSTC(fi1, fi2) 248 newType = getSubsumingType([fi1.type, fi2.type]) 249 250 if unit1=="" and unit2=="": 251 return cls(newType, "", "", newUserData, stc=stc, tainted=True) 252 elif unit1=="": 253 return cls(newType, unit2, fi2.ucd, newUserData, tainted=True, stc=stc) 254 elif unit2=="": 255 return cls(newType, unit1, fi1.ucd, newUserData, tainted=True, stc=stc) 256 else: 257 if opr=="/": 258 unit2 = "(%s)"%unit2 259 return cls(newType, unit1+opr+unit2, "", newUserData, 260 tainted=True, stc=stc)
261 262 @classmethod
263 - def fromAddExpression(cls, opr, fi1, fi2, forceType=None):
264 """returns a new FieldInfo built from the addition-like operator 265 opr and the two field infos. 266 267 If both UCDs and units are the same, they are kept. Otherwise, 268 they are cleared and the fieldInfo is tainted. 269 """ 270 unit, ucd, taint = "", "", True 271 stc = cls.combineSTC(fi1, fi2) 272 if fi1.unit==fi2.unit: 273 unit = fi1.unit 274 else: 275 # if there's no unit on one but there is one on the other, we're 276 # tentatively accepting the unit given (but it's tainted either way) 277 if fi1.unit and not fi2.unit: 278 unit = fi1.unit 279 elif fi2.unit and not fi1.unit: 280 unit = fi2.unit 281 taint = True 282 283 if fi1.ucd==fi2.ucd: 284 ucd = fi1.ucd 285 else: 286 taint = True 287 288 if forceType is not None: 289 newType = forceType 290 else: 291 newType = getSubsumingType([fi1.type, fi2.type]) 292 return cls(newType, unit, ucd, cls.combineUserData(fi1, fi2), taint, stc)
293
294 - def change(self, **kwargs):
295 consArgs = {"type": self.type, "unit": self.unit, "ucd": self.ucd, 296 "userData": self.userData, "tainted": self.tainted, "stc": self.stc} 297 consArgs.update(kwargs) 298 return FieldInfo(**consArgs)
299
300 301 -def _test():
302 import doctest, fieldinfo 303 doctest.testmod(fieldinfo)
304 305 306 if __name__=="__main__": 307 _test() 308