1 """
2 Field Infos -- annotations to ADQL parse nodes carrying values.
3
4 To do this, we have a set of naive heuristics how types, ucds, and units
5 behave when such "fields" are combined. Since right now, we don't parse
6 out enough and, at least for ucds and units we don't have enough data
7 to begin with, much of this is conjecture.
8 """
9
10
11
12
13
14
15
16 import re
20 """An entry in the coercion tree.
21 """
22 - def __init__(self, name, children=(), aliases=()):
27
34
37 """A tree of types that can be used to infer common types.
38
39 The tree is passed in as nested sequences.
40
41 >>> c = Coercions(_CoercNode('bar', (_CoercNode('foo'), _CoercNode('baz',
42 ... (_CoercNode('quux'),)))))
43 >>> c.getSubsuming([])
44 'bar'
45 >>> c.getSubsuming(['foo'])
46 'foo'
47 >>> c.getSubsuming(['foo', 'foo'])
48 'foo'
49 >>> c.getSubsuming(['foo', 'quux'])
50 'bar'
51 >>> c.getSubsuming(['foo', 'weird'])
52 'bar'
53 """
55 self.typesIndex = {}
56 self.root = typeTree
57 def index(node):
58 self.typesIndex[node.name] = node
59 for a in node.aliases:
60 self.typesIndex[a] = node
61 for c in node.children:
62 index(c)
63 index(self.root)
64
66 """returns the first node that is an ancestor to both n1 and n2.
67 """
68 ancestors = set(n1.getAncestorNames())
69 while n2:
70 if n2.name in ancestors:
71 return n2
72 n2 = n2.parent
73 return self.root
74
76 """returns the least general type being able to represent all types
77 within typeSeq.
78
79 The method returns the root type for both an empty typeSeq or
80 a typeSeq containing an unknown type. We don't want to fail here,
81 and the "all-encompassing" type should handle any crap.
82 """
83 try:
84 startNodes = [self.typesIndex[t] for t in typeSeq]
85 except KeyError:
86 return self.root.name
87 try:
88 return reduce(self._unify, startNodes).name
89 except TypeError:
90 return self.root.name
91
92
93 N = _CoercNode
94 _coercions = Coercions(
95 N('raw', (
96 N('unicode', (
97 N('text', (
98 N("double precision", aliases=("double",), children=(
99 N("real", aliases=("float",), children=(
100 N("bigint", (
101 N("integer", aliases=("int",), children=(
102 N("smallint", (
103 N('bytea'),
104 N('boolean'),)),)),)),)),)),
105 N('timestamp', (
106 N('date'),
107 N('time'),)),
108 N('file'),
109 N('box'),
110 N('spoint'),
111 N('scircle'),
112 N('spoly', (
113 N('sbox'),)),
114 ),),),),)))
115 del N
116
117
118 _stringRE = re.compile(r"(?:character varying|varchar|char)\(\d*\)")
119 _arrayRE = re.compile(r"([^[]*)(?:\[\d*\])+")
123 """returns an approximate sql type for a value composed of the types
124 mentioned in the sequence sqlTypes.
125
126 Basically, we have the coercion sequence int -> float -> text,
127 where earlier types get clobbered by later ones. And then there's
128 messy stuff like dates. We don't want to fail here, so if all else
129 fails, we just make it a text.
130
131 Since we don't know what operation is being performed, this can never
132 be accurate; the idea is to come up with something usable to generate
133 VOTables from ADQL results.
134
135 We do arrays (and subsume them by subsuming all types and gluing a []
136 to the result; the char(x) and friends are all subsumed to text.
137
138 All intput is supposed to be lower case.
139
140 >>> getSubsumingType(["smallint", "integer"])
141 'integer'
142 """
143 cleanedTypes, wasArray = [], False
144 for type in sqlTypes:
145 if _stringRE.match(type):
146 return "text"
147 mat = _arrayRE.match(type)
148 if mat:
149 type = mat.group(1)
150 wasArray = True
151 cleanedTypes.append(type)
152
153 subsType = _coercions.getSubsuming(cleanedTypes)
154
155 if wasArray:
156 return subsType+"[]"
157 else:
158 return subsType
159
162 """is a container for meta information on columns.
163
164 It is constructed with a unit, a ucd and userData. UserData is
165 a sequence of opaque objects. A FieldInfo combined from more than
166 one FieldInfo will have all userDatas of the combined FieldInfos in
167 its userData attribute.
168
169 There's also a properties dictionary you can use to set arbitrary
170 keys in. These should not be inherited. This is used for:
171
172 - xtype -- where applicable, write an ADQL xtype.
173 """
174 - def __init__(self, type, unit, ucd, userData=(), tainted=False, stc=None,
175 sqlName=None):
176 self.type = type
177 self.ucd = ucd
178 self.unit = unit
179 self.stc = stc
180 self.sqlName = sqlName
181 self.userData = userData
182 self.tainted = tainted
183 self.properties = {}
184
186 try:
187 return (self.type==other.type
188 and self.ucd==other.ucd
189 and self.unit==other.unit
190 and self.stc==other.stc
191 and self.tainted==other.tainted)
192 except AttributeError:
193 return False
194
196 return not self==other
197
199 return "FieldInfo(%s, %s, %s, %s)"%(
200 repr(self.type),
201 repr(self.unit),
202 repr(self.ucd),
203 repr(self.userData))
204
205 @staticmethod
207 return fi1.userData+fi2.userData
208
209 @staticmethod
211 """tries to find a common STC system for fi1 and fi2.
212
213 Two STC systems are compatible if at least one is None or if they
214 are equal.
215
216 If this method discovers incompatible systems, it will set the
217 stc attribute to "BROKEN".
218 """
219 if fi1.stc is None and fi2.stc is None:
220 return None
221 elif fi2.stc is None or fi1.stc==fi2.stc:
222 return fi1.stc
223 elif fi1.stc is None:
224 return fi2.stc
225 else:
226
227 res = fi1.stc.change()
228 res.broken = ("This STC info is bogus. It is the STC from an"
229 " expression combining two different systems.")
230 return res
231
232 @classmethod
234 """returns a new FieldInfo built from the multiplication-like operator opr
235 and the two field infos.
236
237 The unit is unit1 opr unit2 unless we have a dimless (empty unit), in
238 which case we keep the unit but turn the tainted flag on, unless both
239 are empty.
240
241 The ucd is always empty unless it's a simple dimless multiplication,
242 in which case the ucd of the non-dimless is kept (but the info is
243 tainted).
244 """
245 unit1, unit2 = fi1.unit, fi2.unit
246 newUserData = cls.combineUserData(fi1, fi2)
247 stc = cls.combineSTC(fi1, fi2)
248 newType = getSubsumingType([fi1.type, fi2.type])
249
250 if unit1=="" and unit2=="":
251 return cls(newType, "", "", newUserData, stc=stc, tainted=True)
252 elif unit1=="":
253 return cls(newType, unit2, fi2.ucd, newUserData, tainted=True, stc=stc)
254 elif unit2=="":
255 return cls(newType, unit1, fi1.ucd, newUserData, tainted=True, stc=stc)
256 else:
257 if opr=="/":
258 unit2 = "(%s)"%unit2
259 return cls(newType, unit1+opr+unit2, "", newUserData,
260 tainted=True, stc=stc)
261
262 @classmethod
264 """returns a new FieldInfo built from the addition-like operator
265 opr and the two field infos.
266
267 If both UCDs and units are the same, they are kept. Otherwise,
268 they are cleared and the fieldInfo is tainted.
269 """
270 unit, ucd, taint = "", "", True
271 stc = cls.combineSTC(fi1, fi2)
272 if fi1.unit==fi2.unit:
273 unit = fi1.unit
274 else:
275
276
277 if fi1.unit and not fi2.unit:
278 unit = fi1.unit
279 elif fi2.unit and not fi1.unit:
280 unit = fi2.unit
281 taint = True
282
283 if fi1.ucd==fi2.ucd:
284 ucd = fi1.ucd
285 else:
286 taint = True
287
288 if forceType is not None:
289 newType = forceType
290 else:
291 newType = getSubsumingType([fi1.type, fi2.type])
292 return cls(newType, unit, ucd, cls.combineUserData(fi1, fi2), taint, stc)
293
295 consArgs = {"type": self.type, "unit": self.unit, "ucd": self.ucd,
296 "userData": self.userData, "tainted": self.tainted, "stc": self.stc}
297 consArgs.update(kwargs)
298 return FieldInfo(**consArgs)
299
304
305
306 if __name__=="__main__":
307 _test()
308