1 """
2 Tables, base and in memory.
3
4 Basically, a table consists of a list of dictionaries (the rows) and a
5 table definition (resdef.TableDef).
6
7 You should, in general, not construct the tables directly but use
8 the tables.TableForDef factory. The reason is that some classes ignore
9 certain aspects of TableDefs (indices, uniqueForceness) or may not be
10 what TableDef requires at all (onDisk). Arguably there should be
11 different TableDefs for all these aspects, but then I'd have a plethora
12 of TableDef elements, which I think is worse than a factory function.
13 """
14
15
16
17
18
19
20
21 import sys
22 import weakref
23
24 from gavo import base
25 from gavo import rscdef
26 from gavo.rsc import common
27
28
31
32
34 """Column statistics as exposed by Limits.
35
36 These have min, max, and values attributes, all of which can be None.
37 otherwise, min and max are values of the column type, values is a set
38 of those.
39 """
41 self.min, self.max = None, None
42 self.values = None
43
44
46 """Column statistics (min/max, values) for an in-memory table.
47
48 These are constructed with the rows attribute and a list each for
49 columns for which you want min/max and the values present. Note
50 that None in min/max indicates no non-None values were found. An
51 empty set in values indicates that all values were None.
52
53 This then exposes a dictionary interface
54 """
55 - def __init__(self, rows, minmaxColumns, enumColumns):
56 dict.__init__(self)
57 self._addMinmax(rows, minmaxColumns)
58 self._addEnums(rows, enumColumns)
59
61 stats = [(name, ColumnStat()) for name in minmaxColumns]
62 self.update(dict(stats))
63
64 for row in rows:
65 for name, stat in stats:
66 val = row[name]
67 if val is None:
68 continue
69 if stat.min is None or stat.min>val:
70 stat.min = val
71 if stat.max is None or stat.max<val:
72 stat.max = val
73
75 stats = [(name, self.get("name", ColumnStat())) for name in enumColumns]
76 self.update(dict(stats))
77 for _, stat in stats:
78 stat.values = set()
79
80 for row in rows:
81 for name, stat in stats:
82 if row[name] is not None:
83 stat.values.add(row[name])
84
85
87 """A device for getting data into a table.
88
89 A feeder is a context manager that rejects all action from without
90 the controlled section. Within the controlled section, you can use:
91
92 - add(row) -> None -- add row to table. This may raise all kinds
93 of crazy exceptions.
94 - flush() -> None -- flush out all data that may be cached to the table
95 (this is done automatically on a successful exit)
96 - reset() -> None -- discard any data that may still wait to be
97 flushed to the table
98
99 At the end of the controlled block, the importFinished or importFailed
100 methods or the parent table are called depending on whether all is
101 well or an exception happened. If importFinished raises and
102 exception, it is handed on to importFailed and re-raised if importFailed
103 returns False.
104
105 The batch size constructor argument is for the benefit of DBTables.
106
107 The flush and reset methods are necessary when you do explicit buffering and
108 connection management; you will need to call flush before committing a
109 transaction and reset before rolling one back.
110 """
111 - def __init__(self, table, batchSize=1024):
112 self.table = table
113 self.nAffected = 0
114 self.active = False
115
117 if not self.active:
118 raise base.DataError("Trying to feed a dormant feeder.")
119
121 return self.nAffected
122
123 - def add(self, row):
129
132
133
136
137
139 self.active = True
140 return self
141
142 - def __exit__(self, excType=None, excVal=None, excTb=None):
155
156
158 def fail(self, *args, **kwargs):
159 raise NotImplementedError("%s is an incomplete Table implementation."
160 " No method '%s' defined."%(self.__class__.__name__, name))
161 return fail
162
163
164 -class BaseTable(base.MetaMixin, common.ParamMixin):
165 """is a container for row data.
166
167 Tables consist of rows, where each row maps column names to their
168 value for that row. The rows are accessible at least by iterating
169 over a table.
170
171 Tables get constructed with a tableDef and keyword arguments. For
172 convenience, tables must accept any keyword argument and only pluck those
173 out it wants.
174
175 Here's a list of keywords used by BaseTables or known subclasses:
176
177 - validateRows -- have rows be validated by the tableDef before addition
178 (all Tables)
179 - rows -- a list of rows the table has at start (InMemoryTables; DbTables
180 will raise an error on these).
181 - connection -- a database connection to use for accessing DbTables.
182 - votCasts -- a dictionary mapping column names to dictionaries overriding
183 keys of valuemappers.AnnontatedColumn.
184 - params -- a dictionary mapping param keys to values, where python
185 values and literals allowed.
186
187 You can add rows using the addRow method. For bulk additions, however,
188 it may be much more efficient to call getFeeder (though for in-memory
189 tables, there is no advantage).
190
191 Tables can run "scripts" if someone furnishes them with a _runScripts
192 method. This currently is only done for DBTables. See Scripting_.
193
194 Initial Metadata is populated from the tableDef.
195
196 Tables have to implement the following methods:
197
198 - __iter__
199 - __len__
200 - __getitem__(n) -- returns the n-th row or raises an IndexError
201 - removeRow(row) removes a row from the table or raises an
202 IndexError if the row does not exist. This is a slow, O(n) operation.
203 - addRow(row) -- appends new data to the table
204 - getRow(*args) -- returns a row by the primary key. If no primary key
205 is defined, a ValueError is raised, if the key is not present, a
206 KeyError. An atomic primary key is accessed through its value,
207 for compound primary keys a tuple must be passed.
208 - getFeeder(**kwargs) -> feeder object -- returns an object with add and
209 exit methods. See feeder above.
210 - importFinished() -> None -- called when a feeder exits successfully
211 - importFailed(*excInfo) -> boolean -- called when feeding has failed;
212 when returning True, the exception that has caused the failure
213 is not propagated.
214 - close() -> may be called by clients to signify the table will no
215 longer be used and resources should be cleared (e.g., for DBTables
216 with private connections).
217 """
218 _runScripts = None
219
220 - def __init__(self, tableDef, **kwargs):
230
231 __iter__ = _makeFailIncomplete("__iter__")
232 __len__ = _makeFailIncomplete("__len__")
233 removeRow = _makeFailIncomplete("removeRow")
234 addRow = _makeFailIncomplete("addRow")
235 getRow = _makeFailIncomplete("getRow")
236 getFeeder = _makeFailIncomplete("getFeeder")
237
240
243
246
249
253
255 """raises a ValidationError if any required parameters of this
256 tables are None.
257 """
258 for par in self.iterParams():
259 if par.required and par.value is None:
260 raise base.ValidationError(
261 "Value is required but was not provided", par.name)
262
263
265 """is a table kept in memory.
266
267 This table only keeps an index for the primaray key. All other indices
268 are ignored.
269 """
270 - def __init__(self, tableDef, **kwargs):
273
275 return iter(self.rows)
276
278 return len(self.rows)
279
281 return bool(self.rows)
282
285
293
295 raise ValueError("Cannot use getRow in index-less table")
296
298 return _Feeder(self, **kwargs)
299
301 """returns a limits instance for this table.
302
303 This is a characterisation of the ranges of things in this table,
304 pretty much as what dachs info does; if you fix things here, you probaly
305 want to fix things there, too.
306 """
307 minmaxColumns, enumColumns = [], []
308 for col in self.tableDef:
309 if col.isEnumerated():
310 enumColumns.append(col.name)
311 elif col.type in base.ORDERED_TYPES or col.type.startswith("char"):
312 minmaxColumns.append(col.name)
313 return Limits(self.rows, minmaxColumns, enumColumns)
314
315
317 """is an InMemoryTable for a TableDef with a primary key.
318 """
319 - def __init__(self, tableDef, **kwargs):
320 InMemoryTable.__init__(self, tableDef, **kwargs)
321 if not self.tableDef.primary:
322 raise Error("No primary key given for InMemoryIndexedTable")
323 self._makeRowIndex()
324
329
338
340 return self.rowIndex[args]
341
343 """recreates the index of primary keys to rows.
344 """
345 self.rowIndex = {}
346 for r in self.rows:
347 self.rowIndex[self.tableDef.getPrimaryIn(r)] = r
348
349
351 """is an InMemoryTable with an enforced policy on duplicate
352 primary keys.
353
354 See resdef.TableDef for a discussion of the policies.
355 """
356 - def __init__(self, tableDef, **kwargs):
357
358
359 rows = kwargs.pop("rows", [])
360 InMemoryIndexedTable.__init__(self, tableDef, **kwargs)
361 try:
362 self.resolveConflict = {
363 "check": self._ensureRowIdentity,
364 "drop": self._dropNew,
365 "overwrite": self._overwriteOld,
366 "dropOld": self._overwriteOld,
367 }[self.tableDef.dupePolicy]
368 except KeyError as msg:
369 raise base.ui.logOldExc(
370 Error("Invalid conflict resolution strategy: %s"%str(msg)))
371 for row in rows:
372 self.addRow(row)
373
375 """raises an exception if row is not equivalent to the row stored
376 for key.
377
378 This is one strategy for resolving primary key conflicts.
379 """
380 storedRow = self.rowIndex[key]
381 if row.keys()!=storedRow.keys():
382 raise Error("Differing rows for primary key %s: %s vs. %s"%(
383 key, self.rowIndex[key], row))
384 for colName in row:
385 if row[colName] is None or storedRow[colName] is None:
386 continue
387 if row[colName]!=storedRow[colName]:
388 raise base.ValidationError(
389 "Differing rows for primary key %s;"
390 " %s vs. %s"%(key, row[colName],
391 storedRow[colName]), colName=colName, row=row)
392
394 """does nothing.
395
396 This is for resolution of conflicting rows (the "drop" strategy).
397 """
398 pass
399
401 """overwrites the existing rows with key in table with rows.
402
403 This is for resolution of conflicting rows (the "overwrite"
404 strategy).
405
406 Warning: This is typically rather slow.
407 """
408 storedRow = self.rowIndex[key]
409 self.removeRow(storedRow)
410 return self.addRow(row)
411
424