1 """
2 Definition of rowmakers.
3
4 rowmakers are objects that take a dictionary of some kind and emit
5 a row suitable for inclusion into a table.
6 """
7
8
9
10
11
12
13
14 import bisect
15 import fnmatch
16 import re
17 import sys
18 import traceback
19
20 from gavo import base
21 from gavo import utils
22 from gavo.rscdef import common
23 from gavo.rscdef import procdef
24 from gavo.rscdef import rmkfuncs
25 from gavo.rscdef import rowtriggers
26
27
28 __docformat__ = "restructuredtext en"
29
30
31 -class Error(base.Error):
33
36 """a base class for map and var.
37
38 You must give a destDict class attribute to make these work.
39 """
40
41 destDict = None
42 restrictedMode = False
43
44 _dest = base.UnicodeAttribute("key",
45 default=base.Undefined,
46 description="Name of the column the value is to end up in.",
47 copyable=True,
48 strip=True,
49 aliases=["dest", "name"])
50
51 _src = base.UnicodeAttribute("source",
52 default=None,
53 description="Source key name to convert to column value (either a grammar"
54 " key or a var).",
55 copyable=True,
56 strip=True,
57 aliases=["src"])
58
59 _nullExcs = base.UnicodeAttribute("nullExcs",
60 default=base.NotGiven,
61 description="Exceptions that should be caught and"
62 " cause the value to be NULL, separated by commas.")
63
64 _expr = base.DataContent(
65 description="A python expression giving the value for key.",
66 copyable=True,
67 strip=True)
68
69 _nullExpr = base.UnicodeAttribute("nullExpr",
70 default=base.NotGiven,
71 description="A python expression for a value that is mapped to"
72 " NULL (None). Equality is checked after building the value, so"
73 " this expression has to be of the column type. Use map with"
74 " the parseWithNull function to catch null values before type"
75 " conversion.")
76
90
114
116 """returns python source code for this map.
117 """
118 code = []
119
120 if self.content_:
121 code.append('%s["%s"] = %s'%(self.destDict, self.key, self.content_))
122 else:
123 colDef = columns.getColumnByName(self.key)
124 try:
125 code.append('%s["%s"] = %s'%(self.destDict,
126 self.key,
127 base.sqltypeToPythonCode(colDef.type)%'vars["%s"]'%
128 self.source.replace("\\", r"\\").replace('"', '\\"')))
129 except base.ConversionError:
130 raise base.ui.logOldExc(base.LiteralParseError("map", colDef.type,
131 hint="Auto-mapping to %s is impossible since"
132 " no default map for %s is known"%(self.key, colDef.type)))
133
134 if self.nullExpr is not base.NotGiven:
135 code.append('\nif %s["%s"]==%s: %s["%s"] = None'%(
136 self.destDict,
137 self.key,
138 self.nullExpr,
139 self.destDict,
140 self.key))
141 code = "".join(code)
142
143 if self.nullExcs is not base.NotGiven:
144 code = 'try:\n%s\nexcept (%s): %s["%s"] = None'%(
145 re.sub("(?m)^", " ", code),
146 self.nullExcs,
147 self.destDict,
148 self.key)
149 return code
150
151
152 -class MapRule(MappedExpression):
153 """A mapping rule.
154
155 To specify the source of a mapping, you can either
156
157 - grab a value from what's emitted by the grammar or defined using var via
158 the source attribute. The value given for source is converted to a
159 python value and stored.
160 - or give a python expression in the body. In that case, no further
161 type conversion will be attempted.
162
163 If neither source or a body is given, map uses the key attribute as its
164 source attribute.
165
166 The map rule generates a key/value pair in the result record.
167 """
168 name_ = "map"
169 destDict = "result"
170
171
172 -class VarDef(MappedExpression):
173 """A definition of a rowmaker variable.
174
175 It consists of a name and a python expression, including function
176 calls. The variables are entered into the input row coming from
177 the grammar.
178
179 var elements are evaluated before apply elements, in the sequence
180 they are in the RD. You can refer to keys defined by vars already
181 evaluated in the usual @key manner.
182 """
183 name_ = "var"
184 destDict = "vars"
185
188 """A code fragment to manipulate the result row (and possibly more).
189
190 Apply elements allow embedding python code in rowmakers.
191
192 The current input fields from the grammar (including the rowmaker's vars)
193 are available in the vars dictionary and can be changed there. You can
194 also add new keys.
195
196 You can add new keys for shipping out in the result dictionary.
197
198 The active rowmaker is available as parent. It is also used to
199 expand macros.
200
201 The table that the rowmaker feeds to can be accessed as targetTable.
202 You probably only want to change meta information here (e.g., warnings
203 or infos).
204
205 As always in procApps, you can get the embedding RD as rd; this is
206 useful to, e.g., resolve references using rd.getByRD, and specify
207 resdir-relative file names using rd.getAbsPath.
208 """
209 name_ = "apply"
210 requiredType = "apply"
211 formalArgs = "vars, result, targetTable, _self"
212
215
218 """A collection of macros available to rowmakers.
219
220 NOTE: All macros should return only one single physical python line,
221 or they will mess up the calculation of what constructs caused errors.
222 """
224 r"""returns the "standard publisher DID" for the current product.
225
226 The publisher dataset identifier (PubDID) is important in protocols like
227 SSAP and obscore. If you use this macro, the PubDID will be your
228 authority, the path compontent ~, and the current value of @prodtblAccref.
229 It thus will only work where products#define (or a replacement) is in
230 action. If it isn't, a normal function call
231 getStandardPubDID(\\inputRelativePath) would be an obvious alternative.
232
233 You *can* of course define your PubDIDs in a different way.
234 """
235 return ('getStandardPubDID(vars["prodtblAccref"])')
236
249
251 """returns an expression expanding to the full path of the current
252 input file.
253 """
254 return 'vars["parser_"].sourceToken'
255
262
264 """returns an expression giving the number of records already
265 delivered by the grammar.
266 """
267 return 'vars["parser_"].recNo'
268
270 """returns an expression giving the number of records already
271 returned by this row maker.
272
273 This number excludes failed and skipped rows.
274 """
275 return '_self.rowsMade'
276
278 """returns an expression giving the value of the property propName
279 on the current DD.
280 """
281 return 'curDD_.getProperty("%s")'%propName
282
284 """returns an expression giving the timestamp of the current source.
285
286 This is a timestamp of the modification date; use dateTimeToJdn or
287 dateTimeToMJD to turn this into JD or MJD (which is usually preferred
288 in database tables). See also the sourceCDate macro.
289 """
290 return ('datetime.datetime.utcfromtimestamp('
291 'os.path.getmtime(vars["parser_"].sourceToken))')
292
294 """returns an expression giving the timestamp for the create
295 date of the current source.
296
297 Use dateTimeToJdn or dateTimeToMJD to turn this into JD or MJD (which
298 is usually preferred in database tables). See also the sourceDate macro.
299 """
300 return ('datetime.datetime.utcfromtimestamp('
301 'os.path.getctime(vars["parser_"].sourceToken))')
302
304 """returns python code for the stem of the source file currently parsed in a rowmaker.
305
306 Example: if you're currently parsing /tmp/foo.bar.gz, the stem is foo.
307 """
308 return ('getFileStem(vars["parser_"].sourceToken)')
309
311 """returns an expression calling rmkfuncs.lastSourceElements on
312 the current input path.
313 """
314 return 'lastSourceElements(vars["parser_"].sourceToken, %d)'%(
315 int(numElements))
316
318 """returns an expression giving the current source's path with
319 the resource descriptor's root removed.
320 """
321 return 'utils.getRelativePath(vars["parser_"].sourceToken, rd_.resdir)'
322
327
329 """returns an expression giving the value of the column name in the
330 document row.
331 """
332 return '_parser.getParameters()[fieldName]'
333
335 """returns the qName of the table we are currently parsing into.
336 """
337 return "tableDef_.getQName()"
338
339
340 -class RowmakerDef(base.Structure, RowmakerMacroMixin):
341 """A definition of the mapping between grammar input and finished rows
342 ready for shipout.
343
344 Rowmakers consist of variables, procedures and mappings. They
345 result in a python callable doing the mapping.
346
347 RowmakerDefs double as macro packages for the expansion of various
348 macros. The standard macros will need to be quoted, the rowmaker macros
349 above yield python expressions.
350
351 Within map and var bodies as well as late apply pars and apply bodies,
352 you can refer to the grammar input as vars["name"] or, shorter @name.
353
354 To add output keys, use map or, in apply bodies, add keys to the
355 result dictionary.
356 """
357 name_ = "rowmaker"
358
359 _maps = base.StructListAttribute("maps", childFactory=MapRule,
360 description="Mapping rules.", copyable=True)
361 _vars = base.StructListAttribute("vars", childFactory=VarDef,
362 description="Definitions of intermediate variables.",
363 copyable=True)
364 _apps = base.StructListAttribute("apps",
365 childFactory=ApplyDef, description="Procedure applications.",
366 copyable=True)
367 _rd = common.RDAttribute()
368 _idmaps = base.StringListAttribute("idmaps", description="List of"
369 ' column names that are just "mapped through" (like map with key'
370 " only); you can use shell patterns to select multiple colums at once.",
371 copyable=True)
372 _simplemaps = base.IdMapAttribute("simplemaps", description=
373 "Abbreviated notation for <map source>; each pair is destination:source",
374 copyable=True)
375 _ignoreOn = base.StructAttribute("ignoreOn", default=None,
376 childFactory=rowtriggers.IgnoreOn, description="Conditions on the"
377 " input record coming from the grammar to cause the input"
378 " record to be dropped by the rowmaker, i.e., for this specific"
379 " table. If you need to drop a row for all tables being fed,"
380 " use a trigger on the grammar.", copyable=True)
381 _original = base.OriginalAttribute()
382
383 @classmethod
385 """returns a rowmaker that just maps input names to column names.
386 """
387 if "id" not in kwargs:
388 kwargs["id"] = "autogenerated rowmaker for table %s"%table.id
389 return base.makeStruct(cls, idmaps=[c.key for c in table], **kwargs)
390
391 @classmethod
393 """returns a rowmaker that maps input names to column names without
394 touching them.
395
396 This is for crazy cases in which the source actually provides
397 pre-parsed data that any treatment would actually ruin.
398 """
399 if "id" not in kwargs:
400 kwargs["id"] = "autogenerated rowmaker for table %s"%table.id
401 return base.makeStruct(cls, maps=[
402 base.makeStruct(MapRule, key=c.name, content_="vars[%s]"%repr(c.name))
403 for c in table],
404 **kwargs)
405
416
418 """returns the source code for a mapper to a column set.
419 """
420 lineMap, line = {}, 0
421 source = []
422
423 def appendToSource(srcLine, line, lineMarker):
424 source.append(srcLine)
425 line += 1
426 lineMap[line] = lineMarker
427 line += source[-1].count("\n")
428 return line
429
430 if self.ignoreOn:
431 line = appendToSource("if checkTrigger(vars):\n"
432 " raise IgnoreThisRow(vars)",
433 line, "Checking ignore")
434 for v in self.vars:
435 line = appendToSource(v.getCode(columns), line, "assigning "+v.key)
436 for a in self.apps:
437 line = appendToSource(
438 "%s(vars, result, targetTable, _self)"%a.name,
439 line, "executing "+a.name)
440 for m in self.maps:
441 line = appendToSource(m.getCode(columns), line, "building "+m.key)
442 return "\n".join(source), lineMap
443
445 """returns the source code for a mapper to tableDef's columns.
446 """
447 return self._getSourceFromColset(tableDef.columns)
448
450 globals = {}
451 for a in self.apps:
452 globals[a.name] = a.compile()
453 if self.ignoreOn:
454 globals["checkTrigger"] = self.ignoreOn
455 globals["tableDef_"] = tableDef
456 globals["rd_"] = tableDef.rd
457 globals["curDD_"] = tableDef.parent
458 return globals
459
461 """adds mappings for self's idmap within column set.
462 """
463 existingMaps = set(m.key for m in self.maps)
464 baseNames = [c.key for c in columns]
465 for colName in self.idmaps:
466 matching = fnmatch.filter(baseNames, colName)
467 if not matching:
468 raise base.NotFoundError(colName, "columns matching", "unknown")
469 for dest in matching:
470 if dest not in existingMaps:
471 self.maps.append(MapRule(self, key=dest).finishElement(None))
472 self.idmaps = []
473
475 """raises a LiteralParseError if we try to map to non-existing
476 columns.
477 """
478 for map in self.maps:
479 try:
480 columns.getColumnByName(map.key)
481 except KeyError:
482 raise base.ui.logOldExc(base.LiteralParseError(self.name_, map.key,
483 "Cannot map to '%s' since it does not exist in %s"%(
484 map.key, id)))
485
487 """returns a RowmakerDef with everything expanded and checked for
488 tableDef.
489
490 This may raise LiteralParseErrors if self's output is incompatible
491 with tableDef.
492 """
493 res = self.copyShallowly()
494 try:
495 res._resolveIdmaps(tableDef.columns)
496 res._checkTable(tableDef.columns, tableDef.id)
497 except base.NotFoundError as ex:
498 ex.within = "table %s's columns"%tableDef.id
499 raise
500 return res
501
503 """helps compileForTableDef.
504 """
505 rmk = self._buildForTable(tableDef)
506 source, lineMap = rmk._getSource(tableDef)
507 globals = rmk._getGlobals(tableDef)
508 return Rowmaker(common.replaceProcDefAt(source),
509 self.id or "<rowmaker without id>",
510 globals, tableDef.getDefaults(), lineMap)
511
513 """returns a function receiving a dictionary of raw values and
514 returning a row ready for adding to a tableDef'd table.
515
516 To do this, we first make a rowmaker instance with idmaps resolved
517 and then check if the rowmaker result and the table structure
518 are compatible.
519 """
520 return utils.memoizeOn(tableDef, self, self._realCompileForTableDef,
521 tableDef)
522
524 return base.makeStruct(self.__class__, maps=self.maps[:],
525 vars=self.vars[:], idmaps=self.idmaps[:],
526 apps=self.apps[:], ignoreOn=self.ignoreOn)
527
530 name_ = "parmaker"
531
533 res = self.copyShallowly()
534 try:
535 res._resolveIdmaps(tableDef.params)
536 res._checkTable(tableDef.params, tableDef.id)
537 except base.NotFoundError as ex:
538 ex.within = "table %s's params"%tableDef.id
539 raise
540 return res
541
543 """returns the source code for a mapper to tableDef's columns.
544 """
545 return self._getSourceFromColset(tableDef.params)
546
547
548 identityRowmaker = base.makeStruct(RowmakerDef, idmaps="*")
552 """A callable that arranges for the mapping of key/value pairs to
553 other key/value pairs.
554
555 Within DaCHS, Rowmakers generate database rows (and parameter dictionaries)
556 from the output of grammars.
557
558 They are constructed with the source of the mapping function, a dictionary of
559 globals the function should see, a dictionary of defaults, giving keys to be
560 inserted into the incoming rowdict before the mapping function is called, and
561 a map of line numbers to names handled in that line.
562
563 It is called with a dictionary of locals for the functions (i.e.,
564 usually the result of a grammar iterRows).
565 """
566 - def __init__(self, source, name, globals, defaults, lineMap):
578
580 """returns an educated guess as to which mapping should have
581 caused that traceback in tb.
582
583 This is done by inspecting the second-topmost stackframe. It
584 must hold the generated line that, possibly indirectly, caused
585 the exception. This line should be in the lineMap generated by
586 RowmakerDef._getSource.
587 """
588 if tb.tb_next:
589 excLine = tb.tb_next.tb_lineno
590 base.ui.notifyDebug(
591 "Here's the traceback:\n%s"%"".join(traceback.format_tb(tb)))
592 else:
593 return "in toplevel (internal failure)"
594 destInd = min(len(self.lineMap)-1,
595 bisect.bisect_left(self.lineMap, (excLine, "")))
596
597
598 if self.lineMap[destInd][0]>excLine and destInd:
599 destInd -= 1
600 return self.lineMap[destInd][1]
601
603 """tries to shoehorn a ValidationError out of ex.
604 """
605 base.ui.notifyDebug("Rowmaker failed. Exception below. Failing source"
606 " is:\n%s"%self.source)
607 destName = self._guessExSourceName(tb)
608 if isinstance(ex, KeyError):
609 msg = "Key %s not found in a mapping."%unicode(ex)
610 hint = ("This probably means that your grammar did not yield the"
611 " field asked for. Alternatively, bugs in procs might also"
612 " cause this.")
613 else:
614 msg = unicode(str(ex), "iso-8859-1", "replace")
615 hint = ("This is a failure in more-or-less user-provided code."
616 " If you run again with the global --debug flag, the source of"
617 " the failing code should be in the logs/dcInfos (but make"
618 " sure it's the source the error is reported for; with procs,"
619 " this might not be the case).")
620 raise base.ui.logOldExc(base.ValidationError("While %s in %s: %s"%(
621 destName, self.name, msg), destName.split()[-1], rowdict,
622 hint=hint))
623
625 try:
626 locals = {
627 "vars": vars,
628 "result": {},
629 "_self": self,
630 "targetTable": table
631 }
632 missingKeys = self.keySet-set(vars)
633 for k in missingKeys:
634 vars[k] = self.defaults[k]
635 exec self.code in self.globals, locals
636
637 self.rowsMade += 1
638 return locals["result"]
639 except base.ExecutiveAction:
640 raise
641 except base.ValidationError:
642 raise
643 except Exception as ex:
644 self._guessError(ex, locals["vars"], sys.exc_info()[2])
645