Package gavo :: Package rscdef :: Module rmkdef
[frames] | no frames]

Source Code for Module gavo.rscdef.rmkdef

  1  """ 
  2  Definition of rowmakers. 
  3   
  4  rowmakers are objects that take a dictionary of some kind and emit 
  5  a row suitable for inclusion into a table. 
  6  """ 
  7   
  8  #c Copyright 2008-2019, the GAVO project 
  9  #c 
 10  #c This program is free software, covered by the GNU GPL.  See the 
 11  #c COPYING file in the source distribution. 
 12   
 13   
 14  import bisect 
 15  import fnmatch 
 16  import re 
 17  import sys 
 18  import traceback 
 19   
 20  from gavo import base 
 21  from gavo import utils 
 22  from gavo.rscdef import common 
 23  from gavo.rscdef import procdef 
 24  from gavo.rscdef import rmkfuncs 
 25  from gavo.rscdef import rowtriggers 
 26   
 27   
 28  __docformat__ = "restructuredtext en" 
29 30 31 -class Error(base.Error):
32 pass
33
34 35 -class MappedExpression(base.Structure):
36 """a base class for map and var. 37 38 You must give a destDict class attribute to make these work. 39 """ 40 41 destDict = None 42 restrictedMode = False 43 44 _dest = base.UnicodeAttribute("key", 45 default=base.Undefined, 46 description="Name of the column the value is to end up in.", 47 copyable=True, 48 strip=True, 49 aliases=["dest", "name"]) 50 51 _src = base.UnicodeAttribute("source", 52 default=None, 53 description="Source key name to convert to column value (either a grammar" 54 " key or a var).", 55 copyable=True, 56 strip=True, 57 aliases=["src"]) 58 59 _nullExcs = base.UnicodeAttribute("nullExcs", 60 default=base.NotGiven, 61 description="Exceptions that should be caught and" 62 " cause the value to be NULL, separated by commas.") 63 64 _expr = base.DataContent( 65 description="A python expression giving the value for key.", 66 copyable=True, 67 strip=True) 68 69 _nullExpr = base.UnicodeAttribute("nullExpr", 70 default=base.NotGiven, 71 description="A python expression for a value that is mapped to" 72 " NULL (None). Equality is checked after building the value, so" 73 " this expression has to be of the column type. Use map with" 74 " the parseWithNull function to catch null values before type" 75 " conversion.") 76
77 - def completeElement(self, ctx):
78 self.restrictedMode = getattr(ctx, "restricted", False) 79 if self.restrictedMode and ( 80 self.content_ 81 or self.nullExpr 82 or self.nullValue): 83 raise base.RestrictedElement("map", hint="In restricted mode, only" 84 " maps with a source attribute are allowed; nullExpr or nullValue" 85 " are out, too, since they can be used to inject raw code.") 86 if not self.content_ and not self.source: 87 self.source = self.key 88 if self.content_ and "\\" in self.content_: 89 self.content_ = self.parent.expand(self.content_)
90
91 - def validate(self):
92 """checks that code content is a parseable python expression and that 93 the destination exists in the tableDef 94 """ 95 self._validateNext(MappedExpression) 96 97 if (self.content_ and self.source) or not (self.content_ or self.source): 98 raise base.StructureError("Map must have exactly one of source attribute" 99 " or element content") 100 101 if not utils.identifierPattern.match(self.key): 102 raise base.LiteralParseError("name", self.key, 103 hint="Var keys must be valid python" 104 " identifiers, and '%s' is not"%self.key) 105 106 if self.nullExpr is not base.NotGiven: 107 utils.ensureExpression(self.nullExpr) 108 109 if self.content_: 110 utils.ensureExpression(common.replaceProcDefAt(self.content_), self.name_) 111 112 if self.nullExcs is not base.NotGiven: 113 utils.ensureExpression(self.nullExcs, "%s.nullExcs"%(self.name_))
114
115 - def getCode(self, columns):
116 """returns python source code for this map. 117 """ 118 code = [] 119 120 if self.content_: 121 code.append('%s["%s"] = %s'%(self.destDict, self.key, self.content_)) 122 else: 123 colDef = columns.getColumnByName(self.key) 124 try: 125 code.append('%s["%s"] = %s'%(self.destDict, 126 self.key, 127 base.sqltypeToPythonCode(colDef.type)%'vars["%s"]'% 128 self.source.replace("\\", r"\\").replace('"', '\\"'))) 129 except base.ConversionError: 130 raise base.ui.logOldExc(base.LiteralParseError("map", colDef.type, 131 hint="Auto-mapping to %s is impossible since" 132 " no default map for %s is known"%(self.key, colDef.type))) 133 134 if self.nullExpr is not base.NotGiven: 135 code.append('\nif %s["%s"]==%s: %s["%s"] = None'%( 136 self.destDict, 137 self.key, 138 self.nullExpr, 139 self.destDict, 140 self.key)) 141 code = "".join(code) 142 143 if self.nullExcs is not base.NotGiven: 144 code = 'try:\n%s\nexcept (%s): %s["%s"] = None'%( 145 re.sub("(?m)^", " ", code), 146 self.nullExcs, 147 self.destDict, 148 self.key) 149 return code
150
151 152 -class MapRule(MappedExpression):
153 """A mapping rule. 154 155 To specify the source of a mapping, you can either 156 157 - grab a value from what's emitted by the grammar or defined using var via 158 the source attribute. The value given for source is converted to a 159 python value and stored. 160 - or give a python expression in the body. In that case, no further 161 type conversion will be attempted. 162 163 If neither source or a body is given, map uses the key attribute as its 164 source attribute. 165 166 The map rule generates a key/value pair in the result record. 167 """ 168 name_ = "map" 169 destDict = "result"
170
171 172 -class VarDef(MappedExpression):
173 """A definition of a rowmaker variable. 174 175 It consists of a name and a python expression, including function 176 calls. The variables are entered into the input row coming from 177 the grammar. 178 179 var elements are evaluated before apply elements, in the sequence 180 they are in the RD. You can refer to keys defined by vars already 181 evaluated in the usual @key manner. 182 """ 183 name_ = "var" 184 destDict = "vars"
185
186 187 -class ApplyDef(procdef.ProcApp):
188 """A code fragment to manipulate the result row (and possibly more). 189 190 Apply elements allow embedding python code in rowmakers. 191 192 The current input fields from the grammar (including the rowmaker's vars) 193 are available in the vars dictionary and can be changed there. You can 194 also add new keys. 195 196 You can add new keys for shipping out in the result dictionary. 197 198 The active rowmaker is available as parent. It is also used to 199 expand macros. 200 201 The table that the rowmaker feeds to can be accessed as targetTable. 202 You probably only want to change meta information here (e.g., warnings 203 or infos). 204 205 As always in procApps, you can get the embedding RD as rd; this is 206 useful to, e.g., resolve references using rd.getByRD, and specify 207 resdir-relative file names using rd.getAbsPath. 208 """ 209 name_ = "apply" 210 requiredType = "apply" 211 formalArgs = "vars, result, targetTable, _self" 212
213 - def getFuncCode(self):
215
216 217 -class RowmakerMacroMixin(base.StandardMacroMixin):
218 """A collection of macros available to rowmakers. 219 220 NOTE: All macros should return only one single physical python line, 221 or they will mess up the calculation of what constructs caused errors. 222 """
223 - def macro_standardPubDID(self):
224 r"""returns the "standard publisher DID" for the current product. 225 226 The publisher dataset identifier (PubDID) is important in protocols like 227 SSAP and obscore. If you use this macro, the PubDID will be your 228 authority, the path compontent ~, and the current value of @prodtblAccref. 229 It thus will only work where products#define (or a replacement) is in 230 action. If it isn't, a normal function call 231 getStandardPubDID(\\inputRelativePath) would be an obvious alternative. 232 233 You *can* of course define your PubDIDs in a different way. 234 """ 235 return ('getStandardPubDID(vars["prodtblAccref"])')
236
237 - def macro_dlMetaURI(self, dlId):
238 """returns a link to the datalink document for the current product. 239 240 This assumes you're assinging standard pubDIDs (see also standardPubDID, 241 which is used by this). 242 243 dlId is the XML id of the datalink service, which is supposed to 244 be in the sameRD as the rowmaker. 245 """ 246 return ('"%%s?ID=%%s"%%(' 247 'rd_.getById(%s).getURL("dlmeta", absolute=True),'%repr(dlId)+ 248 'urllib.quote(getStandardPubDID(vars["prodtblAccref"])))')
249
250 - def macro_fullPath(self):
251 """returns an expression expanding to the full path of the current 252 input file. 253 """ 254 return 'vars["parser_"].sourceToken'
255
256 - def macro_inputRelativePath(self, liberalChars="True"):
257 """see grammars.common.GrammarMacroMixin 258 """ 259 return ('getInputsRelativePath(' 260 'vars["parser_"].sourceToken, liberalChars=%s)' 261 )%base.parseBooleanLiteral(liberalChars)
262
263 - def macro_rowsProcessed(self):
264 """returns an expression giving the number of records already 265 delivered by the grammar. 266 """ 267 return 'vars["parser_"].recNo'
268
269 - def macro_rowsMade(self):
270 """returns an expression giving the number of records already 271 returned by this row maker. 272 273 This number excludes failed and skipped rows. 274 """ 275 return '_self.rowsMade'
276
277 - def macro_property(self, propName):
278 """returns an expression giving the value of the property propName 279 on the current DD. 280 """ 281 return 'curDD_.getProperty("%s")'%propName
282
283 - def macro_sourceDate(self):
284 """returns an expression giving the timestamp of the current source. 285 286 This is a timestamp of the modification date; use dateTimeToJdn or 287 dateTimeToMJD to turn this into JD or MJD (which is usually preferred 288 in database tables). See also the sourceCDate macro. 289 """ 290 return ('datetime.datetime.utcfromtimestamp(' 291 'os.path.getmtime(vars["parser_"].sourceToken))')
292
293 - def macro_sourceCDate(self):
294 """returns an expression giving the timestamp for the create 295 date of the current source. 296 297 Use dateTimeToJdn or dateTimeToMJD to turn this into JD or MJD (which 298 is usually preferred in database tables). See also the sourceDate macro. 299 """ 300 return ('datetime.datetime.utcfromtimestamp(' 301 'os.path.getctime(vars["parser_"].sourceToken))')
302
303 - def macro_srcstem(self):
304 """returns python code for the stem of the source file currently parsed in a rowmaker. 305 306 Example: if you're currently parsing /tmp/foo.bar.gz, the stem is foo. 307 """ 308 return ('getFileStem(vars["parser_"].sourceToken)')
309
310 - def macro_lastSourceElements(self, numElements):
311 """returns an expression calling rmkfuncs.lastSourceElements on 312 the current input path. 313 """ 314 return 'lastSourceElements(vars["parser_"].sourceToken, %d)'%( 315 int(numElements))
316
317 - def macro_rootlessPath(self):
318 """returns an expression giving the current source's path with 319 the resource descriptor's root removed. 320 """ 321 return 'utils.getRelativePath(vars["parser_"].sourceToken, rd_.resdir)'
322
323 - def macro_inputSize(self):
324 """returns an expression giving the size of the current source. 325 """ 326 return 'os.path.getsize(vars["parser_"].sourceToken)'
327
328 - def macro_docField(self, name):
329 """returns an expression giving the value of the column name in the 330 document row. 331 """ 332 return '_parser.getParameters()[fieldName]'
333
334 - def macro_qName(self):
335 """returns the qName of the table we are currently parsing into. 336 """ 337 return "tableDef_.getQName()"
338
339 340 -class RowmakerDef(base.Structure, RowmakerMacroMixin):
341 """A definition of the mapping between grammar input and finished rows 342 ready for shipout. 343 344 Rowmakers consist of variables, procedures and mappings. They 345 result in a python callable doing the mapping. 346 347 RowmakerDefs double as macro packages for the expansion of various 348 macros. The standard macros will need to be quoted, the rowmaker macros 349 above yield python expressions. 350 351 Within map and var bodies as well as late apply pars and apply bodies, 352 you can refer to the grammar input as vars["name"] or, shorter @name. 353 354 To add output keys, use map or, in apply bodies, add keys to the 355 result dictionary. 356 """ 357 name_ = "rowmaker" 358 359 _maps = base.StructListAttribute("maps", childFactory=MapRule, 360 description="Mapping rules.", copyable=True) 361 _vars = base.StructListAttribute("vars", childFactory=VarDef, 362 description="Definitions of intermediate variables.", 363 copyable=True) 364 _apps = base.StructListAttribute("apps", 365 childFactory=ApplyDef, description="Procedure applications.", 366 copyable=True) 367 _rd = common.RDAttribute() 368 _idmaps = base.StringListAttribute("idmaps", description="List of" 369 ' column names that are just "mapped through" (like map with key' 370 " only); you can use shell patterns to select multiple colums at once.", 371 copyable=True) 372 _simplemaps = base.IdMapAttribute("simplemaps", description= 373 "Abbreviated notation for <map source>; each pair is destination:source", 374 copyable=True) 375 _ignoreOn = base.StructAttribute("ignoreOn", default=None, 376 childFactory=rowtriggers.IgnoreOn, description="Conditions on the" 377 " input record coming from the grammar to cause the input" 378 " record to be dropped by the rowmaker, i.e., for this specific" 379 " table. If you need to drop a row for all tables being fed," 380 " use a trigger on the grammar.", copyable=True) 381 _original = base.OriginalAttribute() 382 383 @classmethod
384 - def makeIdentityFromTable(cls, table, **kwargs):
385 """returns a rowmaker that just maps input names to column names. 386 """ 387 if "id" not in kwargs: 388 kwargs["id"] = "autogenerated rowmaker for table %s"%table.id 389 return base.makeStruct(cls, idmaps=[c.key for c in table], **kwargs)
390 391 @classmethod
392 - def makeTransparentFromTable(cls, table, **kwargs):
393 """returns a rowmaker that maps input names to column names without 394 touching them. 395 396 This is for crazy cases in which the source actually provides 397 pre-parsed data that any treatment would actually ruin. 398 """ 399 if "id" not in kwargs: 400 kwargs["id"] = "autogenerated rowmaker for table %s"%table.id 401 return base.makeStruct(cls, maps=[ 402 base.makeStruct(MapRule, key=c.name, content_="vars[%s]"%repr(c.name)) 403 for c in table], 404 **kwargs)
405
406 - def completeElement(self, ctx):
407 if self.simplemaps: 408 for k,v in self.simplemaps.iteritems(): 409 nullExcs = base.NotGiven 410 if v.startswith("@"): 411 v = v[1:] 412 nullExcs = "KeyError," 413 self.feedObject("maps", base.makeStruct(MapRule, 414 key=k, source=v, nullExcs=nullExcs)) 415 self._completeElementNext(RowmakerDef, ctx)
416
417 - def _getSourceFromColset(self, columns):
418 """returns the source code for a mapper to a column set. 419 """ 420 lineMap, line = {}, 0 421 source = [] 422 423 def appendToSource(srcLine, line, lineMarker): 424 source.append(srcLine) 425 line += 1 426 lineMap[line] = lineMarker 427 line += source[-1].count("\n") 428 return line
429 430 if self.ignoreOn: 431 line = appendToSource("if checkTrigger(vars):\n" 432 " raise IgnoreThisRow(vars)", 433 line, "Checking ignore") 434 for v in self.vars: 435 line = appendToSource(v.getCode(columns), line, "assigning "+v.key) 436 for a in self.apps: 437 line = appendToSource( 438 "%s(vars, result, targetTable, _self)"%a.name, 439 line, "executing "+a.name) 440 for m in self.maps: 441 line = appendToSource(m.getCode(columns), line, "building "+m.key) 442 return "\n".join(source), lineMap
443
444 - def _getSource(self, tableDef):
445 """returns the source code for a mapper to tableDef's columns. 446 """ 447 return self._getSourceFromColset(tableDef.columns)
448
449 - def _getGlobals(self, tableDef):
450 globals = {} 451 for a in self.apps: 452 globals[a.name] = a.compile() 453 if self.ignoreOn: 454 globals["checkTrigger"] = self.ignoreOn 455 globals["tableDef_"] = tableDef 456 globals["rd_"] = tableDef.rd 457 globals["curDD_"] = tableDef.parent 458 return globals
459
460 - def _resolveIdmaps(self, columns):
461 """adds mappings for self's idmap within column set. 462 """ 463 existingMaps = set(m.key for m in self.maps) 464 baseNames = [c.key for c in columns] 465 for colName in self.idmaps: 466 matching = fnmatch.filter(baseNames, colName) 467 if not matching: 468 raise base.NotFoundError(colName, "columns matching", "unknown") 469 for dest in matching: 470 if dest not in existingMaps: 471 self.maps.append(MapRule(self, key=dest).finishElement(None)) 472 self.idmaps = []
473
474 - def _checkTable(self, columns, id):
475 """raises a LiteralParseError if we try to map to non-existing 476 columns. 477 """ 478 for map in self.maps: 479 try: 480 columns.getColumnByName(map.key) 481 except KeyError: 482 raise base.ui.logOldExc(base.LiteralParseError(self.name_, map.key, 483 "Cannot map to '%s' since it does not exist in %s"%( 484 map.key, id)))
485
486 - def _buildForTable(self, tableDef):
487 """returns a RowmakerDef with everything expanded and checked for 488 tableDef. 489 490 This may raise LiteralParseErrors if self's output is incompatible 491 with tableDef. 492 """ 493 res = self.copyShallowly() 494 try: 495 res._resolveIdmaps(tableDef.columns) 496 res._checkTable(tableDef.columns, tableDef.id) 497 except base.NotFoundError as ex: 498 ex.within = "table %s's columns"%tableDef.id 499 raise 500 return res
501
502 - def _realCompileForTableDef(self, tableDef):
503 """helps compileForTableDef. 504 """ 505 rmk = self._buildForTable(tableDef) 506 source, lineMap = rmk._getSource(tableDef) 507 globals = rmk._getGlobals(tableDef) 508 return Rowmaker(common.replaceProcDefAt(source), 509 self.id or "<rowmaker without id>", 510 globals, tableDef.getDefaults(), lineMap)
511
512 - def compileForTableDef(self, tableDef):
513 """returns a function receiving a dictionary of raw values and 514 returning a row ready for adding to a tableDef'd table. 515 516 To do this, we first make a rowmaker instance with idmaps resolved 517 and then check if the rowmaker result and the table structure 518 are compatible. 519 """ 520 return utils.memoizeOn(tableDef, self, self._realCompileForTableDef, 521 tableDef)
522
523 - def copyShallowly(self):
524 return base.makeStruct(self.__class__, maps=self.maps[:], 525 vars=self.vars[:], idmaps=self.idmaps[:], 526 apps=self.apps[:], ignoreOn=self.ignoreOn)
527
528 529 -class ParmakerDef(RowmakerDef):
530 name_ = "parmaker" 531
532 - def _buildForTable(self, tableDef):
533 res = self.copyShallowly() 534 try: 535 res._resolveIdmaps(tableDef.params) 536 res._checkTable(tableDef.params, tableDef.id) 537 except base.NotFoundError as ex: 538 ex.within = "table %s's params"%tableDef.id 539 raise 540 return res
541
542 - def _getSource(self, tableDef):
543 """returns the source code for a mapper to tableDef's columns. 544 """ 545 return self._getSourceFromColset(tableDef.params)
546 547 548 identityRowmaker = base.makeStruct(RowmakerDef, idmaps="*")
549 550 551 -class Rowmaker(object):
552 """A callable that arranges for the mapping of key/value pairs to 553 other key/value pairs. 554 555 Within DaCHS, Rowmakers generate database rows (and parameter dictionaries) 556 from the output of grammars. 557 558 They are constructed with the source of the mapping function, a dictionary of 559 globals the function should see, a dictionary of defaults, giving keys to be 560 inserted into the incoming rowdict before the mapping function is called, and 561 a map of line numbers to names handled in that line. 562 563 It is called with a dictionary of locals for the functions (i.e., 564 usually the result of a grammar iterRows). 565 """
566 - def __init__(self, source, name, globals, defaults, lineMap):
567 try: 568 self.code = compile(source, "generated mapper code", "exec") 569 except SyntaxError as msg: 570 raise base.ui.logOldExc( 571 base.BadCode(source, "rowmaker", msg)) 572 self.source, self.name = source, name 573 globals.update(rmkfuncs.__dict__) 574 self.globals, self.defaults = globals, defaults 575 self.keySet = set(self.defaults) 576 self.lineMap = sorted(lineMap.items()) 577 self.rowsMade = 0
578
579 - def _guessExSourceName(self, tb):
580 """returns an educated guess as to which mapping should have 581 caused that traceback in tb. 582 583 This is done by inspecting the second-topmost stackframe. It 584 must hold the generated line that, possibly indirectly, caused 585 the exception. This line should be in the lineMap generated by 586 RowmakerDef._getSource. 587 """ 588 if tb.tb_next: 589 excLine = tb.tb_next.tb_lineno 590 base.ui.notifyDebug( 591 "Here's the traceback:\n%s"%"".join(traceback.format_tb(tb))) 592 else: # toplevel failure, internal 593 return "in toplevel (internal failure)" 594 destInd = min(len(self.lineMap)-1, 595 bisect.bisect_left(self.lineMap, (excLine, ""))) 596 # If we're between lineMap entries, the one before the guessed one 597 # is the one we want 598 if self.lineMap[destInd][0]>excLine and destInd: 599 destInd -= 1 600 return self.lineMap[destInd][1]
601
602 - def _guessError(self, ex, rowdict, tb):
603 """tries to shoehorn a ValidationError out of ex. 604 """ 605 base.ui.notifyDebug("Rowmaker failed. Exception below. Failing source" 606 " is:\n%s"%self.source) 607 destName = self._guessExSourceName(tb) 608 if isinstance(ex, KeyError): 609 msg = "Key %s not found in a mapping."%unicode(ex) 610 hint = ("This probably means that your grammar did not yield the" 611 " field asked for. Alternatively, bugs in procs might also" 612 " cause this.") 613 else: 614 msg = unicode(str(ex), "iso-8859-1", "replace") 615 hint = ("This is a failure in more-or-less user-provided code." 616 " If you run again with the global --debug flag, the source of" 617 " the failing code should be in the logs/dcInfos (but make" 618 " sure it's the source the error is reported for; with procs," 619 " this might not be the case).") 620 raise base.ui.logOldExc(base.ValidationError("While %s in %s: %s"%( 621 destName, self.name, msg), destName.split()[-1], rowdict, 622 hint=hint))
623
624 - def __call__(self, vars, table):
625 try: 626 locals = { 627 "vars": vars, 628 "result": {}, 629 "_self": self, 630 "targetTable": table 631 } 632 missingKeys = self.keySet-set(vars) 633 for k in missingKeys: 634 vars[k] = self.defaults[k] 635 exec self.code in self.globals, locals 636 637 self.rowsMade += 1 638 return locals["result"] 639 except base.ExecutiveAction: # pass these on 640 raise 641 except base.ValidationError: # hopefully downstream knows better than we 642 raise 643 except Exception as ex: 644 self._guessError(ex, locals["vars"], sys.exc_info()[2])
645