1  """ 
  2  Definition of rowmakers. 
  3   
  4  rowmakers are objects that take a dictionary of some kind and emit 
  5  a row suitable for inclusion into a table. 
  6  """ 
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14  import bisect 
 15  import fnmatch 
 16  import re 
 17  import sys 
 18  import traceback 
 19   
 20  from gavo import base 
 21  from gavo import utils 
 22  from gavo.rscdef import common 
 23  from gavo.rscdef import procdef 
 24  from gavo.rscdef import rmkfuncs 
 25  from gavo.rscdef import rowtriggers 
 26   
 27   
 28  __docformat__ = "restructuredtext en" 
 29   
 30   
 31 -class Error(base.Error): 
  33   
 36          """a base class for map and var. 
 37   
 38          You must give a destDict class attribute to make these work. 
 39          """ 
 40           
 41          destDict = None 
 42          restrictedMode = False 
 43   
 44          _dest = base.UnicodeAttribute("key",  
 45                  default=base.Undefined,  
 46                  description="Name of the column the value is to end up in.", 
 47                  copyable=True,  
 48                  strip=True,  
 49                  aliases=["dest", "name"]) 
 50   
 51          _src = base.UnicodeAttribute("source",  
 52                  default=None, 
 53                  description="Source key name to convert to column value (either a grammar" 
 54                  " key or a var).",  
 55                  copyable=True,  
 56                  strip=True, 
 57                  aliases=["src"]) 
 58   
 59          _nullExcs = base.UnicodeAttribute("nullExcs",  
 60                  default=base.NotGiven, 
 61                  description="Exceptions that should be caught and" 
 62                  " cause the value to be NULL, separated by commas.") 
 63   
 64          _expr = base.DataContent( 
 65                  description="A python expression giving the value for key.",  
 66                  copyable=True,  
 67                  strip=True) 
 68   
 69          _nullExpr = base.UnicodeAttribute("nullExpr",  
 70                  default=base.NotGiven, 
 71                  description="A python expression for a value that is mapped to" 
 72                  " NULL (None).  Equality is checked after building the value, so" 
 73                  " this expression has to be of the column type.  Use map with" 
 74                  " the parseWithNull function to catch null values before type" 
 75                  " conversion.") 
 76   
 90   
114   
116                  """returns python source code for this map. 
117                  """ 
118                  code = [] 
119   
120                  if self.content_: 
121                          code.append('%s["%s"] = %s'%(self.destDict, self.key, self.content_)) 
122                  else: 
123                          colDef = columns.getColumnByName(self.key) 
124                          try: 
125                                  code.append('%s["%s"] = %s'%(self.destDict, 
126                                          self.key,  
127                                          base.sqltypeToPythonCode(colDef.type)%'vars["%s"]'% 
128                                                  self.source.replace("\\", r"\\").replace('"', '\\"'))) 
129                          except base.ConversionError: 
130                                  raise base.ui.logOldExc(base.LiteralParseError("map", colDef.type, 
131                                          hint="Auto-mapping to %s is impossible since" 
132                                          " no default map for %s is known"%(self.key, colDef.type))) 
133   
134                  if self.nullExpr is not base.NotGiven: 
135                          code.append('\nif %s["%s"]==%s: %s["%s"] = None'%( 
136                                  self.destDict, 
137                                  self.key,  
138                                  self.nullExpr,  
139                                  self.destDict, 
140                                  self.key)) 
141                  code = "".join(code) 
142   
143                  if self.nullExcs is not base.NotGiven: 
144                          code = 'try:\n%s\nexcept (%s): %s["%s"] = None'%( 
145                                  re.sub("(?m)^", "  ", code),  
146                                  self.nullExcs,  
147                                  self.destDict, 
148                                  self.key) 
149                  return code 
  150   
151   
152 -class MapRule(MappedExpression): 
 153          """A mapping rule. 
154   
155          To specify the source of a mapping, you can either 
156           
157          - grab a value from what's emitted by the grammar or defined using var via 
158                  the source attribute.  The value given for source is converted to a  
159                  python value and stored. 
160          - or give a python expression in the body.  In that case, no further 
161            type conversion will be attempted. 
162   
163          If neither source or a body is given, map uses the key attribute as its 
164          source attribute. 
165   
166          The map rule generates a key/value pair in the result record. 
167          """ 
168          name_ = "map" 
169          destDict = "result" 
 170   
171   
172 -class VarDef(MappedExpression): 
 173          """A definition of a rowmaker variable. 
174   
175          It consists of a name and a python expression, including function 
176          calls.  The variables are entered into the input row coming from 
177          the grammar. 
178   
179          var elements are evaluated before apply elements, in the sequence 
180          they are in the RD.  You can refer to keys defined by vars already 
181          evaluated in the usual @key manner. 
182          """ 
183          name_ = "var" 
184          destDict = "vars" 
 185   
188          """A code fragment to manipulate the result row (and possibly more). 
189   
190          Apply elements allow embedding python code in rowmakers. 
191   
192          The current input fields from the grammar (including the rowmaker's vars)  
193          are available in the vars dictionary and can be changed there.  You can  
194          also add new keys. 
195   
196          You can add new keys for shipping out in the result dictionary. 
197   
198          The active rowmaker is available as parent.  It is also used to 
199          expand macros. 
200   
201          The table that the rowmaker feeds to can be accessed as targetTable.   
202          You probably only want to change meta information here (e.g., warnings  
203          or infos). 
204   
205          As always in procApps, you can get the embedding RD as rd; this is 
206          useful to, e.g., resolve references using rd.getByRD, and specify  
207          resdir-relative file names using rd.getAbsPath. 
208          """ 
209          name_ = "apply" 
210          requiredType = "apply" 
211          formalArgs = "vars, result, targetTable, _self" 
212           
 215   
218          """A collection of macros available to rowmakers. 
219   
220          NOTE: All macros should return only one single physical python line, 
221          or they will mess up the calculation of what constructs caused errors. 
222          """ 
224                  r"""returns the "standard publisher DID" for the current product. 
225   
226                  The publisher dataset identifier (PubDID) is important in protocols like 
227                  SSAP and obscore.  If you use this macro, the PubDID will be your 
228                  authority, the path compontent ~, and the current value of @prodtblAccref. 
229                  It thus will only work where products#define (or a replacement) is in 
230                  action.  If it isn't, a normal function call 
231                  getStandardPubDID(\\inputRelativePath) would be an obvious alternative. 
232   
233                  You *can* of course define your PubDIDs in a different way. 
234                  """ 
235                  return ('getStandardPubDID(vars["prodtblAccref"])') 
 236   
249   
251                  """returns an expression expanding to the full path of the current 
252                  input file. 
253                  """ 
254                  return 'vars["parser_"].sourceToken' 
 255   
262           
264                  """returns an expression giving the number of records already 
265                  delivered by the grammar. 
266                  """ 
267                  return 'vars["parser_"].recNo' 
 268   
270                  """returns an expression giving the number of records already 
271                  returned by this row maker. 
272   
273                  This number excludes failed and skipped rows. 
274                  """ 
275                  return '_self.rowsMade' 
 276   
278                  """returns an expression giving the value of the property propName 
279                  on the current DD. 
280                  """ 
281                  return 'curDD_.getProperty("%s")'%propName 
 282   
284                  """returns an expression giving the timestamp of the current source. 
285   
286                  This is a timestamp of the modification date; use dateTimeToJdn or 
287                  dateTimeToMJD to turn this into JD or MJD (which is usually preferred 
288                  in database tables).  See also the sourceCDate macro. 
289                  """ 
290                  return ('datetime.datetime.utcfromtimestamp(' 
291                          'os.path.getmtime(vars["parser_"].sourceToken))') 
 292   
294                  """returns an expression giving the timestamp for the create 
295                  date of the current source. 
296   
297                  Use dateTimeToJdn or dateTimeToMJD to turn this into JD or MJD (which  
298                  is usually preferred in database tables).  See also the sourceDate macro. 
299                  """ 
300                  return ('datetime.datetime.utcfromtimestamp(' 
301                          'os.path.getctime(vars["parser_"].sourceToken))') 
 302           
304                  """returns python code for the stem of the source file currently parsed in a rowmaker. 
305                   
306                  Example: if you're currently parsing /tmp/foo.bar.gz, the stem is foo. 
307                  """ 
308                  return ('getFileStem(vars["parser_"].sourceToken)') 
 309   
311                  """returns an expression calling rmkfuncs.lastSourceElements on 
312                  the current input path. 
313                  """ 
314                  return 'lastSourceElements(vars["parser_"].sourceToken, %d)'%( 
315                          int(numElements)) 
 316   
318                  """returns an expression giving the current source's path with  
319                  the resource descriptor's root removed. 
320                  """ 
321                  return 'utils.getRelativePath(vars["parser_"].sourceToken, rd_.resdir)' 
 322   
327   
329                  """returns an expression giving the value of the column name in the  
330                  document row. 
331                  """ 
332                  return '_parser.getParameters()[fieldName]' 
 333   
335                  """returns the qName of the table we are currently parsing into. 
336                  """ 
337                  return "tableDef_.getQName()" 
  338   
339   
340 -class RowmakerDef(base.Structure, RowmakerMacroMixin): 
 341          """A definition of the mapping between grammar input and finished rows 
342          ready for shipout. 
343   
344          Rowmakers consist of variables, procedures and mappings.  They 
345          result in a python callable doing the mapping. 
346   
347          RowmakerDefs double as macro packages for the expansion of various 
348          macros.  The standard macros will need to be quoted, the rowmaker macros 
349          above yield python expressions. 
350   
351          Within map and var bodies as well as late apply pars and apply bodies,  
352          you can refer to the grammar input as vars["name"] or, shorter @name. 
353   
354          To add output keys, use map or, in apply bodies, add keys to the 
355          result dictionary. 
356          """ 
357          name_ = "rowmaker" 
358   
359          _maps = base.StructListAttribute("maps", childFactory=MapRule, 
360                  description="Mapping rules.", copyable=True) 
361          _vars = base.StructListAttribute("vars", childFactory=VarDef, 
362                  description="Definitions of intermediate variables.", 
363                  copyable=True) 
364          _apps = base.StructListAttribute("apps", 
365                  childFactory=ApplyDef, description="Procedure applications.", 
366                  copyable=True) 
367          _rd = common.RDAttribute() 
368          _idmaps = base.StringListAttribute("idmaps", description="List of" 
369                  ' column names that are just "mapped through" (like map with key' 
370                  " only); you can use shell patterns to select multiple colums at once.", 
371                  copyable=True) 
372          _simplemaps = base.IdMapAttribute("simplemaps", description= 
373                  "Abbreviated notation for <map source>; each pair is destination:source",  
374                  copyable=True) 
375          _ignoreOn = base.StructAttribute("ignoreOn", default=None, 
376                  childFactory=rowtriggers.IgnoreOn, description="Conditions on the" 
377                  " input record coming from the grammar to cause the input" 
378                  " record to be dropped by the rowmaker, i.e., for this specific" 
379                  " table.  If you need to drop a row for all tables being fed," 
380                  " use a trigger on the grammar.", copyable=True) 
381          _original = base.OriginalAttribute() 
382   
383          @classmethod 
385                  """returns a rowmaker that just maps input names to column names. 
386                  """ 
387                  if "id" not in kwargs: 
388                          kwargs["id"] = "autogenerated rowmaker for table %s"%table.id 
389                  return base.makeStruct(cls, idmaps=[c.key for c in table], **kwargs) 
 390   
391          @classmethod 
393                  """returns a rowmaker that maps input names to column names without 
394                  touching them. 
395   
396                  This is for crazy cases in which the source actually provides  
397                  pre-parsed data that any treatment would actually ruin. 
398                  """ 
399                  if "id" not in kwargs: 
400                          kwargs["id"] = "autogenerated rowmaker for table %s"%table.id 
401                  return base.makeStruct(cls, maps=[ 
402                                  base.makeStruct(MapRule, key=c.name, content_="vars[%s]"%repr(c.name)) 
403                                          for c in table], 
404                          **kwargs) 
 405   
416   
418                  """returns the source code for a mapper to a column set. 
419                  """ 
420                  lineMap, line = {}, 0 
421                  source = [] 
422   
423                  def appendToSource(srcLine, line, lineMarker): 
424                          source.append(srcLine) 
425                          line += 1 
426                          lineMap[line] = lineMarker 
427                          line += source[-1].count("\n") 
428                          return line 
 429   
430                  if self.ignoreOn: 
431                          line = appendToSource("if checkTrigger(vars):\n" 
432                                  "  raise IgnoreThisRow(vars)", 
433                                  line, "Checking ignore") 
434                  for v in self.vars: 
435                          line = appendToSource(v.getCode(columns), line, "assigning "+v.key) 
436                  for a in self.apps: 
437                          line = appendToSource( 
438                                  "%s(vars, result, targetTable, _self)"%a.name,  
439                                  line, "executing "+a.name) 
440                  for m in self.maps: 
441                          line = appendToSource(m.getCode(columns), line, "building "+m.key) 
442                  return "\n".join(source), lineMap 
 443   
445                  """returns the source code for a mapper to tableDef's columns. 
446                  """ 
447                  return self._getSourceFromColset(tableDef.columns) 
 448   
450                  globals = {} 
451                  for a in self.apps: 
452                          globals[a.name] = a.compile() 
453                  if self.ignoreOn: 
454                          globals["checkTrigger"] = self.ignoreOn 
455                  globals["tableDef_"] = tableDef 
456                  globals["rd_"] = tableDef.rd 
457                  globals["curDD_"] = tableDef.parent 
458                  return globals 
 459   
461                  """adds mappings for self's idmap within column set. 
462                  """ 
463                  existingMaps = set(m.key for m in self.maps) 
464                  baseNames = [c.key for c in columns] 
465                  for colName in self.idmaps: 
466                          matching = fnmatch.filter(baseNames, colName) 
467                          if not matching: 
468                                  raise base.NotFoundError(colName, "columns matching", "unknown") 
469                          for dest in matching: 
470                                  if dest not in existingMaps: 
471                                          self.maps.append(MapRule(self, key=dest).finishElement(None)) 
472                  self.idmaps = [] 
 473   
475                  """raises a LiteralParseError if we try to map to non-existing 
476                  columns. 
477                  """ 
478                  for map in self.maps: 
479                          try: 
480                                  columns.getColumnByName(map.key) 
481                          except KeyError: 
482                                  raise base.ui.logOldExc(base.LiteralParseError(self.name_, map.key,  
483                                          "Cannot map to '%s' since it does not exist in %s"%( 
484                                                  map.key, id))) 
 485   
487                  """returns a RowmakerDef with everything expanded and checked for 
488                  tableDef. 
489   
490                  This may raise LiteralParseErrors if self's output is incompatible 
491                  with tableDef. 
492                  """ 
493                  res = self.copyShallowly() 
494                  try: 
495                          res._resolveIdmaps(tableDef.columns) 
496                          res._checkTable(tableDef.columns, tableDef.id) 
497                  except base.NotFoundError as ex: 
498                          ex.within = "table %s's columns"%tableDef.id 
499                          raise 
500                  return res 
 501   
503                  """helps compileForTableDef. 
504                  """ 
505                  rmk = self._buildForTable(tableDef) 
506                  source, lineMap = rmk._getSource(tableDef) 
507                  globals = rmk._getGlobals(tableDef) 
508                  return Rowmaker(common.replaceProcDefAt(source),  
509                          self.id or "<rowmaker without id>",  
510                          globals, tableDef.getDefaults(), lineMap) 
 511   
513                  """returns a function receiving a dictionary of raw values and 
514                  returning a row ready for adding to a tableDef'd table. 
515   
516                  To do this, we first make a rowmaker instance with idmaps resolved 
517                  and then check if the rowmaker result and the table structure 
518                  are compatible. 
519                  """ 
520                  return utils.memoizeOn(tableDef, self, self._realCompileForTableDef, 
521                          tableDef) 
 522   
524                  return base.makeStruct(self.__class__, maps=self.maps[:],  
525                          vars=self.vars[:], idmaps=self.idmaps[:],  
526                          apps=self.apps[:], ignoreOn=self.ignoreOn) 
 527   
530          name_ = "parmaker" 
531   
533                  res = self.copyShallowly() 
534                  try: 
535                          res._resolveIdmaps(tableDef.params) 
536                          res._checkTable(tableDef.params, tableDef.id) 
537                  except base.NotFoundError as ex: 
538                          ex.within = "table %s's params"%tableDef.id 
539                          raise 
540                  return res 
 541   
543                  """returns the source code for a mapper to tableDef's columns. 
544                  """ 
545                  return self._getSourceFromColset(tableDef.params) 
  546   
547   
548  identityRowmaker = base.makeStruct(RowmakerDef, idmaps="*") 
552          """A callable that arranges for the mapping of key/value pairs to  
553          other key/value pairs. 
554   
555          Within DaCHS, Rowmakers generate database rows (and parameter dictionaries) 
556          from the output of grammars. 
557   
558          They are constructed with the source of the mapping function, a dictionary of 
559          globals the function should see, a dictionary of defaults, giving keys to be 
560          inserted into the incoming rowdict before the mapping function is called, and 
561          a map of line numbers to names handled in that line. 
562   
563          It is called with a dictionary of locals for the functions (i.e., 
564          usually the result of a grammar iterRows). 
565          """ 
566 -        def __init__(self, source, name, globals, defaults, lineMap): 
 578   
580                  """returns an educated guess as to which mapping should have 
581                  caused that traceback in tb. 
582   
583                  This is done by inspecting the second-topmost stackframe.  It 
584                  must hold the generated line that, possibly indirectly, caused 
585                  the exception.  This line should be in the lineMap generated by 
586                  RowmakerDef._getSource. 
587                  """ 
588                  if tb.tb_next: 
589                          excLine = tb.tb_next.tb_lineno 
590                          base.ui.notifyDebug( 
591                                  "Here's the traceback:\n%s"%"".join(traceback.format_tb(tb))) 
592                  else:  
593                          return "in toplevel (internal failure)" 
594                  destInd = min(len(self.lineMap)-1,  
595                          bisect.bisect_left(self.lineMap, (excLine, ""))) 
596                   
597                   
598                  if self.lineMap[destInd][0]>excLine and destInd: 
599                          destInd -= 1 
600                  return self.lineMap[destInd][1] 
 601   
603                  """tries to shoehorn a ValidationError out of ex. 
604                  """ 
605                  base.ui.notifyDebug("Rowmaker failed.  Exception below.  Failing source" 
606                          " is:\n%s"%self.source) 
607                  destName = self._guessExSourceName(tb) 
608                  if isinstance(ex, KeyError): 
609                          msg = "Key %s not found in a mapping."%unicode(ex) 
610                          hint = ("This probably means that your grammar did not yield the" 
611                                  " field asked for.  Alternatively, bugs in procs might also" 
612                                  " cause this.") 
613                  else: 
614                          msg = unicode(str(ex), "iso-8859-1", "replace") 
615                          hint = ("This is a failure in more-or-less user-provided code." 
616                                  "  If you run again with the global --debug flag, the source of" 
617                                  " the failing code should be in the logs/dcInfos (but make" 
618                                  " sure it's the source the error is reported for; with procs," 
619                                  " this might not be the case).") 
620                  raise base.ui.logOldExc(base.ValidationError("While %s in %s: %s"%( 
621                          destName, self.name, msg), destName.split()[-1], rowdict, 
622                          hint=hint)) 
 623   
625                  try: 
626                          locals = { 
627                                  "vars": vars, 
628                                  "result": {}, 
629                                  "_self": self, 
630                                  "targetTable": table 
631                          } 
632                          missingKeys = self.keySet-set(vars) 
633                          for k in missingKeys: 
634                                  vars[k] = self.defaults[k] 
635                          exec self.code in self.globals, locals 
636   
637                          self.rowsMade += 1 
638                          return locals["result"] 
639                  except base.ExecutiveAction:  
640                          raise 
641                  except base.ValidationError:    
642                          raise 
643                  except Exception as ex: 
644                          self._guessError(ex, locals["vars"], sys.exc_info()[2]) 
  645