Package gavo :: Package grammars :: Module mysqldumpgrammar
[frames] | no frames]

Source Code for Module gavo.grammars.mysqldumpgrammar

  1  """ 
  2  A q'n'd grammar for reading MySQL dumps of moderate size. 
  3  """ 
  4   
  5  #c Copyright 2008-2019, the GAVO project 
  6  #c 
  7  #c This program is free software, covered by the GNU GPL.  See the 
  8  #c COPYING file in the source distribution. 
  9   
 10   
 11  import re 
 12   
 13  from gavo import base 
 14  from gavo.grammars import common 
 15   
 16   
17 -def guessFieldNames(dump):
18 """returns the table name and the column names for the first 19 CREATE TABLE statement in a MySQL dump. 20 """ 21 mat = re.search("CREATE TABLE `([^`]*)` \(", dump) 22 if not mat: 23 raise base.DataError("No proper CREATE TABLE statement found") 24 tableName = mat.group(1) 25 curPos = mat.end() 26 names = [] 27 28 rowPat = re.compile( 29 r"\s*`(?P<name>[^`]*)` (?P<type>[^ ]*) (?P<flags>[^,)]*),?") 30 while True: 31 mat = rowPat.match(dump, curPos) 32 if not mat: 33 # sanity check would be great here. 34 break 35 names.append(mat.group("name")) 36 curPos = mat.end() 37 38 return tableName, names, curPos
39 40
41 -def makeRecord(fieldNames, fieldValues):
42 """creates a rawdict for fieldValues 43 44 This function should undo any quoting introduced by MySQL. It doesn't right 45 now since we're not working from actual docs. 46 """ 47 res = {} 48 for name, val in zip(fieldNames, fieldValues): 49 if val=="NULL": 50 val = None 51 else: 52 val = val.strip("'") 53 res [name] = val 54 return res
55 56
57 -class RowIterator(common.FileRowIterator):
58 - def _iterRows(self):
59 dumpContents = self.inputFile.read() 60 61 tableName, fieldNames, curPos = guessFieldNames(dumpContents) 62 insertionPat = re.compile("INSERT INTO `%s` VALUES "%tableName) 63 64 # TODO: handle embedded quotes ('') 65 valueRE = "('[^']*'|[^',][^,]*)" 66 rowPat = re.compile(r"\s*\(%s\),?"%(",".join(valueRE for i in fieldNames))) 67 68 while True: 69 mat = insertionPat.search(dumpContents, curPos) 70 if not mat: 71 break 72 curPos = mat.end() 73 74 75 while True: 76 mat = rowPat.match(dumpContents, curPos) 77 if not mat: 78 # sanity check that we really reached the end of the VALUE 79 # statement 80 if not dumpContents[curPos:curPos+30].strip().startswith(";"): 81 raise base.DataError("Expected VALUES-ending ; char %s;" 82 " found %s instead."%( 83 curPos, repr(dumpContents[curPos: curPos+30]))) 84 break 85 86 yield makeRecord(fieldNames, mat.groups()) 87 curPos = mat.end()
88 89 90
91 -class MySQLDumpGrammar(common.Grammar, common.FileRowAttributes):
92 """A grammar pulling information from MySQL dump files. 93 94 WARNING: This is a quick hack. If you want/need it, please contact the 95 authors. 96 97 At this point this is nothing but an ugly RE mess with lots of assumptions 98 about the dump file that's easily fooled. Also, the entire dump file 99 will be pulled into memory. 100 101 Since grammar semantics cannot do anything else, this will always only 102 iterate over a single table. This currently is fixed to the first, 103 but it's conceivable to make that selectable. 104 105 Database NULLs are already translated into Nones. 106 107 In other words: It might do for simple cases. If you have something else, 108 improve this or complain to the authors. 109 """ 110 name_ = "mySQLDumpGrammar" 111 rowIterator = RowIterator
112