1 """
2 A q'n'd grammar for reading MySQL dumps of moderate size.
3 """
4
5
6
7
8
9
10
11 import re
12
13 from gavo import base
14 from gavo.grammars import common
15
16
18 """returns the table name and the column names for the first
19 CREATE TABLE statement in a MySQL dump.
20 """
21 mat = re.search("CREATE TABLE `([^`]*)` \(", dump)
22 if not mat:
23 raise base.DataError("No proper CREATE TABLE statement found")
24 tableName = mat.group(1)
25 curPos = mat.end()
26 names = []
27
28 rowPat = re.compile(
29 r"\s*`(?P<name>[^`]*)` (?P<type>[^ ]*) (?P<flags>[^,)]*),?")
30 while True:
31 mat = rowPat.match(dump, curPos)
32 if not mat:
33
34 break
35 names.append(mat.group("name"))
36 curPos = mat.end()
37
38 return tableName, names, curPos
39
40
42 """creates a rawdict for fieldValues
43
44 This function should undo any quoting introduced by MySQL. It doesn't right
45 now since we're not working from actual docs.
46 """
47 res = {}
48 for name, val in zip(fieldNames, fieldValues):
49 if val=="NULL":
50 val = None
51 else:
52 val = val.strip("'")
53 res [name] = val
54 return res
55
56
59 dumpContents = self.inputFile.read()
60
61 tableName, fieldNames, curPos = guessFieldNames(dumpContents)
62 insertionPat = re.compile("INSERT INTO `%s` VALUES "%tableName)
63
64
65 valueRE = "('[^']*'|[^',][^,]*)"
66 rowPat = re.compile(r"\s*\(%s\),?"%(",".join(valueRE for i in fieldNames)))
67
68 while True:
69 mat = insertionPat.search(dumpContents, curPos)
70 if not mat:
71 break
72 curPos = mat.end()
73
74
75 while True:
76 mat = rowPat.match(dumpContents, curPos)
77 if not mat:
78
79
80 if not dumpContents[curPos:curPos+30].strip().startswith(";"):
81 raise base.DataError("Expected VALUES-ending ; char %s;"
82 " found %s instead."%(
83 curPos, repr(dumpContents[curPos: curPos+30])))
84 break
85
86 yield makeRecord(fieldNames, mat.groups())
87 curPos = mat.end()
88
89
90
92 """A grammar pulling information from MySQL dump files.
93
94 WARNING: This is a quick hack. If you want/need it, please contact the
95 authors.
96
97 At this point this is nothing but an ugly RE mess with lots of assumptions
98 about the dump file that's easily fooled. Also, the entire dump file
99 will be pulled into memory.
100
101 Since grammar semantics cannot do anything else, this will always only
102 iterate over a single table. This currently is fixed to the first,
103 but it's conceivable to make that selectable.
104
105 Database NULLs are already translated into Nones.
106
107 In other words: It might do for simple cases. If you have something else,
108 improve this or complain to the authors.
109 """
110 name_ = "mySQLDumpGrammar"
111 rowIterator = RowIterator
112