1 """
2 A grammar supporting C language boosters (or possibly other mechanisms
3 bypassing internal dbtable).
4
5 These actually bypass most of our machinery and should only be used if
6 performance is paramount. Otherwise, CustomGrammars play much nicer with
7 the rest of the DC software.
8
9 Currently, only one kind of DirectGrammar is supported: C boosters.
10 """
11
12
13
14
15
16
17
18 from __future__ import print_function
19
20 import os
21 import pkg_resources
22 import re
23 import shutil
24 import subprocess
25
26 from gavo import base
27 from gavo import utils
28 from gavo import rscdef
29 from gavo.grammars import common
30
31
33 """is a wrapper for an import booster written in C using the DC booster
34 infrastructure.
35
36 Warning: If you change the booster description, you'll need to touch
37 the source to recompile.
38 """
39 silence_for_test = False
40
41 - def __init__(self, srcName, dataDesc, recordSize=4000, gzippedInput=False,
42 autoNull=None, preFilter=None, ignoreBadRecords=False,
43 customFlags=""):
44 self.dataDesc = dataDesc
45 self.recordSize = recordSize
46 self.resdir = dataDesc.rd.resdir
47 self.srcName = os.path.join(self.resdir, srcName)
48 self.autoNull, self.preFilter = autoNull, preFilter
49 self.ignoreBadRecords = ignoreBadRecords
50 self.gzippedInput = gzippedInput
51 self.customFlags = customFlags
52 self.bindir = os.path.join(self.resdir, "bin")
53 self.binaryName = os.path.join(self.bindir,
54 os.path.splitext(os.path.basename(srcName))[0]+"-"+base.getConfig(
55 "platform"))
56 self._ensureBinary()
57
59 def getResource(src, dest):
60 inF = pkg_resources.resource_stream('gavo', src)
61 outF = open(os.path.join(wd, dest), "w")
62 outF.write(inF.read())
63 outF.close()
64 inF.close()
65 getResource("resources/src/boosterskel.c", "boosterskel.c")
66 getResource("resources/src/boosterskel.h", "boosterskel.h")
67 shutil.copyfile(self.srcName, os.path.join(wd, "func.c"))
68
69
70
71 mat = re.search("(?m)^#define QUERY_N_PARS\s+(\d+)",
72 open(self.srcName).read())
73 if not mat:
74 raise base.ReportableError("Booster function doesn't define QUERY_N_PARS")
75 query_n_pars = mat.group(1)
76
77 f = open(os.path.join(wd, "Makefile"), "w")
78
79 if self.dataDesc.grammar.type=="fits":
80 f.write("LDFLAGS += -lcfitsio\n")
81
82 f.write("LDFLAGS += -lm\n"
83 "CFLAGS += -Wall -DINPUT_LINE_MAX=%d -DQUERY_N_PARS=%s\n"%(
84 self.recordSize, query_n_pars))
85 if self.autoNull:
86 f.write("CFLAGS += -DAUTO_NULL='%s'\n"%self.autoNull.replace(
87 "\\", "\\\\"))
88 if self.ignoreBadRecords:
89 f.write("CFLAGS += -DIGNORE_BAD_RECORDS\n")
90 f.write("CFLAGS += -g\n")
91
92 f.write("booster: boosterskel.c func.c\n"
93 "\t$(CC) $(CFLAGS) %s -o booster $^ $(LDFLAGS)\n"%self.customFlags)
94 f.close()
95
97 callArgs = {}
98 if self.silence_for_test:
99
100 callArgs["stdout"] = open("/dev/null", "w")
101 if subprocess.call("make", **callArgs):
102 raise base.ReportableError("Booster build failed, messages above.")
103
105 shutil.copyfile("booster", self.binaryName)
106 os.chmod(self.binaryName, 0775)
107
109 """makes sure the booster binary exists and is up-to-date.
110 """
111 if not os.path.exists(self.bindir):
112 os.makedirs(self.bindir)
113 try:
114 if os.path.getmtime(self.srcName)<os.path.getmtime(self.binaryName):
115 return
116 except os.error:
117 pass
118 if os.path.exists(self.srcName):
119 utils.runInSandbox(self._copySources, self._build, self._retrieveBinary)
120 else:
121 raise base.ReportableError("Booster source does not exist."
122 " You will not be able to import the enclosing data.",
123 hint="Use gavo mkboost to create a skeleton for the booster.")
124
126 """returns a pipe you can read the booster's output from.
127
128 As a side effect, it also sets the attribute self.pipe. We need
129 this to be able to retrieve the command status below.
130 """
131 if self.preFilter:
132 shellCommand = "%s '%s' | %s"%(self.preFilter, argName, self.binaryName)
133 elif self.gzippedInput:
134 shellCommand = "zcat '%s' | %s"%(argName, self.binaryName)
135 else:
136 shellCommand = "%s '%s'"%(self.binaryName, argName)
137
138 pipeArgs = {"shell": True, "stdout": subprocess.PIPE}
139 if self.silence_for_test:
140
141 pipeArgs["stderr"] = open("/dev/null", "w")
142 self.pipe = subprocess.Popen(shellCommand, **pipeArgs)
143 return self.pipe.stdout
144
146 return self.pipe.wait()
147
148
150 """A user-defined external grammar.
151
152 See the `separate document`_ on user-defined code on more on direct grammars.
153
154 You will almost always use these in connection with C code generated
155 by ``dachs mkboost``.
156
157 .. _separate document: :dachsdoc:booster.html
158 """
159 name_ = "directGrammar"
160
161 _cbooster = rscdef.ResdirRelativeAttribute("cBooster",
162 default=base.Undefined,
163 description="resdir-relative path to the booster C source.",
164 copyable=True)
165
166 _gzippedInput = base.BooleanAttribute("gzippedInput", default=False,
167 description="Pipe gzip before booster? (will not work for FITS)",
168 copyable=True)
169
170 _autoNull = base.UnicodeAttribute("autoNull", default=None,
171 description="Use this string as general NULL value (when reading"
172 " from plain text).",
173 copyable=True)
174
175 _ignoreBadRecords = base.BooleanAttribute("ignoreBadRecords",
176 default=False, description="Let booster ignore invalid records?",
177 copyable=True)
178
179 _recordSize = base.IntAttribute("recordSize", default=4000,
180 description="For bin boosters, read this many bytes to make"
181 " up a record; for line-based boosters, this is the maximum"
182 " length of an input line.",
183 copyable=True)
184
185 _preFilter = base.UnicodeAttribute("preFilter", default=None,
186 description="Pipe input through this program before handing it to"
187 " the booster; this string is shell-expanded (will not work for FITS).",
188 copyable=True)
189
190 _customFlags = base.UnicodeAttribute("customFlags", default="",
191 description="Pass these flags to the C compiler when building the"
192 " booster.",
193 copyable=True)
194
195 _type = base.EnumeratedUnicodeAttribute("type", default="col",
196 validValues=["col", "bin", "fits", "split"],
197 description="Make code for a booster parsing by column indices (col),"
198 " by splitting along separators (split), by reading fixed-length"
199 " binary records (bin), for from FITS binary tables (fits).",
200 copyable=True)
201
202 _splitChar = base.UnicodeAttribute("splitChar", default="|",
203 description="For split boosters, use this as the separator.",
204 copyable=True)
205
206 _ext = base.IntAttribute("extension", default=1,
207 description="For FITS table boosters, get the table from this extension.",
208 copyable=True)
209
210 _mapKeys = base.StructAttribute("mapKeys", childFactory=common.MapKeys,
211 default=None, copyable=True,
212 description="For a FITS booster, map DB table column names"
213 " to FITS column names (e.g., if the FITS table name flx is to"
214 " end up in the DB column flux, say flux:flx).")
215
216 _rd = rscdef.RDAttribute()
217
218 isDispatching = False
219
221 self._validateNext(DirectGrammar)
222 if self.type=='bin':
223 if not self.recordSize:
224 raise base.StructureError("DirectGrammars reading from binary need"
225 " a recordSize attribute")
226 if self.mapKeys is not None:
227 if self.type!="fits":
228 raise base.StructureError("mapKeys is only allowed for FITS"
229 " boosters.")
230
232 if self.type=="fits":
233 if self.mapKeys:
234 self.keyMap = self.mapKeys.maps
235 else:
236 self.keyMap = {}
237
239 return CBooster(self.cBooster, self.parent,
240 gzippedInput=self.gzippedInput,
241 preFilter=self.preFilter,
242 autoNull=self.autoNull,
243 ignoreBadRecords=self.ignoreBadRecords,
244 customFlags=self.customFlags)
245
246 - def parse(self, sourceToken, targetData=None):
260 return copyIn
261
262
263
264
265
266 import sys
267
270
271
272
273 COMMON_MAIN_HEADER = """
274 /* Common main header */
275 void createDumpfile(int argc, char **argv)
276 {
277 FILE *destination=stdout;
278 char inputLine[INPUT_LINE_MAX];
279 int recCount = 0;
280 /* /Common main header */
281 """
282
283 NONSEEK_MAIN_INTRO = """
284 FILE *inF;
285
286 /* seekable main intro */
287 if (argc>2) {
288 die(USAGE);
289 }
290 if (argc==2) {
291 if (!(inF = fopen(argv[1], "r"))) {
292 die(strerror(errno));
293 }
294 } else {
295 inF = stdin;
296 }
297 /* /seekable main intro */
298 """
299
300 COMMON_MAIN_INTRO = """
301 /* common main intro */
302 writeHeader(destination);
303 /* /common main intro */
304 """
305
306
307 LOOP_BODY_INTRO = """
308 Field *tuple;
309 context = inputLine;
310 if (!setjmp(ignoreRecord)) {
311 """
312
313
314 LOOP_BODY_FOOT = """
315 if (!tuple) {
316 handleBadRecord("Bad input line at record %d", recCount);
317 }
318 writeTuple(tuple, QUERY_N_PARS, destination);
319 context = NULL;
320 recCount ++;
321 if (!(recCount%1000)) {
322 fprintf(stderr, "%08d\\r", recCount);
323 fflush(stderr);
324 }
325 }
326 """
327
328
329 COMMON_MAIN_FOOT = """
330 writeEndMarker(destination);
331 fprintf(stderr, "%08d records done.\\n", recCount);
332 }
333 """
334
335
337 """returns a maker macro for a column object.
338 """
339 try:
340 return {
341 "integer": "MAKE_INT",
342 "smallint": "MAKE_SHORT",
343 "double precision": "MAKE_DOUBLE",
344 "real": "MAKE_FLOAT",
345 "char": "MAKE_CHAR_NULL",
346 "bytea": "MAKE_BYTE",
347 "text": "MAKE_TEXT",
348 }[item.type]
349 except KeyError:
350
351 return "MAKE_somethingelse"
352
353
355 """a base class for code generators.
356
357 You must at least override getItemParser.
358 """
361
363 """returns a sequence of C lines for code between an item parser.
364 """
365 return []
366
368 """returns code that parses item (a Column instance) at column index
369 index.
370
371 You're free to igore index.
372 """
373 return []
374
376 """returns a list of lines that make up the top of the booster.
377 """
378 return [
379 '#include <stdio.h>',
380 '#include <math.h>',
381 '#include <string.h>',
382 '#include <errno.h>',
383 '#include "boosterskel.h"',
384 '',
385 '#define USAGE "Usage: don\'t."',]
386
388 """returns the prototype of the getTuple function.
389 """
390 return "Field *getTuple(char *inputLine, int recNo)"
391
400
401
403 """a base class for code generators for reading line-based text files.
404 """
422
423
426 t = item.type
427 if "smallint" in t:
428 func = "parseShort"
429 elif "int" in t:
430 func = "parseInt"
431 elif t in ["real", "float"]:
432 func = "parseFloat"
433 elif "double" in t:
434 func = "parseDouble"
435 elif "char" in t:
436 func = "parseString"
437 elif "bool" in t:
438 func = "parseBlankBoolean"
439 else:
440 func = "parseWhatever"
441 return ["%s(inputLine, F(%s), start, len);"%(func, getNameForItem(item))]
442
444 """returns the prototype of the getTuple function.
445 """
446 return "Field *getTuple(char *inputLine, int recNo)"
447
448
450 """a code generator for parsing files with lineas and separators.
451 """
455
457 return _LineBasedCodeGenerator.getPreamble(self)+[
458 "/* delete the next line for POSIX strtok */",
459 "#define strtok strtok_u"]
460
462 return _LineBasedCodeGenerator.getSetupCode(self)+[
463 'char *curCont;',
464 'curCont = strtok(inputLine, "%s");'%self.splitChar]
465
467 t = item.type
468 fi = getNameForItem(item)
469 if t=="text":
470 parse = ["F(%s)->type = VAL_TEXT;"%fi,
471 "F(%s)->length = strlen(curCont);"%fi,
472 "F(%s)->val.c_ptr = curCont;"%fi,]
473 else:
474 if t=="smallint":
475 cType = "VAL_SHORT"
476 elif t=="bigint":
477 cType = "VAL_BIGINT"
478 elif "int" in t:
479 cType = "VAL_INT"
480 elif t in ["real", "float"]:
481 cType = "VAL_FLOAT"
482 elif "double" in t:
483 cType = "VAL_DOUBLE"
484 elif "char"==t:
485 cType = "VAL_CHAR"
486 elif "char" in t:
487 cType = "VAL_TEXT"
488 elif "bool" in t:
489 cType = "VAL_BOOL"
490 else:
491 cType = "###No appropriate type###"
492 parse = ["fieldscanf(curCont, %s, %s);"%(fi, cType)]
493 parse.append('curCont = strtok(NULL, "%s");'%self.splitChar)
494 return parse
495
496
498 """a code generator for reading fixed-length binary records.
499 """
501 t = item.type
502 if t=="integer":
503 pline = "%s(%s, *(int32_t*)(inputLine+));"
504 elif t=="smallint":
505 pline = "%s(%s, *(int16_t*)(inputLine+ ));"
506 elif t=="double precision":
507 pline = "%s(%s, *(double*)(inputLine+ ));"
508 elif t=="real":
509 pline = "%s(%s, *(float*)(inputLine+ ));"
510 elif t=="char":
511 pline = "%s(%s, *(char*)(inputLine+ ), '<nil>');"
512 elif t=="bytea":
513 pline = "%s(%s, *(double*)(inputLine+ ), '<nil>');"
514 else:
515 pline = "%s %s"
516 return ["/* %s (%s) */"%(item.description, t),
517 pline%(_getMakeMacro(item), getNameForItem(item))]
518
520 return _CodeGenerator.getPreamble(self)+[
521 "#define FIXED_RECORD_SIZE %d"%self.grammar.recordSize]
522
543
544
546 """A code generator for reading from FITS binary tables.
547 """
548 fitsTypes = {
549 "B": ("TBYTE", "char"),
550 "A": ("TSTRING", "char *"),
551 "I": ("TSHORT", "short"),
552 "J": ("TLONG", "long"),
553 "K": ("TLONGLONG", "long long"),
554 "E": ("TFLOAT", "float"),
555 "L": ("TLOGICAL", "unsigned char"),
556 "D": ("TDOUBLE", "double")}
557 makers = {
558 "bigint": "MAKE_BIGINT",
559 "smallint": "MAKE_SHORT",
560 "bytea": "MAKE_BYTE",
561 "text": "MAKE_TEXT",
562 "integer": "MAKE_INT",
563 "real": "MAKE_FLOAT",
564 "double precision": "MAKE_DOUBLE",
565 }
566
568 from gavo.utils import pyfits
569 _CodeGenerator.__init__(self, grammar, tableDef)
570
571 if self.grammar.parent.sources is None:
572 raise base.StructureError("Cannot make FITS bintable booster without"
573 " a sources element on the embedding data.")
574
575 self.forExtension = grammar.extension
576
577 try:
578 self.fitsTable = pyfits.open(
579 self.grammar.parent.sources.iterSources().next())[self.forExtension]
580 except StopIteration:
581 raise base.StructureError("Buliding a FITS bintable booster requires"
582 " at least one matching source.")
583
584 self._computeMatches()
585
587 """adds .fitsIndexForCol and .colForFITSIndex attributes.
588
589 These are matches based on the respective column names, where
590 we do a case-insensitive matching for now.
591
592 Nones mean that no corresponding column is present; for FITS columns,
593 this means they are ignored. For table columns, this means that
594 stand-in code is generated for filling out later.
595 """
596 tableColumns = dict((col.name.lower(), col)
597 for col in self.tableDef)
598 if len(tableColumns)!=len(self.tableDef.columns):
599 raise base.StructureError("Table unsuitable for FITS boosting as"
600 " column names identical after case folding are present.",
601 hint="Use mapKeys to adapt FITS table names to resolve"
602 " the ambiguity")
603
604 self.colForFITSIndex = {}
605 for index, fitsCol in enumerate(self.fitsTable.columns):
606 columnName = self.grammar.keyMap.get(fitsCol.name, fitsCol.name).lower()
607 self.colForFITSIndex[index] = tableColumns.get(columnName)
608
609 self.fitsIndexForColName = {}
610 for index, col in self.colForFITSIndex.iteritems():
611 if col is None:
612 continue
613 self.fitsIndexForColName[col.name.lower()] = index
614
616 try:
617 fitsIndex = self.fitsIndexForColName[item.name.lower()]
618 fitsCol = self.fitsTable.columns[fitsIndex]
619 castTo = self.fitsTypes[
620 self._parseFITSFormat(fitsCol.format, fitsCol.name)[1]
621 ][1]
622
623 return [
624 "/* %s (%s) */"%(item.description, item.type),
625 "if (nulls[%d][rowIndex]) {"%fitsIndex,
626 " MAKE_NULL(%s);"%getNameForItem(item),
627 "} else {",
628 " %s(%s, ((%s*)(data[%d]))[rowIndex]);"%(
629 self.makers[item.type],
630 getNameForItem(item),
631 castTo,
632 fitsIndex),
633 "}",]
634
635 except KeyError:
636
637 return ["MAKE_NULL(%s); /* %s(%s, FILL IN VALUE); */"%(
638 getNameForItem(item),
639 _getMakeMacro(item),
640 getNameForItem(item))]
641
643 return _CodeGenerator.getPreamble(self)+[
644 "#include <fitsio.h>",
645 "#include <assert.h>",
646 "#define FITSCATCH(x) if (x) {fatalFitsError(status);}",
647 "void fatalFitsError(int status) {",
648 " if (status==0) {",
649 " return;",
650 " }",
651 " fits_report_error(stderr, status);",
652 " abort();",
653 "}",
654 ]
655
657 return "Field *getTuple(void *data[], char *nulls[], int rowIndex)"
658
673
675 """returns a C initializer for an array of FITSColDescs.
676 """
677 res = []
678 for index, fcd in enumerate(self.fitsTable.columns):
679 col = self.colForFITSIndex[index]
680 if col is None:
681
682
683 res.append("{.cSize = 0, .fitsType = 0, .index=0, .arraysize=0}")
684 continue
685
686 length, typecode = self._parseFITSFormat(fcd.format, fcd.name)
687
688 if typecode=="A":
689
690
691 res.append(
692 "{.cSize = %d, .fitsType = TSTRING, .index=%d, .arraysize=1}"%(
693 length, index+1))
694
695 else:
696 res.append(
697 "{.cSize = sizeof(%s), .fitsType = %s, .index=%d, .arraysize=%d}"%(
698 self.fitsTypes[typecode][1],
699 self.fitsTypes[typecode][0],
700 index+1,
701 length))
702
703 return res
704
783
784
794
795
808
809
822
823
824 -def indent(stringList, indentChar):
825 return [indentChar+s for s in stringList]
826
827
838
839
853
854
856 from optparse import OptionParser
857 parser = OptionParser(usage = "%prog <id-of-directGrammar>")
858 (opts, args) = parser.parse_args()
859 if len(args)!=1:
860 parser.print_help()
861 sys.exit(1)
862 return opts, args[0]
863
864
871
872
874 from gavo import rscdesc
875 try:
876 opts, grammarId = parseCmdLine()
877 print(getSource(grammarId))
878 except SystemExit as msg:
879 sys.exit(msg.code)
880