1 """
2 Creation of resource descriptors
3
4 The first part of this is an early experiment to automatically create
5 resource descriptors from structured data representations.
6
7 While parts of this may be recoverable for smarter gavo start functionality,
8 doing this so that the result is actually useful is hard.
9
10 Instead, the new gavo start functionality just fetches one of a few
11 commented RD templates, fills out a thing or two and leaves the rest to
12 the operator.
13 """
14
15
16
17
18
19
20
21 from __future__ import print_function
22
23 import datetime
24 import os
25 import re
26 import sys
27
28 import pkg_resources
29
30 from gavo import base
31 from gavo import grammars
32 from gavo import rscdef
33 from gavo import votable
34 from gavo import utils
35 from gavo.base import macros
36 from gavo.grammars import fitsprodgrammar
37 from gavo.grammars import fitstablegrammar
38 from gavo.formats import votableread
39 from gavo.utils import ElementTree
40 from gavo.utils import fitstools
41
42 MS = base.makeStruct
43
44
45
46
47 ignoredFITSHeaders = set(["COMMENT", "SIMPLE", "BITPIX", "EXTEND",
48 "NEXTEND", "SOFTNAME", "SOFTVERS", "SOFTDATE", "SOFTAUTH", "SOFTINST",
49 "HISTORY", "BZERO", "BSCALE", "DATAMIN", "DATAMAX"])
50 wcsKey = re.compile("CD.*|CRVAL.*|CDELT.*|NAXIS.*|CRPIX.*|CTYPE.*|CUNIT.*"
51 "|CROTA.*|RADECSYS|AP?_\d_\d|BP?_\d_\d|LATPOLE|LONPOLE")
52
53
55 """returns true if kw should not be translated or put into the table.
56
57 This is important for all WCS keywords when you want to compute
58 SIAP bboxes; these keywords must not be translated.
59 """
60 return kw in ignoredFITSHeaders or wcsKey.match(kw)
61
62
64 """returns an ElementTree for the copyable content of aStruct.
65
66 Note that due to manipulations at parse time and non-copyable content,
67 this will, in general, not reproduce the original XML trees.
68 """
69 nodeStack = [ElementTree.Element(aStruct.name_)]
70 for evType, elName, value in aStruct.iterEvents():
71 try:
72 if evType=="start":
73 nodeStack.append(ElementTree.SubElement(nodeStack[-1], elName))
74 elif evType=="end":
75 nodeStack.pop()
76 elif evType=="value":
77 if value is None or value is base.NotGiven:
78 continue
79 if elName=="content_":
80 nodeStack[-1].text = value
81 else:
82 if not isinstance(value, basestring):
83
84
85 value = value.id
86 nodeStack[-1].set(elName, value)
87 else:
88 raise base.Error("Invalid struct event: %s"%evType)
89 except:
90 base.ui.notifyError("Badness occurred in element %s, event %s,"
91 " value %s\n"%(elName, evType, value))
92 raise
93 return nodeStack[-1]
94
95
96 FT_TYPE_MAP = {
97 "B": "bytea",
98 "A": "text",
99 "I": "smallint",
100 "J": "integer",
101 "K": "bigint",
102 "E": "real",
103 "D": "double precision",
104 }
105
107 """returns a DaCHS type for FITS table column.
108
109 This currently ignores array sizes and such. Well, people can always
110 fix things manually.
111 """
112 mat = re.match("(\d*)(.)$", fitsCol.format)
113 if not mat or not mat.group(2) in FT_TYPE_MAP:
114 raise base.ReportableError("FITS type code '%s' of %s not handled"
115 " by gavo mkrd; add handling if you can."%(fitsCol.format, fitsCol.name))
116 return FT_TYPE_MAP[mat.group(2)]
117
118
120 from gavo.utils import pyfits
121 cols, nameMaps = [], {}
122 nameMaker = base.VOTNameMaker()
123
124 for fitsCol in pyfits.open(srcName)[1].columns:
125 destName = nameMaker.makeName(fitsCol)
126 if destName!=fitsCol.name:
127 nameMaps[destName] = fitsCol.name
128 cols.append(MS(rscdef.Column,
129 type=getTypeForFTColumn(fitsCol),
130 name=destName,
131 unit=fitsCol.unit,
132 ucd="",
133 description="FILL IN"))
134 table = rscdef.TableDef(rd, id=opts.tableName, onDisk=True,
135 columns=cols)
136 table.nameMaps = nameMaps
137 return table
138
139
141 targetTable = rd.tables[0]
142
143 rmkMaps =[MS(rscdef.MapRule, key=key, content_="vars['%s']"%src)
144 for key, src in targetTable.nameMaps.iteritems()]
145
146 grammar = MS(fitstablegrammar.FITSTableGrammar)
147 sources = MS(rscdef.SourceSpec,
148 pattern=["*.fits"], recurse=True)
149 rowmaker = MS(rscdef.RowmakerDef, id="gen_rmk", idmaps="*", maps=[rmkMaps])
150 make = MS(rscdef.Make,
151 table=targetTable,
152 rowmaker=rowmaker)
153
154 return MS(rscdef.DataDescriptor,
155 id="import",
156 sources=sources,
157 grammar=grammar,
158 rowmakers=[rowmaker],
159 make=make)
160
161
163 keyMappings = []
164 table = rscdef.TableDef(rd, id=opts.tableName, onDisk=True)
165 headerCards = fitstools.openFits(srcName)[0].header.cards
166 for index, card in enumerate(headerCards):
167 if isIgnoredKeyword(card.keyword):
168 continue
169 colName = re.sub("[^a-z]", "_", card.keyword.lower())
170 if not colName:
171 continue
172
173 if isinstance(card.value, basestring):
174 type = "text"
175 elif isinstance(card.value, int):
176 type = "integer"
177 else:
178 type = "real"
179
180 table.feedObject("column", MS(rscdef.Column,
181 name=colName, unit="FILLIN", ucd="FILLIN", type=type,
182 description=card.comment))
183 keyMappings.append((colName, card.keyword))
184 rd.setProperty("mapKeys", ", ".join("%s:%s"%(v,k) for k,v in keyMappings))
185 return table.finishElement()
186
187
189 targetTable = rd.tables[0]
190 dd = rscdef.DataDescriptor(rd, id="import_"+opts.tableName)
191 grammar = fitsprodgrammar.FITSProdGrammar(dd)
192 grammar.feedObject("qnd", True)
193 rowfilter = base.parseFromString(grammars.Rowfilter, """
194 <rowfilter procDef="//products#define">
195 <bind key="table">"%s"</bind>
196 <bind key="owner">"FILLIN"</bind>
197 <bind key="embargo">"FILLIN"</bind>
198 </rowfilter>"""%(targetTable.getQName()))
199 grammar.feedObject("rowfilter", rowfilter)
200 grammar.feedObject("mapKeys", MS(grammars.MapKeys,
201 content_=rd.getProperty("mapKeys")))
202 grammar.finishElement()
203 dd.grammar = grammar
204 dd.feedObject("sources", MS(rscdef.SourceSpec,
205 pattern=["*.fits"], recurse=True))
206 dd.feedObject("rowmaker", MS(rscdef.RowmakerDef, idmaps="*", id="gen_rmk"))
207 dd.feedObject("make", MS(rscdef.Make, table=targetTable, rowmaker="gen_rmk"))
208 return dd
209
210
215
216
233
234
235 tableMakers = {
236 "FITS": makeTableFromFITS,
237 "VOT": makeTableFromVOTable,
238 "FT": makeTableFromFT,
239 }
240
241 dataMakers = {
242 "FITS": makeDataForFITS,
243 "VOT": makeDataForVOTable,
244 "FT": makeDataForFT,
245 }
246
247
249 from gavo import rscdesc
250 rd = rscdesc.RD(None, schema=os.path.basename(opts.resdir),
251 resdir=opts.resdir)
252
253 for key, value in [
254 ("title", "FILL-IN"),
255 ("creationDate", utils.formatISODT(datetime.datetime.utcnow())),
256 ("description", "FILL-IN a long text (and maybe do format='plain'"
257 " or even format='rst'"),
258 ("copyright", "Free to use."),
259 ("creator.name", "Author, S."),
260 ("creator", ""),
261 ("creator.name", "Other, A."),
262 ("subject", "One Keyword"),
263 ("subject", "Two Words"),
264 ("content.type", "Catalog"),
265 ("coverage.waveband", "Optical"),
266 ("coverage.profile", "AllSky ICRS"),]:
267 rd.addMeta(key, value)
268
269 rd.feedObject("table", tableMakers[opts.srcForm](rd, args[0], opts))
270 rd.feedObject("data", dataMakers[opts.srcForm](rd, args[0], opts))
271 return rd.finishElement()
272
273
275 i = "\n" + level*"\t"
276 if len(elem):
277 if not elem.text or not elem.text.strip():
278 elem.text = i + "\t"
279 if not elem.tail or not elem.tail.strip():
280 elem.tail = i
281 for child in elem:
282 indent(child, level+1)
283 if not child.tail or not child.tail.strip():
284 child.tail = i
285 if not elem.tail or not elem.tail.strip():
286 elem.tail = i
287 else:
288 if level and (not elem.tail or not elem.tail.strip()):
289 elem.tail = i
290
291
295
296
298 from optparse import OptionParser
299 parser = OptionParser(usage = "%prog [options] <sample>"
300 " DEPRECATED, use dachs start instead")
301 parser.add_option("-f", "--format", help="Input file format: "
302 " FITS, VOT or FT (FITS table)"
303 " Default: Detect from file name", dest="srcForm", default=None,
304 action="store", type="str")
305 parser.add_option("-t", "--table-name", help="Name of the generated table",
306 dest="tableName", default="main", action="store", type="str")
307 parser.add_option("-r", "--resdir", help="Override resdir (and schema)",
308 dest="resdir", default=os.getcwd(), action="store", type="str")
309 opts, args = parser.parse_args()
310 if len(args)!=1:
311 parser.print_help(file=sys.stderr)
312 sys.exit(1)
313 if not opts.srcForm:
314 ext = os.path.splitext(args[0])[1].lower()
315 if ext in set([".xml", ".vot"]):
316 opts.srcForm = "VOT"
317 elif ext==".fits":
318 opts.srcForm = "FITS"
319 else:
320 sys.stderr.write("Cannot guess format, use -f option: %s\n"%args[0])
321 parser.print_help(file=sys.stderr)
322 sys.exit(1)
323 return opts, args
324
325
327
328 rscdef.TableDef._id.copyable = rscdef.TableDef._onDisk.copyable = True
329 rscdef.DataDescriptor._id.copyable = True
330 opts, args = parseCommandLine()
331 rd = makeRD(args, opts)
332 rd._metaAttr.copyable = True
333 eTree = structToETree(rd)
334 writePrettyPrintedXML(eTree)
335
336
337
338
340 """writes to stdout a list of protocols we can generate RD templates
341 for.
342
343 This is read from the resources coming with DaCHS, where we evaluate
344 a magic =tpldesc Desc= string that must be embedded in the template.
345 """
346 templateDir ="resources/src/"
347 for fName in pkg_resources.resource_listdir(
348 "gavo", templateDir):
349 mat = re.match(r"template-(.*)\.rd_$", fName)
350 if not mat:
351 continue
352 protName = mat.group(1)
353
354 with pkg_resources.resource_stream('gavo', templateDir+fName) as f:
355 tplHead = f.read(1000)
356 mat = re.search(r"\\tpldesc{(.*?)}", tplHead)
357 if not mat:
358 continue
359 protDesc = mat.group(1)
360
361 print("%s -- %s"%(protName, protDesc))
362
363
365 import argparse
366 parser = argparse.ArgumentParser(description = "Write a template"
367 " q.rd for a certain data type")
368 parser.add_argument("protocol", metavar="PROTO", action="store",
369 type=str, help="Generate an RD template for PROTO; use list to"
370 " see what is available.")
371 args = parser.parse_args()
372 if args.protocol=="list":
373 _listProtocols()
374 sys.exit(0)
375 return args
376
377
379 """Macros for RD templates.
380 """
382 """A silent macro used for self-documentation.
383 """
384 return ""
385
387 """returns an ISO representation of just about now UTC.
388 """
389 return utils.formatISODT(datetime.datetime.utcnow())
390
392 """returns the last element of the current path.
393
394 This is assumed to be the intended resource directory.
395 """
396 return os.path.split(os.getcwd())[-1]
397
404
405
407 """a macro expander for RD templates.
408 """
411
412
433
434
435 if __name__=="__main__":
436 main()
437