Package gavo :: Package user :: Module mkrd
[frames] | no frames]

Source Code for Module gavo.user.mkrd

  1  """ 
  2  Creation of resource descriptors 
  3   
  4  The first part of this is an early experiment to automatically create 
  5  resource descriptors from structured data representations. 
  6   
  7  While parts of this may be recoverable for smarter gavo start functionality, 
  8  doing this so that the result is actually useful is hard. 
  9   
 10  Instead, the new gavo start functionality just fetches one of a few 
 11  commented RD templates, fills out a thing or two and leaves the rest to 
 12  the operator. 
 13  """ 
 14   
 15  #c Copyright 2008-2019, the GAVO project 
 16  #c 
 17  #c This program is free software, covered by the GNU GPL.  See the 
 18  #c COPYING file in the source distribution. 
 19   
 20   
 21  from __future__ import print_function 
 22   
 23  import datetime 
 24  import os 
 25  import re 
 26  import sys 
 27   
 28  import pkg_resources 
 29    
 30  from gavo import base 
 31  from gavo import grammars 
 32  from gavo import rscdef 
 33  from gavo import votable 
 34  from gavo import utils 
 35  from gavo.base import macros 
 36  from gavo.grammars import fitsprodgrammar 
 37  from gavo.grammars import fitstablegrammar 
 38  from gavo.formats import votableread 
 39  from gavo.utils import ElementTree 
 40  from gavo.utils import fitstools 
 41   
 42  MS = base.makeStruct 
 43   
 44   
 45  # ======================= Begin deprecated ui and implementation =========== 
 46   
 47  ignoredFITSHeaders = set(["COMMENT", "SIMPLE", "BITPIX", "EXTEND",  
 48          "NEXTEND", "SOFTNAME", "SOFTVERS", "SOFTDATE", "SOFTAUTH", "SOFTINST", 
 49          "HISTORY", "BZERO", "BSCALE", "DATAMIN", "DATAMAX"]) 
 50  wcsKey = re.compile("CD.*|CRVAL.*|CDELT.*|NAXIS.*|CRPIX.*|CTYPE.*|CUNIT.*" 
 51          "|CROTA.*|RADECSYS|AP?_\d_\d|BP?_\d_\d|LATPOLE|LONPOLE") 
 52   
 53   
54 -def isIgnoredKeyword(kw):
55 """returns true if kw should not be translated or put into the table. 56 57 This is important for all WCS keywords when you want to compute 58 SIAP bboxes; these keywords must not be translated. 59 """ 60 return kw in ignoredFITSHeaders or wcsKey.match(kw)
61 62
63 -def structToETree(aStruct):
64 """returns an ElementTree for the copyable content of aStruct. 65 66 Note that due to manipulations at parse time and non-copyable content, 67 this will, in general, not reproduce the original XML trees. 68 """ 69 nodeStack = [ElementTree.Element(aStruct.name_)] 70 for evType, elName, value in aStruct.iterEvents(): 71 try: 72 if evType=="start": 73 nodeStack.append(ElementTree.SubElement(nodeStack[-1], elName)) 74 elif evType=="end": 75 nodeStack.pop() 76 elif evType=="value": 77 if value is None or value is base.NotGiven: 78 continue 79 if elName=="content_": 80 nodeStack[-1].text = value 81 else: 82 if not isinstance(value, basestring): 83 # TODO: figure out if something is a reference by inspecting 84 # the attribute definition; meanwhile, just assume it is: 85 value = value.id 86 nodeStack[-1].set(elName, value) 87 else: 88 raise base.Error("Invalid struct event: %s"%evType) 89 except: 90 base.ui.notifyError("Badness occurred in element %s, event %s," 91 " value %s\n"%(elName, evType, value)) 92 raise 93 return nodeStack[-1]
94 95 96 FT_TYPE_MAP = { 97 "B": "bytea", 98 "A": "text", 99 "I": "smallint", 100 "J": "integer", 101 "K": "bigint", 102 "E": "real", 103 "D": "double precision", 104 } 105
106 -def getTypeForFTColumn(fitsCol):
107 """returns a DaCHS type for FITS table column. 108 109 This currently ignores array sizes and such. Well, people can always 110 fix things manually. 111 """ 112 mat = re.match("(\d*)(.)$", fitsCol.format) 113 if not mat or not mat.group(2) in FT_TYPE_MAP: 114 raise base.ReportableError("FITS type code '%s' of %s not handled" 115 " by gavo mkrd; add handling if you can."%(fitsCol.format, fitsCol.name)) 116 return FT_TYPE_MAP[mat.group(2)]
117 118
119 -def makeTableFromFT(rd, srcName, opts):
120 from gavo.utils import pyfits 121 cols, nameMaps = [], {} 122 nameMaker = base.VOTNameMaker() 123 124 for fitsCol in pyfits.open(srcName)[1].columns: 125 destName = nameMaker.makeName(fitsCol) 126 if destName!=fitsCol.name: 127 nameMaps[destName] = fitsCol.name 128 cols.append(MS(rscdef.Column, 129 type=getTypeForFTColumn(fitsCol), 130 name=destName, 131 unit=fitsCol.unit, 132 ucd="", 133 description="FILL IN")) 134 table = rscdef.TableDef(rd, id=opts.tableName, onDisk=True, 135 columns=cols) 136 table.nameMaps = nameMaps 137 return table
138 139
140 -def makeDataForFT(rd, srcName, opts):
141 targetTable = rd.tables[0] 142 # nameMaps left by makeTableFromFT 143 rmkMaps =[MS(rscdef.MapRule, key=key, content_="vars['%s']"%src) 144 for key, src in targetTable.nameMaps.iteritems()] 145 146 grammar = MS(fitstablegrammar.FITSTableGrammar) 147 sources = MS(rscdef.SourceSpec, 148 pattern=["*.fits"], recurse=True) 149 rowmaker = MS(rscdef.RowmakerDef, id="gen_rmk", idmaps="*", maps=[rmkMaps]) 150 make = MS(rscdef.Make, 151 table=targetTable, 152 rowmaker=rowmaker) 153 154 return MS(rscdef.DataDescriptor, 155 id="import", 156 sources=sources, 157 grammar=grammar, 158 rowmakers=[rowmaker], 159 make=make)
160 161
162 -def makeTableFromFITS(rd, srcName, opts):
163 keyMappings = [] 164 table = rscdef.TableDef(rd, id=opts.tableName, onDisk=True) 165 headerCards = fitstools.openFits(srcName)[0].header.cards 166 for index, card in enumerate(headerCards): 167 if isIgnoredKeyword(card.keyword): 168 continue 169 colName = re.sub("[^a-z]", "_", card.keyword.lower()) 170 if not colName: 171 continue 172 173 if isinstance(card.value, basestring): 174 type = "text" 175 elif isinstance(card.value, int): 176 type = "integer" 177 else: 178 type = "real" 179 180 table.feedObject("column", MS(rscdef.Column, 181 name=colName, unit="FILLIN", ucd="FILLIN", type=type, 182 description=card.comment)) 183 keyMappings.append((colName, card.keyword)) 184 rd.setProperty("mapKeys", ", ".join("%s:%s"%(v,k) for k,v in keyMappings)) 185 return table.finishElement()
186 187
188 -def makeDataForFITS(rd, srcName, opts):
189 targetTable = rd.tables[0] 190 dd = rscdef.DataDescriptor(rd, id="import_"+opts.tableName) 191 grammar = fitsprodgrammar.FITSProdGrammar(dd) 192 grammar.feedObject("qnd", True) 193 rowfilter = base.parseFromString(grammars.Rowfilter, """ 194 <rowfilter procDef="//products#define"> 195 <bind key="table">"%s"</bind> 196 <bind key="owner">"FILLIN"</bind> 197 <bind key="embargo">"FILLIN"</bind> 198 </rowfilter>"""%(targetTable.getQName())) 199 grammar.feedObject("rowfilter", rowfilter) 200 grammar.feedObject("mapKeys", MS(grammars.MapKeys, 201 content_=rd.getProperty("mapKeys"))) 202 grammar.finishElement() 203 dd.grammar = grammar 204 dd.feedObject("sources", MS(rscdef.SourceSpec, 205 pattern=["*.fits"], recurse=True)) 206 dd.feedObject("rowmaker", MS(rscdef.RowmakerDef, idmaps="*", id="gen_rmk")) 207 dd.feedObject("make", MS(rscdef.Make, table=targetTable, rowmaker="gen_rmk")) 208 return dd
209 210
211 -def makeTableFromVOTable(rd, srcName, opts):
212 rawTable = votable.parse(open(srcName)).next() 213 return votableread.makeTableDefForVOTable(opts.tableName, 214 rawTable.tableDefinition, onDisk=True)
215 216
217 -def makeDataForVOTable(rd, srcName, opts):
218 rowmaker = MS(rscdef.RowmakerDef, id="makerows_"+opts.tableName, 219 idmaps="*") 220 221 # The qualifiedId monkeying is necessary since otherwise the 222 # ReferenceAttribute.unparse thinks it's ok to return the objects raw. 223 # Face it: I've not really had serialization in mind when writing all 224 # this. 225 rowmaker.qualifiedId = rowmaker.id 226 rd.tables[0].qualifiedId = rd.tables[0].id 227 228 return MS(rscdef.DataDescriptor, 229 grammar=MS(rscdef.getGrammar("voTableGrammar")), 230 sources=MS(rscdef.SourceSpec, pattern=srcName), 231 rowmaker=rowmaker, 232 makes=[MS(rscdef.Make, table=rd.tables[0], rowmaker=rowmaker)])
233 234 235 tableMakers = { 236 "FITS": makeTableFromFITS, 237 "VOT": makeTableFromVOTable, 238 "FT": makeTableFromFT, 239 } 240 241 dataMakers = { 242 "FITS": makeDataForFITS, 243 "VOT": makeDataForVOTable, 244 "FT": makeDataForFT, 245 } 246 247
248 -def makeRD(args, opts):
249 from gavo import rscdesc 250 rd = rscdesc.RD(None, schema=os.path.basename(opts.resdir), 251 resdir=opts.resdir) 252 253 for key, value in [ 254 ("title", "FILL-IN"), 255 ("creationDate", utils.formatISODT(datetime.datetime.utcnow())), 256 ("description", "FILL-IN a long text (and maybe do format='plain'" 257 " or even format='rst'"), 258 ("copyright", "Free to use."), 259 ("creator.name", "Author, S."), 260 ("creator", ""), 261 ("creator.name", "Other, A."), 262 ("subject", "One Keyword"), 263 ("subject", "Two Words"), 264 ("content.type", "Catalog"), 265 ("coverage.waveband", "Optical"), 266 ("coverage.profile", "AllSky ICRS"),]: 267 rd.addMeta(key, value) 268 269 rd.feedObject("table", tableMakers[opts.srcForm](rd, args[0], opts)) 270 rd.feedObject("data", dataMakers[opts.srcForm](rd, args[0], opts)) 271 return rd.finishElement()
272 273
274 -def indent(elem, level=0):
275 i = "\n" + level*"\t" 276 if len(elem): 277 if not elem.text or not elem.text.strip(): 278 elem.text = i + "\t" 279 if not elem.tail or not elem.tail.strip(): 280 elem.tail = i 281 for child in elem: 282 indent(child, level+1) 283 if not child.tail or not child.tail.strip(): 284 child.tail = i 285 if not elem.tail or not elem.tail.strip(): 286 elem.tail = i 287 else: 288 if level and (not elem.tail or not elem.tail.strip()): 289 elem.tail = i
290 291
292 -def writePrettyPrintedXML(root):
293 indent(root) 294 ElementTree.ElementTree(root).write(sys.stdout, encoding="utf-8")
295 296
297 -def parseCommandLine():
298 from optparse import OptionParser 299 parser = OptionParser(usage = "%prog [options] <sample>" 300 " DEPRECATED, use dachs start instead") 301 parser.add_option("-f", "--format", help="Input file format: " 302 " FITS, VOT or FT (FITS table)" 303 " Default: Detect from file name", dest="srcForm", default=None, 304 action="store", type="str") 305 parser.add_option("-t", "--table-name", help="Name of the generated table", 306 dest="tableName", default="main", action="store", type="str") 307 parser.add_option("-r", "--resdir", help="Override resdir (and schema)", 308 dest="resdir", default=os.getcwd(), action="store", type="str") 309 opts, args = parser.parse_args() 310 if len(args)!=1: 311 parser.print_help(file=sys.stderr) 312 sys.exit(1) 313 if not opts.srcForm: 314 ext = os.path.splitext(args[0])[1].lower() 315 if ext in set([".xml", ".vot"]): 316 opts.srcForm = "VOT" 317 elif ext==".fits": 318 opts.srcForm = "FITS" 319 else: 320 sys.stderr.write("Cannot guess format, use -f option: %s\n"%args[0]) 321 parser.print_help(file=sys.stderr) 322 sys.exit(1) 323 return opts, args
324 325
326 -def main():
327 # hack to make id and onDisk copyable so we see them on iterEvent 328 rscdef.TableDef._id.copyable = rscdef.TableDef._onDisk.copyable = True 329 rscdef.DataDescriptor._id.copyable = True 330 opts, args = parseCommandLine() 331 rd = makeRD(args, opts) 332 rd._metaAttr.copyable = True 333 eTree = structToETree(rd) 334 writePrettyPrintedXML(eTree)
335 336 # ======================= End deprecated ui and implementation =========== 337 338
339 -def _listProtocols():
340 """writes to stdout a list of protocols we can generate RD templates 341 for. 342 343 This is read from the resources coming with DaCHS, where we evaluate 344 a magic =tpldesc Desc= string that must be embedded in the template. 345 """ 346 templateDir ="resources/src/" 347 for fName in pkg_resources.resource_listdir( 348 "gavo", templateDir): 349 mat = re.match(r"template-(.*)\.rd_$", fName) 350 if not mat: 351 continue 352 protName = mat.group(1) 353 354 with pkg_resources.resource_stream('gavo', templateDir+fName) as f: 355 tplHead = f.read(1000) 356 mat = re.search(r"\\tpldesc{(.*?)}", tplHead) 357 if not mat: 358 continue 359 protDesc = mat.group(1) 360 361 print("%s -- %s"%(protName, protDesc))
362 363
364 -def parseNewCommandLine():
365 import argparse 366 parser = argparse.ArgumentParser(description = "Write a template" 367 " q.rd for a certain data type") 368 parser.add_argument("protocol", metavar="PROTO", action="store", 369 type=str, help="Generate an RD template for PROTO; use list to" 370 " see what is available.") 371 args = parser.parse_args() 372 if args.protocol=="list": 373 _listProtocols() 374 sys.exit(0) 375 return args
376 377
378 -class TemplateMacroPackage(macros.MacroPackage):
379 """Macros for RD templates. 380 """
381 - def macro_tpldesc(self, description):
382 """A silent macro used for self-documentation. 383 """ 384 return ""
385
386 - def macro_now(self):
387 """returns an ISO representation of just about now UTC. 388 """ 389 return utils.formatISODT(datetime.datetime.utcnow())
390
391 - def macro_resdir(self):
392 """returns the last element of the current path. 393 394 This is assumed to be the intended resource directory. 395 """ 396 return os.path.split(os.getcwd())[-1]
397
398 - def macro_commonmeta(self):
399 """expands to the content of tpart-commonmeta.xml. 400 """ 401 with pkg_resources.resource_stream('gavo', 402 "resources/src/tpart-commonmeta.xml") as f: 403 return f.read()
404 405
406 -class MkrdMacroProcessor(macros.MacroExpander):
407 """a macro expander for RD templates. 408 """
409 - def __init__(self):
411 412
413 -def start():
414 args = parseNewCommandLine() 415 outputName = "q.rd" 416 417 try: 418 source = pkg_resources.resource_stream('gavo', 419 "resources/src/template-%s.rd_"%args.protocol) 420 except IOError: 421 base.ui.notifyError("No template for %s."%args.protocol) 422 sys.exit(1) 423 424 if os.path.exists(outputName): 425 base.ui.notifyError( 426 "Output %s already exists. Move it away and try again."%outputName) 427 sys.exit(1) 428 else: 429 proc = MkrdMacroProcessor() 430 rdSource = proc.expand(source.read()) 431 with open(outputName, "wb") as dest: 432 dest.write(rdSource)
433 434 435 if __name__=="__main__": 436 main() 437