Package gavo :: Package user :: Module info
[frames] | no frames]

Source Code for Module gavo.user.info

  1  """ 
  2  Commands for obtaining information about various things in the data center. 
  3  """ 
  4   
  5  #c Copyright 2008-2019, the GAVO project 
  6  #c 
  7  #c This program is free software, covered by the GNU GPL.  See the 
  8  #c COPYING file in the source distribution. 
  9   
 10   
 11  # XXX: TODO: we should throw the AnnotationMaker stuff away and 
 12  # move the core of annotateDBTable to rsc.dbtable, and rsc.Limits. 
 13  # There's now inMemoryTable.getLimits that does a similar thing. 
 14   
 15   
 16  from __future__ import print_function 
 17   
 18  import datetime 
 19   
 20  from argparse import ArgumentParser 
 21   
 22  from gavo import api 
 23  from gavo import base 
 24  from gavo import stc 
 25  from gavo import svcs 
 26  from gavo import utils 
 27  from gavo.protocols import scs 
 28   
 29   
30 -class Interval(object):
31 """an interval class. 32 33 This should probably move to utils as soon as it is really used 34 outside of this module; I suppose we should have a native implementation 35 of xtype="interval". 36 """
37 - def __init__(self, min, max):
38 self.min, self.max = min, max
39
40 - def __str__(self):
41 return "%g %g"%(self.min, self.max)
42 43
44 -class AnnotationMaker(object):
45 """A class for producing column annotations. 46 47 An annotation simply is a dictionary with some well-known keys. They 48 are generated from DB queries. It is this class' responsibility 49 to collect the DB query result columns pertaining to a column and 50 produce the annotation dictionary from them. 51 52 To make this happen, it is constructed with the column; then, for 53 each property queried, addPropertyKey is called. Finally, addAnnotation 54 is called with the DB result row (see annotateDBTable) to actually 55 make and attach the dictionary. 56 """
57 - def __init__(self, column):
58 self.column = column 59 if not hasattr(self.column, "annotations"): 60 self.column.annotations = {} 61 self.propDests = {}
62
63 - def doesWork(self):
64 return self.propDests
65
66 - def getOutputFieldFor(self, propName, propFunc, nameMaker):
67 """returns an OutputField that will generate a propName annotation 68 from the propFunc function. 69 70 propFunc for now has a %(name)s where the column name must be 71 inserted. 72 73 nameMaker is something like a base.VOTNameMaker. 74 """ 75 destCol = nameMaker.makeName(propName+"_"+self.column.name) 76 self.propDests[destCol] = propName 77 return api.makeStruct(svcs.OutputField, 78 name=destCol, 79 select=propFunc%{"name": self.column.name}, 80 type=self.column.type)
81
82 - def annotate(self, resultRow):
83 """builds an annotation of the column form resultRow. 84 85 resultRow is a dictionary containing values for all keys registred 86 through addPropertyKey. 87 88 If the column already has an annotation, only the new keys will be 89 overwritten. 90 """ 91 for srcKey, destKey in self.propDests.iteritems(): 92 self.column.annotations[destKey] = resultRow[srcKey]
93 94
95 -def annotateDBTable(td, extended=True, requireValues=False):
96 """returns the TableDef td with domain annotations for its columns. 97 98 td must be an existing on-Disk table. 99 100 The annotations come in a dictionary-valued attribute on each 101 column annotated. Possible keys include "min", "max", "avg", and "hasnulls". 102 Only columns with the appropriate types (i.e., orderable, and, 103 for avg, numeric) are annotated. 104 105 Without extended, only min and max are annotated. With 106 requireValues, only numeric columns that already have a values child 107 are annotated. 108 """ 109 outputFields, annotators = [], [] 110 nameMaker = base.VOTNameMaker() 111 for col in td: 112 annotator = AnnotationMaker(col) 113 114 if col.type in base.ORDERED_TYPES or col.type.startswith("char"): 115 if requireValues: 116 if (col.type not in base.NUMERIC_TYPES 117 or not col.values 118 or col.values.min is None): 119 continue 120 121 outputFields.append(annotator.getOutputFieldFor("max", 122 "MAX(%(name)s)", nameMaker)) 123 outputFields.append(annotator.getOutputFieldFor("min", 124 "MIN(%(name)s)", nameMaker)) 125 126 if extended: 127 if col.type in base.NUMERIC_TYPES: 128 outputFields.append(annotator.getOutputFieldFor("avg", 129 "AVG(%(name)s)", nameMaker)) 130 131 132 outputFields.append(annotator.getOutputFieldFor("hasnulls", 133 "BOOL_OR(%(name)s IS NULL)", nameMaker)) 134 135 if annotator.doesWork(): 136 annotators.append(annotator) 137 138 if not annotators: 139 # no columns present for which we want or can get limits 140 return 141 142 with base.getTableConn() as conn: 143 dbtable = api.TableForDef(td, connection=conn) 144 145 if not hasattr(dbtable, "getTableForQuery"): 146 raise api.ReportableError("Table %s cannot be queried."%td.getQName(), 147 hint="This is probably because it is an in-memory table. Add" 148 " onDisk='True' to make tables reside in the database.") 149 150 limitsTable = dbtable.getTableForQuery(outputFields, "") 151 resultRow = limitsTable.rows[0] 152 153 for annotator in annotators: 154 annotator.annotate(resultRow)
155 156
157 -def getSCSCoverageQuery(td, order):
158 """returns a database query for getting a MOC for a table suitable 159 for cone search. 160 161 This will return None if no such query can be built. 162 """ 163 try: 164 raCol, decCol = scs.getConeColumns(td) 165 except (ValueError, base.NotFoundError): 166 return None 167 168 fragments = [ 169 "SELECT smoc('%d/' || string_agg(format('%%%%s', hpx), ','))"%order, 170 "FROM (", 171 " SELECT DISTINCT healpix_nest(%d," 172 " spoint(RADIANS(%s), RADIANS(%s))) AS hpx "%( 173 order, str(raCol.name), str(decCol.name)), 174 "FROM %s"%td.getQName(), 175 "WHERE %s IS NOT NULL AND %s IS NOT NULL"%( 176 str(raCol.name), str(decCol.name)), 177 "GROUP BY hpx", 178 ") as q"] 179 return "\n".join(fragments)
180 181
182 -def getSSAPCoverageQuery(td, order):
183 """returns a database query for getting a MOC for a table using 184 one of our standard SSAP mixins. 185 186 This will return None if no such query can be built. 187 """ 188 mixinsHandled = ["//ssap#hcd", "//ssap#mixc"] 189 for mixin in mixinsHandled: 190 if td.mixesIn(mixin): 191 break 192 else: 193 return None 194 195 fragments = [ 196 "SELECT smoc('%d/' || string_agg(format('%%%%s', hpx), ','))"%order, 197 "FROM (", 198 " SELECT DISTINCT healpix_nest(%d, ssa_location) AS hpx"%order, 199 "FROM %s WHERE ssa_location IS NOT NULL GROUP BY hpx"%td.getQName(), 200 ") as q"] 201 return "\n".join(fragments)
202 203
204 -def getSIAPCoverageQuery(td, order):
205 """returns a database query for getting a MOC for a table using 206 //siap#pgs (i.e., SIAv1) 207 208 TODO pgsphere is missing features to make this really work properly, 209 so so far we're just using centers. Make sure you're using small 210 orders here. 211 212 This will return None if no such query can be built. 213 214 For SIAv2, no such thing is available yet; the assumption is that 215 for the time being, for all image collections there's always a 216 SIAv1 service, too. 217 """ 218 if not td.mixesIn("//siap#pgs"): 219 return None 220 221 fragments = [ 222 "SELECT smoc('%d/' || string_agg(format('%%%%s', hpx), ','))"%order, 223 "FROM (", 224 " SELECT DISTINCT healpix_nest(%d, " 225 " spoint(RADIANS(centerAlpha), RADIANS(centerDelta))) AS hpx "%(order), 226 "FROM %s WHERE centerAlpha*centerDelta IS NOT NULL"%td.getQName(), 227 "GROUP BY hpx", 228 ") as q"] 229 return "\n".join(fragments)
230 231
232 -def getObscoreCoverageQuery(td, order):
233 """returns a database query for getting a MOC for tables with obscore 234 columns 235 236 TODO pgsphere is missing features to make this really work properly, 237 so so far we're just using s_ra and s_dec; we should move to s_region. 238 239 This will return None if no such query can be built. 240 """ 241 if not ("s_ra" in td and "s_dec" in td): 242 return None 243 244 fragments = [ 245 "SELECT smoc('%d/' || string_agg(format('%%%%s', hpx), ','))"%order, 246 "FROM (", 247 " SELECT DISTINCT healpix_nest(%d, " 248 " spoint(RADIANS(s_ra), RADIANS(s_dec))) AS hpx "%(order), 249 "FROM %s WHERE s_ra*s_dec IS NOT NULL"%td.getQName(), 250 "GROUP BY hpx", 251 ") as q"] 252 return "\n".join(fragments)
253 254
255 -def getMOCQuery(td, order):
256 """returns a MOC-generating query for a tableDef with standard 257 columns. 258 259 (this is a helper for getMOCForStdTable) 260 """ 261 for generatingFunction in [ 262 getSIAPCoverageQuery, 263 getSSAPCoverageQuery, 264 getSCSCoverageQuery, 265 getObscoreCoverageQuery, 266 ]: 267 mocQuery = generatingFunction(td, order) 268 if mocQuery is not None: 269 return mocQuery 270 else: 271 raise base.ReportableError("Table %s does not have columns DaCHS knows" 272 " how to get a coverage from."%td.getFullId())
273 274
275 -def getMOCForStdTable(td, order=6):
276 """returns a MOC for a tableDef with one of the standard protocol mixins. 277 278 The function knows about SCS and SSAP for now; protocols are tested 279 for in this order. 280 """ 281 with base.getTableConn() as conn: 282 moc = list(conn.query(getMOCQuery(td, order)))[0][0] 283 return moc
284 285
286 -def _getTimeTransformer(col):
287 """returns a function turning values in col to julian years. 288 289 This is very much a matter of heuristics; we build upon what's happening 290 in utils.serializers. 291 """ 292 if col.type in ["timestamp", "date"]: 293 return lambda val: stc.dateTimeToMJD(val) 294 295 elif ((col.ucd and "MJD" in col.ucd.upper()) 296 or col.xtype=="mjd" 297 or "mjd" in col.name): 298 return utils.identity 299 300 elif col.unit=="yr" or col.unit=="a": 301 return lambda val: stc.dateTimeToMJD(stc.jYearToDateTime(val)) 302 303 elif col.unit=="d": 304 return lambda val: val-stc.JD_MJD 305 306 elif col.unit=="s": 307 return lambda val: stc.dateTimeToMJD(datetime.utcfromtimestamp(val)) 308 309 else: 310 raise base.NotImplementedError("Cannot figure out how to get an MJD" 311 " from column %s"%col.name)
312 313
314 -def getTimeLimitsColumnNames(td):
315 """returns the names of columns hopefully containing minimal and 316 maximal time coverage of each row of a tabe defined by td. 317 318 As required by getScalarLimits, this will also return a function 319 that (hopefully) turns the detected columns to julian years, 320 321 This tries a couple of known criteria for columns containing times 322 in some order, and the first one matching wins. 323 324 This will raise a NotFoundError if none of our heuristics work. 325 """ 326 # obscore and friends 327 try: 328 return (td.getColumnByName("t_min").name, 329 td.getColumnByName("t_max").name, 330 _getTimeTransformer(td.getColumnByName("t_min"))) 331 except base.NotFoundError: 332 pass 333 334 # SSAP 335 try: 336 col = td.columns.getColumnByUtype( 337 "ssa:Char.TimeAxis.Coverage.Location.Value" 338 ) 339 return col.name, col.name, utils.identity 340 except base.NotFoundError: 341 pass 342 343 # our SIAP mixins 344 try: 345 col = td.getColumnByName("dateObs" 346 ) 347 return col.name, col.name, utils.identity 348 except base.NotFoundError: 349 pass 350 351 # Any table with appropriate, sufficiently unique UCDs 352 try: 353 col = td.getColumnByUCD("time.start") 354 # we assume time.start and time.end work the same way and one 355 # transformer is enough. 356 return (col.name, 357 td.getColumnByUCD("time.end").name 358 ), _getTimeTransformer(col) 359 except ValueError: 360 pass 361 362 for obsUCD in ["time.epoch", "time.epoch;obs"]: 363 try: 364 col = td.getColumnByUCD(obsUCD) 365 return col.name, col.name, _getTimeTransformer(col) 366 except ValueError: 367 pass 368 369 raise base.NotFoundError("temporal coverage", "Columns to figure out", 370 "table "+td.getFullId())
371 372
373 -def getSpectralLimitsColumnNames(td):
374 """returns the name of columns hopefully containing minimal and 375 maximal spectral coverage. 376 377 As transformer function, we currently return the identity, as we're 378 only using IVOA standard columns anyway. Based on unit and ucd, 379 we could pretty certainly do better. 380 381 If this doesn't find any, it raise a NotFoundError. 382 """ 383 # obscore and friends 384 try: 385 return (td.getColumnByName("em_min").name, 386 td.getColumnByName("em_max").name, 387 utils.identity) 388 except base.NotFoundError: 389 pass 390 391 # SSAP 392 try: 393 return (td.getColumnByName("ssa_specstart").name, 394 td.getColumnByName("ssa_specend").name, 395 utils.identity) 396 except base.NotFoundError: 397 pass 398 399 # SIAv1 400 try: 401 return (td.getColumnByName("bandpassLo").name, 402 td.getColumnByName("bandpassHi").name, 403 utils.identity) 404 except base.NotFoundError: 405 pass 406 407 raise base.NotFoundError("spectral coverage", "Columns to figure out", 408 "table "+td.getFullId())
409 410
411 -def iterScalarLimits(td, columnsDeterminer):
412 """yields Interal instances for time or spectral coverage. 413 414 ColumnsDeterminer is a function td -> (mincol, maxcol, transformer) expected 415 to raise a NotFoundError if no appropriate columns can be found. This is 416 either getTimeLimitsColumnNames or getSpectralLimitsColumnNames at this 417 point. transformer here is a function val -> val turning what's coming 418 back from the database to what's expected by the coverage machinery 419 (e.g., MJD -> jYear). 420 421 It's conceivable that at some point we'll give multiple intervals, 422 and hence this is an iterator (that can very well yield nothing for 423 a variety of reasons). 424 """ 425 try: 426 minColName, maxColName, transformer = columnsDeterminer(td) 427 except base.NotFoundError: 428 return 429 430 query = "SELECT MIN(%s), MAX(%s) FROM %s"%( 431 str(minColName), str(maxColName), td.getQName()) 432 with base.getTableConn() as conn: 433 for res in conn.query(query): 434 if res[0] is not None and res[1] is not None: 435 yield Interval(transformer(res[0]), transformer(res[1]))
436 437 438 _PROP_SEQ = ("min", "avg", "max", "hasnulls") 439
440 -def printTableInfo(td):
441 """tries to obtain various information on the properties of the 442 database table described by td. 443 """ 444 annotateDBTable(td) 445 propTable = [("col",)+_PROP_SEQ] 446 for col in td: 447 row = [col.name] 448 for prop in _PROP_SEQ: 449 if prop in col.annotations: 450 row.append(utils.makeEllipsis( 451 utils.safe_str(col.annotations[prop]), 30)) 452 else: 453 row.append("-") 454 propTable.append(tuple(row)) 455 print(utils.formatSimpleTable(propTable))
456 457
458 -def parseCmdline():
459 parser = ArgumentParser( 460 description="Displays various stats about the table referred to in" 461 " the argument.") 462 parser.add_argument("tableId", help="Table id (of the form rdId#tableId)") 463 parser.add_argument("-m", "--moc-order", type=int, default=None, 464 dest="mocOrder", 465 help="Also print a MOC giving the coverage at MOC_ORDER (use MOC_ORDER=6" 466 " for about 1 deg resolution).", 467 metavar="MOC_ORDER") 468 return parser.parse_args()
469 470
471 -def main():
472 args = parseCmdline() 473 td = api.getReferencedElement(args.tableId, api.TableDef) 474 printTableInfo(td) 475 476 if args.mocOrder: 477 print("Computing MOC at order %s..."%args.mocOrder) 478 print(getMOCForStdTable(td, args.mocOrder))
479