Package gavo :: Module rscdesc
[frames] | no frames]

Source Code for Module gavo.rscdesc

  1  """ 
  2  Structure definition of resource descriptors. 
  3   
  4  The stuff they are describing is not a resource in the VO sense (whatever 
  5  that is) or in the Dublin Core sense, but simply stuff held together 
  6  by common metadata.  If it's got the same creator, the same base title, 
  7  the same keywords, etc., it's described by one RD. 
  8   
  9  In the DaCHS, a resource descriptor typically sets up a schema in 
 10  the database. 
 11  """ 
 12   
 13  #c Copyright 2008-2019, the GAVO project 
 14  #c 
 15  #c This program is free software, covered by the GNU GPL.  See the 
 16  #c COPYING file in the source distribution. 
 17   
 18   
 19  import datetime 
 20  import grp 
 21  import os 
 22  import pkg_resources 
 23  import time 
 24  import threading 
 25  import weakref 
 26   
 27  from gavo import base 
 28  from gavo import registry 
 29  from gavo import rscdef 
 30  from gavo import svcs 
 31  from gavo import utils 
 32  from gavo.rscdef import common 
 33  from gavo.rscdef import regtest 
 34  from gavo.rscdef import scripting 
 35  from gavo.rscdef import executing 
36 37 38 -class RD(base.Structure, base.ComputedMetaMixin, scripting.ScriptingMixin, 39 base.StandardMacroMixin, common.PrivilegesMixin, registry.DateUpdatedMixin):
40 """A resource descriptor. 41 42 RDs collect all information about how to parse a particular source (like a 43 collection of FITS images, a catalogue, or whatever), about the database 44 tables the data ends up in, and the services used to access them. 45 46 In DaCHS' RD XML serialisation, they correspond to the root element. 47 """ 48 name_ = "resource" 49 50 # this is set somewhere below once parsing has proceeded far enough 51 # such that caching the RD make sense 52 cacheable = False 53 54 _resdir = base.FunctionRelativePathAttribute("resdir", 55 default=None, 56 baseFunction=lambda instance: base.getConfig("inputsDir"), 57 description="Base directory for source files and everything else" 58 " belonging to the resource.", 59 copyable=True) 60 61 _schema = base.UnicodeAttribute("schema", 62 default=base.Undefined, 63 description="Database schema for tables defined here. Follow the rule" 64 " 'one schema, one RD' if at all possible. If two RDs share the same" 65 " schema, the must generate exactly the same permissions for that" 66 " schema; this means, in particular, that if one has an ADQL-published" 67 " table, so must the other. In a nutshell: one schema, one RD.", 68 copyable=True, 69 callbacks=["_inferResdir"]) 70 71 _dds = base.StructListAttribute("dds", 72 childFactory=rscdef.DataDescriptor, 73 description="Descriptors for the data generated and/or published" 74 " within this resource.", 75 copyable=True, 76 before="outputTables") 77 78 _tables = base.StructListAttribute("tables", 79 childFactory=rscdef.TableDef, 80 description="A table used or created by this resource", 81 copyable=True, 82 before="dds") 83 84 _outputTables = base.StructListAttribute("outputTables", 85 childFactory=svcs.OutputTableDef, 86 description="Canned output tables for later reference.", 87 copyable=True) 88 89 _rowmakers = base.StructListAttribute("rowmakers", 90 childFactory=rscdef.RowmakerDef, 91 description="Transformations for going from grammars to tables." 92 " If specified in the RD, they must be referenced from make" 93 " elements to become active.", 94 copyable=True, 95 before="dds") 96 97 _procDefs = base.StructListAttribute("procDefs", 98 childFactory=rscdef.ProcDef, 99 description="Procedure definintions (rowgens, rowmaker applys)", 100 copyable=True, before="rowmakers") 101 102 _condDescs = base.StructListAttribute("condDescs", 103 childFactory=svcs.CondDesc, 104 description="Global condition descriptors for later reference", 105 copyable=True, 106 before="cores") 107 108 _resRecs = base.StructListAttribute("resRecs", 109 childFactory=registry.ResRec, 110 description="Non-service resources for the IVOA registry. They will" 111 " be published when gavo publish is run on the RD.") 112 113 _services = base.StructListAttribute("services", 114 childFactory=svcs.Service, 115 description="Services exposing data from this resource.", 116 copyable=True) 117 118 _macDefs = base.MacDefAttribute(before="tables", 119 description="User-defined macros available on this RD") 120 121 _mixinDefs = base.StructListAttribute("mixdefs", 122 childFactory=rscdef.MixinDef, 123 description="Mixin definitions (usually not for users)") 124 125 _require = base.ActionAttribute("require", 126 methodName="importModule", 127 description="Import the named gavo module (for when you need something" 128 " registred)") 129 130 _cores = base.MultiStructListAttribute("cores", 131 childFactory=svcs.getCore, 132 childNames=svcs.CORE_REGISTRY.keys(), 133 description="Cores available in this resource.", copyable=True, 134 before="services") 135 136 _jobs = base.StructListAttribute("jobs", 137 childFactory=executing.Execute, 138 description="Jobs to be run while this RD is active.") 139 140 _tests = base.StructListAttribute("tests", 141 childFactory=regtest.RegTestSuite, 142 description="Suites of regression tests connected to this RD.") 143 144 _coverage = base.StructAttribute("coverage", 145 childFactory=rscdef.Coverage, 146 default=None, 147 description="STC coverage of this resource.", copyable=True) 148 149 _properties = base.PropertyAttribute() 150
151 - def __init__(self, srcId, **kwargs):
152 # RDs never have parents, so contrary to all other structures they 153 # are constructed with with a srcId instead of a parent. You 154 # *can* have that None, but such RDs cannot be used to create 155 # non-temporary tables, services, etc, since the srcId is used 156 # in the construction of identifiers and such. 157 self.sourceId = srcId 158 base.Structure.__init__(self, None, **kwargs) 159 # The rd attribute is a weakref on self. Always. So, this is the class 160 # that roots common.RDAttributes 161 self.rd = weakref.proxy(self) 162 # real dateUpdated is set by getRD, this is just for RDs created 163 # on the fly. 164 self.dateUpdated = datetime.datetime.utcnow() 165 # if an RD is parsed from a disk file, this gets set to its path 166 # by getRD below 167 self.srcPath = None 168 # this is for modified-since and friends. 169 self.loadedAt = time.time() 170 # keep track of RDs depending on us for the registry code 171 # (only read this) 172 self.rdDependencies = set()
173
174 - def __iter__(self):
175 return iter(self.dds)
176
177 - def __repr__(self):
178 return "<resource descriptor for %s>"%self.sourceId
179
180 - def validate(self):
181 if not utils.identifierPattern.match(self.schema): 182 raise base.StructureError("DaCHS schema attributes must be valid" 183 " python identifiers")
184
185 - def isDirty(self):
186 """returns true if the RD on disk has a timestamp newer than 187 loadedAt. 188 """ 189 if isinstance(self.srcPath, PkgResourcePath): 190 # stuff from the resource package should not change underneath us. 191 return False 192 193 try: 194 if self.srcPath is not None: 195 return os.path.getmtime(self.srcPath)>self.loadedAt 196 except os.error: 197 # this will ususally mean the file went away 198 return True 199 return False
200
201 - def importModule(self, ctx):
202 # this is a callback for the require attribute 203 utils.loadInternalObject(self.require, "__doc__")
204
205 - def onElementComplete(self):
206 for table in self.tables: 207 self.readProfiles = self.readProfiles | table.readProfiles 208 table.setMetaParent(self) 209 210 self.serviceIndex = {} 211 for svc in self.services: 212 self.serviceIndex[svc.id] = svc 213 svc.setMetaParent(self) 214 215 for dd in self.dds: 216 dd.setMetaParent(self) 217 218 if self.resdir and not os.path.isdir(self.resdir): 219 base.ui.notifyWarning("RD %s: resource directory '%s' does not exist"%( 220 self.sourceId, self.resdir)) 221 222 self._onElementCompleteNext(RD)
223
224 - def _inferResdir(self, value):
225 if self.resdir is None: 226 self._resdir.feedObject(self, value)
227
228 - def iterDDs(self):
229 return iter(self.dds)
230
231 - def getService(self, id):
232 return self.serviceIndex.get(id, None)
233
234 - def getTableDefById(self, id):
235 return self.getById(id, rscdef.TableDef)
236
237 - def getDataDescById(self, id):
238 return self.getById(id, rscdef.DataDescriptor)
239
240 - def getById(self, id, forceType=None):
241 try: 242 res = self.idmap[id] 243 except KeyError: 244 raise base.NotFoundError( 245 id, "Element with id", "RD %s"%(self.sourceId)) 246 if forceType: 247 if not isinstance(res, forceType): 248 raise base.StructureError("Element with id '%s' is not a %s"%( 249 id, forceType.__name__)) 250 return res
251
252 - def getAbsPath(self, relPath):
253 """returns the absolute path for a resdir-relative relPath. 254 """ 255 return os.path.join(self.resdir, relPath)
256
257 - def openRes(self, relPath, mode="r"):
258 """returns a file object for relPath within self's resdir. 259 260 Deprecated. This is going to go away, use getAbsPath and a context 261 manager. 262 """ 263 return open(self.getAbsPath(relPath), mode)
264
265 - def getTimestampPath(self):
266 """returns a path to a file that's accessed by Resource each time 267 a bit of the described resource is written to the db. 268 """ 269 return os.path.join(base.getConfig("stateDir"), "updated_"+ 270 self.sourceId.replace("/", "+"))
271
272 - def touchTimestamp(self):
273 """updates the timestamp on the rd's state file. 274 """ 275 fn = self.getTimestampPath() 276 try: 277 try: 278 os.unlink(fn) 279 except os.error: 280 pass 281 f = open(fn, "w") 282 f.close() 283 os.chmod(fn, 0664) 284 try: 285 os.chown(fn, -1, grp.getgrnam(base.getConfig("GavoGroup")[2])) 286 except (KeyError, os.error): 287 pass 288 except (os.error, IOError): 289 base.ui.notifyWarning( 290 "Could not update timestamp on RD %s"%self.sourceId)
291
292 - def _computeIdmap(self):
293 res = {} 294 for child in self.iterChildren(): 295 if hasattr(child, "id"): 296 res[child.id] = child 297 return res
298
299 - def addDependency(self, rd, prereq):
300 """declares that rd needs the RD prereq to properly work. 301 302 This is used in the generation of resource records to ensure that, e.g. 303 registred data have added their served-bys to the service resources. 304 """ 305 if rd.sourceId!=prereq.sourceId: 306 self.rdDependencies.add((rd.sourceId, prereq.sourceId))
307
308 - def copy(self, parent):
309 base.ui.notifyWarning("Copying an RD -- this may not be a good idea") 310 new = base.Structure.copy(self, parent) 311 new.idmap = new._computeIdmap() 312 new.sourceId = self.sourceId 313 return new
314
315 - def invalidate(self):
316 """make the RD fail on every attribute read. 317 318 See rscdesc._loadRDIntoCache for why we want this. 319 """ 320 errMsg = ("Loading of %s failed in another thread; this RD cannot" 321 " be used here")%self.sourceId 322 323 class BrokenClass(object): 324 """A class that reacts to all attribute requests with a some exception. 325 """ 326 def __getattribute__(self, attributeName): 327 if attributeName=="__class__": 328 return BrokenClass 329 raise base.ReportableError(errMsg)
330 331 self.__class__ = BrokenClass 332
333 - def macro_RSTccbysa(self, stuffDesignation):
334 """expands to a declaration that stuffDesignation is available under 335 CC-BY-SA. 336 337 This only works in reStructured text (though it's still almost 338 readable as source). 339 """ 340 return ("%s is licensed under the `Creative Commons Attribution" 341 " Share-Alike 3.0" 342 " License <http://creativecommons.org/licenses/by-sa/3.0/>`_\n\n" 343 ".. image:: /static/img/ccbysa.png\n\n" 344 )%stuffDesignation
345
346 - def macro_RSTccby(self, stuffDesignation):
347 """expands to a declaration that stuffDesignation is available under 348 CC-BY. 349 350 This only works in reStructured text (though it's still almost 351 readable as source). 352 """ 353 return ("%s is licensed under the `Creative Commons Attribution 3.0" 354 " License <http://creativecommons.org/licenses/by/3.0/>`_\n\n" 355 ".. image:: /static/img/ccby.png\n\n" 356 )%stuffDesignation
357
358 - def macro_RSTcc0(self, stuffDesignation):
359 """expands to a declaration that stuffDesignation is available under 360 CC-0. 361 362 This only works in reStructured text (though it's still almost 363 readable as source). 364 """ 365 return ("To the extent possible under law, the publisher has" 366 " waived all copyright and related or neighboring rights to %s." 367 " For details, see the `Creative Commons CC0 1.0" 368 " Public Domain dedication" 369 " <http://creativecommons.org/publicdomain/zero/1.0/>`_. Of course," 370 " you should still give proper credit when using this data as" 371 " required by good scientific practice.\n\n" 372 ".. image:: /static/img/cc0.png\n\n" 373 )%stuffDesignation
374
375 376 -class RDParseContext(base.ParseContext):
377 """is a parse context for RDs. 378 379 It defines a couple of attributes that structures can ask for (however, 380 it's good practice not to rely on their presence in case someone wants 381 to parse XML snippets with a standard parse context, so use 382 getattr(ctx, "doQueries", True) or somesuch. 383 """
384 - def __init__(self, doQueries=True, restricted=False, forRD=None):
385 self.doQueries = doQueries 386 base.ParseContext.__init__(self, restricted, forRD)
387 388 @classmethod
389 - def fromContext(cls, ctx, forRD=None):
390 """a constructor that makes a context with the parameters taken from 391 the RDParseContext ctx. 392 """ 393 return cls(doQueries=ctx.doQueries, restricted=ctx.restricted, 394 forRD=forRD)
395 396 @property
397 - def failuresAreCacheable(self):
398 """returns true if failures produced with this context should 399 be cached. 400 401 This is not the case with restricted parses. 402 """ 403 return not self.restricted
404
405 406 -class PkgResourcePath(str):
407 """A sentinel class used to mark an RD as coming from pkg_resources. 408 """
409 - def __str__(self):
410 return self
411
412 413 -def canonicalizeRDId(srcId):
414 """returns a standard rd id for srcId. 415 416 srcId may be a file system path, or it may be an "id". The canonical 417 basically is "inputs-relative path without .rd extension". Everything 418 that's not within inputs or doesn't end with .rd is handed through. 419 // is expanded to __system__/. The path to built-in RDs, 420 /resources/inputs, is treated analoguous to inputsDir. 421 """ 422 if srcId.startswith("//"): 423 srcId = "__system__"+srcId[1:] 424 425 for inputsDir in (base.getConfig("inputsDir"), "/resources/inputs"): 426 if srcId.startswith(inputsDir): 427 srcId = srcId[len(inputsDir):].lstrip("/") 428 429 if srcId.endswith(".rd"): 430 srcId = srcId[:-3] 431 432 return srcId
433
434 435 -def _getFilenamesForId(srcId):
436 """helps getRDInputStream by iterating over possible files for srcId. 437 """ 438 if srcId.startswith("/"): 439 yield srcId+".rd" 440 yield srcId 441 else: 442 inputsDir = base.getConfig("inputsDir") 443 yield os.path.join(inputsDir, srcId)+".rd" 444 yield os.path.join(inputsDir, srcId) 445 yield "/resources/inputs/%s.rd"%srcId 446 yield "/resources/inputs/%s"%srcId
447
448 449 -def getRDInputStream(srcId):
450 """returns a read-open stream for the XML source of the resource 451 descriptor with srcId. 452 453 srcId is already normalized; that means that absolute paths must 454 point to a file (sans possibly .rd), relative paths are relative 455 to inputsDir or pkg_resources(/resources/inputs). 456 457 This function prefers files with .rd to those without, and 458 inputsDir to pkg_resources (the latter allowing the user to 459 override built-in system RDs). 460 """ 461 for fName in _getFilenamesForId(srcId): 462 if os.path.isfile(fName): 463 # We don't want RDs from outside of inputs and config, as 464 # these make referencing really messy. 465 filePath = os.path.abspath(fName) 466 if not ( 467 filePath.startswith(base.getConfig("inputsDir")) 468 or filePath.startswith(base.getConfig("configDir"))): 469 raise base.ReportableError("%s: Only RDs below inputsDir (%s) are" 470 " allowed."%(fName, base.getConfig("inputsDir"))) 471 472 return fName, open(fName) 473 474 if (pkg_resources.resource_exists('gavo', fName) 475 and not pkg_resources.resource_isdir('gavo', fName)): 476 return (PkgResourcePath(fName), 477 pkg_resources.resource_stream('gavo', fName)) 478 raise base.RDNotFound(srcId)
479
480 481 -def setRDDateTime(rd, inputFile):
482 """guesses a date the resource was updated. 483 484 This uses either the timestamp on inputFile or the rd's import timestamp, 485 whatever is newer. 486 """ 487 # this would look better as a method on RD, and maybe it would be cool 488 # to just try to infer the inputFile from the ID? 489 rdTimestamp = utils.fgetmtime(inputFile) 490 try: 491 dataTimestamp = os.path.getmtime(rd.getTimestampPath()) 492 except os.error: # no timestamp yet 493 dataTimestamp = rdTimestamp 494 rd.timestampUpdated = max(dataTimestamp, rdTimestamp) 495 rd.dateUpdated = datetime.datetime.utcfromtimestamp( 496 rd.timestampUpdated)
497 498 499 USERCONFIG_RD_PATH = os.path.join(base.getConfig("configDir"), "userconfig")
500 501 502 -class _UserConfigFakeRD(object):
503 """A fake object that's in the RD cache as "%". 504 505 This is used by the id resolvers in parsecontext; this certainly is 506 of no use as an RD otherwise. 507 """
508 - def __init__(self):
509 pass
510
511 - def getRealRD(self):
513
514 - def getMeta(self, *args, **kwargs):
515 return base.caches.getRD(USERCONFIG_RD_PATH).getMeta(*args, **kwargs)
516
517 - def getById(self, id, forceType=None):
518 """returns an item from userconfig. 519 520 This first tries to resolve id in gavo/etc/userconfig.rd, then in the 521 fallback //userconfig.rd. 522 """ 523 try: 524 try: 525 return base.caches.getRD( 526 os.path.join(base.getConfig("configDir"), "userconfig.rd") 527 ).getById(id, forceType=forceType) 528 except base.NotFoundError: 529 pass 530 except Exception as msg: 531 base.ui.notifyError("Bad userconfig: (%s), ignoring it. Run" 532 " 'gavo val %%' to see actual errors."%repr(msg)) 533 534 return base.caches.getRD("//userconfig" 535 ).getById(id, forceType=forceType) 536 except base.NotFoundError: 537 raise base.NotFoundError(id, "Element with id", 538 "etc/userconfig.rd")
539
540 541 -def getRD(srcId, doQueries=True, restricted=False, useRD=None):
542 """returns a ResourceDescriptor for srcId. 543 544 srcId is something like an input-relative path; you'll generally 545 omit the extension (unless it's not the standard .rd). 546 547 getRD furnishes the resulting RD with an idmap attribute containing 548 the mapping from id to object collected by the parse context. 549 550 The useRD parameter is for _loadRDIntoCache exclusively and is 551 used by it internally. It is strictly an ugly implementation detail. 552 553 """ 554 if srcId=='%': 555 return _UserConfigFakeRD() 556 557 if useRD is None: 558 rd = RD(canonicalizeRDId(srcId)) 559 else: 560 rd = useRD 561 562 srcPath, inputFile = getRDInputStream(rd.sourceId) 563 # look for a context upstack and get the default parameters from there, 564 # overriding the parameters. 565 try: 566 getRD_context = RDParseContext.fromContext( 567 utils.stealVar("getRD_context"), forRD=rd.sourceId) 568 except ValueError: 569 # no getRD_context variable in the stack 570 getRD_context = RDParseContext(doQueries=doQueries, 571 restricted=restricted, forRD=rd.sourceId) 572 573 if not isinstance(srcPath, PkgResourcePath): 574 srcPath = os.path.abspath(srcPath) 575 rd.srcPath = getRD_context.srcPath = srcPath 576 rd.idmap = getRD_context.idmap 577 578 try: 579 rd = base.parseFromStream(rd, inputFile, context=getRD_context) 580 except Exception as ex: 581 ex.inFile = srcPath 582 ex.cacheable = getRD_context.failuresAreCacheable 583 raise 584 setRDDateTime(rd, inputFile) 585 return rd
586 587 588 # in _currentlyParsing, getRD keeps track of what RDs are currently being 589 # parsed. The keys are the canonical sourceIds, the values are pairs of 590 # an unfinished RD and RLocks protecting it. 591 _currentlyParsingLock = threading.Lock() 592 _currentlyParsing = {} 593 import threading
594 595 596 -class CachedException(object):
597 """An exception that occurred while parsing an RD. 598 599 This will remain in the cache until the underlying RD is changed. 600 """
601 - def __init__(self, exception, sourcePath):
602 self.exception = exception 603 self.sourcePath = sourcePath 604 # this can race a bit in that we won't catch saves done between 605 # we started parsing and we came up with the exception, but 606 # these are easy to fix by saving again, so we won't bother. 607 try: 608 self.timestamp = os.path.getmtime(self.sourcePath) 609 except (TypeError, os.error): 610 # If the file doesn't exist, that state is "as of now" 611 self.timestamp = time.time()
612
613 - def isDirty(self):
614 if self.sourcePath is None: 615 # see above 616 return False 617 if not os.path.exists(self.sourcePath): 618 # someone has removed the file, kill cache 619 return True 620 return os.path.getmtime(self.sourcePath)>self.timestamp
621
622 - def raiseAgain(self):
623 # XXX TODO: do we want to fix the traceback here? 624 raise self.exception
625
626 627 -def _loadRDIntoCache(canonicalRDId, cacheDict):
628 """helps _makeRDCache. 629 630 This function contains the locking logic that makes sure multiple 631 threads can load RDs. 632 """ 633 with _currentlyParsingLock: 634 if canonicalRDId in _currentlyParsing: 635 lock, rd = _currentlyParsing[canonicalRDId] 636 justWait = True 637 else: 638 lock, rd = threading.RLock(), RD(canonicalRDId) 639 _currentlyParsing[canonicalRDId] = lock, rd 640 lock.acquire() 641 justWait = False 642 643 if justWait: 644 # Someone else is already parsing. If it's the current thread, 645 # go on (lock is an RLock!) so we can resolve circular references 646 # (as long as they are backward references). All other threads 647 # just wait for the parsing thread to finish 648 lock.acquire() 649 lock.release() 650 return rd 651 652 try: 653 try: 654 cacheDict[canonicalRDId] = getRD(canonicalRDId, useRD=rd) 655 except Exception as ex: 656 # Importing failed, invalidate the RD (in case other threads still 657 # see it from _currentlyParsing) 658 if getattr(ex, "cacheable", False): 659 cacheDict[canonicalRDId] = CachedException(ex, 660 getattr(rd, "srcPath", None)) 661 rd.invalidate() 662 raise 663 finally: 664 del _currentlyParsing[canonicalRDId] 665 lock.release() 666 return cacheDict[canonicalRDId]
667
668 669 -def _makeRDCache():
670 """installs the cache for RDs. 671 672 One trick here is to handle "aliasing", i.e. making sure that 673 you get identical objects regardless of whether you request 674 __system__/adql.rd, __system__/adql, or //adql. 675 676 Then, we're checking for "dirty" RDs (i.e., those that should 677 be reloaded). 678 679 The messiest part is the support for getting RDs in the presence of 680 threads while still supporting recursive references, though. 681 """ 682 # TODO: Maybe unify this again with caches._makeCache? That stuff could 683 # do with a facility to invalidate cached entries, too. 684 # But care is necessary to not cache any RD parsed in a nonstandard 685 # fashion (e.g., in restricted mode). CAREFUL: since getRD indulges 686 # in variable stealing, explicit checks are necessary. 687 rdCache = {} 688 689 def getRDCached(srcId, **kwargs): 690 if kwargs: 691 return getRD(srcId, **kwargs) 692 693 srcId = canonicalizeRDId(srcId) 694 if (srcId in rdCache 695 and getattr(rdCache[srcId], "isDirty", lambda: False)()): 696 base.caches.clearForName(srcId) 697 698 if srcId in rdCache: 699 cachedOb = rdCache[srcId] 700 if isinstance(cachedOb, CachedException): 701 cachedOb.raiseAgain() 702 else: 703 return cachedOb 704 705 else: 706 return _loadRDIntoCache(srcId, rdCache)
707 708 getRDCached.cacheCopy = rdCache 709 base.caches.registerCache("getRD", rdCache, getRDCached) 710 711 _makeRDCache() 712