1 """
2 Structure definition of resource descriptors.
3
4 The stuff they are describing is not a resource in the VO sense (whatever
5 that is) or in the Dublin Core sense, but simply stuff held together
6 by common metadata. If it's got the same creator, the same base title,
7 the same keywords, etc., it's described by one RD.
8
9 In the DaCHS, a resource descriptor typically sets up a schema in
10 the database.
11 """
12
13
14
15
16
17
18
19 import datetime
20 import grp
21 import os
22 import pkg_resources
23 import time
24 import threading
25 import weakref
26
27 from gavo import base
28 from gavo import registry
29 from gavo import rscdef
30 from gavo import svcs
31 from gavo import utils
32 from gavo.rscdef import common
33 from gavo.rscdef import regtest
34 from gavo.rscdef import scripting
35 from gavo.rscdef import executing
36
37
38 -class RD(base.Structure, base.ComputedMetaMixin, scripting.ScriptingMixin,
39 base.StandardMacroMixin, common.PrivilegesMixin, registry.DateUpdatedMixin):
40 """A resource descriptor.
41
42 RDs collect all information about how to parse a particular source (like a
43 collection of FITS images, a catalogue, or whatever), about the database
44 tables the data ends up in, and the services used to access them.
45
46 In DaCHS' RD XML serialisation, they correspond to the root element.
47 """
48 name_ = "resource"
49
50
51
52 cacheable = False
53
54 _resdir = base.FunctionRelativePathAttribute("resdir",
55 default=None,
56 baseFunction=lambda instance: base.getConfig("inputsDir"),
57 description="Base directory for source files and everything else"
58 " belonging to the resource.",
59 copyable=True)
60
61 _schema = base.UnicodeAttribute("schema",
62 default=base.Undefined,
63 description="Database schema for tables defined here. Follow the rule"
64 " 'one schema, one RD' if at all possible. If two RDs share the same"
65 " schema, the must generate exactly the same permissions for that"
66 " schema; this means, in particular, that if one has an ADQL-published"
67 " table, so must the other. In a nutshell: one schema, one RD.",
68 copyable=True,
69 callbacks=["_inferResdir"])
70
71 _dds = base.StructListAttribute("dds",
72 childFactory=rscdef.DataDescriptor,
73 description="Descriptors for the data generated and/or published"
74 " within this resource.",
75 copyable=True,
76 before="outputTables")
77
78 _tables = base.StructListAttribute("tables",
79 childFactory=rscdef.TableDef,
80 description="A table used or created by this resource",
81 copyable=True,
82 before="dds")
83
84 _outputTables = base.StructListAttribute("outputTables",
85 childFactory=svcs.OutputTableDef,
86 description="Canned output tables for later reference.",
87 copyable=True)
88
89 _rowmakers = base.StructListAttribute("rowmakers",
90 childFactory=rscdef.RowmakerDef,
91 description="Transformations for going from grammars to tables."
92 " If specified in the RD, they must be referenced from make"
93 " elements to become active.",
94 copyable=True,
95 before="dds")
96
97 _procDefs = base.StructListAttribute("procDefs",
98 childFactory=rscdef.ProcDef,
99 description="Procedure definintions (rowgens, rowmaker applys)",
100 copyable=True, before="rowmakers")
101
102 _condDescs = base.StructListAttribute("condDescs",
103 childFactory=svcs.CondDesc,
104 description="Global condition descriptors for later reference",
105 copyable=True,
106 before="cores")
107
108 _resRecs = base.StructListAttribute("resRecs",
109 childFactory=registry.ResRec,
110 description="Non-service resources for the IVOA registry. They will"
111 " be published when gavo publish is run on the RD.")
112
113 _services = base.StructListAttribute("services",
114 childFactory=svcs.Service,
115 description="Services exposing data from this resource.",
116 copyable=True)
117
118 _macDefs = base.MacDefAttribute(before="tables",
119 description="User-defined macros available on this RD")
120
121 _mixinDefs = base.StructListAttribute("mixdefs",
122 childFactory=rscdef.MixinDef,
123 description="Mixin definitions (usually not for users)")
124
125 _require = base.ActionAttribute("require",
126 methodName="importModule",
127 description="Import the named gavo module (for when you need something"
128 " registred)")
129
130 _cores = base.MultiStructListAttribute("cores",
131 childFactory=svcs.getCore,
132 childNames=svcs.CORE_REGISTRY.keys(),
133 description="Cores available in this resource.", copyable=True,
134 before="services")
135
136 _jobs = base.StructListAttribute("jobs",
137 childFactory=executing.Execute,
138 description="Jobs to be run while this RD is active.")
139
140 _tests = base.StructListAttribute("tests",
141 childFactory=regtest.RegTestSuite,
142 description="Suites of regression tests connected to this RD.")
143
144 _coverage = base.StructAttribute("coverage",
145 childFactory=rscdef.Coverage,
146 default=None,
147 description="STC coverage of this resource.", copyable=True)
148
149 _properties = base.PropertyAttribute()
150
152
153
154
155
156
157 self.sourceId = srcId
158 base.Structure.__init__(self, None, **kwargs)
159
160
161 self.rd = weakref.proxy(self)
162
163
164 self.dateUpdated = datetime.datetime.utcnow()
165
166
167 self.srcPath = None
168
169 self.loadedAt = time.time()
170
171
172 self.rdDependencies = set()
173
175 return iter(self.dds)
176
178 return "<resource descriptor for %s>"%self.sourceId
179
184
186 """returns true if the RD on disk has a timestamp newer than
187 loadedAt.
188 """
189 if isinstance(self.srcPath, PkgResourcePath):
190
191 return False
192
193 try:
194 if self.srcPath is not None:
195 return os.path.getmtime(self.srcPath)>self.loadedAt
196 except os.error:
197
198 return True
199 return False
200
204
206 for table in self.tables:
207 self.readProfiles = self.readProfiles | table.readProfiles
208 table.setMetaParent(self)
209
210 self.serviceIndex = {}
211 for svc in self.services:
212 self.serviceIndex[svc.id] = svc
213 svc.setMetaParent(self)
214
215 for dd in self.dds:
216 dd.setMetaParent(self)
217
218 if self.resdir and not os.path.isdir(self.resdir):
219 base.ui.notifyWarning("RD %s: resource directory '%s' does not exist"%(
220 self.sourceId, self.resdir))
221
222 self._onElementCompleteNext(RD)
223
227
229 return iter(self.dds)
230
232 return self.serviceIndex.get(id, None)
233
236
239
240 - def getById(self, id, forceType=None):
241 try:
242 res = self.idmap[id]
243 except KeyError:
244 raise base.NotFoundError(
245 id, "Element with id", "RD %s"%(self.sourceId))
246 if forceType:
247 if not isinstance(res, forceType):
248 raise base.StructureError("Element with id '%s' is not a %s"%(
249 id, forceType.__name__))
250 return res
251
253 """returns the absolute path for a resdir-relative relPath.
254 """
255 return os.path.join(self.resdir, relPath)
256
257 - def openRes(self, relPath, mode="r"):
258 """returns a file object for relPath within self's resdir.
259
260 Deprecated. This is going to go away, use getAbsPath and a context
261 manager.
262 """
263 return open(self.getAbsPath(relPath), mode)
264
266 """returns a path to a file that's accessed by Resource each time
267 a bit of the described resource is written to the db.
268 """
269 return os.path.join(base.getConfig("stateDir"), "updated_"+
270 self.sourceId.replace("/", "+"))
271
273 """updates the timestamp on the rd's state file.
274 """
275 fn = self.getTimestampPath()
276 try:
277 try:
278 os.unlink(fn)
279 except os.error:
280 pass
281 f = open(fn, "w")
282 f.close()
283 os.chmod(fn, 0664)
284 try:
285 os.chown(fn, -1, grp.getgrnam(base.getConfig("GavoGroup")[2]))
286 except (KeyError, os.error):
287 pass
288 except (os.error, IOError):
289 base.ui.notifyWarning(
290 "Could not update timestamp on RD %s"%self.sourceId)
291
298
300 """declares that rd needs the RD prereq to properly work.
301
302 This is used in the generation of resource records to ensure that, e.g.
303 registred data have added their served-bys to the service resources.
304 """
305 if rd.sourceId!=prereq.sourceId:
306 self.rdDependencies.add((rd.sourceId, prereq.sourceId))
307
308 - def copy(self, parent):
314
316 """make the RD fail on every attribute read.
317
318 See rscdesc._loadRDIntoCache for why we want this.
319 """
320 errMsg = ("Loading of %s failed in another thread; this RD cannot"
321 " be used here")%self.sourceId
322
323 class BrokenClass(object):
324 """A class that reacts to all attribute requests with a some exception.
325 """
326 def __getattribute__(self, attributeName):
327 if attributeName=="__class__":
328 return BrokenClass
329 raise base.ReportableError(errMsg)
330
331 self.__class__ = BrokenClass
332
334 """expands to a declaration that stuffDesignation is available under
335 CC-BY-SA.
336
337 This only works in reStructured text (though it's still almost
338 readable as source).
339 """
340 return ("%s is licensed under the `Creative Commons Attribution"
341 " Share-Alike 3.0"
342 " License <http://creativecommons.org/licenses/by-sa/3.0/>`_\n\n"
343 ".. image:: /static/img/ccbysa.png\n\n"
344 )%stuffDesignation
345
347 """expands to a declaration that stuffDesignation is available under
348 CC-BY.
349
350 This only works in reStructured text (though it's still almost
351 readable as source).
352 """
353 return ("%s is licensed under the `Creative Commons Attribution 3.0"
354 " License <http://creativecommons.org/licenses/by/3.0/>`_\n\n"
355 ".. image:: /static/img/ccby.png\n\n"
356 )%stuffDesignation
357
359 """expands to a declaration that stuffDesignation is available under
360 CC-0.
361
362 This only works in reStructured text (though it's still almost
363 readable as source).
364 """
365 return ("To the extent possible under law, the publisher has"
366 " waived all copyright and related or neighboring rights to %s."
367 " For details, see the `Creative Commons CC0 1.0"
368 " Public Domain dedication"
369 " <http://creativecommons.org/publicdomain/zero/1.0/>`_. Of course,"
370 " you should still give proper credit when using this data as"
371 " required by good scientific practice.\n\n"
372 ".. image:: /static/img/cc0.png\n\n"
373 )%stuffDesignation
374
375
376 -class RDParseContext(base.ParseContext):
377 """is a parse context for RDs.
378
379 It defines a couple of attributes that structures can ask for (however,
380 it's good practice not to rely on their presence in case someone wants
381 to parse XML snippets with a standard parse context, so use
382 getattr(ctx, "doQueries", True) or somesuch.
383 """
384 - def __init__(self, doQueries=True, restricted=False, forRD=None):
385 self.doQueries = doQueries
386 base.ParseContext.__init__(self, restricted, forRD)
387
388 @classmethod
389 - def fromContext(cls, ctx, forRD=None):
390 """a constructor that makes a context with the parameters taken from
391 the RDParseContext ctx.
392 """
393 return cls(doQueries=ctx.doQueries, restricted=ctx.restricted,
394 forRD=forRD)
395
396 @property
398 """returns true if failures produced with this context should
399 be cached.
400
401 This is not the case with restricted parses.
402 """
403 return not self.restricted
404
407 """A sentinel class used to mark an RD as coming from pkg_resources.
408 """
411
414 """returns a standard rd id for srcId.
415
416 srcId may be a file system path, or it may be an "id". The canonical
417 basically is "inputs-relative path without .rd extension". Everything
418 that's not within inputs or doesn't end with .rd is handed through.
419 // is expanded to __system__/. The path to built-in RDs,
420 /resources/inputs, is treated analoguous to inputsDir.
421 """
422 if srcId.startswith("//"):
423 srcId = "__system__"+srcId[1:]
424
425 for inputsDir in (base.getConfig("inputsDir"), "/resources/inputs"):
426 if srcId.startswith(inputsDir):
427 srcId = srcId[len(inputsDir):].lstrip("/")
428
429 if srcId.endswith(".rd"):
430 srcId = srcId[:-3]
431
432 return srcId
433
436 """helps getRDInputStream by iterating over possible files for srcId.
437 """
438 if srcId.startswith("/"):
439 yield srcId+".rd"
440 yield srcId
441 else:
442 inputsDir = base.getConfig("inputsDir")
443 yield os.path.join(inputsDir, srcId)+".rd"
444 yield os.path.join(inputsDir, srcId)
445 yield "/resources/inputs/%s.rd"%srcId
446 yield "/resources/inputs/%s"%srcId
447
479
482 """guesses a date the resource was updated.
483
484 This uses either the timestamp on inputFile or the rd's import timestamp,
485 whatever is newer.
486 """
487
488
489 rdTimestamp = utils.fgetmtime(inputFile)
490 try:
491 dataTimestamp = os.path.getmtime(rd.getTimestampPath())
492 except os.error:
493 dataTimestamp = rdTimestamp
494 rd.timestampUpdated = max(dataTimestamp, rdTimestamp)
495 rd.dateUpdated = datetime.datetime.utcfromtimestamp(
496 rd.timestampUpdated)
497
498
499 USERCONFIG_RD_PATH = os.path.join(base.getConfig("configDir"), "userconfig")
503 """A fake object that's in the RD cache as "%".
504
505 This is used by the id resolvers in parsecontext; this certainly is
506 of no use as an RD otherwise.
507 """
510
513
516
517 - def getById(self, id, forceType=None):
518 """returns an item from userconfig.
519
520 This first tries to resolve id in gavo/etc/userconfig.rd, then in the
521 fallback //userconfig.rd.
522 """
523 try:
524 try:
525 return base.caches.getRD(
526 os.path.join(base.getConfig("configDir"), "userconfig.rd")
527 ).getById(id, forceType=forceType)
528 except base.NotFoundError:
529 pass
530 except Exception as msg:
531 base.ui.notifyError("Bad userconfig: (%s), ignoring it. Run"
532 " 'gavo val %%' to see actual errors."%repr(msg))
533
534 return base.caches.getRD("//userconfig"
535 ).getById(id, forceType=forceType)
536 except base.NotFoundError:
537 raise base.NotFoundError(id, "Element with id",
538 "etc/userconfig.rd")
539
540
541 -def getRD(srcId, doQueries=True, restricted=False, useRD=None):
542 """returns a ResourceDescriptor for srcId.
543
544 srcId is something like an input-relative path; you'll generally
545 omit the extension (unless it's not the standard .rd).
546
547 getRD furnishes the resulting RD with an idmap attribute containing
548 the mapping from id to object collected by the parse context.
549
550 The useRD parameter is for _loadRDIntoCache exclusively and is
551 used by it internally. It is strictly an ugly implementation detail.
552
553 """
554 if srcId=='%':
555 return _UserConfigFakeRD()
556
557 if useRD is None:
558 rd = RD(canonicalizeRDId(srcId))
559 else:
560 rd = useRD
561
562 srcPath, inputFile = getRDInputStream(rd.sourceId)
563
564
565 try:
566 getRD_context = RDParseContext.fromContext(
567 utils.stealVar("getRD_context"), forRD=rd.sourceId)
568 except ValueError:
569
570 getRD_context = RDParseContext(doQueries=doQueries,
571 restricted=restricted, forRD=rd.sourceId)
572
573 if not isinstance(srcPath, PkgResourcePath):
574 srcPath = os.path.abspath(srcPath)
575 rd.srcPath = getRD_context.srcPath = srcPath
576 rd.idmap = getRD_context.idmap
577
578 try:
579 rd = base.parseFromStream(rd, inputFile, context=getRD_context)
580 except Exception as ex:
581 ex.inFile = srcPath
582 ex.cacheable = getRD_context.failuresAreCacheable
583 raise
584 setRDDateTime(rd, inputFile)
585 return rd
586
587
588
589
590
591 _currentlyParsingLock = threading.Lock()
592 _currentlyParsing = {}
593 import threading
597 """An exception that occurred while parsing an RD.
598
599 This will remain in the cache until the underlying RD is changed.
600 """
601 - def __init__(self, exception, sourcePath):
602 self.exception = exception
603 self.sourcePath = sourcePath
604
605
606
607 try:
608 self.timestamp = os.path.getmtime(self.sourcePath)
609 except (TypeError, os.error):
610
611 self.timestamp = time.time()
612
614 if self.sourcePath is None:
615
616 return False
617 if not os.path.exists(self.sourcePath):
618
619 return True
620 return os.path.getmtime(self.sourcePath)>self.timestamp
621
623
624 raise self.exception
625
628 """helps _makeRDCache.
629
630 This function contains the locking logic that makes sure multiple
631 threads can load RDs.
632 """
633 with _currentlyParsingLock:
634 if canonicalRDId in _currentlyParsing:
635 lock, rd = _currentlyParsing[canonicalRDId]
636 justWait = True
637 else:
638 lock, rd = threading.RLock(), RD(canonicalRDId)
639 _currentlyParsing[canonicalRDId] = lock, rd
640 lock.acquire()
641 justWait = False
642
643 if justWait:
644
645
646
647
648 lock.acquire()
649 lock.release()
650 return rd
651
652 try:
653 try:
654 cacheDict[canonicalRDId] = getRD(canonicalRDId, useRD=rd)
655 except Exception as ex:
656
657
658 if getattr(ex, "cacheable", False):
659 cacheDict[canonicalRDId] = CachedException(ex,
660 getattr(rd, "srcPath", None))
661 rd.invalidate()
662 raise
663 finally:
664 del _currentlyParsing[canonicalRDId]
665 lock.release()
666 return cacheDict[canonicalRDId]
667
670 """installs the cache for RDs.
671
672 One trick here is to handle "aliasing", i.e. making sure that
673 you get identical objects regardless of whether you request
674 __system__/adql.rd, __system__/adql, or //adql.
675
676 Then, we're checking for "dirty" RDs (i.e., those that should
677 be reloaded).
678
679 The messiest part is the support for getting RDs in the presence of
680 threads while still supporting recursive references, though.
681 """
682
683
684
685
686
687 rdCache = {}
688
689 def getRDCached(srcId, **kwargs):
690 if kwargs:
691 return getRD(srcId, **kwargs)
692
693 srcId = canonicalizeRDId(srcId)
694 if (srcId in rdCache
695 and getattr(rdCache[srcId], "isDirty", lambda: False)()):
696 base.caches.clearForName(srcId)
697
698 if srcId in rdCache:
699 cachedOb = rdCache[srcId]
700 if isinstance(cachedOb, CachedException):
701 cachedOb.raiseAgain()
702 else:
703 return cachedOb
704
705 else:
706 return _loadRDIntoCache(srcId, rdCache)
707
708 getRDCached.cacheCopy = rdCache
709 base.caches.registerCache("getRD", rdCache, getRDCached)
710
711 _makeRDCache()
712