1 """
2 Functions dealing with compilation and introspection of python and
3 external code.
4 """
5
6
7
8
9
10
11
12 from __future__ import print_function
13
14 import compiler
15 import compiler.ast
16 import contextlib
17 import imp
18 import itertools
19 import inspect
20 import functools
21 import linecache
22 import os
23 import re
24 import shutil
25 import string
26 import sys
27 import tempfile
28 import threading
29 import weakref
30 from cStringIO import StringIO
31
32 from gavo.utils import algotricks
33 from gavo.utils import misctricks
34 from gavo.utils import excs
38 """is a decorator that adds a "buildDocsForThis" attribute to its argument.
39
40 This attribute is evaluated by documentation generators.
41 """
42 origFun.buildDocsForThis = True
43 return origFun
44
47 """A cache for a callable.
48
49 This is basically memoization, except that these are supposed
50 to be singletons; CachedGetters should be used where the
51 construction of a resource (e.g., a grammar) should be deferred
52 until it is actually needed to save on startup times.
53
54 The resource is created on the first call, all further calls
55 just return references to the original object.
56
57 You can also leave out the getter argument and add an argumentless
58 method impl computing the value to cache.
59
60 Using a CachedGetter also serializes generation, so you can also
61 use it when getter isn't thread-safe.
62
63 At construction, you can pass a f(thing) -> bool in an isAlive
64 keyword argument. If you do, the function will be called with the
65 cache before the cache is being returned. If it returns false,
66 the resource is re-made (no concurrency control is enforced here).
67 """
68 - def __init__(self, getter, *args, **kwargs):
69 if getter is None:
70 getter = self.impl
71 self.cache, self.getter = None, getter
72
73 self.isAlive = kwargs.pop("isAlive", None)
74 self.args, self.kwargs = args, kwargs
75 self.lock = threading.Lock()
76
78 if (self.isAlive is not None
79 and self.cache is not None
80 and not self.isAlive(self.cache)):
81 self.cache = None
82
83 if self.cache is None:
84 with self.lock:
85
86 if self.cache is not None:
87 return self.cache
88 self.cache = self.getter(*self.args, **self.kwargs)
89
90
91
92 if self.isAlive is None:
93 del self.args
94 del self.kwargs
95 del self.lock
96
97 return self.cache
98
101 """is like CachedGetter but with a built-in getter.
102
103 Here, you define your class and have a class method impl returning
104 what you want.
105 """
106 cache = None
107
108 @classmethod
113
116 """A trivial deferred module loader.
117
118 Use this to delay the actual import of a module until it's actually
119 needed.
120
121 It is constructed with a module name (that will be inserted into the
122 calling module's globals() as a side effect) and some literal code
123 that, when executed in the caller's global namespace, actually
124 imports the module, for instance::
125
126 utils.DeferredImport("wcs", "from astropy import wcs")
127
128 As a service for static code checking, you'll usually want to repeat
129 the module name, though:
130
131
132 wcs = utils.DeferredImport("wcs", "from astropy import wcs")
133 """
134 loadedModule = None
135
136 - def __init__(self, moduleName, loadingCode):
137 self.parentGlobals = inspect.currentframe().f_back.f_globals
138 self.moduleName = moduleName
139 self.loader = compile(loadingCode, "<modloader-%s>"%id(self), 'exec')
140 self.parentGlobals[moduleName] = self
141
143 exec self.loader in self.parentGlobals
144 return getattr(self.parentGlobals[self.moduleName], name)
145
148 """
149 A mixin for objects requiring unique IDs.
150
151 The primaray use case is XML generation, where you want stable IDs
152 for objects, but IDs must be unique over an entire XML file.
153
154 The IdManagerMixin provides some methods for doing that:
155
156 - makeIdFor(object) -- returns an id for object, or None if makeIdFor has
157 already been called for that object (i.e., it presumably already is
158 in the document).
159
160 - getIdFor(object) -- returns an id for object if makeIdFor has already
161 been called before. Otherwise, a NotFoundError is raised
162
163 - getOrMakeIdFor(object) -- returns an id for object; if object has
164 been seen before, it's the same id as before. Identity is by equality
165 for purposes of dictionaries.
166
167 - getForId(id) -- returns the object belonging to an id that has
168 been handed out before. Raises a NotFoundError for unknown ids.
169
170 - cloneFrom(other) -- overwrites the self's id management dictionaries
171 with those from other. You want this if two id managers must work
172 on the same document.
173 """
174 __cleanupPat = re.compile("[^A-Za-z0-9_]+")
175
176
178 try:
179 return self.__objectToId, self.__idsToObject
180 except AttributeError:
181 self.__objectToId, self.__idsToObject = {}, {}
182 return self.__objectToId, self.__idsToObject
183
185 for i in itertools.count():
186 newId = suggestion+str(i)
187 if newId not in invMap:
188 return newId
189
191 """takes the id management dictionaries from other.
192 """
193 self.__objectToId, self.__idsToObject = other.__getIdMaps()
194
196 map, invMap = self.__getIdMaps()
197 if suggestion:
198 suggestion = self.__cleanupPat.sub("", suggestion)
199 if id(ob) in map:
200 return None
201
202 if suggestion is not None:
203 if suggestion in invMap:
204 newId = self._fixSuggestion(suggestion, invMap)
205 else:
206 newId = suggestion
207 else:
208 newId = intToFunnyWord(id(ob))
209
210
211 map[id(ob)] = newId
212 try:
213 invMap[newId] = weakref.proxy(ob)
214 except TypeError:
215 invMap[newId] = ob
216 return newId
217
219 try:
220 return self.__getIdMaps()[0][id(ob)]
221 except KeyError:
222 raise excs.NotFoundError(repr(ob), what="object",
223 within="id manager %r"%(self,), hint="Someone asked for the"
224 " id of an object not managed by the id manager. This usually"
225 " is a software bug.")
226
232
234 try:
235 return self.__getIdMaps()[1][id]
236 except KeyError:
237 raise excs.NotFoundError(id, what="id", within="id manager %r"%(self,),
238 hint="Someone asked for the object belonging to an id that has"
239 " been generated externally (i.e., not by this id manager). This"
240 " usually is an internal error of the software.")
241
244 """A Null object, i.e. one that accepts any method call whatsoever.
245
246 This mainly here for use in scaffolding.
247 """
250
253
256 """is a metaclass for *classes* that always compare in one way.
257 """
258
261
268
271 """is a *class* smaller than anything.
272
273 This will only work as the first operand.
274
275 >>> Infimum<-2333
276 True
277 >>> Infimum<""
278 True
279 >>> Infimum<None
280 True
281 >>> Infimum<Infimum
282 True
283 """
284 cmpRes = -1
285
288 """is a *class* larger than anything.
289
290 This will only work as the first operand.
291
292 >>> Supremum>1e300
293 True
294 >>> Supremum>""
295 True
296 >>> Supremum>None
297 True
298 >>> Supremum>Supremum
299 True
300 """
301 cmpRes = 1
302
305 """a set that contains everything.
306
307 Ok, so this doesn't exist. Yes, I've read my Russell. You see, this
308 is a restricted hack for a reason. And even the docstring is
309 contradictory.
310
311 Sort-of. This now works for intersection and containing.
312 Should this reject union? Also, unfortunately this only works as a
313 left operand; I don't see how to override whatever set does with
314 this as a right operand.
315
316 >>> s = AllEncompassingSet()
317 >>> s & set([1,2])
318 set([1, 2])
319 >>> "gooble" in s
320 True
321 >>> s in s
322 True
323 >>> s not in s
324 False
325 """
328
331
334
335 intersection = __and__
336
339
342 """iterates over all subclasses of baseClass in the sequence objects.
343 """
344 for cand in objects:
345 try:
346 if issubclass(cand, baseClass) and cand is not baseClass:
347 yield cand
348 except TypeError:
349 pass
350
353 """iterates over all instances of baseClass in the sequence objects.
354 """
355 for cand in objects:
356 if isinstance(cand, baseClass):
357 yield cand
358
359
360 -def buildClassResolver(baseClass, objects, instances=False,
361 key=lambda obj: getattr(obj, "name", None), default=None):
362 """returns a function resolving classes deriving from baseClass
363 in the sequence objects by their names.
364
365 This is used to build registries of Macros and RowProcessors. The
366 classes in question have to have a name attribute.
367
368 objects would usually be something like globals().values()
369
370 If instances is True the function will return instances instead
371 of classes.
372
373 key is a function taking an object and returning the key under which
374 you will later access it. If this function returns None, the object
375 will not be entered into the registry.
376 """
377 if instances:
378 registry = algotricks.DeferringDict()
379 else:
380 registry = {}
381 for cls in iterDerivedClasses(baseClass, objects):
382 clsKey = key(cls)
383 if clsKey is not None:
384 registry[clsKey] = cls
385 def resolve(name, registry=registry):
386 try:
387 return registry[name]
388 except KeyError:
389 if default is not None:
390 return default
391 raise
392 resolve.registry = registry
393 return resolve
394
419
420 docLines = []
421 for title, body in docItems:
422 docLines.extend([title, underliner*len(title), "", "::", "",
423 formatDocstring(body), ""])
424 docLines.append("\n.. END AUTO\n")
425 return "\n".join(docLines)
426
429 """prints hopefully RST-formatted docs for all subclasses
430 of baseClass in objects.
431
432 The function returns True if it finds arguments it expects ("docs"
433 and optionally a char to build underlines from) in the command line,
434 False if not (and it doesn't print anything in this case) if not.
435
436 Thus, you'll usually use it like this::
437
438 if __name__=="__main__":
439 if not makeClassDocs(Macro, globals().values()):
440 _test()
441 """
442 if len(sys.argv) in [2,3] and sys.argv[1]=="docs":
443 if len(sys.argv)==3:
444 underliner = sys.argv[2][0]
445 else:
446 underliner = "."
447 else:
448 return False
449 docs = []
450 for cls in iterDerivedClasses(baseClass, objects):
451 try:
452 title = cls.name
453 except AttributeError:
454 title = cls.__name__
455 docs.append((title, cls.__doc__))
456 docs.sort()
457 print(formatDocs(docs, underliner))
458 return True
459
460 _SILENCE_LOCK = threading.RLock()
461
462 @contextlib.contextmanager
463 -def silence(errToo=False):
464 """a context manager to temporarily redirect stdout to /dev/null.
465
466 This is used to shut up some versions of pyparsing and pyfits that
467 insist on spewing stuff to stdout from deep within in relatively
468 normal situations.
469
470 Note that this will acquire a lock while things are silenced; this
471 means that silenced things cannot run concurrently.
472 """
473 with _SILENCE_LOCK:
474 realstdout = sys.stdout
475 sys.stdout = open("/dev/null", "w")
476 if errToo:
477 realstderr = sys.stderr
478 sys.stderr = sys.stdout
479
480 try:
481 yield
482 finally:
483 sys.stdout.close()
484 sys.stdout = realstdout
485 if errToo:
486 sys.stderr = realstderr
487
488
489 @contextlib.contextmanager
490 -def in_dir(destDir):
491 """executes the controlled block within destDir and then returns
492 to the previous directory.
493
494 Think "within dir". Haha.
495 """
496 owd = os.getcwd()
497 os.chdir(destDir)
498 try:
499 yield owd
500 finally:
501 os.chdir(owd)
502
503
504 @contextlib.contextmanager
505 -def sandbox(tmpdir=None, debug=False, extractfunc=None):
506 """sets up and tears down a sandbox directory within tmpdir.
507
508 This is is a context manager. The object returned is the original
509 path (which allows you to copy stuff from there). The working
510 directory is the sandbox created while in the controlled block.
511
512 If tmpdir is None, the *system* default is used (usually /tmp),
513 rather than dachs' tmpdir. So, you will ususally want to call
514 this as sandbox(base.getConfig("tempDir"))
515
516 This is obviously not thread-safe -- you'll not usually want
517 to run this in the main server process. Better fork before
518 running this.
519
520 You can pass in a function extractfunc(owd) that is executed in
521 the sandbox just before teardown. It receives the original working
522 directory and can, e.g., move files there from the sandbox.
523 """
524 owd = os.getcwd()
525 wd = tempfile.mkdtemp("sandbox", dir=tmpdir)
526 os.chdir(wd)
527 try:
528 yield owd
529 finally:
530 if extractfunc:
531 extractfunc(owd)
532 os.chdir(owd)
533 if not debug:
534 shutil.rmtree(wd)
535
536
537 -def runInSandbox(setUp, func, tearDown, *args, **kwargs):
538 """runs func in a temporary ("sandbox") directory.
539
540 func is called with args and kwargs. setUp and tearDown are
541 two functions also called with args and kwargs; in addition, they
542 are passed the path of the tempdir (setUp) or the path of the
543 original directory (teardown) in the first argument.
544
545 setUp is called after the directory has been created,
546 but the process is still in the current WD.
547
548 tearDown is called before the temp dir is deleted and in this directory.
549 Its return value is the return value of runInSandbox, which is the
550 preferred way of getting data out of the sandbox.
551
552 If any of the handlers raise exceptions, the following handlers will not
553 be called. The sandbox will be torn down, though.
554
555 This is only present for legacy code. Use the sandbox context manager
556 now.
557 """
558 owd = os.getcwd()
559
560
561 wd = tempfile.mkdtemp("sandbox")
562 try:
563 if setUp:
564 setUp(wd, *args, **kwargs)
565 os.chdir(wd)
566 func(*args, **kwargs)
567 result = tearDown(owd, *args, **kwargs)
568 finally:
569 os.chdir(owd)
570 shutil.rmtree(wd)
571 return result
572
575 """A singleton to keep compileFunction's state somewhat localised.
576
577 The state currently is a counter used to build unique ids for
578 stuff compiled.
579 """
580 compiledCount = 0
581
582 @classmethod
583 - def _compile(cls, src, funcName, useGlobals=None, debug=False):
584 """runs src through exec and returns the item funcName from the resulting
585 namespace.
586
587 This is typically used to define functions, like this:
588
589 >>> resFunc = compileFunction("def f(x): print(x)", "f")
590 >>> resFunc(1); resFunc("abc")
591 1
592 abc
593 """
594 if isinstance(src, unicode):
595 src = src.encode("utf-8")
596 src = src+"\n"
597
598 locals = {}
599 if useGlobals is None:
600 useGlobals = globals()
601
602 uniqueName = "<generated code %s>"%cls.compiledCount
603 cls.compiledCount += 1
604
605 try:
606 code = compile(src, uniqueName, 'exec')
607 exec code in useGlobals, locals
608 except Exception as ex:
609 misctricks.sendUIEvent("Warning", "The code that failed to compile was:"
610 "\n%s"%src)
611 raise misctricks.logOldExc(excs.BadCode(src, "function", ex))
612 func = locals[funcName]
613
614
615
616
617
618 linecache.cache[uniqueName] = len(src), None, src.split("\n"), uniqueName
619 func._cleanup = weakref.ref(func,
620 lambda _, key=uniqueName: linecache and linecache.cache.pop(key, None))
621
622 if debug:
623 debugLocals = {}
624 embSrc = "\n".join([
625 "from gavo.utils import excs",
626 "def compileFunctionDebugWrapper(*args, **kwargs):",
627 " try:",
628 " return %s(*args, **kwargs)"%funcName,
629 " except excs.ExecutiveAction:",
630 " raise",
631 " except:",
632 ' notify("Failing source:\\n%s"%src)',
633 " raise"])
634 debugLocals["src"] = src
635 debugLocals["notify"] = lambda msg: misctricks.sendUIEvent("Warning", msg)
636 debugLocals[funcName] = func
637 exec embSrc+"\n" in debugLocals
638 return debugLocals["compileFunctionDebugWrapper"]
639
640 return func
641
642 compileFunction = _FunctionCompiler._compile
646 """raises a LiteralParserError if expr is not a parseable python expression.
647 """
648
649
650 if expr.startswith("'") or expr.startswith('"'):
651 expr = "''+"+expr
652 try:
653 ast = compiler.parse(expr)
654 except SyntaxError as msg:
655 raise misctricks.logOldExc(excs.BadCode(expr, "expression", msg))
656
657
658 try:
659 exprNodes = ast.node.nodes
660 if len(exprNodes)!=1:
661 raise ValueError("Not a single statement")
662 if not isinstance(exprNodes[0], compiler.ast.Discard):
663 raise ValueError("Not an expression")
664 except (ValueError, AttributeError) as ex:
665 raise misctricks.logOldExc(excs.BadCode(expr, "expression", ex))
666
669 """imports a module from the module path.
670
671 Use this to programmatically import "normal" modules, e.g., dc-internal
672 ones. It uses python's standard import mechanism and returns the
673 module object.
674
675 We're using exec and python's normal import, so the semantics
676 should be identical to saying import modName except that the
677 caller's namespace is not changed.
678
679 The function returns the imported module.
680 """
681
682 if not re.match("([A-Za-z_]+)(\.[A-Za-z_]+)*", modName):
683 raise excs.Error("Invalid name in internal import: %s"%modName)
684 parts = modName.split(".")
685 vars = {}
686 if len(parts)==1:
687 exec "import %s"%modName in vars
688 else:
689 exec "from %s import %s"%(".".join(parts[:-1]), parts[-1]) in vars
690 return vars[parts[-1]]
691
695 """imports fqName and returns the module with a module description.
696
697 The module description is what what find_module returns; you may
698 need this for reloading and similar.
699
700 Do not use this function to import DC-internal modules; this may
701 mess up singletons since you could bypass python's mechanisms
702 to prevent multiple imports of the same module.
703
704 fqName is a fully qualified path to the module without the .py,
705 unless relativeTo is given, in which case it is interpreted as a
706 relative path. This for letting modules in resdir/res import each
707 other by saying::
708
709 mod, _ = api.loadPythonModule("foo", relativeTo=__file__)
710
711 The python path is temporarily amended with the path part of the
712 source module.
713
714 If the module is in /var/gavo/inputs/foo/bar/mod.py, Python will know
715 the module as foo_bar_mod (the last two path components are always added).
716 This is to keep Python from using the module when someone writes
717 import mod.
718 """
719 if relativeTo is not None:
720 fqName = os.path.join(os.path.dirname(relativeTo), fqName)
721
722 pythonModuleName = "_".join(fqName.split("/")[-3:])
723 moduleName = os.path.basename(fqName)
724 modpath = os.path.dirname(fqName)
725
726 try:
727 moddesc = imp.find_module(moduleName, [modpath])
728 except ImportError:
729
730
731
732 raise excs.StructureError("Requested module %s not importable."%fqName,
733 hint="If it exists at all, the import might also failed because"
734 ' of syntax errors or similar. Try python -c "import mod" to get'
735 ' a clue in that case.')
736 try:
737 imp.acquire_lock()
738 modNs = imp.load_module(pythonModuleName, *moddesc)
739 finally:
740 imp.release_lock()
741
742 return modNs, moddesc
743
746 """gets a name from an internal module.
747
748 relativeName is the python module path (not including "gavo."),
749 objectName the name of something within the module.
750
751 This is used for "manual" registries (grammars, cores,...).
752 """
753 modName = "gavo."+relativeName
754 module = importModule(modName)
755 return getattr(module, objectName)
756
759 """a trivial memoizing decorator.
760
761 Use this for plain functions; see memoizedMethod for instance methods.
762 No cache expiry, no non-hashable arguments, nothing.
763 """
764 cache = {}
765 def fun(*args):
766 if args not in cache:
767 cache[args] = origFun(*args)
768 return cache[args]
769 fun._cache = cache
770 return functools.update_wrapper(fun, origFun)
771
774 """a trivial memoizing decorator for instance methods.
775
776 See memoized for the same thing for functions. This uses a single
777 persistent cache for all instances, so there's not terribly much
778 the wrapped method is allowed to do with its self.
779 """
781 cache = {}
782 @functools.wraps(meth)
783 def wrapped(obj, *args):
784 try:
785 return cache[args]
786 except KeyError:
787 cache[args] = meth(obj, *args)
788 return cache[args]
789 self.wrapped = wrapped
790
791 - def __get__(self, obj, objtype=None):
792 if obj is None:
793 return self.wrapped
794 return functools.partial(self.wrapped, obj)
795
798 """yields pairs of consecutive items from sequence.
799
800 If the last item cannot be paired, it is dropped.
801
802 >>> list(iterConsecutivePairs(range(6)))
803 [(0, 1), (2, 3), (4, 5)]
804 >>> list(iterConsecutivePairs(range(5)))
805 [(0, 1), (2, 3)]
806 """
807 iter1, iter2 = iter(sequence), iter(sequence)
808 iter2.next()
809 return list(zip(
810 itertools.islice(iter1, None, None, 2),
811 itertools.islice(iter2, None, None, 2)))
812
815 """yields (left, right) pairs for a sequence of separators.
816
817 >>> list(iterRanges(range(6)))
818 [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)]
819 """
820 i = iter(separators)
821 left = i.next()
822 for right in i:
823 yield left, right
824 left = right
825
828 """returns a key of dict matching key case-insensitively.
829
830 This is sometimes useful with protocols that stupidly define keys
831 as case-insensitive.
832
833 If no matching key exists, a KeyError is raised.
834 """
835 for k in dict:
836 if k.lower()==key.lower():
837 return k
838 raise KeyError(key)
839
843
844
845 -def intToFunnyWord(anInt, translation=string.maketrans(
846 "-0123456789abcdef",
847 "zaeiousmnthwblpgd")):
848 """returns a sometimes funny (but unique) word from an arbitrary integer.
849 """
850 return "".join(("%x"%anInt).translate(translation))
851
854 """adds key-value pairs from defaultDict to dataDict if the key is missing
855 in dataDict.
856 """
857 for key, value in defaultDict.iteritems():
858 if key not in dataDict:
859 dataDict[key] = value
860
861
862 -def memoizeOn(onObject, generatingObject, generatingFunction, *args):
863 """memoizes the result of generatingFunction on onObject.
864
865 This is for caching things that adapt to onObjects; see procdefs
866 and rowmakers for examples why this is useful.
867 """
868 cacheName = "_cache%s%s"%(generatingObject.__class__.__name__,
869 str(id(generatingObject)))
870 if getattr(onObject, cacheName, None) is None:
871 setattr(onObject, cacheName, generatingFunction(*args))
872 return getattr(onObject, cacheName)
873
876 """clears things memoizeOn-ed on ob or @utils.memoize-ed.
877
878 This is sometimes necessary to let the garbage collector free
879 ob, e.g., when closures have been memoized.
880 """
881 for n in dir(ob):
882 child = getattr(ob, n)
883
884 if hasattr(child, "_cache"):
885 child._cache.clear()
886
887 if n.startswith("_cache"):
888 delattr(ob, n)
889
892 """returns the first local variable called varName in the frame stack
893 above my caller.
894
895 This is obviously abominable. This is only used within the DC code where
896 the author deemed the specification ugly. Ah. Almost.
897 """
898 frame = inspect.currentframe().f_back.f_back
899 while frame:
900 if varName in frame.f_locals:
901 return frame.f_locals[varName]
902 frame = frame.f_back
903 raise ValueError("No local %s in the stack"%varName)
904
907 """prints a compact list of frames.
908
909 This is an aid for printf debugging.
910 """
911 frame = inspect.currentframe().f_back.f_back
912 if inspect.getframeinfo(frame)[2]=="getJobsTable":
913 return
914 while frame:
915 print("[%s,%s], [%s]"%inspect.getframeinfo(frame)[:3])
916 frame = frame.f_back
917
920 import traceback
921 f = StringIO()
922 traceback.print_exc(file=f)
923 return f.getvalue()
924
927 """A value that compares equal based on RE matches.
928
929 This is a helper mainly for GetHasXPathsTests. Use an instance of
930 this class to check against an RE rather than a plain string.
931 """
934
936 return self.pat.match(other)
937
939 return not self.__eq__(other)
940
942 return "<Pattern %s>"%self.pat.pattern
943
944 __repr__ = __str__
945
952
957
958
959 if __name__=="__main__":
960 _test()
961