1 """
2 Various helpers that didn't fit into any other xTricks.
3 """
4
5
6
7
8
9
10
11 import collections
12 import contextlib
13 import os
14 import re
15 import struct
16 import threading
17 import urllib2
18 from cStringIO import StringIO
19
20 from gavo.utils import excs
23 """A stub that raises some more or less descriptive error on attribute
24 access.
25
26 This is used in some places no replace non-essential modules.
27 """
29 self.modName = modName
30
32 raise RuntimeError("%s not installed"%self.modName)
33
34
35
36 BIBCODE_PATTERN = re.compile("[012]\d\d\d\w[^ ]{14}$")
39 """returns true if we think that the string s is a bibcode.
40
41 This is based on matching against BIBCODE_PATTERN.
42 """
43 return bool(BIBCODE_PATTERN.match(s))
44
45
46 try:
47 from docutils import core as rstcore
48
49 from docutils import nodes
50 from docutils import utils as rstutils
51 from docutils.parsers.rst import roles
52 from docutils.parsers.rst import directives
53
54 - class RSTExtensions(object):
55 """a register for local RST extensions.
56
57 This is for both directives and interpreted text roles.
58
59 We need these as additional markup in examples; these always
60 introduce local rst interpreted text roles, which always
61 add some class to the node in question (modifications are possible).
62
63 These classes are then changed to properties as the HTML fragments
64 from RST translation are processed by the _Example nevow data factory.
65
66 To add a new text role, say::
67
68 RSTExtensions.addRole(roleName, roleFunc=None)
69
70 You can pass in a full role function as discussed in
71 /usr/share/doc/python-docutils/docs/howto/rst-roles.html (Debian systems).
72 It must, however, add a dachs-ex-<roleName> class to the node. The
73 default funtion produces a nodes.emphasis item with the proper class.
74
75 In a pinch, you can pass a propertyName argument to addRole if the
76 desired property name is distinct from the role name in the RST.
77 This is used by tapquery and taprole since we didn't want to change
78 our examples when the standard changed.
79
80 To add a directive, say::
81
82 RSTExtensions.addDirective(dirName, dirClass)
83
84 In HTML, these classes become properties named like the role name
85 (except you can again use propertyName in a pinch).
86 """
87 classToProperty = {}
88
89 @classmethod
90 - def addDirective(cls, name, implementingClass, propertyName=None):
91 directives.register_directive(name, implementingClass)
92 cls.classToProperty["dachs-ex-"+name] = propertyName or name
93
94 @classmethod
95 - def makeTextRole(cls, roleName, roleFunc=None, propertyName=None):
96 """creates a new text role for roleName.
97
98 See class docstring.
99 """
100 if roleFunc is None:
101 roleFunc = cls._makeDefaultRoleFunc(roleName)
102 roles.register_local_role(roleName, roleFunc)
103 cls.classToProperty["dachs-ex-"+roleName] = propertyName or roleName
104
105 @classmethod
106 - def _makeDefaultRoleFunc(cls, roleName):
107 """returns an RST interpeted text role parser function returning
108 an emphasis node with a dachs-ex-roleName class.
109 """
110 def roleFunc(name, rawText, text, lineno, inliner,
111 options={}, content=[]):
112 node = nodes.emphasis(rawText, text)
113 node["classes"] = ["dachs-ex-"+roleName]
114 return [node], []
115
116 return roleFunc
117
118
119
120 - def _bibcodeRoleFunc(name, rawText, text, lineno, inliner,
121 options={}, content=[]):
122 if not couldBeABibcode(text):
123 raise ValueError("Probably not a bibcode: '%s'"%text)
124 node = nodes.reference(rawText, text,
125 refuri="http://adsabs.harvard.edu/abs/%s"%text)
126 node["classes"] = ["bibcode-link"]
127 return [node], []
128
129 RSTExtensions.makeTextRole("bibcode", _bibcodeRoleFunc)
130 del _bibcodeRoleFunc
131
132
133
134 _explicitTitleRE = re.compile(r'^(.+?)\s*(?<!\x00)<(.*?)>$', re.DOTALL)
135
136 - def _dachsdocRoleFunc(name, rawText, text, lineno, inliner,
137 options={}, content=[]):
138
139 text = rstutils.unescape(text)
140 mat = _explicitTitleRE.match(text)
141 if mat:
142 title, url = mat.groups()
143 else:
144 title, url = text.split("/")[-1], text
145 url = "http://docs.g-vo.org/DaCHS/"+url
146 return [nodes.reference(title, title, internal=False, refuri=url)
147 ], []
148
149 RSTExtensions.makeTextRole("dachsdoc", _dachsdocRoleFunc)
150 del _dachsdocRoleFunc
151
152 - def _dachsrefRoleFunc(name, rawText, text, lineno, inliner,
153 options={}, content=[]):
154
155 text = rstutils.unescape(text)
156 fragId = re.sub("[^a-z0-9]+", "-", text.lower())
157 url = "http://docs.g-vo.org/DaCHS/ref.html#"+fragId
158 return [nodes.reference(text, text, internal=False, refuri=url)
159 ], []
160
161 RSTExtensions.makeTextRole("dachsref", _dachsrefRoleFunc)
162 del _dachsrefRoleFunc
163
164 - def _samplerdRoleFunc(name, rawText, text, lineno, inliner,
165 options={}, content=[]):
166
167
168 text = rstutils.unescape(text)
169 url = "http://svn.ari.uni-heidelberg.de/svn/gavo/hdinputs/"+text
170 return [nodes.reference(text, text, internal=False, refuri=url)
171 ], []
172
173 RSTExtensions.makeTextRole("samplerd", _samplerdRoleFunc)
174 del _samplerdRoleFunc
175
176 except ImportError:
177 rstcore = NotInstalledModuleStub("docutils")
181 """the metaclass for Undefined.
182
183 Used internally.
184 """
187
188 __unicode__ = __str__
189
192
195
198 """a sentinel for all kinds of undefined values.
199
200 Do not instantiate.
201
202 >>> Undefined()
203 Traceback (most recent call last):
204 TypeError: Undefined cannot be instantiated.
205 >>> bool(Undefined)
206 False
207 >>> repr(Undefined)
208 '<Undefined>'
209 >>> str(Undefined)
210 Traceback (most recent call last):
211 StructureError: Undefined cannot be stringified.
212 """
213 __metaclass__ = _UndefinedType
214
216 raise TypeError("Undefined cannot be instantiated.")
217
220 """A string-like thing basically representing SQL delimited identifiers.
221
222 This has some features that make handling these relatively painless
223 in ADQL code.
224
225 The most horrible feature is that these hash and compare as their embedded
226 names, except to other QuotedNamess.
227
228 SQL-92, in 5.2, roughly says:
229
230 delimited identifiers compare literally with each other,
231 delimited identifiers compare with regular identifiers after the
232 latter are all turned to upper case. But since postgres turns everything
233 to lower case, we do so here, too.
234
235 >>> n1, n2, n3 = QuotedName("foo"), QuotedName('foo"l'), QuotedName("foo")
236 >>> n1==n2,n1==n3,hash(n1)==hash("foo")
237 (False, True, True)
238 >>> print n1, n2
239 "foo" "foo""l"
240 """
243
245 return hash(self.name)
246
248 if isinstance(other, QuotedName):
249 return self.name==other.name
250 elif isinstance(other, basestring):
251 return self.name==other.lower()
252 else:
253 return False
254
256 return not self==other
257
260
262 return 'QuotedName(%s)'%repr(self.name)
263
265 return not not re.match("[a-z][a-z0-9_]*$", self.name)
266
268 return self
269
271 return str(self)
272
274 return self.name.capitalize()
275
277 return QuotedName(self.name+other)
278
281 """a buffer that takes data in arbitrary chunks and returns
282 them in chops of chunkSize bytes.
283
284 There's a lock in place so you can access add and get from
285 different threads.
286
287 When everything is written, you must all doneWriting.
288 """
289
290 chunkSize = 50000
291
293 self.buffer = collections.deque()
294 if chunkSize is not None:
295 self.chunkSize = chunkSize
296 self.curSize = 0
297 self.lock = threading.Lock()
298 self.finished = False
299
300 - def add(self, data):
301 with self.lock:
302 self.buffer.append(data)
303 self.curSize += len(data)
304
305 - def get(self, numBytes=None):
306 if numBytes is None:
307 numBytes = self.chunkSize
308
309 if self.curSize<numBytes and not self.finished:
310 return None
311 if not self.buffer:
312 return None
313
314 with self.lock:
315 items, sz = [], 0
316
317 while self.buffer:
318 item = self.buffer.popleft()
319 sz += len(item)
320 self.curSize -= len(item)
321 items.append(item)
322 if sz>=numBytes:
323 break
324
325
326 chunk = "".join(items)
327 leftOver = chunk[numBytes:]
328 if leftOver:
329 self.buffer.appendleft(leftOver)
330 self.curSize += len(leftOver)
331 chunk = chunk[:numBytes]
332
333 return chunk
334
335
336
338 """returns the the buffer up to the first occurrence of char.
339
340 If char is not present in the buffer, the function returns None.
341 """
342 with self.lock:
343 items, sz = [], 0
344
345 while self.buffer:
346 item = self.buffer.popleft()
347 sz += len(item)
348 self.curSize -= len(item)
349 items.append(item)
350 if char in item:
351 break
352 else:
353
354
355 self.buffer.clear()
356 self.buffer.append("".join(items))
357 self.curSize = sz
358 return None
359
360
361 items[-1], leftOver = items[-1].split(char, 1)
362 chunk = "".join(items)
363 if leftOver:
364 self.buffer.appendleft(leftOver)
365 self.curSize += len(leftOver)
366 return chunk+char
367
368 raise AssertionError("This cannot happen")
369
371 """returns the entire buffer as far as it is left over.
372 """
373 result = "".join(self.buffer)
374 self.buffer = collections.deque()
375 return result
376
379
382 """yields items of seq in groups n elements.
383
384 If len(seq)%n!=0, the last elements are discarded.
385
386 >>> list(grouped(2, range(5)))
387 [(0, 1), (2, 3)]
388 >>> list(grouped(3, range(9)))
389 [(0, 1, 2), (3, 4, 5), (6, 7, 8)]
390 """
391 return zip(*([iter(seq)]*n))
392
395 """returns the first value of key in the web argument-like object args.
396
397 args is a dictionary mapping keys to lists of values. If key is present,
398 the first element of the list is returned; else, or if the list is
399 empty, default if given. If not, a Validation error for the requested
400 column is raised.
401
402 Finally, if args[key] is neither list nor tuple (in an ininstance
403 sense), it is returned unchanged.
404
405 >>> getfirst({'x': [1,2,3]}, 'x')
406 1
407 >>> getfirst({'x': []}, 'x')
408 Traceback (most recent call last):
409 ValidationError: Field x: Missing mandatory parameter x
410 >>> getfirst({'x': []}, 'y')
411 Traceback (most recent call last):
412 ValidationError: Field y: Missing mandatory parameter y
413 >>> print(getfirst({'x': []}, 'y', None))
414 None
415 >>> getfirst({'x': 'abc'}, 'x')
416 'abc'
417 """
418 try:
419 val = args[key]
420 if isinstance(val, (list, tuple)):
421 return val[0]
422 else:
423 return val
424 except (KeyError, IndexError):
425 if default is Undefined:
426 raise excs.ValidationError("Missing mandatory parameter %s"%key,
427 colName=key)
428 return default
429
432 """sends an eventName to the DC event dispatcher.
433
434 If no event dispatcher is available, do nothing.
435
436 The base.ui object that DaCHS uses for event dispatching
437 is only available to sub-packages above base. Other code should not
438 use or need it under normal circumstances, but if it does, it can
439 use this.
440
441 All other code should use base.ui.notify<eventName>(*args) directly.
442 """
443 try:
444 from gavo.base import ui
445 getattr(ui, "notify"+eventName)(*args)
446 except ImportError:
447 pass
448
451 """logs the mutation of the currently handled exception to exc.
452
453 This just does a notifyExceptionMutation using sendUIEvent; it should
454 only be used by code at or below base.
455 """
456 sendUIEvent("ExceptionMutation", exc)
457 return exc
458
461 """reads a "fortran record" from f and returns the payload.
462
463 A "fortran record" comes from an unformatted file and has a
464 4-byte payload length before and after the payload. Native endianess
465 is assumed here.
466
467 If the two length specs do not match, a ValueError is raised.
468 """
469 try:
470 startPos = f.tell()
471 except IOError:
472 startPos = "(stdin)"
473 rawLength = f.read(4)
474 if rawLength=='':
475 return None
476 recLen = struct.unpack("i", rawLength)[0]
477 data = f.read(recLen)
478 rawPost = f.read(4)
479 if not rawPost:
480 raise ValueError("Record starting at %s has no postamble"%startPos)
481 postambleLen = struct.unpack("i", rawPost)[0]
482 if recLen!=postambleLen:
483 raise ValueError("Record length at record (%d) and did not match"
484 " postamble declared length (%d) at %s"%(
485 recLen, postambleLen, startPos))
486 return data
487
490 """iterates over the fortran records in f.
491
492 For details, see getFortranRec.
493 """
494 while True:
495 rec = getFortranRec(f)
496 if rec is None:
497 break
498 if skip>0:
499 skip -= 1
500 continue
501 yield rec
502
505 """returns the content of url, from a cache if possible.
506
507 Of course, you only want to use this if there's some external guarantee
508 that the resource behing url doesn't change. No expiry mechanism is
509 present here.
510 """
511 if not os.path.isdir(cacheDir):
512 os.makedirs(cacheDir)
513 cacheName = os.path.join(cacheDir, re.sub("[^\w]+", "", url)+".cache")
514 if os.path.exists(cacheName):
515 with open(cacheName) as f:
516 return f.read()
517 else:
518 f = urllib2.urlopen(url)
519 doc = f.read()
520 f.close()
521 with open(cacheName, "w") as f:
522 f.write(doc)
523 return doc
524
527 """returns HTML and a string with warnings for a piece of ReStructured
528 text.
529
530 source can be a unicode string or a byte string in utf-8.
531
532 userOverrides will be added to the overrides argument of docutils'
533 core.publish_parts.
534 """
535 sourcePath, destinationPath = None, None
536 if not isinstance(source, unicode):
537 source = source.decode("utf-8")
538
539 warnAccum = StringIO()
540 overrides = {'input_encoding': 'unicode',
541 'raw_enabled': True,
542 'doctitle_xform': None,
543 'warning_stream': warnAccum,
544 'initial_header_level': 4}
545 overrides.update(userOverrides)
546
547 parts = rstcore.publish_parts(
548 source=source+"\n", source_path=sourcePath,
549 destination_path=destinationPath,
550 writer_name='html', settings_overrides=overrides)
551 return parts["fragment"], warnAccum.getvalue()
552
555 """returns HTML for a piece of ReStructured text.
556
557 source can be a unicode string or a byte string in utf-8.
558
559 userOverrides will be added to the overrides argument of docutils'
560 core.publish_parts.
561 """
562 return rstxToHTMLWithWarning(source, **userOverrides)[0]
563
566 """A dictionary allowing case-insensitive access to its content.
567
568 This is used for DAL renderers which, unfortunately, are supposed
569 to be case insensitive. Since case insensitivity is at least undesirable
570 for service-specific keys, we go a semi-insenstitve approach here:
571 First, we try literal matches, if that does not work, we try matching
572 against an all-uppercase version.
573
574 Name clashes resulting from different names being mapped to the
575 same normalized version are handled in some random way. Don't do this.
576 And don't rely on case normalization if at all possible.
577
578 Only strings are allowed as keys here. This class is not concerned
579 with the values.
580 >>> d = CaseSemisensitiveDict({"a": 1, "A": 2, "b": 3})
581 >>> d["a"], d["A"], d["b"], d["B"]
582 (1, 2, 3, 3)
583 >>> d["B"] = 9; d["b"], d["B"]
584 (3, 9)
585 >>> del d["b"]; d["b"], d["B"]
586 (9, 9)
587 >>> "B" in d, "b" in d, "u" in d
588 (True, True, False)
589 """
591 dict.__init__(self, *args, **kwargs)
592 self._normCasedCache = None
593
595 try:
596 return dict.__getitem__(self, key)
597 except KeyError:
598 pass
599 return self._normCased[key.upper()]
600
604
607
608 - def get(self, key, default=None):
609 try:
610 return self[key]
611 except KeyError:
612 return default
613
614 @property
616 if self._normCasedCache is None:
617 self._normCasedCache = dict((k.upper(), v)
618 for k, v in self.iteritems())
619 return self._normCasedCache
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642 import pyparsing
643 from pyparsing import ParserElement, ParseExpression
644 ParserElement.enablePackrat()
645
646
647 if not hasattr(ParserElement, "addParseAction"):
648 ParserElement.addParseAction = ParserElement.setParseAction
649
650 _PYPARSE_LOCK = threading.RLock()
654 """a context manager that serializes pyparsing grammar compilation
655 and manages its whitespace chars.
656
657 We need different whitespace definitions in some parts of DaCHS.
658 (The default used to be " \\t" for a while, so this is what things
659 get reset to).
660
661 Since whitespace apparently can only be set globally for pyparsing,
662 we provide this c.m. Since it is possible that grammars will be
663 compiled in threads (e.g., as a side effect of getRD), this is
664 protected by a lock. This, in turn, means that this can
665 potentially block for a fairly long time.
666
667 Bottom line: When compiling pyparsing grammars, *always* set
668 the whitespace chars explicitely, and do it through this c.m.
669 """
670 _PYPARSE_LOCK.acquire()
671 ParserElement.setDefaultWhitespaceChars(whiteChars)
672 try:
673 yield
674 finally:
675 ParserElement.setDefaultWhitespaceChars(" \t")
676 _PYPARSE_LOCK.release()
677
679 """parses a string using a pyparsing grammar thread-safely.
680 """
681 with _PYPARSE_LOCK:
682 res = grammar.parseString(string, **kwargs)
683 ParserElement.resetCache()
684 return res
685
691
698 ret = super(ParseExpression,self).copy()
699 ret.exprs = self.exprs[:]
700 return ret
701
702 if pyparsing.__version__>"1.5.2":
703 ParseExpression.copy = _parse_expression_copy
704
705 del _parse_expression_copy
711 from pyparsing import (
712 Word,alphas, QuotedString, Regex, OneOrMore)
713
714 with pyparsingWhitechars(" \t"):
715 keyword = Word(alphas+"_")("key")
716 keyword.setName("Keyword")
717 value = (QuotedString(quoteChar="'", escChar='\\')
718 | Regex("[^'= \t]*"))("value")
719 value.setName("Simple value or quoted string")
720 pair = keyword - "=" - value
721 pair.setParseAction(lambda s,p,t: (t["key"], t["value"]))
722 line = OneOrMore(pair)
723 line.setParseAction(lambda s,p,t: dict(list(t)))
724
725 return line
726
727 _KVL_GRAMMAR = _makeKVLGrammar()
730 """returns a dictionary for a "key-value line".
731
732 key-value lines represent string-valued dictionaries
733 following postgres libpq/dsn (see PQconnectdb docs;
734 it's keyword=value, whitespace-separated, with
735 whitespace allowed in values through single quoting,
736 and backslash-escaping
737 """
738 return pyparseString(_KVL_GRAMMAR, aString, parseAll=True)[0]
739
740 _IDENTIFIER_PATTERN = re.compile("[A-Za-z_]+$")
757
763
764
765 if __name__=="__main__":
766 _test()
767