1 """
2 Description of columns (and I/O fields).
3 """
4
5
6
7
8
9
10
11 from gavo import adql
12 from gavo import base
13 from gavo import dm
14 from gavo import utils
15 from gavo.base import typesystems
16 from gavo.utils import codetricks
17 from gavo.votable import paramval
18
19 __docformat__ = "restructuredtext en"
20
21
22
23
24
25 EXPLICIT_NULL_TYPES = set([
26 "smallint", "integer", "bigint", "char", "boolean", "bytea"])
30 """An attribute with values constrained to types we understand.
31 """
32 @property
34 return ("a type name; the internal type system is similar to SQL's"
35 " with some restrictions and extensions. The known atomic types"
36 " include: %s"%(", ".join(typesystems.ToPythonConverter.simpleMap)))
37
46
49
52 """An attribute containing a param or column name.
53
54 These, in DaCHS, have to match identifierPat (essentially, like
55 python identifiers.
56 """
57 @property
62
67
70 """An attribute containing a name suitable for SQL table names.
71
72 Column names are special in that you can prefix them with "quoted/"
73 and then get a delimited identifier. This is something you probably
74 shouldn't use.
75
76 Using ADQL/SQL reserved words (without quoting) here yields a warning.
77 """
78 @property
80 return ("a column name within an SQL table. These have to match the"
81 " SQL regular_identifier production."
82 " In a desperate pinch, you can generate delimited identifiers"
83 " (that can contain anything) by prefixing the name with 'quoted/'")
84
95
101
104 """A helper for TableManagedAttribute.
105
106 When a TableManagedAttribute ships off its value into an event
107 it packs its value into an _AttBox. That way, the receiver
108 can tell whether the value comes from another TableManagedAttribute
109 (which is ok) or comes from an XML parser (which is forbidden).
110 """
112 self.payload = payload
113
116 """An attribute not settable from XML for holding information
117 managed by the parent table.
118
119 That's stc and stcUtype here, currently.
120
121 Do not use this in new code. This should to when the stc element
122 can safely be replaced by gavo-dm-based stuff (here: DmRoles).
123 """
124 typeDesc_ = "non-settable internally used value"
125
126 - def feed(self, ctx, instance, value):
133
136
138 val = getattr(instance, self.name_)
139 if val!=self.default_:
140 yield ("value", self.name_, _AttBox(val))
141
142 - def getCopy(self, instance, newParent, ctx):
143
144
145 return getattr(instance, self.name_)
146
149 """A sentinel class for Table to signal non-adapted DM roles on a column
150 or param.
151 """
153 if isinstance(oldRoles, OldRoles):
154 self.oldRoles = oldRoles.oldRoles
155 else:
156 self.oldRoles = oldRoles
157
159 return bool(self.oldRoles)
160
163 """An attribute managing DM roles.
164
165 It is not set directly from XML but filled when a table parses
166 DM annotation. When copying around columns between tables, this
167 is used to build the new annotation; the value is an oldAnnotations
168 instance rather than a list as usual until the new parent table has
169 started constructing its own DM annotations.
170 """
171 typeDesc_ = "read-only list of roles played by this column in DMs"
172
173 - def __init__(self, name, description="Undocumented"):
177
178 @property
181
184
186
187 if False:
188 yield None
189
190 - def getCopy(self, instance, newParent, ctx):
191
192
193
194 val = getattr(instance, self.name_)
195 if val:
196 return OldRoles(val)
197 else:
198 return []
199
202 """is a read-only standin for a dict.
203
204 It's hashable, though, since it's always empty... This is used here
205 for a default for displayHint.
206 """
208 raise TypeError("RoEmptyDicts are immutable")
209
210 _roEmptyDict = RoEmptyDict()
214 """is a display hint.
215
216 Display hint literals are comma-separated key=value sequences.
217 Keys are up to the application and evaluated by htmltable, votable, etc.
218
219 The parsed values are simply dictionaries mapping strings to strings, i.e.,
220 value validation cannot be performed here (yet -- do we want this?
221 A central repository of display hints would be kinda useful...)
222 """
223 typeDesc_ = "Display hint"
224
225 - def __init__(self, name, description, **kwargs):
228
237
239 return ",".join(
240 ["%s=%s"%(k,v) for k,v in value.iteritems()])
241
242
243 -class Option(base.Structure):
244 """A value for enumerated columns.
245
246 For presentation purposes, an option can have a title, defaulting to
247 the option's value.
248 """
249 name_ = "option"
250
251 _title = base.UnicodeAttribute("title", default=base.NotGiven,
252 description="A Label for presentation purposes; defaults to val.",
253 copyable=True)
254 _val = base.DataContent(copyable=True, description="The value of"
255 " the option; this is what is used in, e.g., queries and the like.")
256
258
259 return self.title
260
263
268
271 """returns a list of Option instances with values given in args.
272 """
273 return [base.makeStruct(Option, content_=arg) for arg in args]
274
275
276 -class Values(base.Structure):
277 """Information on a column's values, in particular its domain.
278
279 This is quite like the values element in a VOTable. In particular,
280 to accomodate VOTable usage, we require nullLiteral to be a valid literal
281 for the parent's type.
282
283 Note that DaCHS does not validate for contraints from values on
284 table import. This is mainly because before ``gavo values`` has run,
285 values may not represent the new dataset in semiautomatic values.
286
287 With HTTP parameters, values validation does take place (but again,
288 that's mostly not too helpful because there are query languages
289 sitting in between most of the time).
290
291 Hence, the main utility of values is metadata declaration, both
292 in the form renderer (where they become placeholders) and in datalink
293 (where they are communicated as VOTable values).
294 """
295 name_ = "values"
296
297 _min = base.UnicodeAttribute("min", default=None,
298 description="Minimum acceptable"
299 " value as a datatype literal", copyable=True)
300 _max = base.UnicodeAttribute("max", default=None,
301 description="Maximum acceptable"
302 " value as a datatype literal", copyable=True)
303 _options = base.StructListAttribute("options",
304 childFactory=Option,
305 description="List of acceptable values (if set)", copyable=True)
306 _default = base.UnicodeAttribute("default", default=None,
307 description="A default"
308 " value (currently only used for options).", copyable=True)
309 _nullLiteral = base.UnicodeAttribute("nullLiteral", default=None,
310 description=
311 "An appropriate value representing a NULL for this column in VOTables"
312 " and similar places. You usually should only set it for integer"
313 " types and chars. Note that rowmakers make no use of this nullLiteral,"
314 " i.e., you can and should choose null values independently of"
315 " your source. Again, for reals, floats and (mostly) text you probably"
316 " do not want to do this.", copyable=True)
317 _multiOk = base.BooleanAttribute("multiOk", False, "Deprecated, use"
318 " multiplicity=multiple on input keys instead.", copyable=True)
319 _fromDB = base.ActionAttribute("fromdb", "_evaluateFromDB", description=
320 "A query fragment returning just one column to fill options from (will"
321 " add to options if some are given). Do not write SELECT or anything,"
322 " just the column name and the where clause. Do not do this for"
323 " large tables even if there are reasonably few values, because"
324 " there is no good way to speed up this kind of query using indices.")
325 _caseless = base.BooleanAttribute("caseless",
326 description="When validating, ignore the case of string values."
327 " For non-string types, behaviour is undefined (i.e., DaCHS is"
328 " going to spit on you).",
329 default=False,
330 copyable=True)
331 _original = base.OriginalAttribute()
332
333 validValues = None
334
335 @classmethod
341
344
356
358 """converts min, max, and options from string literals to python
359 objects.
360 """
361 dataField = self.parent
362
363
364
365
366 if self.min:
367 self.min = self.makePythonVal(self.min, dataField.type)
368 if self.max:
369 self.max = self.makePythonVal(self.max, dataField.type)
370
371 if self.options:
372 dbt = dataField.type
373 for opt in self.options:
374 opt.content_ = self.makePythonVal(opt.content_, dbt)
375 self.validValues = set(o.content_ for o in self.options)
376 if self.caseless:
377 self.validValues = set(o and o.lower() for o in self.validValues)
378
379 if self.nullLiteral:
380 try:
381 self.makePythonVal(self.nullLiteral, dataField.type)
382 except ValueError:
383 raise base.LiteralParseError("nullLiteral", self.nullLiteral,
384 hint="If you want to *parse* whatever you gave into a NULL,"
385 " use the parseWithNull function in a rowmaker. The null"
386 " literal gives what value will be used for null values"
387 " when serializing to VOTables and the like.")
388
389 if self.default and isinstance(self.default, basestring):
390 type, arraysize, xtype = dataField._getVOTableType()
391 self.default = paramval.getVOTParser(type, arraysize, xtype)(
392 self.default)
393
395 """returns false if value isn't either in options or doesn't consist of
396 items in options.
397
398 Various null values always validate here; non-null checking is done
399 by the column on its required attribute.
400 """
401 if value=="None":
402 assert False, "Literal 'None' passed as a value to validateOptions"
403
404 if self.validValues is None:
405 return True
406 if self.caseless and value:
407 value = value.lower()
408
409 if isinstance(value, (list, tuple, set)):
410 for val in value:
411 if val and not val in self.validValues:
412 return False
413 else:
414 return value in self.validValues or value is None
415 return True
416
417
418 -class ColumnBase(base.Structure, base.MetaMixin):
419 """A base class for columns, parameters, output fields, etc.
420
421 Actually, right now there's far too much cruft in here that
422 should go into Column proper or still somewhere else. Hence:
423 XXX TODO: Refactor.
424
425 See also Column for a docstring that still applies to all we've in
426 here.
427 """
428 _name = ParamNameAttribute("name", default=base.Undefined,
429 description="Name of the param",
430 copyable=True, before="type")
431 _type = TypeNameAttribute("type", default="real", description=
432 "datatype for the column (SQL-like type system)",
433 copyable=True, before="unit")
434 _unit = base.UnicodeAttribute("unit", default="", description=
435 "Unit of the values", copyable=True, before="ucd",
436 strip=True)
437 _ucd = base.UnicodeAttribute("ucd", default="", description=
438 "UCD of the column", copyable=True, before="description")
439 _description = base.NWUnicodeAttribute("description",
440 default="", copyable=True,
441 description="A short (one-line) description of the values in this column.")
442 _tablehead = base.UnicodeAttribute("tablehead", default=None,
443 description="Terse phrase to put into table headers for this"
444 " column", copyable=True)
445 _utype = base.UnicodeAttribute("utype", default=None, description=
446 "utype for this column", copyable=True)
447 _required = base.BooleanAttribute("required", default=False,
448 description="Record becomes invalid when this column is NULL",
449 copyable=True)
450 _displayHint = DisplayHintAttribute("displayHint",
451 description="Suggested presentation; the format is "
452 " <kw>=<value>{,<kw>=<value>}, where what is interpreted depends"
453 " on the output format. See, e.g., documentation on HTML renderers"
454 " and the formatter child of outputFields.", copyable=True)
455 _verbLevel = base.IntAttribute("verbLevel", default=20,
456 description="Minimal verbosity level at which to include this column",
457 copyable=True)
458 _values = base.StructAttribute("values", default=None,
459 childFactory=Values, description="Specification of legal values",
460 copyable=True)
461 _fixup = base.UnicodeAttribute("fixup", description=
462 "A python expression the value of which will replace this column's"
463 " value on database reads. Write a ___ to access the original"
464 ' value. You can use macros for the embedding table.'
465 ' This is for, e.g., simple URL generation'
466 ' (fixup="\'\\internallink{/this/svc}\'+___").'
467 ' It will *only* kick in when tuples are deserialized from the'
468 " database, i.e., *not* for values taken from tables in memory.",
469 default=None, copyable=True)
470 _note = base.UnicodeAttribute("note", description="Reference to a note meta"
471 " on this table explaining more about this column", default=None,
472 copyable=True)
473 _xtype = base.UnicodeAttribute("xtype", description="VOTable xtype giving"
474 " the serialization form; you usually do *not* want to set this,"
475 " as the xtypes actually used are computed from database type."
476 " DaCHS xtypes are only used for a few unsavoury, hopefully temporary,"
477 " hacks", default=None, copyable=True)
478 _stc = TableManagedAttribute("stc", description="Internally used"
479 " STC information for this column (do not assign to unless instructed"
480 " to do so)",
481 default=None, copyable=True)
482 _stcUtype = TableManagedAttribute("stcUtype", description="Internally used"
483 " STC information for this column (do not assign to)",
484 default=None, copyable=True)
485 _dmRoles = DMRolesAttribute("dmRoles",
486 description="Roles played by this column; cannot be asigned to.")
487 _properties = base.PropertyAttribute(copyable=True)
488 _original = base.OriginalAttribute()
489
490 restrictedMode = False
491
493 return "<Column %s>"%repr(self.name)
494
505
514
528
530 return self.values and self.values.options
531
540
566
568 """returns a guess as to whether this column is part of an index.
569
570 This may return True, False, or None (unknown).
571 """
572 if self.parent and hasattr(self.parent, "indexedColumns"):
573
574 if self.name in self.parent.indexedColumns:
575 return True
576 else:
577 return False
578
580 """returns a guess as to whether this column is a primary key of the
581 embedding table.
582
583 This may return True, False, or None (unknown).
584 """
585 if self.parent and hasattr(self.parent, "primary"):
586
587 if self.name in self.parent.primary:
588 return True
589 else:
590 return False
591
592 _indexedCleartext = {
593 True: "indexed",
594 False: "notIndexed",
595 None: "unknown",
596 }
597
599 """returns a dictionary of certain, "user-interesting" properties
600 of the data field, in a dict of strings.
601 """
602 return {
603 "name": unicode(self.name),
604 "description": self.description or "N/A",
605 "tablehead": self.getLabel(),
606 "unit": self.unit or "N/A",
607 "ucd": self.ucd or "N/A",
608 "verbLevel": self.verbLevel,
609 "indexState": self._indexedCleartext[self.isIndexed()],
610 "note": self.note,
611 }
612
614 """returns an SQL fragment describing this column ready for
615 inclusion in a DDL statement.
616 """
617 type = base.sqltypeToPG(self.type)
618
619
620 items = [str(self.name), type]
621 if self.required:
622 items.append("NOT NULL")
623 return " ".join(items)
624
627
629 """returns a short label for this column.
630
631 The label is either the tablehead or, missing it, the capitalized
632 column name.
633 """
634 if self.tablehead is not None:
635 return self.tablehead
636 return str(self.name).capitalize()
637
648
651 """A database column.
652
653 Columns contain almost all metadata to describe a column in a database
654 table or a VOTable (the exceptions are for column properties that may
655 span several columns, most notably indices).
656
657 Note that the type system adopted by the DC software is a subset
658 of postgres' type system. Thus when defining types, you have to
659 specify basically SQL types. Types for other type systems (like
660 VOTable, XSD, or the software-internal representation in python values)
661 are inferred from them.
662
663 Columns can have delimited identifiers as names. Don't do this, it's
664 no end of trouble. For this reason, however, you should not use name
665 but rather key to programmatially obtain field's values from rows.
666
667 Properties evaluated:
668
669 - std -- set to 1 to tell the tap schema importer to have the column's
670 std column in TAP_SCHEMA 1 (it's 0 otherwise).
671 - statisticsTarget -- an integer to be set as this column's
672 statistics-gathering target. Set this to something between 100 and
673 10000 on postgres if you have large tables and columns with strongly
674 non-uniform distributions. Set to -1 to revert to the system default.
675 gavo imp -m will apply changes here.
676 - targetType -- for a column containing a URL, the media type of the
677 resource pointed at. This is for producing extra annotation for
678 Aladin and friends as per
679 http://mail.ivoa.net/pipermail/dal/2018-May/008017.html
680 - targetTitle -- if you give targetType, use this to set the link
681 title (defaults to "Link").
682 """
683 name_ = "column"
684
685 _name = ColumnNameAttribute("name", default=base.Undefined,
686 description="Name of the column",
687 copyable=True, before="type")
688 _hidden = base.BooleanAttribute("hidden", default=False,
689 description="Hide the column from most of the user interface"
690 " (specifically, you can't use it in TAP queries or results,"
691 " and it won't be in TAP_SCHEMA). You typically want this for"
692 " internal, administrative columns.", copyable=True)
693 _ignored = base.DataContent(description="Columns admit data"
694 " content but ignore it. This is exclusively a convenience"
695 " for building columns from params and should not be used for"
696 " anything else.", copyable=False)
697
717
719 if self.parent==container:
720 return dm.ColumnAnnotation(roleName, self, instance)
721 else:
722 raise base.ReportableError("You cannot use columns from"
723 " other tables in your DM annotations directly.",
724 hint="If you really need something like this, you need to"
725 " define a datatype corresponding to what's in the other table"
726 " and reference a corresponding dm declaration.")
727
730 """returns true if ob is a list consisting of strings exclusively.
731
732 We need this for a hack in param magic. We shouldn't have that
733 hack, and there shouldn't be more of this.
734 """
735 if isinstance(ob, list):
736 return set(isinstance(l, basestring) for l in ob)==set([True])
737 return False
738
741 """A basic parameter.
742
743 This is the base for both Param and InputKey.
744 """
745 _value = base.DataContent(description="The value of parameter."
746 " It is parsed according to the param's type using the default"
747 " parser for the type VOTable tabledata.", default=base.NotGiven,
748 copyable=True, expand=True)
749
750 _valueCache = base.Undefined
751 __contentStore = base.NotGiven
752
753 nullLiteral = ""
754
755 unprocessedTypes = set(["raw", "file"])
756
760
761 - def __set_content(self, val):
762 self.__contentStore = val
763
764 - def __get_content(self):
765 if (self.__contentStore is base.NotGiven
766 and self._valueCache is not base.Undefined):
767 self.__contentStore = self._unparse(self._valueCache)
768 return self.__contentStore
769
770 content_ = property(__get_content, __set_content)
771
773 """hands up macro expansion requests to a parent, if there is one
774 and it can handle expansions.
775 """
776 if hasattr(self.parent, "expand"):
777 return self.parent.expand(value)
778 return value
779
784
792
793 @property
795 """returns a typed value for the parameter.
796
797 Unset items give None here.
798 """
799 if self._valueCache is base.Undefined:
800 if self.content_ is base.NotGiven:
801 self._valueCache = None
802 else:
803 self._valueCache = self._parse(self.content_)
804 return self._valueCache
805
807 """returns a string serialisation of the value.
808
809 This is what would reproduce the value if embedded in an XML
810 serialisation of the param.
811 """
812 if self.type in self.unprocessedTypes:
813 return "(Unrepresentable %s)"%self.type
814 return self.content_
815
816 - def set(self, val):
817 """sets this parameter's value.
818
819 val can be a python value, or string literal. In the second
820 case, this string literal will be preserved in string serializations
821 of this param.
822
823 If val is an invalid value for this item, a ValidationError is
824 raised and the item's value will be Undefined.
825 """
826 if isinstance(val, basestring):
827 self.content_ = val
828 else:
829 self.content_ = base.NotGiven
830 self._valueCache = self._parse(val)
831
832 - def _parse(self, literal, atom=False):
833 """parses literal using the default value parser for this param's
834 type.
835
836 If literal is not a string, it will be returned unchanged.
837
838 The method also makes sure literal matches any constraints
839 set by a values child and raises a ValidationError if not.
840 """
841 if self.type in self.unprocessedTypes:
842 return literal
843
844 elif not isinstance(literal, basestring):
845 return literal
846
847 elif literal=="__NULL__" or literal=="":
848 value = None
849
850 elif ((self.type=="text" or self.type=="unicode")
851 and literal=="__EMPTY__"):
852 value = ""
853
854 else:
855 if literal==self.values.nullLiteral:
856 value = None
857 else:
858 try:
859 type, arraysize, xtype = self._getVOTableType()
860 if atom:
861 arraysize = None
862
863 if type not in ["char", "unicodeChar"]:
864 literal = literal.strip()
865
866 if self.xtype=="interval":
867 xtype = xtype or self.xtype
868 else:
869 xtype = self.xtype or xtype
870
871 value = paramval.getVOTParser(type, arraysize, xtype)(literal)
872
873 if value!=value:
874 value = None
875 except ValueError:
876 raise base.ValidationError("%s is not a valid literal for %s"%(
877 repr(literal), self.name), self.name)
878
879 if not self.values.validateOptions(value):
880
881
882
883 if self.type not in ["pql-int", "pql-float"]:
884 raise base.ValidationError("%s is not a valid value for %s"%(
885 repr(literal), self.name), self.name)
886
887
888 if value is None:
889 self.content_ = ""
890
891 return value
892
894 """returns a string representation of value appropriate for this
895 type.
896
897 Actually, for certain types only handled internally (like file or raw),
898 this is not a string representation at all but just the python stuff.
899
900 Plus, right now, for sequences we're not doing anything. We probably
901 should; but we'll need to be much more careful in ContextGramar then.
902 """
903 if self.type in self.unprocessedTypes:
904 return value
905
906 if value is None:
907 return ""
908 else:
909 type, arraysize, xtype = self._getVOTableType()
910 val = paramval.getVOTSerializer(type, arraysize,
911 self.xtype or xtype)(value)
912 return val
913
914
915 -class Param(ParamBase):
916 """A table parameter.
917
918 This is like a column, except that it conceptually applies to all
919 rows in the table. In VOTables, params will be rendered as
920 PARAMs.
921
922 While we validate the values passed using the DC default parsers,
923 at least the VOTable params will be literal copies of the string
924 passed in.
925
926 You can obtain a parsed value from the value attribute.
927
928 Null value handling is a bit tricky with params. An empty param (like
929 ``<param name="x"/>)`` is always NULL (None in python).
930 In order to allow setting NULL even where syntactially something has
931 to stand, we also turn any __NULL__ to None.
932
933 For floats, NaN will also yield NULLs. For integers, you can also
934 use
935
936 <param name="x" type="integer"><values nullLiteral="-1"/>-1</params>
937
938 For arrays, floats, and strings, the interpretation of values is
939 undefined. Following VOTable practice, we do not tell empty strings and
940 NULLs apart; for internal usage, there is a little hack: __EMPTY__ as literal
941 does set an empty string. This is to allow defaulting of empty strings -- in
942 VOTables, these cannot be distinguished from "true" NULLs.
943 """
944 name_ = "param"
945
946 _hidden = base.BooleanAttribute("hidden", default=False,
947 description="Ignored on params, just present for constructor compatibility"
948 " with column")
949
951 self._validateNext(Param)
952 if self.content_ is base.NotGiven:
953 self.set(None)
954
955 if self.required and self.value is None:
956 raise base.StructureError("Required value not given for param"
957 " %s"%self.name)
958
959 try:
960
961 self.value
962 except ValueError as msg:
963 raise base.LiteralParseError(self.name, self.content_,
964 hint="Param content must be parseable by the DC default parsers."
965 " The value you passed caused the error: %s"%msg)
966
967 - def set(self, val):
968 """sets the value of the parameter.
969
970 Macros will be expanded if the parent object supports macro
971 expansion.
972 """
973 if (isinstance(val, basestring)
974 and "\\" in val
975 and hasattr(self.parent, "expand")):
976 val = self.parent.expand(val)
977 return ParamBase.set(self, val)
978
980 """returns a dm annotation for this param (i.e., a paramRef).
981 """
982
983
984
985 return dm.ParamAnnotation(roleName, self, instance)
986