Package gavo :: Package base :: Module parsecontext
[frames] | no frames]

Source Code for Module gavo.base.parsecontext

  1  """ 
  2  ParseContexts for parsing into structures. 
  3   
  4  A Context is a scratchpad for struct parsing.  It always provides an idmap, but 
  5  you're free to insert additional attributes. 
  6   
  7  Based on this, we provide some attribute definitions. 
  8  """ 
  9   
 10  #c Copyright 2008-2019, the GAVO project 
 11  #c 
 12  #c This program is free software, covered by the GNU GPL.  See the 
 13  #c COPYING file in the source distribution. 
 14   
 15   
 16  import contextlib 
 17   
 18  from gavo import utils 
 19  from gavo.base import attrdef 
 20  from gavo.base import caches 
 21  from gavo.base import common 
22 23 24 -def assertType(id, ob, forceType):
25 """raises a StructureError if forceType is not None and ob is not of 26 type forceType, returns ob otherwise. 27 """ 28 if forceType: 29 if not isinstance(ob, forceType): 30 raise common.StructureError("Reference to '%s' yielded object of type" 31 " %s, expected %s"%(id, ob.__class__.__name__, 32 forceType.__name__)) 33 return ob
34
35 36 -def resolveCrossId(id, forceType=None, **kwargs):
37 """resolves ``id``, where id is of the form ``rdId#id``. 38 39 ``forceType``, if non-None must be a DaCHS struct type (e.g., 40 rscdef.Table); a ``StructureError`` will be raised if the reference 41 resolves to something else than an instance of that type. 42 43 ``id`` can also be a simple rd id. 44 45 ``kwargs`` lets you pass additional keyword arguments to the ``getRD`` 46 calls that may be triggered by this. 47 """ 48 try: 49 rdId, rest = id.split("#") 50 except ValueError: 51 if "#" in id: 52 raise common.LiteralParseError("id", id, hint="There must be at most" 53 " one hash sign ('#') in cross ids, separating the rd identifier" 54 " from the rd-internal id") 55 rdId, rest = id, None 56 57 try: 58 srcRD = caches.getRD(rdId, **kwargs) 59 except common.RDNotFound as ex: 60 ex.hint = ( 61 "I was trying to resolve the reference %s; note that DaCHS only" 62 " uses RDs residing below inputsDir and ignores all others." 63 " If there's an RD that DaCHS insists isn't there, that's" 64 " probably the reason."%id) 65 raise 66 67 if rest is None: 68 return assertType(id, srcRD, forceType) 69 else: 70 return resolveId(srcRD, rest, forceType=forceType)
71
72 73 -def resolveNameBased(container, id, forceType=None):
74 """Tries to find a thing with name id within container. 75 76 If container defines a method getElementForName, it will be called; it 77 must either return some element with this name or raise a NotFoundError. 78 79 If no such method exists, the function iterates over container until 80 it finds an element with el.name==id. If no such element exists, 81 it again raises a NotFoundError. 82 83 The function raises a NotFoundError when no such thing exists. 84 """ 85 if hasattr(container, "getElementForName"): 86 return container.getElementForName(id) 87 88 ob = None 89 try: 90 for ob in container: 91 if hasattr(ob, "name") and ob.name==id: 92 return assertType(id, ob, forceType) 93 except TypeError: 94 if ob is None: 95 raise utils.logOldExc(common.NotFoundError(id, "Element with name", 96 "container %s"%repr(container), 97 hint="The container, %s, is not iterable"%repr(container))) 98 else: 99 raise utils.logOldExc(common.NotFoundError(id, "Element with name", 100 "container %s"%repr(container), 101 hint="Element %s is of type %s and thus unsuitable" 102 " for name path"%(ob.name, type(ob)))) 103 raise common.NotFoundError(id, "Element with name", 104 "container %s"%container.id)
105
106 107 -def resolveComplexId(ctx, id, forceType=None):
108 """resolves a dotted id. 109 110 See resolveId. 111 """ 112 try: 113 pId, name = id.split(".") 114 except ValueError: 115 raise utils.logOldExc(common.LiteralParseError("id", id, 116 hint="A complex reference (parent.name) is expected here")) 117 container = ctx.getById(pId) 118 return resolveNameBased(container, name, forceType)
119
120 121 -def _resolveOnNamepath(ctx, id, instance):
122 if hasattr(instance, "resolveName"): 123 return instance.resolveName(ctx, id) 124 if (instance and instance.parent and 125 hasattr(instance.parent, "resolveName")): 126 return instance.parent.resolveName(ctx, id) 127 raise common.NotFoundError(id, "Element with id or name", "name path")
128
129 130 -def resolveId(ctx, id, instance=None, forceType=None):
131 """tries to resolve id in context. 132 133 ctx is some object having a getById method; this could be an RD 134 or a parse context. 135 136 The rules for id are as follows: 137 138 (#) if id has a # in it, split it and take the first part to be 139 an RD id, the second and id built according to the rest of this spec. 140 141 (#) if id has a dot in it, split at the first dot to get a pair of 142 id and name. Iterate over the element with id, and look for something 143 with a "name" attribute valued name. If this fails, raise a 144 NotFoundError. 145 146 (#) if instance is not None and has a resolveName method or has a parent, and 147 that parent has a resolveName method, pass id to it. If it does not raise a 148 NotFoundError, return the result. This is for parents with a 149 rscdef.NamePathAttribute. 150 151 (#) ask the ParseContext ctx's getById method to resolve id, not 152 catching the NotFoundError this will raise if the id is not known. 153 """ 154 if "#" in id: 155 return resolveCrossId(id, forceType) 156 if ctx is None: 157 raise common.StructureError( 158 "Cannot intra-reference (%s) when parsing without a context"%id) 159 if "." in id: 160 return resolveComplexId(ctx, id, forceType) 161 162 srcOb = None 163 if instance: 164 try: 165 srcOb = _resolveOnNamepath(ctx, id, instance) 166 except common.NotFoundError: 167 # no such named element, try element with id 168 pass 169 if srcOb is None and ctx is not None: 170 srcOb = ctx.getById(id, forceType) 171 return assertType(id, srcOb, forceType)
172
173 174 -class IdAttribute(attrdef.UnicodeAttribute):
175 """is an attribute that registers its parent in the context's id map 176 in addition to setting its id attribute. 177 """
178 - def feed(self, ctx, parent, literal):
179 attrdef.UnicodeAttribute.feed(self, ctx, parent, literal) 180 if ctx is not None: 181 ctx.registerId(parent.id, parent, ctx.replayLevel>0) 182 parent.qualifiedId = ctx.getQualifiedId(literal)
183
184 - def getCopy(self, parent, newParent, ctx):
185 return None # ids may not be copied
186
187 - def makeUserDoc(self):
188 return None # don't mention it in docs -- all structures have it
189
190 191 -class OriginalAttribute(attrdef.AtomicAttribute):
192 """is an attribute that resolves an item copies over the managed 193 attributes from the referenced item. 194 195 The references may be anything resolveId can cope with. 196 197 You can pass a forceType argument to make sure only references to 198 specific types are allowable. In general, this will be the class 199 itself of a base class. If you don't do this, you'll probably get 200 weird AttributeErrors for certain inputs. 201 202 To work reliably, these attributes have to be known to the XML 203 parser so it makes sure they are processed first. This currently 204 works by name, and "original" is reserved for this purpose. Other 205 names will raise an AssertionError right now. 206 207 As a safety mechanism, OriginalAttribute checks if it is replacing 208 a "pristine" object, i.e. one that has not had events fed to it. 209 """ 210 computed_ = True 211 typeDesc_ = "id reference" 212
213 - def __init__(self, name="original", description="An id of an element" 214 " to base the current one on. This provides a simple inheritance" 215 " method. The general rules for advanced referencing in RDs apply.", 216 forceType=None, **kwargs):
217 assert name=='original' 218 attrdef.AtomicAttribute.__init__(self, name, None, description, 219 **kwargs) 220 self.forceType = forceType
221
222 - def feedObject(self, instance, original, ctx=None):
223 if not instance._pristine: 224 raise common.StructureError("Original must be applied before modifying" 225 " the destination structure.", hint="You should normally use" 226 " original only as attribute. If you insist on having it as" 227 " an element, it must be the first one and all other structure" 228 " members must be set through elements, too") 229 instance._originalObject = original 230 instance.feedFrom(original, ctx)
231
232 - def feed(self, ctx, instance, literal):
233 self.feedObject(instance, 234 resolveId(ctx, literal, instance, self.forceType), ctx)
235
236 237 -class _ReferenceParser(common.Parser):
238 """A helper class for the ReferenceAttribute. 239 """
240 - def __init__(self, refAttr, parent, baseName):
241 self.refAttr, self.parent = refAttr, parent 242 self.child = common.NotGiven 243 self.baseName = baseName
244
245 - def _ensureChild(self, ctx):
246 """creates an instance of the new, immediate child to be filled and 247 leaves it in self.child. 248 """ 249 if self.child is common.NotGiven: 250 self.child = self.refAttr._makeChild(self.baseName, self.parent) 251 ctx.setPositionOn(self.child)
252
253 - def start_(self, ctx, name, value):
254 # start event: we have an immediate child. Create it and feed this 255 # event to the newly created child. 256 self._ensureChild(ctx) 257 return self.child.feedEvent(ctx, "start", name, value)
258
259 - def end_(self, ctx, name, value):
260 if self.child is common.NotGiven: # empty element; make a child 261 self._ensureChild(ctx) 262 if self.child is not None: # immediate child was given: 263 self.child.finishElement(ctx) 264 self.parent.feedObject(name, self.child) 265 return self.parent
266
267 - def value_(self, ctx, name, value):
268 # value event: If it's a content_, it's a reference, else it's an 269 # attribute on a child of ours. 270 if name=="content_": 271 if self.child: 272 raise common.StructureError("Content received on ReferenceParser" 273 " although a child is already there.", hint="You should" 274 " not see this. Complain fiercely.") 275 self.refAttr.feed(ctx, self.parent, value) 276 self.child = None 277 return self 278 else: 279 self._ensureChild(ctx) 280 return self.child.feedEvent(ctx, "value", name, value)
281
282 283 -class ReferenceAttribute(attrdef.AtomicAttribute):
284 """An attribute keeping a reference to some other structure 285 286 This is a bit messy since the value referred to keeps its original 287 parent, so self.attr.parent!=self for these attributes. This is 288 ok for many applications, but it will certainly not work for, e.g. 289 tables (roughly, it's always trouble when an attribute value's 290 implementation refers to self.parent; this is particularly true 291 for structures having an RDAttribute). 292 293 So, before adding a reference attribute, think first whether 294 it wouldn't be wiser to have the real thing and use original to copy 295 things over. 296 """ 297 typeDesc_ = "id reference" 298
299 - def __init__(self, name="ref", default=attrdef.Undefined, 300 description="Uncodumented", forceType=None, **kwargs):
301 attrdef.AtomicAttribute.__init__(self, name, default, 302 description, **kwargs) 303 self.forceType = forceType
304
305 - def _getForceType(self, instance):
306 """returns self.forceType unless it is Recursive, in which case instance's 307 type is returned. 308 """ 309 if self.forceType is attrdef.Recursive: 310 return instance.__class__ 311 else: 312 return self.forceType
313
314 - def unparse(self, value):
315 if value is None: # ref attribute was empty 316 return None 317 if hasattr(value, "qualifiedId"): 318 return value.qualifiedId 319 elif isinstance(value, basestring): 320 return value 321 else: # See HACK notice in feed 322 setattr(value, "unparse-approved-anonymous", True) 323 return value
324 325 # Since ReferenceAttributes can now contain immediate elements, 326 # just returning an id (as happens by default) may not be enough 327 # for serialization -- the immediate object is nowhere else. 328 # We could fix that using something like this, at the expense 329 # of unrolling all the elements. We don't do much DC structure 330 # serialization, and thus I believe it's just not worth it. 331 # def iterEvents(self, instance): 332 # # This needs a special iterEvents to actually return embedded 333 # # structures if necessary 334 # val = getattr(instance, self.name_) 335 # if val==self.default_: 336 # return 337 # 338 # if hasattr(val, "_RefAttrImmediate"): 339 # yield ("start", self.name_, None) 340 # for ev in val.iterEvents(): 341 # yield ev 342 # yield ("end", self.name_, None) 343 # else: 344 # yield ("value", self.name_, self.unparse(val)) 345
346 - def feed(self, ctx, instance, literal):
347 if literal is None: # ref attribute empty during a copy 348 return # do nothing, since nothing was ref'd in original 349 350 # HACK: when copying around structures, it's possible that anonymous 351 # structures can be fed in here. We *really* don't want to make 352 # up ids for them. Thus, we allow them out in unparse and in here 353 # again. 354 if hasattr(literal, "unparse-approved-anonymous"): 355 self.feedObject(instance, literal) 356 else: 357 self.feedObject(instance, 358 resolveId(ctx, literal, instance, self._getForceType(instance)))
359
360 - def _makeChild(self, name, parent):
361 """returns a new element of the appropriate type. 362 363 This method raises a StructureError if that type is not known. 364 Within ReferenceAttribute, the type is given by forceType. 365 """ 366 if self.forceType is None: 367 raise common.StructureError("Only references allowed for %s, but" 368 " an immediate object was found"%self.name_, 369 hint="This means that" 370 " you tried to replace a reference to an element with" 371 " the element itself. This is only allowed if the reference" 372 " forces a type, which is not the case here.") 373 child = self._getForceType(parent)(parent) 374 # leave a sentinel in the child that will later let us 375 # iterEvents not the id but the struct itself. 376 child._RefAttrImmediate = True 377 return child
378
379 - def create(self, structure, ctx, name):
380 # we don't know at this point whether or not the next event will be 381 # an open (-> create a new instance of self.forceType) or a 382 # value (-> resolve). Thus, create an intermediate parser that 383 # does the right thing. 384 return _ReferenceParser(self, structure, name)
385
386 387 -class ReferenceListAttribute(ReferenceAttribute):
388 """A list of references. 389 390 These can come as distinct elements -- <ref>a</ref><ref>b</ref> -- or 391 as a comma-joined string with ignored whitespace -- 392 ref="a, //services#b, x.y", or in a mixture between the two. 393 """ 394 typeDesc_ = "list of id references (comma separated or in distinct elements)" 395
396 - def __init__(self, name, **kwargs):
397 if kwargs.get("default") is not None: 398 raise common.StructureError("ReferenceListAttributes cannot have" 399 " defaults") 400 kwargs["default"] = attrdef.Computed 401 ReferenceAttribute.__init__(self, name, **kwargs)
402 403 @property
404 - def default_(self):
405 return []
406
407 - def feedObject(self, instance, value):
408 if isinstance(value, list): 409 for item in value: 410 self.feedObject(instance, item) 411 else: 412 getattr(instance, self.name_).append(value) 413 self.doCallbacks(instance, value)
414
415 - def feed(self, ctx, instance, literal):
416 # split literal up if there's commas 417 if literal is None: # see ReferenceAttribute.feed 418 return 419 if "," in literal: 420 for s in literal.split(","): 421 ReferenceAttribute.feed(self, ctx, instance, s.strip()) 422 else: 423 ReferenceAttribute.feed(self, ctx, instance, literal)
424
425 - def unparse(self, value):
426 # Hack to avoid to have to figure out globally ok ids; see 427 # comments in ReferenceAttribute 428 class BlessedList(list): 429 pass
430 setattr(BlessedList, "unparse-approved-anonymous", True) 431 return BlessedList(value)
432
433 434 -class ParseContext(object):
435 """is a scratchpad for any kind of data parsers want to pass to feed 436 methods. 437 438 These objects are available to the feed methods as their 439 first objects. 440 441 If restricted is True, embedded code must raise an error. 442 443 You should set an eventSource using the setter provided. This is 444 the iterparse instance the events are coming from (or something else 445 that has a pos attribute returning the current position). 446 447 You can register exit functions to do some "global" cleanup. Parsers 448 should call runExitFuncs right before they return the results; this arranges 449 for these functions to be called. The signature of an exit function is 450 exitfunc(rootStruct, parseContext) -> whatever. 451 """
452 - def __init__(self, restricted=False, forRD=None):
453 self.idmap = {} 454 self.restricted = restricted 455 self.forRD = forRD 456 self.eventSource = None 457 self.exitFuncs = [] 458 # if non-0, we're replaying events 459 self.replayLevel = 0
460
461 - def setEventSource(self, evSource):
462 self.eventSource = evSource
463
464 - def addExitFunc(self, callable):
465 self.exitFuncs.append(callable)
466 467 @contextlib.contextmanager
468 - def replaying(self):
469 """is called by active tags to indicate they're replaying 470 events. 471 472 The main effect right now is to suppress diagnostics for overwritten 473 ids. But let's see what else we might want to use it for. 474 475 The API to see if we're replaying: replayLevel>0. 476 """ 477 self.replayLevel += 1 478 try: 479 yield 480 finally: 481 self.replayLevel -= 1
482 483 @property
484 - def pos(self):
485 """returns a token stringifying into a position guess. 486 """ 487 if self.eventSource is None: 488 return "(while parsing sourceless)" 489 else: 490 return self.eventSource.pos
491
492 - def setPositionOn(self, struct):
493 """calls a struct's setPosition method to tell it where it came from. 494 """ 495 if isinstance(self.pos, basestring): 496 struct.setPosition(self.pos, -1) 497 else: 498 struct.setPosition(self.pos.fName, self.pos.line)
499
500 - def getQualifiedId(self, id):
501 """returns an id including the current RD's id, if known, otherwise id 502 itself. 503 """ 504 if self.forRD: 505 return "%s#%s"%(self.forRD, id) 506 return id
507
508 - def registerId(self, elId, value, silentOverwrite=False):
509 """enters a value in the id map. 510 511 We allow overriding in id. That should not happen while parsing 512 and XML document because of their uniqueness requirement, but 513 might come in handy for programmatic manipulations. 514 515 We'll still emit an Info in that case (and may need to find a way to 516 suppress it). 517 """ 518 if elId in self.idmap: 519 if not silentOverwrite: 520 utils.sendUIEvent("Info", "Element with id %s overwritten."%elId) 521 self.idmap[elId] = value
522
523 - def getById(self, id, forceType=None):
524 """returns the object last registred for id. 525 526 You probably want to use resolveId; getById does no namePath or 527 resource descriptor resolution. 528 """ 529 if id not in self.idmap: 530 raise common.NotFoundError(id, "Element with id", "parse context", 531 hint="Elements referenced must occur lexically (i.e., within the" 532 " input file) before the reference. If this actually gives" 533 " you trouble, contact the authors. Usually, though, this" 534 " error just means you mistyped a name.") 535 res = self.idmap[id] 536 return assertType(id, res, forceType)
537
538 - def resolveId(self, id, instance=None, forceType=None):
539 """returns the object referred to by the complex id. 540 541 See the resolveId function. 542 """ 543 return resolveId(self, id, instance, forceType)
544
545 - def runExitFuncs(self, root):
546 for func in self.exitFuncs: 547 func(root, self)
548
549 550 -def getTableDefForTable(connection, tableName):
551 """returns a TableDef object for a SQL table name. 552 553 connection needs to be TableConnection of higher. 554 555 This really has little to do with resolving identifiers, but 556 this module already has getRDs and similar, so it seemed the least 557 unnatural place. 558 """ 559 if not "." in tableName: 560 tableName = "public."+tableName 561 562 res = list(connection.queryToDicts( 563 "select sourcerd, tablename from dc.tablemeta where" 564 " lower(tableName)=%(tableName)s", 565 {"tableName": tableName.lower()})) 566 if len(res)>1: 567 raise common.ReportableError("More than one entry for table %s in" 568 " dc.tablemeta!"%tableName, 569 hint="This is a severe internal error and really should not happen." 570 " Please report this bug.") 571 elif not res: 572 raise common.NotFoundError(tableName, "table", "published tables") 573 574 row = res[0] 575 return caches.getRD(row["sourcerd"] 576 ).getById(row["tablename"].split(".")[-1])
577