Package gavo :: Package registry :: Module oaiinter
[frames] | no frames]

Source Code for Module gavo.registry.oaiinter

  1  """ 
  2  The standard OAI interface. 
  3   
  4  In this module the core handling the OAI requests and the top-level handlers 
  5  for the verbs are defined. 
  6   
  7  The top-level handlers are all called run_<verb> -- any such function 
  8  is web-callable. 
  9  """ 
 10   
 11  #c Copyright 2008-2019, the GAVO project 
 12  #c 
 13  #c This program is free software, covered by the GNU GPL.  See the 
 14  #c COPYING file in the source distribution. 
 15   
 16   
 17  import datetime 
 18  import time 
 19  import urllib 
 20  import urlparse 
 21   
 22  from gavo import base 
 23  from gavo import svcs 
 24  from gavo import utils 
 25  from gavo.registry import builders 
 26  from gavo.registry import common 
 27  from gavo.registry import identifiers 
 28  from gavo.registry.model import OAI 
 29   
 30  from gavo.registry.common import ( #noflake: exported names 
 31          OAIError, BadArgument, BadResumptionToken, BadVerb, 
 32          CannotDisseminateFormat, IdDoesNotExist, NoMetadataFormats, 
 33          NoSetHierarchy, NoRecordsMatch) 
 34   
 35   
 36  ########################### Generic Processing of PMH requests 
 37   
 38  # a mapping of OAI verbs to optional and required argument names 
 39  _ARGUMENTS = { 
 40          "GetRecord": (["identifier", "metadataPrefix"], []), 
 41          "ListRecords": (["metadataPrefix"],  
 42                  ["from", "until", "set", "resumptionToken"]), 
 43          "ListIdentifiers": (["metadataPrefix"],  
 44                  ["from", "until", "set", "resumptionToken"]), 
 45          "ListSets": ([], []), 
 46          "Identify": ([], []), 
 47          "ListMetadataFormats": ([], ["identifier"]),} 
 48   
 49   
50 -def runPMH(pars, builders):
51 """runs the OAI-PMH handling function. 52 53 builders is a mapping of verbs to tuples of (oai_dc-generating-function, 54 ivo_vor-generating-function, argument-building-function). 55 56 The argument-building function takes the OAI-PMH parameter dictionary 57 (that's already validated for mandatory and optional arguments) and 58 returns a tuple that is then passed on to the generating functions. 59 60 Those must returns stanxml for inclusion in an OAI.PMH element. The 61 response header is generated by this function. 62 """ 63 if "verb" not in pars: 64 raise common.BadArgument("verb") 65 verb = pars["verb"] 66 67 if verb not in _ARGUMENTS: 68 raise common.BadVerb("'%s' is an unsupported operation."%verb) 69 requiredArgs, optionalArgs = _ARGUMENTS[verb] 70 checkPars(pars, requiredArgs, optionalArgs) 71 72 contentMaker, getArgs = builders[verb] 73 74 # dispatch on prefix if necessary (contentMaker is a dict) 75 if isinstance(contentMaker, dict): 76 contentMaker = dispatchOnPrefix(pars, contentMaker) 77 78 return OAI.PMH[ 79 getResponseHeaders(pars), 80 contentMaker(*getArgs(pars)),]
81 82 83 84 ########################### parsing and generating resumption tokens 85 # In our implementation, the resumptionToken is a hex-encoded 86 # zlibbed query string made from the parameters, plus information 87 # on the offset and the time the resumption token was issued. 88
89 -def makeResumptionToken(pars, nextOffset):
90 """return a resumptionToken element for resuming the query in 91 pars at nextOffset. 92 """ 93 toEncode = pars.copy() 94 toEncode["nextOffset"] = str(nextOffset) 95 toEncode["queryDate"] = time.time() 96 return urllib.urlencode(toEncode).encode("zlib").encode("hex" 97 ).replace("\n", "")
98 99
100 -def parseResumptionToken(pars):
101 """returns a a dict realPars for an OAI-PMH parameter 102 dictionary pars. 103 104 If we believe that the registry has changed since rawToken's 105 timestamp, we raise a BadResumptionToken exception. This is 106 based on gavo pub reloading the //services RD after publication. 107 Not perfect, but probably adequate. 108 109 Note that newPars will contain resumptionToken again, but as an 110 offset to the query executed. 111 """ 112 try: 113 newPars = dict(urlparse.parse_qsl( 114 pars["resumptionToken"].decode("hex").decode("zlib"))) 115 queryDate = float(newPars.pop("queryDate")) 116 offset = int(newPars.pop("nextOffset")) 117 except KeyError as msg: 118 raise base.ui.logOldExc( 119 common.BadResumptionToken("Incomplete resumption token")) 120 except Exception as msg: 121 raise base.ui.logOldExc(common.BadResumptionToken(str(msg))) 122 123 if newPars.get("verb", 1)!=pars.get("verb", 2): 124 raise common.BadResumptionToken( 125 "Trying to resume with a different verb") 126 127 if pars.get("metadataPrefix"): 128 # if newPars.get("metadataPrefix")!=pars.get("metadataPrefix"): 129 raise common.BadResumptionToken("Trying to resume with a" 130 " different metadata prefix") 131 132 if int(queryDate)<int(base.caches.getRD("//services").loadedAt): 133 raise common.BadResumptionToken("Service table has changed") 134 135 newPars["resumptionToken"] = offset 136 return newPars
137 138 ########################### Helpers for OAI handlers 139
140 -def checkPars(pars, required, optional=[], 141 ignored=set(["verb", "maxRecords"])):
142 """raises exceptions for missing or illegal parameters. 143 """ 144 required, optional = set(required), set(optional) 145 for name in pars: 146 if name not in ignored and name not in required and name not in optional: 147 raise common.BadArgument(name) 148 for name in required: 149 if name not in pars: 150 raise common.BadArgument(name)
151 152
153 -def getResponseHeaders(pars):
154 """returns the OAI response header for a query with pars. 155 """ 156 return [ 157 OAI.responseDate[datetime.datetime.utcnow().strftime( 158 utils.isoTimestampFmt)], 159 OAI.request(verb=pars["verb"], 160 metadataPrefix=pars.get("metadataPrefix"))]
161 162
163 -def dispatchOnPrefix(pars, contentMakers):
164 """returns a resource factory depending on the metadataPrefix in pars. 165 166 contentMakers is one of the dictionaries called "verb" in builders 167 below. 168 169 Invalid metadataPrefixes are detected here and lead to exceptions. 170 """ 171 try: 172 return contentMakers[pars["metadataPrefix"]] 173 except KeyError: 174 if "metadataPrefix" in pars: 175 raise common.CannotDisseminateFormat( 176 "%s metadata are not supported"%pars["metadataPrefix"]) 177 else: 178 raise common.BadArgument("metadataPrefix missing")
179 180
181 -def _getSetNames(pars):
182 """returns a set of requested set names from pars. 183 184 This is ivo_managed if no set is specified in pars. 185 """ 186 return set([pars.get("set", "ivo_managed")])
187 188
189 -def _makeArgsForListMetadataFormats(pars):
190 # returns arguments for builders.getListMetadataElements. 191 # identifier is not ignored since crooks may be trying to verify the 192 # existence of resource in this way and we want to let them do this. 193 # Of course, we support both kinds of metadata on all records. 194 if "identifier" in pars: 195 identifiers.getResobFromIdentifier(pars["identifier"]) 196 return ()
197 198
199 -def _addTemporalCondition(parVal, operator, sqlFrags, sqlPars, parName):
200 """generates a temporal condition for recTimestamp and operator out of 201 parVal. 202 203 This is a helper to handle both from and to parameters. 204 205 Nothing is generated if parVal is None. 206 207 Since the Validator insists that servers enforce identical granularity 208 on both arguments, this is more involved; it returns None, "time", 209 or "datetime" depending on what the argument was. 210 """ 211 if parVal is None: 212 return None 213 214 parsed, argtype = None, None 215 mat = utils.datetimeRE.match(parVal) 216 if mat: 217 parsed = base.parseDefaultDatetime(parVal) 218 argtype = "datetime" 219 220 mat = utils.dateRE.match(parVal) 221 if not parsed and mat: 222 parsed = base.parseDefaultDate(parVal) 223 # postgres comparse dates against timestamps as if the dates were for 224 # midnight. Therefore, on the upper limit, we need to fix things: 225 if operator=='<=': 226 parsed = parsed+datetime.timedelta(1) 227 operator = '<' 228 argtype = "date" 229 230 if parsed: 231 sqlFrags.append("recTimestamp %s %%(%s)s"%( 232 operator, 233 base.getSQLKey("temporal", parsed, sqlPars))) 234 return argtype 235 236 raise common.BadArgument(parName)
237 238
239 -def _parseOAIPars(pars):
240 """returns a pair of queryFragment, parameters for a query of 241 services#services according to OAI. 242 """ 243 sqlPars, sqlFrags = {}, [] 244 argTypes = set(argType for argType in [ 245 _addTemporalCondition( 246 pars.get("from"), ">=", sqlFrags, sqlPars, "from"), 247 _addTemporalCondition( 248 pars.get("until"), "<=", sqlFrags, sqlPars, "until")] 249 if argType) 250 if len(argTypes)==2: 251 raise common.BadArgument("from") 252 253 return " AND ".join(sqlFrags), sqlPars
254 255
256 -def _getSetCondition(pars, sqlPars):
257 """helps getMatchingRestups. 258 """ 259 if "set" in pars: 260 setName = pars["set"] 261 else: 262 setName = "ivo_managed" 263 # we should join for this, but we'd need more careful query 264 # construction then... 265 return ("EXISTS (SELECT setName from dc.sets WHERE" 266 " sets.resId=resources.resId" 267 " AND sets.sourceRD=resources.sourceRD" 268 " AND setname=%%(%s)s)"%(base.getSQLKey("set", setName, sqlPars)))
269 270
271 -def getMatchingRows(pars, rscTableDef, getSetFilter):
272 """returns rows in rscTableDef matching the OAI parameters pars. 273 274 The last element of the list could be an OAI.resumptionToken element. 275 pars is a dictionary mapping any of the following keys to values: 276 277 - from 278 - until -- these give a range for which changed records are being returned 279 - set -- maps to a sequence of set names to be matched. 280 - resumptionToken -- some magic value (see OAI.resumptionToken) 281 - maxRecords -- an integer literal that specifies the maximum number 282 of records returned, defaulting to [ivoa]oaipmhPageSize 283 284 maxRecords is not part of OAI-PMH; it is used internally to 285 turn paging on when we think it's a good idea, and for testing. 286 287 rscTableDef has to be a table with a column recTimestamp giving the 288 resource record's updated time. 289 290 getSetFilter(pars, fillers) is a function receiving the PMH parameters 291 dictionary and a dictionary of query fillers and returning, as appropriate, 292 a condition that implements any conditions on sets within pars 293 """ 294 frag, fillers = _parseOAIPars(pars) 295 frag = " AND ".join( 296 f for f in [getSetFilter(pars, fillers), frag] if f) 297 298 maxRecords = int(pars.get("maxRecords", 299 base.getConfig("ivoa", "oaipmhPagesize"))) 300 offset = pars.get("resumptionToken", 0) 301 fillers.update({"maxRecords": maxRecords, "offset": offset}) 302 303 try: 304 res = rscTableDef.doSimpleQuery( 305 fragments=frag, 306 postfix="LIMIT %(maxRecords)s OFFSET %(offset)s", 307 params=fillers) 308 309 if len(res)==maxRecords: 310 # there's probably more data, request a resumption token 311 res.append(OAI.resumptionToken[ 312 makeResumptionToken(pars, offset+len(res))]) 313 res[-1].addChild = lambda: 0 314 315 except base.DBError: 316 raise base.ui.logOldExc( 317 common.BadArgument("Bad syntax in some parameter value")) 318 except KeyError as msg: 319 raise base.ui.logOldExc(base.Error("Internal error, missing key: %s"%msg)) 320 if not res: 321 raise common.NoRecordsMatch( 322 "No resource records match your criteria.") 323 return res
324 325
326 -def getMatchingRestups(pars):
327 """returns a list of res tuples matching the OAI query arguments pars. 328 329 See getMatchingRows for details on pars. 330 """ 331 td = common.getServicesRD().getById("resources") 332 return getMatchingRows(pars, td, _getSetCondition)
333 334
335 -def getMatchingResobs(pars):
336 """returns a list of res objects matching the OAI-PMH pars. 337 338 See getMatchingRestups for details. 339 """ 340 res = [] 341 for restup in getMatchingRestups(pars): 342 if isinstance(restup, OAI.OAIElement): 343 res.append(restup) 344 else: 345 try: 346 res.append(identifiers.getResobFromRestup(restup)) 347 except Exception as exc: 348 base.ui.notifyError("Could not create resource for %s: %s"%( 349 repr(restup), 350 exc)) 351 return res
352 353 354 ########################### The registry core 355
356 -class RegistryCore(svcs.Core, base.RestrictionMixin):
357 """is a core processing OAI requests. 358 359 Its signature requires a single input key containing the complete 360 args from the incoming request. This is necessary to satisfy the 361 requirement of raising errors on duplicate arguments. 362 363 It returns an ElementTree. 364 365 This core is intended to work the the RegistryRenderer. 366 """ 367 name_ = "registryCore" 368 369 inputTableXML = """ 370 <inputTable id="_pubregInput"> 371 <inputKey name="args" type="raw" 372 multiplicity="single" 373 description="The raw dictionary of input parameters"/> 374 </inputTable> 375 """ 376 377 outputTableXML = """<outputTable/>""" 378 379 # builders maps actions to a pair of (thing, function(pars) -> whatever), 380 # where function accpets the oai-pmh parameters and returns the an internal 381 # representation of the response. thing is either a function accepting 382 # this internal representation for verbs not requiring a metadata 383 # prefix, or it is a mapping of metadata prefixes to such funtions 384 # otherwise. 385 builders = { 386 "GetRecord": ({ 387 "ivo_vor": builders.getVOGetRecordElement, 388 "oai_dc": builders.getDCGetRecordElement}, 389 lambda pars: (identifiers.getResobFromIdentifier(pars["identifier"]),)), 390 "ListRecords": ({ 391 "ivo_vor": builders.getVOListRecordsElement, 392 "oai_dc": builders.getDCListRecordsElement}, 393 lambda pars: (getMatchingResobs(pars), _getSetNames(pars))), 394 "ListIdentifiers": ({ 395 "ivo_vor": builders.getListIdentifiersElement, 396 "oai_dc": builders.getListIdentifiersElement}, 397 lambda pars: (getMatchingRestups(pars),)), 398 "ListSets": (builders.getListSetsElement, 399 lambda pars: ()), 400 "Identify": (builders.getIdentifyElement, 401 lambda pars: (base.caches.getRD("//services").getById("registry"),)), 402 "ListMetadataFormats": (builders.getListMetadataFormatsElement, 403 _makeArgsForListMetadataFormats), 404 } 405
406 - def runWithPMHDict(self, args):
407 pars = {} 408 for argName, argVal in args.iteritems(): 409 if len(argVal)!=1: 410 raise common.BadArgument(argName) 411 else: 412 pars[argName] = argVal[0] 413 414 if "resumptionToken" in pars: 415 pars = parseResumptionToken(pars) 416 417 return runPMH(pars, self.builders)
418
419 - def run(self, service, inputTable, queryMeta):
420 """returns an ElementTree containing a OAI-PMH response for the query 421 described by pars. 422 """ 423 args = inputTable.getParam("args") 424 return self.runWithPMHDict(args)
425