Source code for gavo.web.caching

"""
A simple caching system for nevow pages.

The basic idea is to monkeypatch the request object in order to
snarf content and headers.
"""

#c Copyright 2008-2023, the GAVO project <gavo@ari.uni-heidelberg.de>
#c
#c This program is free software, covered by the GNU GPL.  See the
#c COPYING file in the source distribution.


import time

from twisted.web import http
from twisted.web import resource

from gavo import base
from gavo import utils


# The size up to which each RD cache accepts new entries
DEFAULT_CACHE_LIMIT = 10000000


# A cache for RD-specific page caches.  Each of these maps segments
# (tuples) to a finished caching.CachedPage.  The argument is the id of the
# RD responsible for generating that data.  This ensures that pre-computed
# data is cleared when the RD is reloaded.
base.caches.makeCache("getPageCache", lambda rdId: PageCache())


[docs]class PageCache: """a cache for resources already served out. This is kept per-RD, and it is removed when an RD is removed. It is allowed to grow to a certain size determined at construction time; it will stop adding new items when it's full (this is under the assumption that something has gone wrong and something tries to cache things we shouldn't cache). When you add things here, make sure the key actually gives everything that might influence a response. Below, we make sure we don't cache anything that has parameters, depends on the user. Compression, content negotiation, would have to be dealt with separately. The key actually used is determined in getFromRDCache -- and we should try hard to keep the key making logic local to that function. """ def __init__(self, maxSize=DEFAULT_CACHE_LIMIT): self.maxSize, self.curSize = maxSize, 0 self.cacheDict = {} self.get = self.cacheDict.get
[docs] def add(self, key, cachedPage): """enters content into the cache if it still fits. cachedPage is assomed to have a content attribute that will be taken as a proxy for the size. """ if self.curSize+len(cachedPage.content)>self.maxSize: return self.cacheDict[key] = cachedPage self.curSize += len(cachedPage.content)
def __contains__(self, key): return key in self.cacheDict def __getitem__(self, key): return self.cacheDict[key]
[docs]def instrumentRequestForCaching(request, finishAction): """changes request such that finishAction is called with the request and the content written for a successful page render. """ builder = CacheItemBuilder(finishAction) origWrite, origFinish = request.write, request.finish def write(content): builder.addContent(content) return origWrite(content) def finish(): builder.finish(request) return origFinish() request.write = write request.finish = finish
[docs]class CacheItemBuilder(object): """an aggregator for web pages as they are written. On successful page generation an function is called with the request and the content written as arguments. """ def __init__(self, finishAction): self.finishAction = finishAction self.contentBuffer = []
[docs] def addContent(self, data): self.contentBuffer.append(data)
[docs] def finish(self, request): try: if request.code==200: self.finishAction(request, b"".join(self.contentBuffer)) except Exception: base.ui.notifyError("Exception while building cache item.")
[docs]class CachedPage(resource.Resource): """A piece of cached content. This is built with the content to return, the headers, and a unix timestamp for the last modification time (as applicable). This is enough for it to work as a nevow resource (i.e., run a renderHTTP method. For cache management, this has a lastUsed unix timestamp that is bumped for each renderHTTP call, and a size attribute that gives the length of the content. """ def __init__(self, content, headers, lastModified): self.content = content self.size = len(content) self.creationStamp = time.time() headers.setRawHeaders("x-cache-creation", [str(self.creationStamp)]) self.changeStamp = self.lastModified = lastModified if headers.hasHeader("last-modified"): headers.removeHeader("last-modified") self.headers = headers self.lastUsed = None
[docs] def render(self, request): self.lastUsed = time.time() if self.lastModified: if request.setLastModified(self.lastModified)==http.CACHED: return b"" for key, values in self.headers.getAllRawHeaders(): request.responseHeaders.setRawHeaders(key, values) request.responseHeaders.setRawHeaders('date', [utils.formatRFC2616Date()]) return self.content
[docs]def enterIntoCacheAs(destKey, destDict): """returns a finishAction that enters a page into destDict under key. """ def finishAction(request, content): destDict.add(destKey, CachedPage( content, request.responseHeaders.copy(), request.lastModified)) return finishAction
[docs]def getFromRDCache(request, rd, segments): """returns a cached resource for segments on rd. This will also instrument the request to enter the result into the cache. Hence, do not use this unless you are certain that the request is cacheable. Otherwise, use getFromServiceCache, which contains some sanity checks. """ if request.method!=b"GET" or request.strargs or request.getUser(): return None cache = base.caches.getPageCache(rd.sourceId) segments = tuple(segments) if segments in cache: return cache[segments] instrumentRequestForCaching(request, enterIntoCacheAs(segments, cache)) return None
[docs]def getFromServiceCache(request, service, rendC, segments): """returns a cached resource for service request if applicable, None otherwise. Requests with arguments or a user info are never cacheable; we don't look at content negotiation, though, so make sure whatever is content-negotiated isn't marked cacheable by the renderer class rendC. For cacheable requests for resources not in the cache, the function also instruments the request such that the rendered page is cached. Cacheable pages also cause request's lastModified to be set. """ if not rendC.isCacheable(segments, request): return None return getFromRDCache(request, service.rd, segments)