Source code for gavo.web.ifpages

"""
Infrastructure pages.
"""

#c Copyright 2008-2023, the GAVO project <gavo@ari.uni-heidelberg.de>
#c
#c This program is free software, covered by the GNU GPL.  See the
#c COPYING file in the source distribution.


import datetime
import io
import os
import time

import pkg_resources
from twisted.internet import reactor
from twisted.web import client
from twisted.web import http
from twisted.web import resource
from twisted.web import server
from twisted.web import static
from twisted.web import template
from twisted.web.template import tags as T

from gavo import base
from gavo import registry
from gavo import svcs
from gavo import utils
from gavo.base import meta
from gavo.formal import nevowc
from gavo.protocols import vocabularies
from gavo.utils import stanxml
from gavo.votable import V
from gavo.web import common
from gavo.web import caching
from gavo.web import grend
from gavo.web import weberrors

# prefer globally installed version to what we deliver
try:
	import rjsmin
except ImportError:
	from gavo.imp import rjsmin

[docs]class LoginPage(nevowc.TemplatedPage, grend.GavoRenderMixin): """a page that logs people in or out. You should usually give a nextURL parameter in the context, the page the user is returned to after login. If the user is already authenticated, this will do a logout (by sending a 403). """
[docs] @template.renderer def nextURL(self, request, tag): return tag(href=request.args.get("nextURL", ["/"])[0])
[docs] @template.renderer def iflogged(self, request, tag): if request.getUser(): return tag return ""
[docs] @template.renderer def ifnotlogged(self, request, tag): if not request.getUser(): return tag return ""
[docs] def data_loggedUser(self, request, tag): return request.getUser()
[docs] def render(self, request): relogging = base.parseBooleanLiteral(utils.getfirst( request.strargs, "relog", default="False")) if request.getUser(): # user is logged in... if relogging: # ...and wants to log out: show login dialog... raise svcs.Authenticate() else: # ...and has just logged in: forward to destination raise svcs.SeeOther(str(request.URLPath().click( request.args.get(b"nextURL", [b"/"])[0]))) else: # user is not logged in if relogging: #...but was and has just logged out: forward to dest raise svcs.SeeOther(str(request.URLPath().click( request.args.get(b"nextURL", [b"/"])[0]))) else: # ... and wants to log in. raise svcs.Authenticate()
loader = svcs.loadSystemTemplate("loginout.html")
[docs]class TemplatedPage(grend.CustomTemplateMixin, grend.ServiceBasedPage): """a "server-wide" template. For now, they all are based on the dc root service. """ checkedRenderer = False def __init__(self, request, fName): self.customTemplate = fName grend.ServiceBasedPage.__init__(self, request, base.caches.getRD(registry.SERVICELIST_ID ).getById("root")) self.metaCarrier = meta.MetaMixin() self.metaCarrier.setMetaParent(self.service) if isinstance(fName, str): self.metaCarrier.setMeta("_dataUpdated", datetime.datetime.fromtimestamp(os.path.getmtime(fName)))
[docs]def minifyJS(request, path): """returns javascript in path minified. You can turn off auto-minification by setting [web] jsSource to True; that's sometimes convenient while debugging the javascript. If jsSource is false (the default), changes to javascript are only picked up on a server reload. """ with open(path, "rb") as f: if base.getConfig("web", "jsSource"): return f.read() else: return rjsmin.jsmin(f.read().decode("utf-8")).encode("utf-8")
[docs]def expandTemplate(request, fName): """renders fName as a template on the root service. """ return TemplatedPage(request, fName)
[docs]class StaticFile(static.File): """a file from the file system, served pretty directly. Some of these static files can be javascript (that's usually minified on the fly) or nevow templates. These we want to cache. For everything else, we don't win anything by caching, but, when we serve large data, we can DoS ourselves. So, we decide manually whether to cache. The caches are bound to an RD passed in as cacheRD at construction time. For system resources, that should be getRD(registry.SERVICELIST_ID). On-the-fly processing is based on certain magic mime types: text/nevow-template is rendered and application/javascript is minified. """ defaultType = "application/octet-stream" isLeaf = True processors = { ".shtml": expandTemplate, ".js": minifyJS, } def __init__(self, fName, cacheRD, defaultType="text/html"): self.cacheRD = cacheRD self.overriddenContent = None static.File.__init__(self, fName, defaultType=defaultType)
[docs] def openForReading(self): if self.overriddenContent is None: return self.open() else: return io.BytesIO(self.overriddenContent)
[docs] def getFileSize(self): if self.overriddenContent is None: return static.File.getFileSize(self) else: return len(self.overriddenContent)
[docs] def render(self, request): modStamp = max(self.cacheRD.loadedAt, os.path.getmtime(self.path)) if request.setLastModified(modStamp) is http.CACHED: return b"" if not os.path.isfile(self.path): raise svcs.ForbiddenURI("Only plain files are served here") ext = os.path.splitext(self.path)[1] if ext in self.processors: cachedRes = caching.getFromRDCache( request, self.cacheRD, self.path.split("/")) if cachedRes is not None and cachedRes.creationStamp>modStamp: return cachedRes.render(request) processed = self.processors[ext](request, self.path) if hasattr(processed, "render"): return processed.render(request) else: self.overriddenContent = utils.bytify(processed) return static.File.render(self, request)
[docs]class DirectoryLister(static.DirectoryLister): """a directory listing -- we only have this here because some versions of t.w returned unicode strings here. """
[docs] def render(self, request): rtval = static.DirectoryLister.render(self, request) if isinstance(rtval, str): # pragma: no cover rtval = rtval.encode("utf-8") request.setHeader("content-type", "text/html;charset=utf-8") return rtval
########### Begin Debian Fallbacks: If no local jquery and friends are # present (as happens when the official package is used) # several javascript libraries are taken from where Debian packages # put them. The URI path parts DaCHS sees them under are mapped to # the absolute path names or resource instances in DEBIAN_FALLBACKS.
[docs]class GavoJquery(resource.Resource): """synthesises a query-gavo.js from Debian packages. """ INSTALL_ROOT = "/usr/share/javascript/" PARTS = ["jquery/jquery.js", io.StringIO(""" (function( factory ) { if ( typeof define === "function" && define.amd ) { // AMD. Register as an anonymous module. define([ "jquery" ], factory ); } else { // Browser globals factory( jQuery ); } }(function( $ ) { $.ui = $.ui || {}; var version = $.ui.version = "1.12.1"; """), "jquery-ui/ui/widget.js", "jquery-ui/ui/position.js", "jquery-ui/ui/data.js", "jquery-ui/ui/disable-selection.js", "jquery-ui/ui/focusable.js", "jquery-ui/ui/form-reset-mixin.js", "jquery-ui/ui/jquery-1-7.js", "jquery-ui/ui/keycode.js", "jquery-ui/ui/labels.js", "jquery-ui/ui/scroll-parent.js", "jquery-ui/ui/tabbable.js", "jquery-ui/ui/unique-id.js", "jquery-ui/ui/widgets/mouse.js", io.StringIO(""" var plugin = $.ui.plugin = { add: function( module, option, set ) { var i, proto = $.ui[ module ].prototype; for ( i in set ) { proto.plugins[ i ] = proto.plugins[ i ] || []; proto.plugins[ i ].push( [ option, set[ i ] ] ); } }, call: function( instance, name, args, allowDisconnected ) { var i, set = instance.plugins[ name ]; if ( !set ) { return; } if ( !allowDisconnected && ( !instance.element[ 0 ].parentNode || instance.element[ 0 ].parentNode.nodeType === 11 ) ) { return; } for ( i = 0; i < set.length; i++ ) { if ( instance.options[ set[ i ][ 0 ] ] ) { set[ i ][ 1 ].apply( instance.element, args ); } } } }; """), "jquery-ui/ui/widgets/draggable.js", "jquery-ui/ui/widgets/resizable.js", "jquery-ui/ui/safe-active-element.js", "jquery-ui/ui/safe-blur.js", io.StringIO("}));"), ] def __init__(self): self.content = None resource.Resource.__init__(self) def _getContent(self): parts = [] for part in self.PARTS: if hasattr(part, "read"): parts.append(part.read()) else: try: with open(os.path.join(self.INSTALL_ROOT, part), "r") as f: parts.append(f.read()) except IOError: # We don't worry about missing modules here; this lets # us accept multiple versions of the Debian packages # of jquery and jquery-ui. pass stuff = "\n".join(parts) self.modStamp = time.time() if base.getConfig("web", "jsSource"): return stuff else: return rjsmin.jsmin(stuff)
[docs] def render(self, request): if self.content is None: self.content = self._getContent() if request.setLastModified(self.modStamp) is http.CACHED: return b'' request.setHeader("content-type", "text/javascript;charset=utf-8") return utils.bytify(self.content)
DEBIAN_FALLBACKS = { 'js/jquery-gavo.js': GavoJquery(), 'js/jquery.flot.js': "/usr/share/javascript/jquery-flot/jquery.flot.js", } ########### End Debian Fallbacks
[docs]class StaticServer(resource.Resource): """is a server for various static files. This is basically like static.File, except - we don't do directory listings - we don't bother with ranges - we look for each file in a user area and then in the system area. """ def __init__(self): resource.Resource.__init__(self) self.userPath = utils.ensureOneSlash( os.path.abspath(os.path.join(base.getConfig("webDir"), "nv_static"))) self.systemPath = utils.ensureOneSlash( os.path.abspath(pkg_resources.resource_filename('gavo', "resources/web")))
[docs] def render(self, request): raise svcs.UnknownURI("What did you expect here?")
[docs] def getChild(self, name, request): relPath = "/".join(request.popSegments(name)) associatedRD = base.caches.getRD(registry.SERVICELIST_ID) for basePath in [self.userPath, self.systemPath]: path = os.path.abspath(basePath+relPath) if os.path.exists(path): if not path.startswith(basePath): raise svcs.ForbiddenURI( "%s is not located in a published static directory"%path) return common.compwrap(StaticFile(path, associatedRD)) if os.path.exists(path): if not path.startswith(self.systemPath): raise svcs.ForbiddenURI( "%s is not located in the static system directory"%path) return common.compwrap(StaticFile(path, associatedRD)) if relPath in DEBIAN_FALLBACKS: res = DEBIAN_FALLBACKS[relPath] if isinstance(res, str): return common.compwrap(StaticFile(res, associatedRD)) else: return common.compwrap(res) raise svcs.UnknownURI("No matching file," " neither built-in nor user-provided")
[docs]class SchemaServer(nevowc.TemplatedPage, common.CommonRenderers): """a resource returning our built-in schema files. This exposes stanxml's NSRegistry, i.e., everything that's been introduced to DaCHS using registryPrefix at any point. This both reacts to prefixes and to the last segments of schema locations. This needs to look a bit deeper into stanxml.NSRegistry. As long as it's not operationally relevant, I'll accept using underscored names here. TODO: We actually have essentially all of these schema locally (except we don't necessarily know their canonical prefixes), but the NSRegistry so far only records locations on alnilam. We should probably return them from the local server. """
[docs] def data_schemadata(self, request, tag): return list( sorted(item for item in stanxml.NSRegistry._schemaLocations.items() if item[0] and item[1]))
[docs] def getChild(self, name, request): name = utils.debytify(name) try: _, loc = stanxml.NSRegistry.getPrefixInfo(name) raise svcs.Found(loc) except KeyError: # it's not a prefix, try to get a schema for it for loc in stanxml.NSRegistry._schemaLocations.values(): if loc and loc.split("/")[-1]==name: raise svcs.Found(loc) raise svcs.UnknownURI(f"'{name}' is neither a prefix nor a" " schema name known here")
loader = common.doctypedStan( T.html[ T.head(render="commonhead")[ T.title["Schema files known here"]], T.body() [ T.h1["XML Schema Namespaces and Schemata known to DaCHS"], nevowc.addNevowAttributes( T.ul(render="sequence"), data="schemadata") [ nevowc.addNevowAttributes(T.li, pattern="item")[ T.a(render="schemalink")]]]])
[docs]class RobotsTxt(resource.Resource): """A page combining some built-in robots.txt material with etc/robots.txt if it exists. """ is_leaf = True builtin = utils.fixIndentation(""" Disallow: /login Disallow: /seffe """, "") def _getContent(self): content = self.builtin try: with open(os.path.join(base.getConfig("webDir"), "robots.txt"), encoding="utf-8") as f: content = content+"\n"+f.read() except IOError: pass return content
[docs] def render(self, request): request.setHeader("content-type", "text/plain; charset=utf-8") return utils.bytify(self._getContent())
[docs]class ServiceUnavailable(resource.Resource): """A page to be rendered in emergencies. Essentially, this is a 503 with a text taken from stateDir/MAINT. Root checks for the presence of that file before returning this page, so (ignoring race conditions) this page assumes it's there. """
[docs] def render(self, request): request.setResponseCode(503) request.setHeader("retry-after", "3600") with open(os.path.join(base.getConfig("stateDir"), "MAINT"), "r", encoding="utf-8") as f: try: maintText = f.read() except Exception: maintText = "Botched MAINT file (utf-8 encoded? readable?)" vot = V.VOTABLE[ V.RESOURCE(type="results")[ V.INFO(name="QUERY_STATUS", value="ERROR")[ maintText] ]] request.setHeader("content-type", "text/xml") return (b"<?xml-stylesheet href='/static" b"/xsl/mainterror-to-html.xsl' type='text/xsl'?>" +vot.render())
[docs]class ACMEChallenge(resource.Resource): """A resource for letsencrypt and friends. """ acmeChallengeDir = os.path.join(base.getConfig("statedir"), "acme-challenge")
[docs] def render(self, request): # nobody has a reason to request a directory here raise svcs.ForbiddenURI("No listing of challenges")
[docs] def getChild(self, name, request): fullPath = os.path.join(self.acmeChallengeDir, utils.debytify(name)) if os.path.exists(fullPath): return static.File(fullPath) raise svcs.UnknownURI("No such challenge known here")
[docs]class WellKnown(resource.Resource): """A handler for the .well-known hierarchy. We only do something for ACME at this point. """
[docs] def getChild(self, name, request): if name==b"acme-challenge": return ACMEChallenge() raise svcs.UnknownURI("Only ACME supported in .well-known")
[docs]class CurReaders(resource.Resource): """A page returning an approximate number of clients served currently. """
[docs] def render(self, request): request.setHeader("content-type", "text/plain") # TODO: the following must be amended for IPv6 hostsActive = len([1 for r in reactor.getReaders() if hasattr(r, "getHost") and r.getHost().host!='0.0.0.0']) return utils.bytify(str(hostsActive)+"\n")
[docs]class Teapot(resource.Resource): # I simply wanted to have something in DaCHS that returns a 418.
[docs] def render(self, request): request.setHeader("content-type", "text/plain") request.setResponseCode(418) return b"I'm a tea pot. Of course.\r\n"
[docs] def getChild(self, name, request): return self
class _ThirdPartyCacher(resource.Resource): """A page used by ThirdPartyCachePage in case of cache misses. It is constructed with the key to ThirdPartyCachePage.urlMapping and the parent third party cache. """ def __init__(self, keyToGet, parent): self.keyToGet, self.parent = keyToGet, parent def render(self, request): retriever = client.Agent(reactor) d = retriever.request(b"GET", self.parent.urlMapping[self.keyToGet]) d.addCallback(self.startReadingRemote, request) d.addErrback(self.stopAndForget, request) return server.NOT_DONE_YET def startReadingRemote(self, response, request): """arranges for the agent response to be cached and then served to request. """ reading = client.readBody(response) reading.addErrback(self.acceptPartialDownloads, request) reading.addCallback(self.saveAndServe, request) reading.addErrback(self.stopAndForget, request) return server.NOT_DONE_YET def acceptPartialDownloads(self, failure, request): """An errback that lets us accept twisted's PartialDownloadError as a successful download. This needs to be added before startReadingRemote. """ failure.trap(client.PartialDownloadError) if failure.value.status==b'200': return failure.value.response return failure def saveAndServe(self, data, request): """writes data to the cache for the current document and then serves that cache. """ with open(self.parent.getCacheName(self.keyToGet), "wb") as f: f.write(data) return self.serveFromCache(self.keyToGet, request) def stopAndForget(self, failure, request): return weberrors.renderDCErrorPage(failure, request) def serveFromCache(self, name, request): """writes the cached resource name to the request and finished it. If name isn't cached yet, this will raise a KeyError. """ res = self.parent.getFromCache(name) return res.render(request)
[docs]class ThirdPartyCachePage(resource.Resource): """A page returning third-party resources I don't want to distribute with DaCHS for a variety of reason. I still want them to come from the service itself, if only to keep the number of third-party requests down. These things are retrieved and cached on first access. They are removed on upgrades. """ # NOTE: this maps bytes to bytes! urlMapping = { b"aladin.min.js": b"https://aladin.u-strasbg.fr/AladinLite/api/v2/latest/aladin.min.js", b"aladin.min.css": b"https://aladin.u-strasbg.fr/AladinLite/api/v2/latest/aladin.min.css", } cacheDir = os.path.join(base.getConfig("cacheDir"), "3rdparty")
[docs] def render(self, request): raise svcs.UnknownURI("No representation here")
[docs] def getCacheName(self, name): """returns the name of a file the resource name would be cached under. """ return os.path.join(self.cacheDir, utils.debytify(name))
[docs] def getFromCache(self, name): """returns a resource serving name if it's alreading in the cache, raises a KeyError if not. """ fn = self.getCacheName(name) if os.path.exists(fn): return static.File(fn) else: os.makedirs(self.cacheDir, exist_ok=True) raise KeyError(name)
[docs] def getChild(self, name, request): if name not in self.urlMapping: raise svcs.UnknownURI(f"No cache rule for {name}") try: return self.getFromCache(name) except KeyError: # We'll have to retrieve the stuff yourselves. That'll be done # from the the render method return _ThirdPartyCacher(name, self)
############### resources for the javascript-enabled root page # These once had a module of their own, which I didn't like. # Really, we'd like them in some RD, as they're really linked to # *some* resource. Aw. Never mind. # these are the fields necessary for formatting resource headers RESMETA_FIELDS = ("title, accessurl, referenceurl," " sourcerd, resid, owner, browseable")
[docs]class Titles(common.JSONQuery): query = ( "SELECT "+RESMETA_FIELDS+ " FROM dc.resources" " NATURAL JOIN dc.interfaces" " NATURAL JOIN dc.sets" " WHERE setname='local'" " AND NOT deleted" " ORDER BY title")
[docs]class Subjects(common.JSONQuery):
[docs] def doQuery(self, queryArgs): res = common.JSONQuery.doQuery(self, queryArgs) uat = vocabularies.get_vocabulary("uat") for rec in res: rec["subject_label"] = vocabularies.get_label(uat, rec["subject"]) return res
query = ( "SELECT subject, count(*) as numMatch" " FROM dc.subjects" " NATURAL JOIN dc.sets" " WHERE setname='local'" " AND NOT deleted" " GROUP BY subject" " ORDER BY subject")
[docs]class Authors(common.JSONQuery): query = ( "SELECT author, count(*) as numMatch" " FROM dc.authors" " NATURAL JOIN dc.sets" " WHERE setname='local'" " AND NOT deleted" " GROUP BY author" " ORDER BY author")
[docs]class ByFulltext(common.JSONQuery): query = ( "SELECT DISTINCT "+RESMETA_FIELDS+ " FROM dc.resources" " NATURAL JOIN dc.interfaces" " NATURAL JOIN dc.subjects" " NATURAL JOIN dc.sets" " WHERE setname='local'" " AND NOT deleted" " AND (to_tsvector('english', description) || to_tsvector(subject) " " || to_tsvector('english', title) || to_tsvector(authors))" " @@ plainto_tsquery(%(q)s)" " ORDER BY title")
[docs]class BySubject(common.JSONQuery): query = ( "SELECT "+RESMETA_FIELDS+ " FROM dc.resources" " NATURAL JOIN dc.interfaces" " NATURAL JOIN dc.subjects" " NATURAL JOIN dc.sets" " WHERE setname='local'" " AND subject=%(subject)s" " AND NOT deleted" " ORDER BY title")
[docs]class ByAuthor(common.JSONQuery): query = ( "SELECT "+RESMETA_FIELDS+ " FROM dc.resources" " NATURAL JOIN dc.interfaces" " NATURAL JOIN dc.authors" " NATURAL JOIN dc.sets" " WHERE setname='local'" " AND author=%(author)s" " AND NOT deleted" " ORDER BY title")
[docs]class ServiceInfo(common.JSONQuery): query = ( "SELECT title, description, authors," " case when dateUpdated is null then 'N/A'" " else to_char(dateUpdated, 'YYYY-MM-DD') end as lastupdate," " referenceURL, accessURL" " FROM dc.interfaces" " NATURAL JOIN dc.sets" " RIGHT OUTER JOIN dc.resources USING (sourcerd, resid)" " WHERE setname='local'" " AND resId=%(resId)s and sourceRd=%(sourceRD)s")
[docs] def doQuery(self, queryArgs): res = super().doQuery(queryArgs) for rec in res: try: rec["description"] = utils.rstxToHTML( (rec["description"] or "").strip()) except Exception: # well, perhaps it's no RST. Whatever it is, don't fail # but show what you have. pass return res
ROOT_PAGE_HELPERS = resource.Resource() ROOT_PAGE_HELPERS.putChild(b"titles", Titles()) ROOT_PAGE_HELPERS.putChild(b"subjects", Subjects()) ROOT_PAGE_HELPERS.putChild(b"authors", Authors()) ROOT_PAGE_HELPERS.putChild(b"bySubject", BySubject()) ROOT_PAGE_HELPERS.putChild(b"byAuthor", ByAuthor()) ROOT_PAGE_HELPERS.putChild(b"byFulltext", ByFulltext()) ROOT_PAGE_HELPERS.putChild(b"serviceInfo", ServiceInfo()) if __name__=="__main__": # you can call this module to update jquery-gavo.js and jquery.flot.js # as is distributed in the tarball to what Debian gives upstream. # See develNotes.rstx in our docs. for path, data in DEBIAN_FALLBACKS.items(): if not path.startswith("js/"): continue if isinstance(data, str): with open(data, "rb") as f: payload = f.read() else: base.setConfig("web", "jsSource", "True") payload = data._getContent().encode("utf-8") with open(path[3:], "wb") as f: f.write(payload)