1 """
2 Products and a core turning accrefs into lists of products.
3
4 There is a substantial overlap between what's going on there and datalink
5 (and datalink uses some of the products mentioned here). The cutouts
6 and scale things here shouldn't be developed on, all this should
7 move towards datalink. Meanwhile, we still have siapCutoutCore and
8 friends that relies on the mess here, so all this is going to remain
9 for the forseeable future. Just don't extend it.
10
11 The "user-visible" part are just accrefs, as modelled by the RAccref
12 -- they can contain instructions for cutouts or scaling, hence the additional
13 structure.
14
15 Using the product table and some logic in this module, such accrefs
16 are turned into subclasses of ProductBase.
17
18 These have mime types and know how to generate their data through their
19 synchronous iterData methods. They must also work as nevow resources and thus
20 have implement asynchronuous renderHTTP(ctx) methods. It's a bit unfortunate
21 that we thus depend on nevow here, but we'd have to reimplement quite a bit of
22 it if we don't, and for now it doesn't seem we'll support a different framework
23 in the forseeable future.
24 """
25
26
27
28
29
30
31
32 import datetime
33 import gzip
34 import functools
35 import re
36 import os
37 import urllib
38 import urlparse
39 from io import BytesIO
40
41 import numpy
42
43 from PIL import Image
44
45 from nevow import inevow
46 from nevow import static
47 from twisted.internet import defer
48 from twisted.internet import threads
49 from zope.interface import implements
50
51 from gavo import base
52 from gavo import rscdef
53 from gavo import svcs
54 from gavo import utils
55 from gavo.base import coords
56 from gavo.protocols import creds
57 from gavo.svcs import streaming
58 from gavo.utils import imgtools
59 from gavo.utils import fitstools
60 from gavo.utils import pyfits
61
62
63
64 PREVIEW_SIZE = 200
65
66 PRODUCTS_TDID = "//products#products"
67
68 REMOTE_URL_PATTERN = re.compile("(https?|ftp)://")
69
70 MS = base.makeStruct
74 """returns image/jpeg bytes for a preview of a product spitting out a
75 2D FITS.
76 """
77 if hasattr(product, "getFile"):
78
79 if product.rAccref.accref.endswith(".gz"):
80 inFile = gzip.GzipFile(fileobj=product.getFile())
81 else:
82 inFile = product.getFile()
83
84 with utils.fitsLock():
85 pixels = numpy.array([row
86 for row in fitstools.iterScaledRows(inFile,
87 destSize=PREVIEW_SIZE)])
88 else:
89 raise NotImplementedError("TODO: Fix fitstools.iterScaledRows"
90 " to be more accomodating to weird things")
91 return imgtools.jpegFromNumpyArray(pixels)
92
95 """returns image/jpeg bytes for a preview of the PIL-readable product.
96 """
97
98
99 fullsize = BytesIO(product.read(1000000000))
100 im = Image.open(fullsize)
101 scale = max(im.size)/float(PREVIEW_SIZE)
102 resized = im.resize((
103 int(im.size[0]/scale),
104 int(im.size[1]/scale)))
105 f = BytesIO()
106 resized.save(f, format="jpeg")
107 return f.getvalue()
108
109
110 _PIL_COMPATIBLE_MIMES = frozenset(['image/jpeg', 'image/png'])
113 """returns image/jpeg bytes containing a preview of product.
114
115 This only works for a select subset of products. You're usually
116 better off using static previews.
117 """
118 if hasattr(product, "makePreview"):
119 return product.makePreview()
120
121 sourceMime = product.pr["mime"]
122 if sourceMime=='image/fits':
123 return makePreviewFromFITS(product)
124 elif sourceMime in _PIL_COMPATIBLE_MIMES:
125 return makePreviewWithPIL(product)
126 else:
127 raise base.DataError("Cannot make automatic preview for %s"%
128 sourceMime)
129
132 """is a class that manages the preview cache.
133
134 It's really the class that manages it, so don't bother creating instances.
135
136 The normal operation is that you pass the product you want a preview to
137 getPreviewFor. If a cached preview already exists, you get back its content
138 (the mime type must be taken from the products table).
139
140 If the file does not exist yet, some internal magic tries to come up with
141 a preview and determines whether it should be cached, in which case it does
142 so provided a preview has been generated successfully.
143
144 A cache file is touched when it is used, so you can clean up rarely used
145 cache files by deleting all files in the preview cache older than some
146 limit.
147 """
148 cachePath = base.getConfig("web", "previewCache")
149
150 @classmethod
152 """returns the full path a preview for accref is be stored under.
153 """
154 return os.path.join(cls.cachePath, rscdef.getFlatName(accref))
155
156 @classmethod
158 """returns the path to a cached preview if it exists, None otherwise.
159 """
160 cacheName = cls.getCacheName(accref)
161 if os.path.exists(cacheName):
162 return cacheName
163 return None
164
165 @classmethod
167 try:
168 with open(cacheName, "w") as f:
169 f.write(data)
170 except IOError:
171 pass
172 return data
173
174 @classmethod
176 """returns a deferred firing the data for a preview.
177 """
178 if not product.rAccref.previewIsCacheable():
179 return threads.deferToThread(computePreviewFor, product)
180
181 accref = product.rAccref.accref
182 cacheName = cls.getCacheName(accref)
183 if os.path.exists(cacheName):
184
185 try:
186 os.utime(cacheName, None)
187 except os.error:
188 pass
189
190 with open(cacheName) as f:
191 return defer.succeed(f.read())
192
193 else:
194
195 return threads.deferToThread(computePreviewFor, product
196 ).addCallback(cls.saveToCache, cacheName)
197
200 """A base class for products returned by the product core.
201
202 See the module docstring for the big picture.
203
204 The constructor arguments of RAccrefs depend on what they are.
205 The common interface is the the class method
206 fromRAccref(rAccref, authGroups=None).
207 It returns None if the RAccref is not for a product of the
208 respective sort, the product otherwise.
209
210 authGroups is a set of groups authorised for the user when
211 controlling access to embargoed products. This is the main
212 reason you should never hand out products yourself but always
213 expose the to the user through the product core.
214
215 The actual constructor requires a RAccref, which is exposed as the
216 rAccref attribute. Do not use the productsRow attribute from rAccref,
217 though, as constructors may want to manipulate the content of the
218 product row (e.g., in NonExistingProduct). Access the product
219 row as self.pr in product classes.
220
221 In addition to those, all Products have a name attribute,
222 which must be something suitable as a file name; the default
223 constructor calls a _makeName method to come up with one, and
224 you should simply override it.
225
226 The iterData method has to yield reasonably-sized chunks of
227 data (self.chunkSize should be a good choice). It must be
228 synchronuous.
229
230 Products usually are used as nevow resources. Therefore, they
231 must have a renderHTTP method. This must be asynchronuous,
232 i.e., it should not block for extended periods of time.
233
234 Products also work as rudimentary files via read and close
235 methods; by default, these are implemented on top of iterData.
236 Clients must never mix calls to the file interface and to
237 iterData. Derived classes that are based on actual files should
238 set up optimized read and close methods using the setupRealFile
239 class method (look for the getFile method on the instance to see
240 if there's a real file). Again, the assumption is made there that clients
241 use either iterData or read, but never both.
242
243 If a product knows how to (quickly) generate a preview for itself,
244 it can define a makePreview() method. This must return content
245 for a mime type conventional for that kind of product (which is laid
246 down in the products table).
247 """
248 implements(inevow.IResource)
249
250 chunkSize = 2**16
251 _curIterator = None
252
254
255
256 self.rAccref = rAccref
257 self.pr = self.rAccref.productsRow
258 self._makeName()
259
261 self.name = "invalid product"
262
264 return "<%s %s (%s)>"%(self.__class__.__name__,
265 self.name,
266 self.pr["mime"])
267
270
272 return (isinstance(other, self.__class__)
273 and self.rAccref==other.rAccref)
274
276 return not self==other
277
278 @classmethod
281
282 @classmethod
284 """changes cls such that read and close work an an actual file-like
285 object rather than the inefficient iterData.
286
287 openMethod has to be an instance method of the class returning
288 an opened input file.
289 """
290 cls._openedInputFile = None
291
292 def getFileMethod(self):
293 return openMethod(self)
294
295 def readMethod(self, size=None):
296 if self._openedInputFile is None:
297 self._openedInputFile = openMethod(self)
298 return self._openedInputFile.read(size)
299
300 def closeMethod(self):
301 if self._openedInputFile is not None:
302 self._openedInputFile.close()
303 self._openedInputFile = None
304
305 cls.read = readMethod
306 cls.close = closeMethod
307 cls.getFile = getFileMethod
308
312
316
317 - def read(self, size=None):
341
346
349 """A product corresponding to a local file.
350
351 As long as the accessPath in the RAccref's productsRow corresponds
352 to a real file and no params are in the RAccref, this will return
353 a product.
354 """
355 forSaving = True
356
357 @classmethod
359 if set(rAccref.params)-set(["preview"]):
360 return None
361 if os.path.exists(rAccref.localpath):
362 return cls(rAccref)
363
366
368
369
370 return open(self.rAccref.localpath, "rb")
371
378
380 request = inevow.IRequest(ctx)
381 if self.forSaving:
382 request.setHeader("content-disposition", 'attachment; filename="%s"'%
383 str(self.name))
384 request.setLastModified(os.path.getmtime(self.rAccref.localpath))
385 res = static.File(self.rAccref.localpath)
386
387
388
389 res.type = str(self.pr["mime"])
390 res.encoding = None
391 return res
392
393 FileProduct.setupRealFile(FileProduct._openUnderlyingFile)
397 """A product that's a cached or pre-computed preview.
398 """
399 forSaving = False
400
401 @classmethod
403 if not rAccref.params.get("preview"):
404 return None
405
406 if rAccref.params.get("sra"):
407 return None
408
409 previewPath = rAccref.productsRow["preview"]
410 localName = None
411
412 if previewPath is None:
413 return None
414
415 elif previewPath=="AUTO":
416 localName = PreviewCacheManager.getCachedPreviewPath(rAccref.accref)
417
418 else:
419
420
421 localName = os.path.join(base.getConfig("inputsDir"), previewPath)
422
423 if localName is None:
424 return None
425 elif os.path.exists(localName):
426 rAccref.productsRow["accessPath"] = localName
427 rAccref.productsRow["mime"] = rAccref.productsRow["preview_mime"
428 ] or "image/jpeg"
429 return cls(rAccref)
430
433 """A class for products at remote sites, given by their URL.
434 """
436 self.name = urlparse.urlparse(self.pr["accessPath"]
437 ).path.split("/")[-1] or "file"
438
440 return "<Remote %s at %s>"%(self.pr["mime"], self.pr["accessPath"])
441
442 @classmethod
446
448 f = urllib.urlopen(self.pr["accessPath"])
449 while True:
450 data = f.read(self.chunkSize)
451 if data=="":
452 break
453 yield data
454
457
460 """A preview that's on a remote server.
461 """
462 @classmethod
474
477 """A local file that is not delivered to the current client.
478
479 iterData returns the data for the benefit of preview making.
480 However, there is a renderHTTP method, so the product renderer will
481 not use it; it will, instead, raise an Authenticate exception.
482 """
483 forSaving = False
484
485 @classmethod
487 dbRow = rAccref.productsRow
488 if (dbRow["embargo"] is None
489 or dbRow["embargo"]<datetime.date.today()):
490 return None
491 if authGroups is None or dbRow["owner"] not in authGroups:
492 return cls(rAccref)
493
495 return "<Protected product %s, access denied>"%self.name
496
498 return self.__class__==other.__class__
499
502
505 """A local file that went away.
506
507 iterData here raises an IOError, renderHTTP an UnknownURI.
508
509 These should normally yield 404s.
510
511 We don't immediately raise some error here as archive generation
512 shouldn't fail just because a single part of it is missing.
513 """
515
516 self.rAccref = rAccref
517 self.pr = {
518 'accessPath': None, 'accref': None,
519 'embargo': None, 'owner': None,
520 'mime': 'text/html', 'sourceTable': None,
521 'datalink': None, 'preview': None}
522
524 return "<Non-existing product %s>"%self.rAccref.accref
525
527 return self.__class__==other.__class__
528
529 @classmethod
535
537 self.name = "missing.html"
538
540 raise IOError("%s does not exist"%self.rAccref.accref)
541
544
547 """An invalid file.
548
549 This is returned by getProductForRAccref if all else fails. This
550 usually happens when a file known to the products table is deleted,
551 but it could also be an attempt to use unsupported combinations
552 of files and parameters.
553
554 Since any situation leading here is a bit weird, we probably
555 should be doing something else but just return a 404. Hm...
556
557 This class always returns an instance from fromRAccref; this means
558 any resolution chain ends with it. But it shouldn't be in
559 PRODUCT_CLASSES in the first place since the fallback is
560 hardcoded into getProductForRAccref.
561 """
563 return "<Invalid product %s>"%self.rAccref
564
565 @classmethod
568
570 self.name = "invalid.html"
571
573 raise IOError("%s is invalid"%self.rAccref)
574
577 """A class representing cutouts from FITS files.
578
579 This only works for local FITS files with two axes. For everything
580 else, use datalink.
581
582 We assume the cutouts are smallish -- they are, right now, not
583 streamed, but accumulated in memory.
584 """
586 self.name = "<cutout-"+os.path.basename(self.pr["accessPath"])
587
589 return "<cutout-%s %s>"%(self.name, self.rAccref.params)
590
591 _myKeys = ["ra", "dec", "sra", "sdec"]
592 _myKeySet = frozenset(_myKeys)
593
594 @classmethod
599
601 ra, dec, sra, sdec = [self.rAccref.params[k] for k in self._myKeys]
602 with utils.fitsLock():
603 hdus = pyfits.open(self.rAccref.localpath,
604 do_not_scale_image_data=True,
605 memmap=True)
606 try:
607 skyWCS = coords.getWCS(hdus[0].header)
608 pixelFootprint = numpy.asarray(
609 numpy.round(skyWCS.wcs_world2pix([
610 (ra-sra/2., dec-sdec/2.),
611 (ra+sra/2., dec+sdec/2.)], 1)), numpy.int32)
612 res = fitstools.cutoutFITS(hdus[0],
613 (skyWCS.longAxis, min(pixelFootprint[:,0]), max(pixelFootprint[:,0])),
614 (skyWCS.latAxis, min(pixelFootprint[:,1]), max(pixelFootprint[:,1])))
615 finally:
616 hdus.close()
617
618 return res
619
621
622
623 res = self._getCutoutHDU()
624 bytes = BytesIO()
625 res.writeto(bytes)
626
627 bytes.seek(0)
628 while True:
629 res = bytes.read(self.chunkSize)
630 if not res:
631 break
632 yield res
633
637
642
646
649 """A class representing a scaled FITS file.
650
651 Right now, this only works for local FITS files. Still, the
652 class is constructed with a full rAccref.
653 """
658
660 return "<%s scaled by %s>"%(self.name, self.scale)
661
662 @classmethod
664 if ("scale" in rAccref.params
665 and rAccref.productsRow["mime"]=="image/fits"):
666 return cls(rAccref)
667
669 self.name = "scaled-"+os.path.basename(self.pr["accref"])
670
681
685
690
691
692
693
694
695 PRODUCT_CLASSES = [
696 RemotePreview,
697 StaticPreview,
698 NonExistingProduct,
699 UnauthorizedProduct,
700 RemoteProduct,
701 CutoutProduct,
702 ScaledFITSProduct,
703 FileProduct,
704 ]
707 """returns a product for a RAccref.
708
709 This tries, in sequence, to make a product using each element
710 of PRODUCT_CLASSES' fromRAccref method. If nothing succeeds,
711 it will return an InvalidProduct.
712
713 If rAccref is a string, the function makes a real RAccref through
714 RAccref's fromString method from it.
715 """
716 if not isinstance(rAccref, RAccref):
717 rAccref = RAccref.fromString(rAccref)
718 for prodClass in PRODUCT_CLASSES:
719 res = prodClass.fromRAccref(rAccref, authGroups)
720 if res is not None:
721 return res
722 return InvalidProduct.fromRAccref(rAccref, authGroups)
723
726 """A core retrieving paths and/or data from the product table.
727
728 You will not usually mention this core in your RDs. It is mainly
729 used internally to serve /getproduct queries.
730
731 It is instanciated from within //products.rd and relies on
732 tables within that RD.
733
734 The input data consists of accref; you can use the string form
735 of RAccrefs, and if you renderer wants, it can pass in ready-made
736 RAccrefs. You can pass accrefs in through both an accref
737 param and table rows.
738
739 The accref param is the normal way if you just want to retrieve a single
740 image, the table case is for building tar files and such. There is one core
741 instance in //products for each case.
742
743 The core returns a list of instances of a subclass of ProductBase above.
744
745 This core and its supporting machinery handles all the fancy product
746 functionality (user autorisation, cutouts, ...).
747 """
748 name_ = "productCore"
749
751 """returns a list of RAccref requested within inputTable.
752 """
753 keys = []
754 args = inputTable.args
755 if args["accref"]:
756 keys.extend(RAccref.fromString(a) for a in args["accref"])
757
758 if args.get("pattern"):
759 try:
760 tablepat, filepat = args["pattern"].split("#")
761 except ValueError:
762 raise base.ValidationError(
763 "Must be of the form tablepattern#filepattern", "pattern")
764 with base.getTableConn() as conn:
765 for row in conn.queryToDicts(
766 "SELECT accref FROM dc.products"
767 " WHERE"
768 " accref LIKE %(filepat)s"
769 " AND sourceTable LIKE %(tablepat)s",
770 {"filepat": filepat, "tablepat": tablepat}):
771 keys.append(RAccref.fromString(row["accref"]))
772
773 return keys
774
780
781 - def run(self, service, inputTable, queryMeta):
782 """returns a list of {"soruce": product} dicts for products matching
783 the inputTable.
784 """
785 authGroups = self._getGroups(queryMeta["user"], queryMeta["password"])
786
787 return [getProductForRAccref(r, authGroups)
788 for r in self._getRAccrefs(inputTable)]
789
792 """A product key including possible modifiers.
793
794 The product key is in the accref attribute.
795
796 The modifiers come in the params dictionary. It contains (typed)
797 values, the possible keys of which are given in _buildKeys. The
798 values in passed in the inputDict constructor argument are parsed,
799 anything not in _buildKeys is discarded.
800
801 In principle, these modifiers are just the query part of a URL,
802 and they generally come from the arguments of a web request. However,
803 we don't want to carry around all request args, just those meant
804 for product generation.
805
806 One major reason for having this class is serialization into URL-parts.
807 Basically, stringifying a RAccref yields something that can be pasted
808 to <server root>/getproduct to yield the product URL. For the
809 path part, this means just percent-escaping blanks, plusses and percents
810 in the file name. The parameters are urlencoded and appended with
811 a question mark. This representation is be parsed by the fromString
812 function.
813
814 RAccrefs have a (read only) property productsRow attribute -- that's
815 a dictionary containing the row for accres from //products#products
816 if that exists. If it doesn't, accessing the property will raise
817 an NotFoundError.
818 """
819 _buildKeys = dict((
820 ("dm", str),
821 ("ra", float),
822 ("dec", float),
823 ("sra", float),
824 ("sdec", float),
825 ("scale", int),
826 ("preview", base.parseBooleanLiteral),
827 ))
828
829 - def __init__(self, accref, inputDict={}):
830 self.accref = accref
831 self.params = self._parseInputDict(inputDict)
832
833 @classmethod
835 """returns a rich accref from a path and a parse_qs-dictionary args.
836
837 (it's mainly a helper for fromRequest and fromString).
838 """
839 inputDict = {}
840 for key, value in args.iteritems():
841 if len(value)>0:
842 inputDict[key] = value[-1]
843
844
845
846
847 if not path.strip("/").strip():
848 if "key" in inputDict:
849 path = inputDict["key"]
850 else:
851 raise base.ValidationError(
852 "Must give key when constructing RAccref",
853 "accref")
854
855 return cls(path, inputDict)
856
857 @classmethod
859 """returns a rich accref from a nevow request.
860
861 Basically, it raises an error if there's no key at all, it will return
862 a (string) accref if no processing is desired, and it will return
863 a RAccref if any processing is requested.
864 """
865 return cls.fromPathAndArgs(path, request.args)
866
867 @classmethod
869 """returns a fat product key from a string representation.
870
871 As a convenience, if keyString already is a RAccref,
872 it is returned unchanged.
873 """
874 if isinstance(keyString, RAccref):
875 return keyString
876
877 qSep = keyString.rfind("?")
878 if qSep!=-1:
879 return cls.fromPathAndArgs(
880 unquoteProductKey(keyString[:qSep]),
881 urlparse.parse_qs(keyString[qSep+1:]))
882
883 return cls(unquoteProductKey(keyString))
884
885 @property
887 """returns the row in dc.products corresponding to this RAccref's
888 accref, or raises a NotFoundError.
889 """
890 try:
891 return self._productsRowCache
892 except AttributeError:
893 res = base.resolveCrossId(PRODUCTS_TDID).doSimpleQuery(
894 fragments="accref=%(accref)s", params={"accref": self.accref})
895 if not res:
896 raise base.NotFoundError(self.accref, "accref", "product table",
897 hint="Product URLs may disappear, though in general they should"
898 " not. If you have an IVOID (pubDID) for the file you are trying to"
899 " locate, you may still find it by querying the ivoa.obscore table"
900 " using TAP and ADQL.")
901 self._productsRowCache = res[0]
902
903
904
905 for key in ["mime", "accessPath", "accref"]:
906 self._productsRowCache[key] = str(self._productsRowCache[key])
907
908 return self._productsRowCache
909
911
912 res = quoteProductKey(self.accref)
913 args = urllib.urlencode(dict(
914 (k,str(v)) for k, v in self.params.iteritems()))
915 if args:
916 res = res+"?"+args
917 return res
918
921
923 return (isinstance(other, RAccref)
924 and self.accref==other.accref
925 and self.params==other.params)
926
928 return not self.__eq__(other)
929
941
942 @property
944 try:
945 return self._localpathCache
946 except AttributeError:
947 self._localpathCache = os.path.join(base.getConfig("inputsDir"),
948 self.productsRow["accessPath"])
949 return self._localpathCache
950
952 """returns True if the a preview generated for this rAccref
953 is representative for all representative rAccrefs.
954
955 Basically, scaled versions all have the same preview, cutouts do not.
956 """
957 if "ra" in self.params:
958 return False
959 return True
960
963 """reverses quoteProductKey.
964 """
965 return urllib.unquote(key)
966
969 """returns the columns within colSeq that contain product links of some
970 sort.
971 """
972 return [col for col in colSeq if col.displayHint.get("type")=="product"]
973
977 """returns key as getproduct URL-part.
978
979 If ``key`` is a string, it is quoted as a naked accref so it's usable
980 as the path part of an URL. If it's an ``RAccref``, it is just stringified.
981 The result is something that can be used after getproduct in URLs
982 in any case.
983 """
984 if isinstance(key, RAccref):
985 return str(key)
986 return urllib.quote(key)
987 rscdef.addProcDefObject("quoteProductKey", quoteProductKey)
992 """returns the URL at which a product can be retrieved.
993
994 key can be an accref string or an RAccref
995 """
996 url = base.makeSitePath("/getproduct/%s"%RAccref.fromString(key))
997 if withHost:
998 if useHost is None:
999 useHost = base.getCurrentServerURL()
1000 url = urlparse.urljoin(useHost, url)
1001 return url
1002 rscdef.addProcDefObject("makeProductLink", makeProductLink)
1019
1022 """A value mapper factory for product links.
1023
1024 Within the DC, any column called accref, with a display hint of
1025 type=product, a UCD of VOX:Image_AccessReference, or a utype
1026 of Access.Reference may contain a key into the product table.
1027 Here, we map those to links to the get renderer unless they look
1028 like a URL to begin with.
1029 """
1030 if not (
1031 colDesc["name"]=="accref"
1032 or colDesc["utype"]=="ssa:Access.Reference"
1033 or colDesc["ucd"]=="VOX:Image_AccessReference"
1034 or colDesc["displayHint"].get("type")=="product"):
1035 return
1036
1037 return functools.partial(
1038 formatProductLink, useHost=base.getCurrentServerURL())
1039
1040 utils.registerDefaultMF(_productMapperFactory)
1041