Package gavo :: Package web :: Module producttar
[frames] | no frames]

Source Code for Module gavo.web.producttar

  1  """ 
  2  Helper functions for producing tar files from tables containing 
  3  a product column. 
  4   
  5  Everything in this module expects the product interface, i.e., tables 
  6  must at least contain accref, owner, embargo, and accsize fields. 
  7  """ 
  8   
  9  #c Copyright 2008-2019, the GAVO project 
 10  #c 
 11  #c This program is free software, covered by the GNU GPL.  See the 
 12  #c COPYING file in the source distribution. 
 13   
 14   
 15  # XXX TODO: this should eventually become a renderer on the product core, 
 16  # redirected to from the current TarResponse. 
 17   
 18  from cStringIO import StringIO 
 19  import os 
 20  import tarfile 
 21  import time 
 22   
 23  from gavo import base 
 24  from gavo import grammars 
 25  from gavo import svcs 
 26  from gavo import utils 
 27  from gavo.protocols import products 
 28  from gavo.svcs import streaming 
 29   
 30   
 31  MS = base.makeStruct 
32 33 34 -class UniqueNameGenerator(object):
35 """A factory to build unique names from possibly ambiguous ones. 36 37 If the lower case of a name is not known to an instance, it just returns 38 that name. Otherwise, it disambiguates by adding characters in front 39 of the extension. 40 """
41 - def __init__(self):
42 self.knownNames = set()
43
44 - def _buildNames(self, baseName):
45 base, ext = os.path.splitext(baseName) 46 yield "dc_data/%s%s"%(base, ext) 47 i = 1 48 while True: 49 yield "dc_data/%s-%03d%s"%(base, i, ext) 50 i += 1
51
52 - def makeName(self, baseName):
53 for name in self._buildNames(baseName): 54 if name.lower() not in self.knownNames: 55 self.knownNames.add(name.lower()) 56 return str(name)
57
58 59 -class ColToRowIterator(grammars.RowIterator):
60 """A RowIterator yielding several columns of a row as separate rows. 61 62 A hacky feature is that a ColToRowIterator will not return the same 63 row twice. This is a convenience for TarMakers to keep them from 64 tarring in identical files that somehow manage to be mentioned more 65 than once in a result table. 66 """
67 - def __init__(self, *args, **kwargs):
68 grammars.RowIterator.__init__(self, *args, **kwargs) 69 self.seenKeys = set()
70
71 - def _iterRows(self):
72 for row in self.sourceToken: 73 for key in self.grammar.sourceKeys: 74 if row.get(key): 75 accref = row[key] 76 # this is a service for "rich" product displays that 77 # select more than one row: if we have a list (SQL array) 78 # extract the first element and use that as access key 79 if isinstance(accref, list): 80 accref = accref[0] 81 # The str below is for product.RAccrefs 82 if str(accref) not in self.seenKeys: 83 yield {self.grammar.targetKey: accref} 84 self.seenKeys.add(str(accref))
85
86 87 -class ColToRowGrammar(grammars.Grammar):
88 """is a grammar that selects some columns and returns each of them 89 as a row with a specified key. 90 91 This is useful to extract all products from tables that can have 92 multiple columns carrying products. 93 94 The input is a sequence of dictionaries (i.e., Table rows). 95 """ 96 97 rowIterator = ColToRowIterator 98 99 _targetKey = base.UnicodeAttribute("targetKey", default=base.Undefined, 100 description="Name of the target column") 101 _sourceKeys = base.ListOfAtomsAttribute("sourceKeys", 102 description="Names of the source columns.", 103 itemAttD=base.UnicodeAttribute("sourceKey"))
104
105 106 -class ProductTarMaker(object):
107 """A factory for tar files. 108 109 You probably don't want to instanciate it directly but instead get a copy 110 through the getProductMaker function below. 111 112 The main entry point to this class is deliverProductTar. 113 """
114 - def __init__(self):
115 self.rd = base.caches.getRD("__system__/products") 116 self.core = self.rd.getById("forTar")
117
118 - def _getEmbargoedFile(self, name):
119 stuff = StringIO("This file is embargoed. Sorry.\n") 120 b = tarfile.TarInfo(name) 121 b.size = len(stuff.getvalue()) 122 b.mtime = time.time() 123 return b, stuff
124
125 - def _getTarInfoFromProduct(self, prod, name):
126 """returns a tar info from a general products.PlainProduct instance 127 prod. 128 129 This is relatively inefficient for data that's actually on disk, 130 so you should only use it when data is being computed on the fly. 131 """ 132 assert not isinstance(prod, products.UnauthorizedProduct) 133 data = "".join(prod.iterData()) 134 b = tarfile.TarInfo(name) 135 b.size = len(data) 136 b.mtime = time.time() 137 return b, StringIO(data)
138
139 - def _getDestName(self, productsTable):
140 """returns a filename for a tar with the stuff in productsTable. 141 142 For now, we just distinguish overflowed and non-overflowed tars. 143 """ 144 qs = base.getMetaText(productsTable, "_queryStatus") 145 if qs=="OVERFLOW": 146 return "truncated_data.tar" 147 else: 148 return "data.tar"
149
150 - def _productsToTar(self, productList, destination):
151 """actually writes the tar. 152 """ 153 # SvcResult madness; clean up SvcResult and remove this. 154 productList = getattr(productList, "original", productList) 155 156 nameGen = UniqueNameGenerator() 157 outputTar = tarfile.TarFile.open("data.tar", "w|", destination) 158 for prodRec in productList: 159 src = prodRec 160 if isinstance(src, products.NonExistingProduct): 161 continue # just skip files that somehow don't exist any more 162 163 elif isinstance(src, products.UnauthorizedProduct): 164 outputTar.addfile(*self._getEmbargoedFile(src.name)) 165 166 elif isinstance(src, products.FileProduct): 167 # actual file in the file system 168 targetName = nameGen.makeName(src.name) 169 outputTar.add(str(src.rAccref.localpath), targetName) 170 171 else: # anything else is read from the src 172 outputTar.addfile(*self._getTarInfoFromProduct(src, 173 nameGen.makeName(src.name))) 174 outputTar.close() 175 return "" # finish off request if necessary.
176
177 - def _streamOutTar(self, productData, request, queryMeta, destName):
178 request.setHeader('content-disposition', 179 'attachment; filename=%s'%destName) 180 request.setHeader("content-type", "application/x-tar") 181 182 def writeTar(dest): 183 self._productsToTar(productData, dest)
184 185 return streaming.streamOut(writeTar, request)
186
187 - def deliverProductTar(self, coreResult, request, queryMeta):
188 """causes a tar containing all accrefs mentioned in coreResult 189 to be streamed out via request. 190 """ 191 table = coreResult.original.getPrimaryTable() 192 productColumns = products.getProductColumns(table.tableDef.columns) 193 if not productColumns: 194 raise base.ValidationError("This query does not select any" 195 " columns with access references", "_OUTPUT") 196 197 accrefs = [] 198 for row in table: 199 for col in productColumns: 200 accrefs.append(row[col.name]) 201 202 inputTable = svcs.CoreArgs.fromRawArgs( 203 self.rd.getById("forTar").inputTable, 204 {"accref": accrefs}) 205 206 prods = self.core.run(coreResult.service, inputTable, queryMeta) 207 return self._streamOutTar(prods, request, queryMeta, 208 self._getDestName(table))
209
210 211 @utils.memoized 212 -def getTarMaker():
213 return ProductTarMaker()
214