Package gavo :: Package formats :: Module common
[frames] | no frames]

Source Code for Module gavo.formats.common

  1  """ 
  2  Common code for generation of various data formats. 
  3   
  4  The main function here is formatData.  It receives a string format id, 
  5  a data instance and a destination file.  It dispatches this to formatters  
  6  previously registred using registerDataWriter. 
  7   
  8  The data writers must take a data instance and a file instance; their 
  9  effect must be that a serialized representation of data, or, if the format 
 10  does not support this, the data's primary table is written to the file  
 11  instance. 
 12  """ 
 13   
 14  #c Copyright 2008-2019, the GAVO project 
 15  #c 
 16  #c This program is free software, covered by the GNU GPL.  See the 
 17  #c COPYING file in the source distribution. 
 18   
 19   
 20  import cgi 
 21  import os 
 22  import mimetypes 
 23  from cStringIO import StringIO 
 24   
 25  from gavo import base 
 26   
 27   
 28  PRESERVED_MIMES = set([ # TAP Spec, 2.7.1, similar in DALI 
 29          "text/xml", "application/x-votable+xml", "text/plain"]) 
 30   
 31  # used in guessMediaType 
 32  EXTENSION_FALLBACKS = { 
 33          ".vot": base.votableType, 
 34          ".fits": "application/fits", 
 35          ".fz": "image/fits", 
 36  } 
37 38 39 -class CannotSerializeIn(base.Error):
40 - def __init__(self, format):
41 self.format = format 42 base.Error.__init__(self, format, 43 hint="Either you gave an invalid format id or a known format" 44 " did not get registred for some reason. Format codes" 45 " known at this point: %s. You can also try common MIME types"%( 46 ", ".join(FORMATS_REGISTRY.writerRegistry))) 47 self.args = [format]
48
49 - def __str__(self):
50 return "Cannot serialize in '%s'."%self.format
51
52 53 -def getMIMEKey(contentType):
54 """makes a DaCHS mime key from a content-type string. 55 56 This is used for retrieving matching mime types and is a triple 57 of major and minor mime type and a set of parameter pairs. 58 59 contentType is a string-serialized mime type. 60 """ 61 media_type, paramdict = cgi.parse_header(contentType) 62 try: 63 major, minor = media_type.split("/") 64 except (ValueError, TypeError): 65 raise CannotSerializeIn(contentType) 66 return (major, minor, 67 frozenset(paramdict.iteritems()))
68
69 70 -class FORMATS_REGISTRY(object):
71 """a registry for data formats that can be produced by DaCHS. 72 73 This works by self-registration of the respective modules on their 74 input; hence, if you want to rely on some entry here, be sure 75 there's an import somewhere. 76 """ 77 # format key -> writer function 78 writerRegistry = {} 79 # format key -> mime type 80 formatToMIME = {} 81 # format key -> human-readable label 82 formatToLabel = {} 83 # (major, minor, param pair set) -> format key 84 mimeToKey = {} 85 extensionToKey = {} 86 keyToExtension = {} 87 88 @classmethod
89 - def registerDataWriter(cls, 90 key, writer, mainMime, label, extension, *aliases):
91 """adds a writer to the formats registry. 92 93 Key is a short, unique handle for the format, writer is a writer 94 function(data, outputFile) -> None (where data can be an rsc.Data 95 or an rsc.Table instance), mainMime is the preferred media type, 96 label is a human-readable designation for the format (shown in 97 selection widgets and the like), extension is a suggested extension 98 for the format (lower-case only), and aliases are other strings 99 that can be used to select the format in DALI FORMAT or similar. 100 101 Where keys, mainMime, and aliases clash, previous entries are 102 silently overwritten. For extensions, the first registred format 103 wins. 104 """ 105 cls.writerRegistry[key] = writer 106 cls.formatToMIME[key] = mainMime 107 cls.formatToLabel[key] = label 108 109 cls.mimeToKey[getMIMEKey(mainMime)] = key 110 for mime in aliases: 111 cls.mimeToKey[getMIMEKey(mime)] = key 112 if extension not in cls.extensionToKey: 113 cls.extensionToKey[extension] = key 114 cls.keyToExtension[key] = extension
115 116 @classmethod
117 - def getMIMEFor(cls, formatName, orderedFormat=None):
118 """returns a simple MIME type for our formatName (some incoming MIME 119 or an alias). 120 121 Some magic, reserved mimes that need to be preserved from 122 the input are recognised and returned in orderedFormat. This 123 is for TAP and related DALI hacks. 124 """ 125 if orderedFormat in PRESERVED_MIMES: 126 return orderedFormat 127 128 if formatName in cls.formatToMIME: 129 return cls.formatToMIME[formatName] 130 131 # if it looks like a mime type, return it, otherwise assume it's 132 # an unimported format and return a generic mime 133 if "/" in formatName: 134 return formatName 135 else: 136 return "application/octet-stream"
137 138 @classmethod
139 - def getWriterFor(cls, formatName):
140 """returns a writer for formatName. 141 142 writers are what's registred via registerDataWriter; formatName is 143 a MIME type or a format alias. This raises CannotSerializeIn 144 if no writer is available. 145 """ 146 return cls.writerRegistry[cls.getKeyFor(formatName)]
147 148 @classmethod
149 - def getLabelFor(cls, formatName):
150 """returns a label for formatName (DaCHS key or MIME type). 151 """ 152 return cls.formatToLabel[cls.getKeyFor(formatName)]
153 154 @classmethod
155 - def getKeyFor(cls, formatName):
156 """returns a DaCHS format key for formatName (DaCHS key or MIME). 157 158 If formatName is a mime type with parameters, we'll also try 159 to get a format with the parameters stripped and silently succeed 160 if that works. 161 """ 162 if formatName in cls.writerRegistry: 163 return formatName 164 165 parsed = getMIMEKey(formatName) 166 if parsed in cls.mimeToKey: 167 return cls.mimeToKey[parsed] 168 parsed = (parsed[0], parsed[1], frozenset()) 169 if parsed in cls.mimeToKey: 170 return cls.mimeToKey[parsed] 171 172 raise CannotSerializeIn(formatName)
173 174 @classmethod
175 - def getTypeForExtension(cls, extension):
176 """returns the media type first registered for extension. 177 178 extension must begin with a dot. None is returned for extensions 179 no format has (yet) claimed. 180 """ 181 key = cls.extensionToKey.get(extension.lower()) 182 if key is None: 183 return None 184 return cls.formatToMIME[key]
185 186 @classmethod
187 - def iterFormats(cls):
188 """iterates over the short names of the available formats. 189 """ 190 return iter(cls.writerRegistry)
191 192 193 registerDataWriter = FORMATS_REGISTRY.registerDataWriter 194 getMIMEFor = FORMATS_REGISTRY.getMIMEFor 195 getWriterFor = FORMATS_REGISTRY.getWriterFor 196 getLabelFor = FORMATS_REGISTRY.getLabelFor 197 iterFormats = FORMATS_REGISTRY.iterFormats
198 199 200 -def formatData( 201 formatName, 202 table, 203 outputFile, 204 acquireSamples=True, 205 **moreFormatterArgs):
206 """writes a table to outputFile in the format given by key. 207 208 Table may be a table or a ``Data`` instance. ``formatName`` is a format 209 shortcut (``formats.iterFormats()`` gives keys available) or a media type. 210 If you pass None, the default VOTable format will be selected. 211 212 This raises a ``CannotSerializeIn`` exception if ``formatName`` is 213 not recognized. Note that you have to import the serialising modules 214 from the format package to make the formats available (fitstable, 215 csvtable, geojson, jsontable, texttable, votable; api itself already 216 imports the more popular of these). 217 218 If a client knows a certain formatter understands additional arguments, 219 it can hand them in as keywords arguments. This will raise an error 220 if another formatter that doesn't understand the argument is being used. 221 """ 222 if formatName is None: 223 formatName = base.votableType 224 getWriterFor(formatName)( 225 table, 226 outputFile, 227 acquireSamples=acquireSamples, 228 **moreFormatterArgs)
229
230 231 -def getFormatted(formatName, table, acquireSamples=False):
232 """returns a string containing a representation of table in the 233 format given by formatName. 234 235 This is just wrapping the `function formatData`_; se there for formatName. 236 This function will use large amounts of memory for large data. 237 """ 238 buffer = StringIO() 239 formatData(formatName, table, buffer, acquireSamples) 240 return buffer.getvalue()
241
242 243 -def guessMediaType(fName):
244 """returns a media type plausible for a file named fName. 245 246 This first uses the extension map inferred by our formats registry, 247 has some built-in safety catches in case the formatters haven't 248 been imported, and then falls back to built-in python 249 mimetypes.guess_type If nothing matches, it returns 250 application/octet-stream. 251 252 Extensions are used case-insensitively. We don't do any encoding 253 inference (yet). We may, though, so by all means shout if you're using 254 this in DaCHS-external code. 255 """ 256 extension = os.path.splitext(fName)[-1].lower() 257 res = FORMATS_REGISTRY.getTypeForExtension(extension) 258 259 if res is None: 260 res = EXTENSION_FALLBACKS.get(extension) 261 262 if res is None: 263 res, _ = mimetypes.guess_type(fName) 264 265 if res is None: 266 res = "application/octet-stream" 267 268 return res
269
270 271 -def getExtensionFor(mediaType):
272 """returns a suggested extension for files of mediaType. 273 274 mediaType can be an RFC 2045 media type, or one of DaCHS' internal format 275 codes. 276 277 As a fallback, .dat will be returned. 278 """ 279 try: 280 return FORMATS_REGISTRY.keyToExtension[ 281 FORMATS_REGISTRY.getKeyFor(mediaType)] 282 except (CannotSerializeIn, KeyError): 283 return mimetypes.guess_extension(mediaType) or ".dat"
284