| Home | Trees | Indices | Help |
|
|---|
|
|
1 """
2 Common code for generation of various data formats.
3
4 The main function here is formatData. It receives a string format id,
5 a data instance and a destination file. It dispatches this to formatters
6 previously registred using registerDataWriter.
7
8 The data writers must take a data instance and a file instance; their
9 effect must be that a serialized representation of data, or, if the format
10 does not support this, the data's primary table is written to the file
11 instance.
12 """
13
14 #c Copyright 2008-2019, the GAVO project
15 #c
16 #c This program is free software, covered by the GNU GPL. See the
17 #c COPYING file in the source distribution.
18
19
20 import cgi
21 import os
22 import mimetypes
23 from cStringIO import StringIO
24
25 from gavo import base
26
27
28 PRESERVED_MIMES = set([ # TAP Spec, 2.7.1, similar in DALI
29 "text/xml", "application/x-votable+xml", "text/plain"])
30
31 # used in guessMediaType
32 EXTENSION_FALLBACKS = {
33 ".vot": base.votableType,
34 ".fits": "application/fits",
35 ".fz": "image/fits",
36 }
41 self.format = format
42 base.Error.__init__(self, format,
43 hint="Either you gave an invalid format id or a known format"
44 " did not get registred for some reason. Format codes"
45 " known at this point: %s. You can also try common MIME types"%(
46 ", ".join(FORMATS_REGISTRY.writerRegistry)))
47 self.args = [format]
48
50 return "Cannot serialize in '%s'."%self.format
51
54 """makes a DaCHS mime key from a content-type string.
55
56 This is used for retrieving matching mime types and is a triple
57 of major and minor mime type and a set of parameter pairs.
58
59 contentType is a string-serialized mime type.
60 """
61 media_type, paramdict = cgi.parse_header(contentType)
62 try:
63 major, minor = media_type.split("/")
64 except (ValueError, TypeError):
65 raise CannotSerializeIn(contentType)
66 return (major, minor,
67 frozenset(paramdict.iteritems()))
68
71 """a registry for data formats that can be produced by DaCHS.
72
73 This works by self-registration of the respective modules on their
74 input; hence, if you want to rely on some entry here, be sure
75 there's an import somewhere.
76 """
77 # format key -> writer function
78 writerRegistry = {}
79 # format key -> mime type
80 formatToMIME = {}
81 # format key -> human-readable label
82 formatToLabel = {}
83 # (major, minor, param pair set) -> format key
84 mimeToKey = {}
85 extensionToKey = {}
86 keyToExtension = {}
87
88 @classmethod
91 """adds a writer to the formats registry.
92
93 Key is a short, unique handle for the format, writer is a writer
94 function(data, outputFile) -> None (where data can be an rsc.Data
95 or an rsc.Table instance), mainMime is the preferred media type,
96 label is a human-readable designation for the format (shown in
97 selection widgets and the like), extension is a suggested extension
98 for the format (lower-case only), and aliases are other strings
99 that can be used to select the format in DALI FORMAT or similar.
100
101 Where keys, mainMime, and aliases clash, previous entries are
102 silently overwritten. For extensions, the first registred format
103 wins.
104 """
105 cls.writerRegistry[key] = writer
106 cls.formatToMIME[key] = mainMime
107 cls.formatToLabel[key] = label
108
109 cls.mimeToKey[getMIMEKey(mainMime)] = key
110 for mime in aliases:
111 cls.mimeToKey[getMIMEKey(mime)] = key
112 if extension not in cls.extensionToKey:
113 cls.extensionToKey[extension] = key
114 cls.keyToExtension[key] = extension
115
116 @classmethod
118 """returns a simple MIME type for our formatName (some incoming MIME
119 or an alias).
120
121 Some magic, reserved mimes that need to be preserved from
122 the input are recognised and returned in orderedFormat. This
123 is for TAP and related DALI hacks.
124 """
125 if orderedFormat in PRESERVED_MIMES:
126 return orderedFormat
127
128 if formatName in cls.formatToMIME:
129 return cls.formatToMIME[formatName]
130
131 # if it looks like a mime type, return it, otherwise assume it's
132 # an unimported format and return a generic mime
133 if "/" in formatName:
134 return formatName
135 else:
136 return "application/octet-stream"
137
138 @classmethod
140 """returns a writer for formatName.
141
142 writers are what's registred via registerDataWriter; formatName is
143 a MIME type or a format alias. This raises CannotSerializeIn
144 if no writer is available.
145 """
146 return cls.writerRegistry[cls.getKeyFor(formatName)]
147
148 @classmethod
150 """returns a label for formatName (DaCHS key or MIME type).
151 """
152 return cls.formatToLabel[cls.getKeyFor(formatName)]
153
154 @classmethod
156 """returns a DaCHS format key for formatName (DaCHS key or MIME).
157
158 If formatName is a mime type with parameters, we'll also try
159 to get a format with the parameters stripped and silently succeed
160 if that works.
161 """
162 if formatName in cls.writerRegistry:
163 return formatName
164
165 parsed = getMIMEKey(formatName)
166 if parsed in cls.mimeToKey:
167 return cls.mimeToKey[parsed]
168 parsed = (parsed[0], parsed[1], frozenset())
169 if parsed in cls.mimeToKey:
170 return cls.mimeToKey[parsed]
171
172 raise CannotSerializeIn(formatName)
173
174 @classmethod
176 """returns the media type first registered for extension.
177
178 extension must begin with a dot. None is returned for extensions
179 no format has (yet) claimed.
180 """
181 key = cls.extensionToKey.get(extension.lower())
182 if key is None:
183 return None
184 return cls.formatToMIME[key]
185
186 @classmethod
188 """iterates over the short names of the available formats.
189 """
190 return iter(cls.writerRegistry)
191
192
193 registerDataWriter = FORMATS_REGISTRY.registerDataWriter
194 getMIMEFor = FORMATS_REGISTRY.getMIMEFor
195 getWriterFor = FORMATS_REGISTRY.getWriterFor
196 getLabelFor = FORMATS_REGISTRY.getLabelFor
197 iterFormats = FORMATS_REGISTRY.iterFormats
198
199
200 -def formatData(
201 formatName,
202 table,
203 outputFile,
204 acquireSamples=True,
205 **moreFormatterArgs):
206 """writes a table to outputFile in the format given by key.
207
208 Table may be a table or a ``Data`` instance. ``formatName`` is a format
209 shortcut (``formats.iterFormats()`` gives keys available) or a media type.
210 If you pass None, the default VOTable format will be selected.
211
212 This raises a ``CannotSerializeIn`` exception if ``formatName`` is
213 not recognized. Note that you have to import the serialising modules
214 from the format package to make the formats available (fitstable,
215 csvtable, geojson, jsontable, texttable, votable; api itself already
216 imports the more popular of these).
217
218 If a client knows a certain formatter understands additional arguments,
219 it can hand them in as keywords arguments. This will raise an error
220 if another formatter that doesn't understand the argument is being used.
221 """
222 if formatName is None:
223 formatName = base.votableType
224 getWriterFor(formatName)(
225 table,
226 outputFile,
227 acquireSamples=acquireSamples,
228 **moreFormatterArgs)
229
232 """returns a string containing a representation of table in the
233 format given by formatName.
234
235 This is just wrapping the `function formatData`_; se there for formatName.
236 This function will use large amounts of memory for large data.
237 """
238 buffer = StringIO()
239 formatData(formatName, table, buffer, acquireSamples)
240 return buffer.getvalue()
241
244 """returns a media type plausible for a file named fName.
245
246 This first uses the extension map inferred by our formats registry,
247 has some built-in safety catches in case the formatters haven't
248 been imported, and then falls back to built-in python
249 mimetypes.guess_type If nothing matches, it returns
250 application/octet-stream.
251
252 Extensions are used case-insensitively. We don't do any encoding
253 inference (yet). We may, though, so by all means shout if you're using
254 this in DaCHS-external code.
255 """
256 extension = os.path.splitext(fName)[-1].lower()
257 res = FORMATS_REGISTRY.getTypeForExtension(extension)
258
259 if res is None:
260 res = EXTENSION_FALLBACKS.get(extension)
261
262 if res is None:
263 res, _ = mimetypes.guess_type(fName)
264
265 if res is None:
266 res = "application/octet-stream"
267
268 return res
269
272 """returns a suggested extension for files of mediaType.
273
274 mediaType can be an RFC 2045 media type, or one of DaCHS' internal format
275 codes.
276
277 As a fallback, .dat will be returned.
278 """
279 try:
280 return FORMATS_REGISTRY.keyToExtension[
281 FORMATS_REGISTRY.getKeyFor(mediaType)]
282 except (CannotSerializeIn, KeyError):
283 return mimetypes.guess_extension(mediaType) or ".dat"
284
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Thu May 2 07:29:09 2019 | http://epydoc.sourceforge.net |