Source code for gavo.protocols.sdm

"""
Code dealing with spectra (the actual data), in particular in the spectral
data model (sdm).

This assumes Spectral Data Model Version 1, but doesn't use very much of it.
"""

#c Copyright 2008-2023, the GAVO project <gavo@ari.uni-heidelberg.de>
#c
#c This program is free software, covered by the GNU GPL.  See the
#c COPYING file in the source distribution.


import datetime
import io
import os

from gavo import base
from gavo import formats
from gavo import rsc
from gavo import svcs
from gavo import utils
from gavo.formats import fitstable
from gavo.protocols import products


# MIME types we can generate *from* SDM-compliant data; the values are
# either keys for formats.formatData, or None if we have special
# handling below.
GETDATA_FORMATS = {
	base.votableType: "votable",
	"application/x-votable+xml;serialization=tabledata": "votabletd",
	"text/plain": "tsv",
	"text/csv": "csv",
	"application/fits": None,
	"application/x-votable+xml;serialization=TABLEDATA;version=1.5": "vodml"}


_SDM1_IRREGULARS = {
	"dataset.datamodel": "Spectrum.DataModel",
	"dataset.type": "Spectrum.Type",
	"dataset.length": "Spectrum.Length",
	"dataset.timesi": "Spectrum.TimeSI",
	"dataset.spectralsi": "Spectrum.SpectralSI",
	"dataset.fluxsi": "Spectrum.FluxSI",
	"access.format": None,
	"access.reference": None,
	"access.size": None,
	"astrocoords.position2d.value2":
		"Spectrum.Char.SpatialAxis.Coverage.Location.Value",
	"target.pos.spoint": "Spectrum.Target.pos",
}

[docs]def getSDM1UtypeForSSA(utype): """returns a utype from the spectrum data model for a utype of the ssa data model. For most utypes, this just removes a prefix and adds spec:Spectrum. Heaven knows why these are two different data models anyway. There are some (apparently random) differences, though. For convenience, utype=None is allowed and returned as such. """ if utype is None: return None localName = utype.split(":")[-1] specLocal = _SDM1_IRREGULARS.get(localName.lower(), "Spectrum."+localName) if specLocal: return "spec:"+specLocal else: return None
_SDM_TO_SED_UTYPES = { "spec:Spectrum.Data.SpectralAxis.Value": "sed:Segment.Points.SpectralCoord.Value", "spec:Data.SpectralAxis.Value": "sed:Segment.Points.SpectralCoord.Value", "spec:Spectrum.Data.FluxAxis.Value": "sed:Segment.Points.Flux.Value", "spec:Data.FluxAxis.Value": "sed:Segment.Points.Flux.Value", } ################### Making SDM compliant tables (from SSA rows and ################### data descriptors making spectral data)
[docs]def makeSDMDataForSSARow(ssaRow, spectrumData, sdmVersion=base.getConfig("ivoa", "sdmVersion")): """returns a rsc.Data instance containing an SDM compliant spectrum for the spectrum described by ssaRow. spectrumData is a data element making a primary table containing the spectrum data from an SSA row (typically, this is going to be the tablesource property of an SSA service). You'll usually use this via //soda#sdm_genData """ with base.getTableConn() as conn: resData = rsc.makeData(spectrumData, forceSource=ssaRow, connection=conn) resData.setMeta("utype", "spec:Spectrum") resTable = resData.getPrimaryTable() # fudge accref into a full URL resTable.setParam("accref", products.makeProductLink(resTable.getParam("accref"))) resData.DACHS_SDM_VERSION = sdmVersion # fudge spoint params into 2-arrays for param in resTable.iterParams(): # Bad, bad: In-place changes; change this to creating new params # one of these days. if param.type=="spoint": val = param.value param.type = "double precision(2)" param.xtype = None param.unit = "deg" if val: param.set([val.x/utils.DEG, val.y/utils.DEG]) # we want to include service metadata for data origin, but getting to # the service object from in here is really hard (since services may # share cores). So... let's steal. Nothing much breaks if this doesn't # work out. try: resData.contributingMetaCarriers.append(utils.stealVar("service")) except ValueError: # no service local in stack pass return resData
[docs]def makeSDMDataForPUBDID(pubDID, ssaTD, spectrumData, sdmVersion=base.getConfig("ivoa", "sdmVersion")): """returns a rsc.Data instance containing an SDM compliant spectrum for pubDID from ssaTable. ssaTD is the definition of a table containing the SSA metadata, spectrumData is a data element making a primary table containing the spectrum data from an SSA row (typically, this is going to be the tablesource property of an SSA service). """ matchingRows = ssaTD.doSimpleQuery(fragments="ssa_pubdid=%(pubDID)s", params={"pubDID": pubDID}) if not matchingRows: raise svcs.UnknownURI("No spectrum with pubdid %s known here"% pubDID) return makeSDMDataForSSARow( matchingRows[0], spectrumData, sdmVersion=sdmVersion)
################## Special FITS hacks for SDM serialization def _add_target_pos_cards(header, par): """_SDM_HEADER_MAPPING for target.pos. """ header.set("RA_TARG", par.value.x/utils.DEG) header.set("DEC_TARG", par.value.y/utils.DEG) def _add_location_cards(header, par): """_SDM_HEADER_MAPPING for target.pos. """ header.set("RA", par.value[0]) header.set("DEC", par.value[1])
[docs]def getColIndForUtype(header, colUtype): """returns the fits field index that has colUtype as its utype within header. If no such field exists, this raises a KeyError """ nCols = header.get("TFIELDS", 0) for colInd in range(1, nCols+1): key = "TUTYP%d"%colInd if header.get(key, None)==colUtype: return colInd else: raise KeyError(colUtype)
def _make_limits_func(colUtype, keyBase): """returns a _SDM_HEADER_MAPPING for declaring TDMIN/MAXn and friends. colUtype determines the n here. """ def func(header, par): try: key = keyBase+str(getColIndForUtype(header, colUtype)) header.set(key, par.value, comment="[%s]"%par.unit) except KeyError: # no field for utype pass return func # A mapping from utypes to the corresponding FITS keywords # There are some more complex cases, for which a function is a value # here; the function is called with the FITS header and the parameter # in question. _SDM_HEADER_MAPPING = { "datamodel": "VOCLASS", "length": "DATALEN", "type": "VOSEGT", "coordsys.id": "VOCSID", "coordsys.spaceframe.name": "RADECSYS", "coordsys.spaceframe.equinox": "EQUINOX", "coordsys.spaceframe.ucd": "SKY_UCD", "coordsys.spaceframe.refpos": "SKY_REF", "coordsys.timeframe.name": "TIMESYS", "coordsys.timeframe.ucd": None, "coordsys.timeframe.zero": "MJDREF", "coordsys.timeframe.refpos": None, "coordsys.spectralframe.refpos": "SPECSYS", "coordsys.spectralframe.redshift": "REST_Z", "coordsys.spectralframe.name": "SPECNAME", "coordsys.redshiftframe.name": "ZNAME", "coordsys.redshiftframe.refpos": "SPECSYSZ", "curation.publisher": "VOPUB", "curation.reference": "VOREF", "curation.publisherid": "VOPUBID", "curation.version": "VOVER", "curation.contactname": "CONTACT", "curation.contactemail": "EMAIL", "curation.rights": "VORIGHTS", "curation.date": "VODATE", "curation.publisherdid": "DS_IDPUB", "target.name": "OBJECT", "target.description": "OBJDESC", "target.class": "SRCCLASS", "target.spectralclass": "SPECTYPE", "target.redshift": "REDSHIFT", "target.varampl": "TARGVAR", "dataid.title": "TITLE", "dataid.creator": "AUTHOR", "dataid.datasetid": "DS_IDENT", "dataid.creatordid": "CR_IDENT", "dataid.date": "DATE", "dataid.version": "VERSION", "dataid.instrument": "INSTRUME", "dataid.creationtype": "CRETYPE", "dataid.logo": "VOLOGO", # collection will need work when we properly implement it "dataid.collection": "COLLECT1", "dataid.contributor": "CONTRIB1", "dataid.datasource": "DSSOURCE", "dataid.bandpass": "SPECBAND", "derived.snr": "DER_SNR", "derived.redshift.value": "DER_Z", "derived.redshift.staterror": "DER_ZERR", "derived.redshift.confidence": "DER_ZCNF", "derived.varampl": "DER_VAR", "timesi": "TIMESDIM", "spectralsi": "SPECSDIM", "fluxsi": "FLUXSDIM", "char.fluxaxis.name": None, "char.fluxaxis.unit": None, "char.fluxaxis.ucd": None, "char.spectralaxis.name": None, "char.spectralaxis.unit": None, "char.spectralaxis.ucd": None, "char.timeaxis.name": None, "char.timeaxis.ucd": None, "char.spatialaxis.name": None, "char.spatialaxis.unit": None, "char.fluxaxis.accuracy.staterror": "STAT_ERR", "char.fluxaxis.accuracy.syserror": "SYS_ERR", "char.timeaxis.accuracy.staterror": "TIME_ERR", "char.timeaxis.accuracy.syserror": "TIME_SYE", "char.timeaxis.resolution": "TIME_RES", "char.fluxaxis.calibration": "FLUX_CAL", "char.spectralaxis.calibration": "SPEC_CAL", "char.spectralaxis.coverage.location.value": "SPEC_VAL", "char.spectralaxis.coverage.bounds.extent": "SPEC_BW", "char.spectralaxis.coverage.bounds.start": _make_limits_func("spec:spectrum.data.spectralaxis.value", "TDMIN"), "char.spectralaxis.coverage.bounds.stop": _make_limits_func("spec:spectrum.data.spectralaxis.value", "TDMAX"), "char.spectralaxis.samplingprecision.": None, "samplingprecisionrefval.fillfactor": "SPEC_FIL", "char.spectralaxis.samplingprecision.SampleExtent": "SPEC BIN", "char.spectralaxis.accuracy.binsize": "SPEC_BIN", "char.spectralaxis.accuracy.staterror": "SPEC_ERR", "char.spectralaxis.accuracy.syserror": "SPEC_SYE", "char.spectralaxis.resolution": "SPEC_RES", "char.spectralaxis.respower": "SPEC_RP", "char.spectralaxis.coverage.support.extent": "SPECWID", "char.timeaxis.unit": "TIMEUNIT", "char.timeaxis.accuracy.binsize": "TIMEDEL", "char.timeaxis.calibration": "TIME_CAL", "char.timeaxis.coverage.location.value": "TMID", "char.timeaxis.coverage.bounds.extent": "TELAPSE", "char.timeaxis.coverage.bounds.start": "TSTART", "char.timeaxis.coverage.bounds.stop": "TSTOP", "char.timeaxis.coverage.support.extent": "EXPOSURE", "char.timeaxis.samplingprecision.samplingprecisionrefval.fillfactor": "DTCOR", "char.timeaxis.samplingprecision.sampleextent": "TIMEDEL", "char.spatialaxis.ucd": "SKY_UCD", "char.spatialaxis.accuracy.staterr": "SKY_ERR", "char.spatialaxis.accuracy.syserror": "SKY_SYE", "char.spatialaxis.calibration": "SKY_CAL", "char.spatialaxis.resolution": "SKY_RES", "char.spatialaxis.coverage.bounds.extent": "APERTURE", "char.spatialaxis.coverage.support.area": "REGION", "char.spatialaxis.coverage.support.extent": "AREA", "char.spatialaxis.samplingprecision.samplingprecisionrefval.fillfactor": "SKY_FILL", # special handling through functions "target.pos": _add_target_pos_cards, "char.spatialaxis.coverage.location.value": _add_location_cards, }
[docs]def makeBasicSDMHeader(sdmData, header): """updates the fits header header with the params from sdmData. """ for par in sdmData.getPrimaryTable().iterParams(): if par.value is None or par.utype is None: continue mapKey = par.utype.lower().split(":")[-1] if mapKey.startswith("spectrum."): # WTF? mapKey = mapKey[9:] destKey = _SDM_HEADER_MAPPING.get(mapKey, None) if destKey is None: pass elif callable(destKey): destKey(header, par) else: comment = "" if par.unit: comment = str("[%s]"%par.unit) # TODO: use our serialising infrastructure here value = par.value if isinstance(value, bytes): value = value.decode("ascii") elif isinstance(value, datetime.datetime): value = value.isoformat() header.set(destKey, value, comment)
[docs]def makeSDMFITS(sdmData): """returns sdmData in an SDM-compliant FITS. This only works for SDM version 1. Behaviour with version 2 SDM data is undefined. """ sdmData.getPrimaryTable().IgnoreTableParams = None hdus = fitstable.makeFITSTable(sdmData) sdmHdr = hdus[1].header makeBasicSDMHeader(sdmData, sdmHdr) srcName = fitstable.writeFITSTableFile(hdus) with open(srcName, "rb") as f: data = f.read() os.unlink(srcName) return data
################## Serializing SDM compliant tables
[docs]def formatSDMData(sdmData, format, queryMeta=svcs.emptyQueryMeta): """returns a pair of mime-type and payload for a rendering of the SDM Data instance sdmData in format. (you'll usually use this via //soda#sdm_format) """ destMime = str(format or base.votableType) if queryMeta["tdEnc"] and destMime==base.votableType: destMime = "application/x-votable+xml;serialization=tabledata" formatId = GETDATA_FORMATS.get(destMime, None) if formatId is None: # special or unknown format if destMime=="application/fits": return destMime, makeSDMFITS(sdmData) else: raise base.ValidationError("Cannot format table to %s"%destMime, "FORMAT") # target version is usually set by makeSDMDataForSSARow; but if # people made sdmData in some other way, fall back to default. sdmVersion = getattr(sdmData, "DACHS_SDM_VERSION", base.getConfig("ivoa", "sdmVersion")) if sdmVersion=="1": sdmData.addMeta("_votableRootAttributes", 'xmlns:spec="http://www.ivoa.net/xml/SpectrumModel/v1.01"') resF = io.BytesIO() formats.formatData(formatId, sdmData, resF, acquireSamples=False) return destMime, resF.getvalue()
################## Manipulation of SDM compliant tables # The idea here is that you can push in a table, the function does some # magic, and it returns that table. The getData implementation (see ssap.py) # and some SODA data functions (//soda) # use these functions to provide some spectrum transformations. We # may want to provide some plugin system so people can add their own # transformations, but let's first see someone request that.
[docs]def getFluxColumn(sdmTable): """returns the column containing the flux in sdmTable. """ return sdmTable.tableDef.getByUtypes("spec:Spectrum.Data.FluxAxis.Value")
[docs]def getSpectralColumn(sdmTable): """returns the column containing the spectral coordinate in sdmTable. """ return sdmTable.tableDef.getByUtypes( "spec:Spectrum.Data.SpectralAxis.Value")
[docs]def mangle_cutout(sdmTable, low, high): """returns only those rows from sdmTable for which the spectral coordinate is between low and high. Both low and high must be given. If you actually want half-open intervals, do it in interface code (low=-1 and high=1e308 should do fine). """ spectralColumn = getSpectralColumn(sdmTable) # we may swap limits later; let's memorise whether we have an empty # interval. returnEmpty = low>high spectralUnit = spectralColumn.unit # convert low and high from meters to the unit on the # spectrum's spectral axis converter = base.getSpecConverter("m", spectralUnit) low, high = converter(low), converter(high) # swap limits in case of (freq, energy) -> wavelength low, high = min(low, high), max(low, high) # Whoa! we should have an API that allows replacing table rows safely # (this stuff will blow up when we have indices): spectralName = spectralColumn.name if returnEmpty: sdmTable.rows=[] else: sdmTable.rows=[ row for row in sdmTable.rows if low<=row[spectralName]<=high] specVals = [r[spectralName] for r in sdmTable.rows] if specVals: invConverter = base.getSpecConverter(spectralUnit, "m") lim1, lim2 = invConverter(min(specVals)), invConverter(max(specVals)) specstart, specend = min(lim1, lim2), max(lim1, lim2) sdmTable.setParam("ssa_specext", specend-specstart) sdmTable.setParam("ssa_specstart", specstart) sdmTable.setParam("ssa_specend", specend) sdmTable.setParam("ssa_specmid", (specstart+specend)/2) return sdmTable
[docs]def mangle_fluxcalib(sdmTable, newCalib): """returns sdmTable with a new calibration. Currently, we can only normalize the spectrum to the maximum value. """ newCalib = newCalib.lower() if newCalib==sdmTable.getParam("ssa_fluxcalib").lower(): return sdmTable fluxName = getFluxColumn(sdmTable).name try: # TODO: parameterize this errorName = sdmTable.tableDef.getColumnByUCD( "stat.error;phot.flux;em.opt").name except base.StructureError: # no (known) error column errorName = None if newCalib=="relative": # whoa! we're changing this in place; I guess that should be # legalized for tables in general. normalizer = float(max(row[fluxName] for row in sdmTable.rows)) for row in sdmTable.rows: row[fluxName] = row[fluxName]/normalizer if errorName: for row in sdmTable.rows: row[errorName] = row[errorName]/normalizer sdmTable.setParam("ssa_fluxcalib", "RELATIVE") sdmTable.tableDef = sdmTable.tableDef.copy(sdmTable.tableDef.parent) sdmTable.tableDef.getColumnByName("flux").unit = "" return sdmTable raise base.ValidationError("Do not know how to turn a %s spectrum" " into a %s one."%(sdmTable.getParam("ssa_fluxcalib"), newCalib), "FLUXCALIB")