Hi Krys,
> The error now is:
>
> ...
> File "/usr/progs/ccpnmr/ccpnmr1.0/python/ccp/format/ansig/projectIO.py",
> line 232, in read
> specInfo['dimInfo'][cols[i]]['linewidth'] = returnFloat(cols[i+1])
> KeyError: 'F2'
Ah I assumed that the nucleus for each dimension is always explicitly
listed for a spectrum, but apparently it isn't... so if it's not there,
I'm now assuming it's 1H. There was a small problem in another file, also
fixed now... attached are the two corrected files:
python/ccp/format/ansig/projectIO.py
python/ccpnmr/format/converters/AnsigFormat.py
Replace the existing files, and your .ctr and .spd files should be read in
correctly.
Wim
#!/usr/bin/python
"""
======================COPYRIGHT/LICENSE START==========================
projectIO.py: I/O for Ansig project file
Copyright (C) 2004-2005 Wim Vranken (European Bioinformatics Institute)
=======================================================================
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
A copy of this license can be found in ../../../../license/LGPL.license
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
======================COPYRIGHT/LICENSE END============================
for further information, please contact :
- CCPN website (http://www.ccpn.ac.uk/)
- MSD website (http://www.ebi.ac.uk/msd/)
- contact Wim Vranken ([log in to unmask])
=======================================================================
If you are using this software for academic purposes, we suggest
quoting the following references:
===========================REFERENCE START=============================
R. Fogh, J. Ionides, E. Ulrich, W. Boucher, W. Vranken, J.P. Linge, M.
Habeck, W. Rieping, T.N. Bhat, J. Westbrook, K. Henrick, G. Gilliland,
H. Berman, J. Thornton, M. Nilges, J. Markley and E. Laue (2002). The
CCPN project: An interim report on a data model for the NMR community
(Progress report). Nature Struct. Biol. 9, 416-418.
Wim F. Vranken, Wayne Boucher, Tim J. Stevens, Rasmus
H. Fogh, Anne Pajon, Miguel Llinas, Eldon L. Ulrich, John L. Markley, John
Ionides and Ernest D. Laue. The CCPN Data Model for NMR Spectroscopy:
Development of a Software Pipeline. Accepted by Proteins (2004).
===========================REFERENCE END===============================
"""
import os, string
# Import general functions
from memops.universal.Io import getTopDirectory
from memops.universal.Util import returnFloat, returnInt
from memops.universal.Constants import False, True
from ccp.format.ansig.generalIO import AnsigGenericFile
#####################
# Class definitions #
#####################
class AnsigProjectFile(AnsigGenericFile):
# Information on file level
def initialize(self):
(self.directory,self.fileName) = os.path.split(self.name)
self.directories = {}
self.files = {}
self.spectraFile = None
def read(self,readSpectra = True, verbose = 0):
if verbose == 1:
print "Reading %s project file %s" % (self.format,self.name)
#
# Initialize
#
inControlBlock = 0
#
# Read file...
#
fin = open(self.name)
line = fin.readline()
while line:
# Ignore empty lines and comments
if self.patt['emptyline'].search(line) or self.patt['exclamation'].search(line):
line = fin.readline()
continue
cols = string.split(line)
if cols[0] == 'control':
inControlBlock = 1
elif cols[0] == 'end_control':
inControlBlock = 0
elif inControlBlock:
name = cols[0]
value = cols[1]
# Substitute directories in value
searchObj = self.patt[self.format + 'CurlyBrace'].search(value)
if searchObj:
directoryName = searchObj.group(1)
value = string.replace(value,searchObj.group(),self.directories[directoryName])
# Check if directory...
searchObj = self.patt[self.format + 'CurlyBrace'].search(name)
if searchObj:
self.directories[searchObj.group(1)] = value
else:
self.files[name] = os.path.join(self.directory,value)
line = fin.readline()
fin.close()
if readSpectra and self.files.has_key('spectra'):
self.spectraFile = AnsigSpectraFile(self.files['spectra'],self)
self.spectraFile.read(verbose = verbose)
return
class AnsigSpectraFile(AnsigGenericFile):
# Information on file level
def initialize(self,parentFile):
self.spectra = []
self.ppmRange = {}
self.parentFile = parentFile
def read(self,verbose = 0):
if verbose == 1:
print "Reading %s spectra file %s" % (self.format,self.name)
#
# Initialize
#
inSpectrum = 0
inMatrix = 0
#
# Read file...
#
fin = open(self.name)
line = fin.readline()
while line:
# Ignore empty lines and comments
if self.patt['emptyline'].search(line) or self.patt['exclamation'].search(line):
line = fin.readline()
continue
cols = string.split(line)
if cols[0] == 'spectrum':
inSpectrum = 1
specInfo = {'name': cols[2],'numDim': returnInt(cols[1][:-1]), 'dims': [], 'dimInfo': {}}
if len(cols) > 3 and cols[3] == 'expnucleus':
specInfo['dims'].append(cols[4])
specInfo['dimInfo'][cols[4]] = {'nucleus': cols[5]}
elif cols[0] == 'end_spectrum':
inSpectrum = 0
self.spectra.append(specInfo)
elif cols[0] == 'ppm_range':
self.ppmRange[cols[1]] = (returnFloat(cols[2]),returnFloat(cols[3]))
elif inSpectrum:
if cols[0] == 'expnucleus':
specInfo['dims'].append(cols[1])
specInfo['dimInfo'][cols[1]] = {'nucleus': cols[2]}
elif cols[0] == 'experiment':
specInfo['type'] = cols[1]
elif cols[0] == 'solvent':
specInfo['solvent'] = cols[1]
specInfo['temperature'] = cols[3]
specInfo['pH'] = cols[5]
elif cols[0] == 'equivalent':
specInfo['equivalent'] = cols[1]
elif cols[0] == 'matrix_file':
inMatrix = 1
# Assuming it's 1H if no info was given...
if len(specInfo['dimInfo']) != specInfo['numDim']:
for dim in range(1,specInfo['numDim'] + 1):
dimKey = "F%d" % dim
if not specInfo['dimInfo'].has_key(dimKey):
specInfo['dimInfo'][dimKey] = {'nucleus': '1H'}
elif cols[0] == 'end_matrix_file':
inMatrix = 0
elif inMatrix and cols[0] == 'parameter_file':
specInfo['parFile'] = self.getFullFile(cols[1])
elif inMatrix and cols[0] == 'file':
specInfo['specFile'] = self.getFullFile(cols[1])
elif inMatrix and cols[0] == 'aliased':
specInfo['dimInfo'][cols[1]]['aliased'] = 1
elif inMatrix and cols[0] == 'linewidth':
for i in range(1,len(cols),2):
specInfo['dimInfo'][cols[i]]['linewidth'] = returnFloat(cols[i+1])
elif inMatrix and cols[0] in ['noise_level','contour_base']:
pass
elif cols[0] in ['contour_file','scale'] or cols[0].count('{contours}') > 0:
pass
else:
print "Warning: did not handle inside tag %s" % line
else:
print "Warning: did not handle %s" % line
line = fin.readline()
fin.close()
return
def getFullFile(self,fileName):
# Substitute directories in value
searchObj = self.patt[self.format + 'CurlyBrace'].search(fileName)
if searchObj:
directoryName = searchObj.group(1)
fileName = string.replace(fileName,searchObj.group(),self.parentFile.directories[directoryName])
return fileName
###################
# Main of program #
###################
if __name__ == "__main__":
files = ['../../reference/ccpNmr/katCam/oxime/oxime.ctr',
'../../reference/ccpNmr/tim/forWim/g1.ctr',
'../../reference/ccpNmr/krysEdinburgh/050121/T1_ole_ABA-1A.ctr']
for inFile in files:
projectFile = AnsigProjectFile(os.path.join(getTopDirectory(),inFile))
projectFile.read(verbose = 1)
print projectFile.directories
print projectFile.files
print projectFile.spectraFile.spectra
#!/usr/bin/python
"""
======================COPYRIGHT/LICENSE START==========================
AnsigFormat.py: Contains functions specific to Ansig conversions.
Copyright (C) 2004-2005 Wim Vranken (European Bioinformatics Institute)
=======================================================================
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
A copy of this license can be found in ../../../../license/LGPL.license
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
======================COPYRIGHT/LICENSE END============================
for further information, please contact :
- CCPN website (http://www.ccpn.ac.uk/)
- MSD website (http://www.ebi.ac.uk/msd/)
- contact Wim Vranken ([log in to unmask])
=======================================================================
If you are using this software for academic purposes, we suggest
quoting the following references:
===========================REFERENCE START=============================
R. Fogh, J. Ionides, E. Ulrich, W. Boucher, W. Vranken, J.P. Linge, M.
Habeck, W. Rieping, T.N. Bhat, J. Westbrook, K. Henrick, G. Gilliland,
H. Berman, J. Thornton, M. Nilges, J. Markley and E. Laue (2002). The
CCPN project: An interim report on a data model for the NMR community
(Progress report). Nature Struct. Biol. 9, 416-418.
Wim F. Vranken, Wayne Boucher, Tim J. Stevens, Rasmus
H. Fogh, Anne Pajon, Miguel Llinas, Eldon L. Ulrich, John L. Markley, John
Ionides and Ernest D. Laue. The CCPN Data Model for NMR Spectroscopy:
Development of a Software Pipeline. Accepted by Proteins (2004).
===========================REFERENCE END===============================
"""
from ccpnmr.format.converters.DataFormat import DataFormat, IOkeywords
from ccpnmr.format.general.Util import getResName
from ccpnmr.format.general.Util import getResNameText
from ccpnmr.format.general.Constants import volume_kw
from memops.universal.Constants import False, True
import ccp.api.Nmr as Nmr
from ccp.format.ansig import sequenceIO, peaksIO, projectIO
import string, copy
# This is necessary for handling the parameter files!
from ccpnmr.format.converters.AzaraFormat import AzaraFormat
#
# Format specific keywords...
#
IOkeywords = copy.deepcopy(IOkeywords)
IOkeywords['readProject']['peakFile'] = (None,False,'The peak file in export or storage format associated with this project.')
class AnsigFormat(DataFormat):
def setFormat(self):
self.format = 'ansig'
def getSequence(self):
# TODO HERE: have to figure out what to do if project file read...
self.sequenceFile = sequenceIO.AnsigSequenceFile(self.fileName)
self.sequenceFile.read()
if self.verbose == 1:
print "Reading sequence from %s file %s" % (self.formatLabel,self.fileName)
#
# TODO: also have to handle 'details' info: can put in resonanceGroups?
#
def createSequenceFile(self):
if self.verbose == 1:
print "Writing sequence to %s file %s" % (self.formatLabel,self.fileName)
self.sequenceFile = sequenceIO.AnsigSequenceFile(self.fileName)
def createSequence(self):
self.sequenceFile.sequences.append(sequenceIO.AnsigSequence(molName = self.chain.molecule.name, chainCode = self.exportChainCode))
self.sequence = self.sequenceFile.sequences[-1]
def setSequenceFileElements(self):
#
# TODO: get 'details' information... from resGroup in principle.
#
self.sequence.elements.append(sequenceIO.AnsigSequenceElement(self.seqCode,self.residue.molResidue.chemCompHead.ccpCode))
def getPeaks(self):
if self.verbose == 1:
print "Reading peak list from %s file %s" % (self.formatLabel,self.fileName)
# TODO HERE: have to figure out what to do if project file read...
# Read in whole file (even if only experiment wanted later on...)
self.peakFile = peaksIO.AnsigPeakFile(self.fileName)
self.peakFile.read()
def createPeakFile(self):
# Using only use EXPORT format for writing!!!
if self.verbose == 1:
print "Writing peak list(s) to %s file %s" % (self.formatLabel,self.fileName)
self.peakFile = peaksIO.AnsigPeakFile(self.fileName)
self.peakFile.setSpectrumInfo('Export',0,self.numPeakDim)
def getFullProject(self,fileName,peakFile = None,peakKeyWdList = None, sequenceKeyWds = None):
if self.verbose == 1:
print "Reading %s project from file %s" % (self.formatLabel,fileName)
self.file = projectIO.AnsigProjectFile(fileName)
self.file.read()
#
# Ask if peakFile has to be read if not given...
#
if not peakFile and not self.minimalPrompts:
getPeakFile = self.messageReporter.showYesNo("Read a peak file","Do you want to read in an associated peak file (in export or storage format)?")
if getPeakFile:
interaction = self.multiDialog.FileName(self.guiParent,component = 'peaks', format = self.format, selectionText = "Select peak file", title = 'Select peak file')
peakFile = interaction.file
#
# Process elements - call functions 'as if' reading real file, but pass
# on objects created during project import
#
if self.file.files['sequence']:
if sequenceKeyWds:
keywds = sequenceKeyWds
else:
keywds = {}
self.readSequence(fileName = self.file.files['sequence'], minimalPrompts = self.minimalPrompts, **keywds)
if self.file.spectraFile:
spectraList = self.file.spectraFile.spectra
azaraFormat = AzaraFormat(self.project,self.guiParent)
for spectrumInfo in spectraList:
#
# TODO: use 'solvent', 'temperature', 'pH' to set up sample info!
#
specName = spectrumInfo['name']
if hasattr(spectrumInfo,'type'):
specType = spectrumInfo['type']
else:
specType = None
numDim = spectrumInfo['numDim']
parFile = spectrumInfo['parFile']
#
# Use ppmRange if there is an aliased dimension...
# assuming that dimension order in ANSIG file same as in Azara one.
#
aliasing = {}
for dim in range(0,len(spectrumInfo['dims'])):
dimName = spectrumInfo['dims'][dim]
if spectrumInfo['dimInfo'][dimName].has_key('aliased'):
nucleus = spectrumInfo['dimInfo'][dimName]['nucleus']
if self.file.spectraFile.ppmRange.has_key(nucleus):
aliasing[dim] = self.file.spectraFile.ppmRange[nucleus]
if not self.fileExists(parFile, verbose = False):
print " Warning: parameter file %s does not exist - reading skipped" % parFile
continue
dataSource = azaraFormat.readProcPars(fileName = parFile, expName = specName, aliasing = aliasing, minimalPrompts = self.minimalPrompts)
if dataSource:
#
# Reset some values. DO NOT USE the mapping information -
#
#dataDimRefs = azaraFormat.dataDimRefs
azaraFormat.experiment = None
#
# Now try to read in the relevant peak information
#
if peakFile:
# dataDimRefs = dataDimRefs,
peakLists = self.readPeaks(fileName = peakFile, dataSource = dataSource, specName = specName, minimalPrompts = self.minimalPrompts)
#
# Avoid reading project again
#
self.file = None
return self.project
#
# Functions different to default functions in DataFormat
#
def thisPeakValid(self):
#
# Check whether peak from correct spectrum
#
if self.specName != self.rawPeak.specName:
return 0
else:
return 1
def setPeakIntensity(self):
# PeakIntensity attributes
if self.rawPeak.volume != 0:
peakInt = Nmr.PeakIntensity(self.peak,value = self.rawPeak.volume, method = self.methods[self.format]['Volume'])
# Assuming always volume for ansig
peakInt.intensityType = volume_kw
def setPeakDim(self):
dataDimRef = self.dataDimRefs[self.rawPeakDimIndex]
peakDimIndex = dataDimRef.dataDim.dim - 1
self.peakDim = self.peak.peakDims[peakDimIndex]
self.peakDim.dataDimRef = dataDimRef
self.peakDim.value = self.rawPeak.ppm[self.rawPeakDimIndex]
def setPeakExtras(self):
if self.rawPeak.deleted == 1:
# TODO: is this the correct way to 'delete' a peak?
self.peak.delete = True
def getPeakResNames(self):
if self.rawPeak.assignResNum[self.rawPeakDimIndex] != '':
self.resNames = [getResName(' ',self.rawPeak.assignResNum[self.rawPeakDimIndex],self.rawPeak.assignAtom[self.rawPeakDimIndex])]
else:
self.resNames = []
def selectPeakLists(self):
#
# Use all peaklists for Ansig (can handle multiple per file)
# Have to find maximum number of dims first
#
self.numPeakDim = 0
for peakList in self.peakLists:
numPeakDim = len(peakList.dataSource.dataDims)
if self.numPeakDim < numPeakDim:
self.numPeakDim = numPeakDim
def setPeakFileInfo(self):
self.numDim = len(self.peakList.peaks[0].peakDims)
self.peakFile.numDims.append(self.numDim)
#
# Limit to 12 chars... should really have popup or something
#
newSpecName = specName = self.peakList.name[:12]
if self.peakFile.specNames.count(specName) > 0:
while (newSpecName == specName):
newSpecName = self.dataEntry.askString("Enter experiment name","Ansig peak list export: name '%s' exists already. Please enter another name:" % specName,newSpecName,self.guiParent)
self.peakFile.specNames.append(newSpecName)
self.specName = newSpecName
def setPeakFilePeakIntensity(self):
self.volume = 0.0
if self.peak.peakIntensities != []:
# Only use obvious volume. Ignore otherwise
for peakIntensity in self.peak.peakIntensities:
if peakIntensity.intensityType == volume_kw:
self.volume = peakIntensity.value
def setPeakFilePeakExtras(self):
self.assignResNum = []
self.assignRes = []
self.assignAtom = []
self.ppm = []
def setPeakFilePeakDimInfo(self):
self.ppm.append(self.peakDim.getValue())
#
# Transfer assignment
#
peakDimContribs = self.peakDim.peakDimContribs
code3Letter = seqCode = atomName = ''
if peakDimContribs:
if isinstance(peakDimContribs[0],Nmr.PeakDimContribN):
print " Warning: cannot handle multiple resonances for one peakDim contribution. Ignored."
return
atomSetIds = []
atomIds = []
for peakDimContrib in peakDimContribs:
resonance = peakDimContrib.resonance
if resonance.resonanceSet:
code3Letter = resonance.resonanceSet.atomSets[0].atoms[0].residue.molResidue.chemCompHead.ccpCode
code3Letter = string.capitalize(code3Letter)
else:
code3Letter = ""
if self.resonanceToAtoms.has_key(resonance):
if self.useOriginalResNames:
if len(self.resonanceToAtoms[resonance]) > 1:
print " Warning: Only first original resName taken for peak %d." % self.peak.serial
resonanceToAtom = self.resonanceToAtoms[resonance][0]
atomIds.append([resonanceToAtom.chain.code,resonanceToAtom.seqId,resonanceToAtom.atomName,None,code3Letter])
else:
for resonanceToAtom in self.resonanceToAtoms[resonance]:
#
# Check atomSet: use this if the same for all...
#
atomSetId = resonanceToAtom.getAtomSetId()
if atomSetId:
atomSetId += [code3Letter]
if atomSetIds.count(atomSetId) == 0:
atomSetIds.append(atomSetId)
#
# Always keep track of single ones...
#
atomId = resonanceToAtom.getAtomId() + [code3Letter]
if atomId and atomIds.count(atomId) == 0:
atomIds.append(atomId)
chain = None
if len(atomIds) > 1 and len(atomSetIds) == 1:
#
# Use atomSet if all the same
#
chain = atomSetIds[0][0]
seqId = atomSetIds[0][1]
atomName = atomSetIds[0][2]
code3Letter = atomSetIds[0][4]
elif atomIds:
#
# Else use separate name
#
if len(atomIds) > 1:
print " Warning: Multiple assignment possibilities for peak %d - only first used." % self.peak.serial
atomId = atomIds[0]
chain = atomId[0]
seqId = atomId[1]
atomName = atomId[2]
code3Letter = atomId[4]
else:
print " Warning: no atom link for resonance %s" % getResNameText(resonance)
#
# Use chain and seqcode mapping
#
if chain:
#chainCode = self.chainDict[chain][0]
seqCode = self.chainDict[chain][1] + seqId - 1
self.assignResNum.append(str(seqCode))
self.assignRes.append(code3Letter)
self.assignAtom.append(atomName)
def createPeakFilePeak(self):
self.peakFile.npeaks += 1
self.peakFile.peaks.append(peaksIO.AnsigPeak(self.peakNum,0,self.ppm,self.volume,self.assignResNum,self.assignRes,self.assignAtom,self.numDim,self.specName))
def setPeakDimOrder(self):
self.peakDimOrder = {
1: [0],
2: [1,0],
3: [2,1,0],
4: [3,2,1,0],
5: [4,3,2,1,0],
6: [5,4,3,2,1,0],
7: [6,5,4,3,2,1,0]
}
def getSpecificExperimentList(self):
experimentList = self.project.findAllNmrExperiments()
interactionText = "Existing experiments:"
return (experimentList,interactionText)
def getPresetChainMapping(self,chainList):
return self.getSingleChainFormatPresetChainMapping(chainList)
|