Dear All
I sent this to Wim first but since i don't have a reply I guess he is
away or busy so:
I have written some python code that parses marvin peak lists from
the xplor-nih marvin automated assignment protocol. basically the output
consists of a list of peak assignments with a figure of merit for the
assignment. How should I go about incoporating the import of this data
via format converter
the output is as follows (this is just what my test rig dumps)
peak_id from_h_shift from_heavy_shift to_h_shift \
from_shift_id from_h_residue_id from_h_residue_type(redundant)
from_h_atom_name \
to_shift_id to_h_residue_id to_h_residue_type(redundant)
to_h_atom_name \
figure_of_merit
1000 8.405 118.266 4.267858 70 77 VAL HN 389 59 LYS HA 0.02
1000 8.405 118.266 4.267858 70 77 VAL HN 416 64 ALA HA 0.06
1000 8.405 118.266 4.267858 70 77 VAL HN 569 131 LEU HA 0.0
1000 8.405 118.266 4.267858 86 95 GLN HN 389 59 LYS HA 0.0
1000 8.405 118.266 4.267858 86 95 GLN HN 416 64 ALA HA 0.0
1000 8.405 118.266 4.267858 86 95 GLN HN 569 131 LEU HA 0.0
1001 8.408 118.195 3.945858 70 77 VAL HN 529 106 ALA HA 0.0
1001 8.408 118.195 3.945858 70 77 VAL HN 540 109 THR HA 0.0
1001 8.408 118.195 3.945858 86 95 GLN HN 529 106 ALA HA 0.0
1001 8.408 118.195 3.945858 86 95 GLN HN 540 109 THR HA 0.0
regards
gary
n.b. attached is the test_data and python file. The program is
beautifully small and intelligible; python is such a neat language....
-------------------------8<------------------marvin_reader.py-----------------------------8<-----------------------------
#!/usr/bin/python
import re
from UserDict import DictMixin
class Record(object,DictMixin):
def __init__(self,type,values,leaf=False):
self.leaf=leaf
self.type=type
self.values=values
self.children = []
def dump(self,level=1):
result= []
result.append('%s : %s' % (self.type, self.values))
for elem in self.children:
indent = '\t'*level
for line in elem.dump(level=level+1).split('\n'):
result.append('%s%s' % (indent,line))
return '\n'.join(result)
def as_float(self):
return float(self.values)
def as_string(self,separator=' '):
return `self.values`
def is_container(self):
return len(self.children)
def __str__(self):
return self.dump()
# def __setitem__(self,key,value):
# raise NotImplementedError('__setitem__ is not implimented this
is a read only dictionary')
#
# def __delitem__(self,key):
# raise NotImplementedError('__delitem__ is not implimented this
is a read only dictionary')
def keys(self):
key_set=set()
for elem in self.children:
key_set.add(elem.type)
return list(key_set)
def __getitem__(self,key):
result = []
for elem in self.children:
if elem.type == key:
result.append(elem)
if len(result) != 0:
return result
else:
raise KeyError(key)
class Marvin_reader(object):
def __init__(self,handler=None):
self.handler=handler
self.records=[]
def read(self,file):
for record in self.pass_data(file):
self.records.append(record)
def dump(self, handler=None):
if handler == None:
print 'dumping marvin peak records'
print '---------------------------'
print
print '%d marvin peak records' % len(self.records)
print
for record in self.records:
print record
print
else:
for record in self.records:
handler.handle_peak(record)
def pass_data(self,file):
for line in file:
bare_line = self.clear_comments(line).strip()
if len(bare_line) > 0:
fields = bare_line.split()
yield self.process_record(type=fields[0],fields='
'.join(fields[1:]),file=file)
def process_record(self,type,fields, file):
record = Record(type,fields)
for line in file:
bare_line = self.clear_comments(line).strip()
if len(bare_line) == 0:
continue
fields = bare_line.split()
if bare_line[0] == '-':
record.children.append(Record(type=fields[0][1:],values='
'.join(fields[1:]),leaf=True))
elif fields[0] == 'end':
break
else:
child= self.process_record(type=fields[0],fields='
'.join(fields[1:]),file=file)
record.children.append(child)
return record
def clear_comments(self, line):
pat=re.compile('([^!])*!.*$')
match = re.match(pat, line)
if match:
line = match.group(1)
if line:
line = line.strip()
else:
line = ''
return line
class Peak_handler(object):
def handle_peak(self,record):
pass
class Dump_peak_handler(Peak_handler):
def handle_peak(self,record):
print record.dump()
class Dump_filter(Peak_handler):
def __init__(self,sub_handler):
self.sub_handler=sub_handler
def check_peak_ok(self,record):
pass
def handle_peak(self,record):
if self.check_peak_ok(record):
self.sub_handler.handle_peak(record)
class Assigned_peak_filter(Dump_filter):
def __init__(self,sub_handler):
super(Assigned_peak_filter,self).__init__(sub_handler)
def check_peak_ok(self,record):
return 'peakAssign' in record
class Print_assignments(Peak_handler):
class Axis_assignment(object):
def __init__(self,shift_id,residue_id,residue_type,atom_name):
self.shift_id=shift_id
self.residue_id=residue_id
self.residue_type=residue_type
self.atom_name=atom_name
class Assignment_pair(object):
def __init__(self,from_assignment,to_assignment):
self.from_assignment=from_assignment
self.to_assignment=to_assignment
def __init__(self,prefix='3d[c|n]'):
self.prefix_re=re.compile(prefix)
def decompose_asignment_string(self,string):
string=string.replace('_from(',' ')
string=string.replace('_to(',' ')
string=string.replace(')','')
string=string.replace('_',' ')
(shift_id,residue_id,residue_type,atom_name) = string.split(' ')
shift_id=self.prefix_re.sub('',shift_id)
return self.Axis_assignment
(shift_id,residue_id,residue_type,atom_name)
def values_to_assignments(self,string):
(peak_assignment,from_proton,to_proton) = string.split()
return
(self.decompose_asignment_string(from_proton),self.decompose_asignment_string(to_proton))
def handle_peak(self,record):
peak_id = record.values
from_proton_shift = record['fromProtonShift'][0].as_float()
from_heavy_shift = record['fromHeavyatomShift'][0].as_float()
to_proton_shift = record['toProtonShift'][0].as_float()
for assignment in record['peakAssign']:
assignments = assignment.values
merit = assignment['likelihood'][0].as_float()
assignment= assignment.values
(from_assignment,to_assignment) =
self.values_to_assignments(assignment)
#merit=assignment[0]['likelihood']
peak_id=self.prefix_re.sub('',peak_id)
print
peak_id,from_proton_shift,from_heavy_shift,to_proton_shift,
print
from_assignment.shift_id,from_assignment.residue_id,from_assignment.residue_type,from_assignment.atom_name,
print
to_assignment.shift_id,to_assignment.residue_id,to_assignment.residue_type,to_assignment.atom_name,
print merit
#print '%s %f %f %f %s %f' %(peak_id,
from_proton_shift,from_heavy_shift,to_proton_shift,assignment,merit)
if __name__ == '__main__':
filename='../test_data/simple_peaks.dat'
file=open(filename)
handler=None
reader = Marvin_reader()
reader.read(file)
reader.dump(Assigned_peak_filter(Print_assignments()))
-------------------------8<------------------simple_peaks.dat(test
data)-----------------------------8<-----------------------------
!
! Choosing best 50 of 500 total structs, based on num longrange peaks
with violations < 0.500000 A
!
! { Num violated longrange peaks in all structures
!
! fraction of
! 500 datapoints
! 1.0 |
! |
! |
! |
! |
! |
! |
! |
! |
! |
! |
! |
! |
! |
! |
! |
! |
! | XX
! | XXXXXX
! | XXXXXXXXXXX
! 0.0 +--------------------
! 437 500
!
! }
!
! { Num violated longrange peaks in converged structures
!
! fraction of
! 50 datapoints
! 1.0 |
! |
! |
! |
! |
! |
! |
! |
! |
! |
! |
! |
! |
! |
! |
! | X
! | XXXX
! | XXXX
! | XXXXX
! | X XXXXX
! 0.0 +--------------------
! 437 462
!
! } {Detailed violation report:
! k7r_pass3_163.pdb has 437 violations
! k7r_pass3_31.pdb has 442 violations
! k7r_pass3_421.pdb has 446 violations
! k7r_pass3_127.pdb has 448 violations
! k7r_pass3_234.pdb has 448 violations
! k7r_pass3_135.pdb has 448 violations
! k7r_pass3_138.pdb has 455 violations
! k7r_pass3_269.pdb has 456 violations
! k7r_pass3_494.pdb has 456 violations
! k7r_pass3_252.pdb has 456 violations
! k7r_pass3_498.pdb has 456 violations
! k7r_pass3_107.pdb has 457 violations
! k7r_pass3_422.pdb has 457 violations
! k7r_pass3_123.pdb has 457 violations
! k7r_pass3_332.pdb has 457 violations
! k7r_pass3_160.pdb has 457 violations
! k7r_pass3_54.pdb has 457 violations
! k7r_pass3_312.pdb has 457 violations
! k7r_pass3_453.pdb has 458 violations
! k7r_pass3_289.pdb has 458 violations
! k7r_pass3_197.pdb has 459 violations
! k7r_pass3_139.pdb has 459 violations
! k7r_pass3_177.pdb has 459 violations
! k7r_pass3_247.pdb has 459 violations
! k7r_pass3_179.pdb has 459 violations
! k7r_pass3_298.pdb has 459 violations
! k7r_pass3_20.pdb has 459 violations
! k7r_pass3_253.pdb has 459 violations
! k7r_pass3_185.pdb has 459 violations
! k7r_pass3_117.pdb has 460 violations
! k7r_pass3_397.pdb has 460 violations
! k7r_pass3_154.pdb has 460 violations
! k7r_pass3_193.pdb has 460 violations
! k7r_pass3_438.pdb has 460 violations
! k7r_pass3_391.pdb has 460 violations
! k7r_pass3_66.pdb has 460 violations
! k7r_pass3_392.pdb has 460 violations
! k7r_pass3_67.pdb has 460 violations
! k7r_pass3_339.pdb has 461 violations
! k7r_pass3_215.pdb has 461 violations
! k7r_pass3_53.pdb has 461 violations
! k7r_pass3_264.pdb has 461 violations
! k7r_pass3_183.pdb has 461 violations
! k7r_pass3_92.pdb has 461 violations
! k7r_pass3_14.pdb has 462 violations
! k7r_pass3_99.pdb has 462 violations
! k7r_pass3_343.pdb has 462 violations
! k7r_pass3_355.pdb has 462 violations
! k7r_pass3_381.pdb has 462 violations
! k7r_pass3_22.pdb has 462 violations}
!
! { Longrange peak likelihoods
!
! fraction of
! 321 datapoints
! 1.0 |
! |
! |
! |
! |
! |
! |X
! |X
! |X
! |X
! |X
! |X
! |X
! |X
! |X
! |X
! |X
! |X
! |XX
! |XXX X
! 0.0 +--------------------
! 0 1
!
! } {Defined region of structure is (not name h*)
! Number of high-likelihood long range peaks/residue in defined region:
0.020134} {Long-range NOE discrimination: 79.439252 %}
!
peak 3dn1
-bounds 6.00 1.80
-intensity 1086521.000000
-fromProtonShift 9.919000
-toProtonShift 6.903858
-fromHeavyatomShift 130.183000
-note from file k7r_NPeaks_test2.PCK, peak 1
-note No 3d assignments found.
-note No 3d assignments found.
-note No 3d assignments found.
-note degeneracy 0, with previousLikelihood range 0.900000 .. 2.000000
-note degeneracy 0, with previousLikelihood range -1.000000 .. 2.000000
-note degeneracy 0, with previousLikelihood range 0.900000 .. 2.000000
end
peak 3dn10
-bounds 6.00 1.80
-intensity 1093447.000000
-fromProtonShift 9.929000
-toProtonShift 1.299858
-fromHeavyatomShift 130.089000
-note from file k7r_NPeaks_test2.PCK, peak 10
-note No 3d assignments found.
-note No 3d assignments found.
-note No 3d assignments found.
-note degeneracy 0, with previousLikelihood range 0.900000 .. 2.000000
-note degeneracy 0, with previousLikelihood range -1.000000 .. 2.000000
-note degeneracy 0, with previousLikelihood range 0.900000 .. 2.000000
end
peak 3dn1000
-bounds 5.00 1.80
-intensity 2450068.000000
-fromProtonShift 8.405000
-toProtonShift 4.267858
-fromHeavyatomShift 118.266000
-note from file k7r_NPeaks_test2.PCK, peak 1000
-note degeneracy 2, with previousLikelihood range 0.900000 .. 2.000000
-note degeneracy 6, with previousLikelihood range -1.000000 .. 2.000000
-note degeneracy 4, with previousLikelihood range 0.900000 .. 2.000000
peakAssign 3dn1000_0 3dn70_from(77_VAL_HN) 3dn389_to(59_LYS_HA)
-likelihood 0.020000
-unfoldedFromProtonPeakPosition 8.405000
-unfoldedFromHeavyatomPeakPosition 118.266000
-unfoldedToProtonPeakPosition 4.267858
-numFiltersFailed 1
end
peakAssign 3dn1000_1 3dn70_from(77_VAL_HN) 3dn416_to(64_ALA_HA)
-likelihood 0.060000
-unfoldedFromProtonPeakPosition 8.405000
-unfoldedFromHeavyatomPeakPosition 118.266000
-unfoldedToProtonPeakPosition 4.267858
-numFiltersFailed 0
end
peakAssign 3dn1000_2 3dn70_from(77_VAL_HN) 3dn569_to(131_LEU_HA)
-likelihood 0.000000
-good
-unfoldedFromProtonPeakPosition 8.405000
-unfoldedFromHeavyatomPeakPosition 118.266000
-unfoldedToProtonPeakPosition 4.267858
-numFiltersFailed 0
end
peakAssign 3dn1000_3 3dn86_from(95_GLN_HN) 3dn389_to(59_LYS_HA)
-likelihood 0.000000
-unfoldedFromProtonPeakPosition 8.405000
-unfoldedFromHeavyatomPeakPosition 118.266000
-unfoldedToProtonPeakPosition 4.267858
-numFiltersFailed 1
end
peakAssign 3dn1000_4 3dn86_from(95_GLN_HN) 3dn416_to(64_ALA_HA)
-likelihood 0.000000
-unfoldedFromProtonPeakPosition 8.405000
-unfoldedFromHeavyatomPeakPosition 118.266000
-unfoldedToProtonPeakPosition 4.267858
-numFiltersFailed 0
end
peakAssign 3dn1000_5 3dn86_from(95_GLN_HN) 3dn569_to(131_LEU_HA)
-likelihood 0.000000
-unfoldedFromProtonPeakPosition 8.405000
-unfoldedFromHeavyatomPeakPosition 118.266000
-unfoldedToProtonPeakPosition 4.267858
-numFiltersFailed 0
end
end
peak 3dn1001
-bounds 6.00 1.80
-intensity 1020316.000000
-fromProtonShift 8.408000
-toProtonShift 3.945858
-fromHeavyatomShift 118.195000
-note from file k7r_NPeaks_test2.PCK, peak 1001
-note degeneracy 4, with previousLikelihood range 0.900000 .. 2.000000
-note degeneracy 4, with previousLikelihood range -1.000000 .. 2.000000
-note degeneracy 4, with previousLikelihood range 0.900000 .. 2.000000
peakAssign 3dn1001_0 3dn70_from(77_VAL_HN) 3dn529_to(106_ALA_HA)
-likelihood 0.000000
-good
-unfoldedFromProtonPeakPosition 8.408000
-unfoldedFromHeavyatomPeakPosition 118.195000
-unfoldedToProtonPeakPosition 3.945858
-numFiltersFailed 0
end
peakAssign 3dn1001_1 3dn70_from(77_VAL_HN) 3dn540_to(109_THR_HA)
-likelihood 0.000000
-good
-unfoldedFromProtonPeakPosition 8.408000
-unfoldedFromHeavyatomPeakPosition 118.195000
-unfoldedToProtonPeakPosition 3.945858
-numFiltersFailed 0
end
peakAssign 3dn1001_2 3dn86_from(95_GLN_HN) 3dn529_to(106_ALA_HA)
-likelihood 0.000000
-good
-unfoldedFromProtonPeakPosition 8.408000
-unfoldedFromHeavyatomPeakPosition 118.195000
-unfoldedToProtonPeakPosition 3.945858
-numFiltersFailed 0
end
peakAssign 3dn1001_3 3dn86_from(95_GLN_HN) 3dn540_to(109_THR_HA)
-likelihood 0.000000
-unfoldedFromProtonPeakPosition 8.408000
-unfoldedFromHeavyatomPeakPosition 118.195000
-unfoldedToProtonPeakPosition 3.945858
-numFiltersFailed 0
end
end
--
-------------------------------------------------------------------
Dr Gary Thompson
Astbury Centre for Structural Molecular Biology,
University of Leeds, Astbury Building,
Leeds, LS2 9JT, West-Yorkshire, UK Tel. +44-113-3433024
email: [log in to unmask] Fax +44-113-2331407
-------------------------------------------------------------------
|