From beef185276b812aea97f4e91ff2b43dc6f1f2403 Mon Sep 17 00:00:00 2001
From: Marco Biasini <marco.biasini@unibas.ch>
Date: Mon, 28 Mar 2011 17:53:27 +0200
Subject: [PATCH] improve SEQRES import and AlignToSEQRES

This is based on exhaustive testing on the complete files of the PDB.
There are still a few unresolved issues. However it is not clear to me,
whether the problems are due to errors in the PDB files or our
misunderstanding.
---
 modules/io/src/mol/pdb_reader.cc  |  9 ++++++---
 modules/seq/alg/pymod/__init__.py | 11 +++++------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/modules/io/src/mol/pdb_reader.cc b/modules/io/src/mol/pdb_reader.cc
index bf2e50bd8..1ea2c56b8 100644
--- a/modules/io/src/mol/pdb_reader.cc
+++ b/modules/io/src/mol/pdb_reader.cc
@@ -151,9 +151,12 @@ void PDBReader::ParseSeqRes(const StringRef& line, int line_num)
     conop::CompoundPtr compound=comp_lib->FindCompound(trimmed.str(), 
                                                        conop::Compound::PDB);
     if (!compound) {
-      LOG_WARNING("unknown residue '" << trimmed << "' in SEQRES record. "
-                  "Setting one-letter-code to X");
-      curr_seq.Append('X');
+      if (rname!=StringRef("UNK", 3)) {
+     
+        LOG_WARNING("unknown residue '" << trimmed << "' in SEQRES record. "
+                    "Setting one-letter-code to '?'");
+      }
+      curr_seq.Append('?');
       continue;
     }
     curr_seq.Append(compound->GetOneLetterCode());
diff --git a/modules/seq/alg/pymod/__init__.py b/modules/seq/alg/pymod/__init__.py
index bb1ea4e82..1862fc812 100644
--- a/modules/seq/alg/pymod/__init__.py
+++ b/modules/seq/alg/pymod/__init__.py
@@ -17,14 +17,13 @@ def AlignToSEQRES(chain, seqres):
   """
   from ost import seq
   from ost import mol
-  residues=chain.residues
+  view=chain.Select('ligand=false and peptide=true')
+  residues=view.residues
   if len(residues)==0:
-    return None
+    return seq.CreateAlignment()
   fragments=[residues[0].one_letter_code]
   for r1, r2 in zip(residues[:-2], residues[1:]):
-    if not r2.IsPeptideLinking() or r2.IsLigand():
-      continue
-    if not mol.InSequence(r1, r2):
+    if not mol.InSequence(r1.handle, r2.handle):
       fragments.append('')
     fragments[-1]+=r2.one_letter_code
   ss=str(seqres)
@@ -41,4 +40,4 @@ def AlignToSEQRES(chain, seqres):
                              seq.CreateSequence('atoms', aln_seq))
 
     
-  
\ No newline at end of file
+  
-- 
GitLab