diff --git a/modules/conop/doc/cleanup.rst b/modules/conop/doc/cleanup.rst
new file mode 100644
index 0000000000000000000000000000000000000000..35b20e5705b248a995ec61c5e06604c53df0767f
--- /dev/null
+++ b/modules/conop/doc/cleanup.rst
@@ -0,0 +1,8 @@
+:mod:`conop.cleanup <ost.conop.cleanup>` -- Sanitize structures
+================================================================================
+
+.. module:: ost.conop.ceanup
+   :synopsis: Contains functions to sanitize (cleanup) structures by using 
+        information from the compound library.
+
+.. autofunction:: ost.conop.cleanup.Cleanup
\ No newline at end of file
diff --git a/modules/conop/doc/conop.rst b/modules/conop/doc/conop.rst
index 8c3f413469872a1d95f9da98e37f4e21ffb24bfc..fb72db2645a92ea70f3db4f686c0f33a35c7e956 100644
--- a/modules/conop/doc/conop.rst
+++ b/modules/conop/doc/conop.rst
@@ -20,3 +20,4 @@ In this module
  aminoacid
  connectivity
  compoundlib
+ cleanup
\ No newline at end of file
diff --git a/modules/conop/pymod/CMakeLists.txt b/modules/conop/pymod/CMakeLists.txt
index 5affa4b457d780b7368b34a097bec832d2c3f36c..97de72d7a52f1a74233a9782e746bd596620ffc7 100644
--- a/modules/conop/pymod/CMakeLists.txt
+++ b/modules/conop/pymod/CMakeLists.txt
@@ -7,4 +7,4 @@ set(OST_CONOP_PYMOD_SOURCES
   export_ring_finder.cc
 )
 
-pymod(NAME conop CPP ${OST_CONOP_PYMOD_SOURCES} PY __init__.py)
\ No newline at end of file
+pymod(NAME conop CPP ${OST_CONOP_PYMOD_SOURCES} PY __init__.py cleanup.py)
diff --git a/modules/conop/pymod/cleanup.py b/modules/conop/pymod/cleanup.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ecf9c7ef12b8bb4fce889a5fb8062aab2364eda
--- /dev/null
+++ b/modules/conop/pymod/cleanup.py
@@ -0,0 +1,167 @@
+from ost import conop, mol
+  
+def Cleanup(entity, strip_water=True, canonicalize=True, remove_ligands=True):
+  """
+  This function returns a cleaned-up (simplified) version of the protein
+  structure. Different parameters affect the behaviour of the function.
+
+  :param strip_water: Whether to remove water from the structure
+  :param canonicalize: Whether to strip off modifications of amino acids and map
+     them back to their parent standard amino acid, e.g. selenium methionine to
+     methionine.For more complex amino acids, where the relation between the
+     modified and the standard parent amino acid is not known, sidechain atoms
+     are removed. D-peptide-linking residues are completely removed as well.
+  :param remove_ligands: Whether to remove ligands from the structure
+
+  :return: a cleaned version of the entity
+  """
+  #setup
+  builder = conop.GetBuilder()
+  if not hasattr(builder, "compound_lib") :
+    raise RuntimeError( "Cannot cleanup structure, since the default builder doesn't use the compound library")
+  compound_lib = builder.compound_lib
+  clean_entity = entity.Copy()
+  ed = clean_entity.EditXCS()
+  #remove water residues
+  if strip_water:
+    _StripWater(clean_entity, ed)
+  #replace modified residues before removing ligands to avoid removing MSE and others
+  if canonicalize:
+    _CanonicalizeResidues(clean_entity, ed, compound_lib)
+  #remove all hetatoms that are not water
+  if remove_ligands:
+    _RemoveLigands(clean_entity, ed)
+  return clean_entity
+
+
+def _StripWater(clean_entity, ed) :
+  """
+  This function removes water residues from the structure
+  """
+  for res in clean_entity.residues:
+    if res.IsValid():
+      if res.chem_class == mol.WATER:
+        ed.DeleteResidue(res.handle)
+  ed.UpdateICS()
+  return
+  
+def _RemoveLigands(clean_entity, ed) :
+  """
+  This function removes ligands from the structure
+  """
+  for res in clean_entity.residues:
+    if res.IsValid():
+      #WHEN mmCIF WILL BE USED, CHANGE IsPeptideLinking() TO IsProtein()
+      if not res.IsPeptideLinking() and res.atoms[0].is_hetatom and res.chem_class != mol.WATER:
+        ed.DeleteResidue(res.handle)
+  ed.UpdateICS()
+  return
+  
+def _CanonicalizeResidues(clean_entity, ed, compound_lib) :
+  """
+  This function strips off modifications of amino acids and maps
+  them back to their parent standard amino acid, e.g. selenium methionine to
+  methionine.For more complex amino acids, where the relation between the 
+  modified and the standard parent amino acid is not known, sidechain atoms 
+  are removed. D-peptide-linking residues are completely removed as well.
+  """
+
+  for res in clean_entity.residues:
+    if res.IsValid() and res.IsPeptideLinking() :
+      parent_olc = res.one_letter_code
+      if parent_olc == "X" :
+        _DeleteSidechain(res, ed)
+        for atom in res.atoms:
+          atom.is_hetatom = False
+      else:
+        parent_tlc = conop.OneLetterCodeToResidueName(parent_olc)
+        parent_res = compound_lib.FindCompound(parent_tlc)
+        if not parent_res:
+          _DeleteSidechain(res, ed)
+          for atom in res.atoms:
+            atom.is_hetatom = False
+          print "Removing sidechain of %s, beacuse it has not been found in the compound library"% parent_tlc
+        else:
+          #collect atom's names
+          modif_atom_names = set([atom.name for atom in res.atoms
+                                                  if atom.element != "H" and atom.element != "D" ])
+          #if the res is the first or last take all the atoms from the parent res
+          if res.FindAtom("OXT").IsValid() :
+            parent_atom_names = set([atom.name for atom in parent_res.atom_specs
+                                                  if atom.element != "H" and atom.element != "D" ])
+          else:
+            parent_atom_names = set([atom.name for atom in parent_res.atom_specs
+                                                    if atom.element != "H" and atom.element != "D" and not atom.is_leaving ])
+          additional_parent_atoms = parent_atom_names - modif_atom_names
+          additional_modif_atoms = modif_atom_names - parent_atom_names
+          #WHEN mmCIF WILL BE USED, CHANGE IsPeptideLinking() TO IsProtein(), TO EXCLUDE LIGANDS FROM CANONICALISATION
+          if res.atoms[0].is_hetatom :
+            old_name = res.name
+            ed.RenameResidue(res, parent_tlc)
+            if additional_parent_atoms:
+              if additional_modif_atoms:
+                #replacement
+                _Replacement(res, ed, old_name)
+              else:
+                #deletion
+                _Deletion(res, ed)
+            elif additional_modif_atoms:
+              #addition
+              _Addition(res, ed, additional_modif_atoms)
+            else:
+              #unchanged, later check stereochemistry or H atoms
+              _Unchanged(res, ed)
+          #the res is a peptide but not a ligand (is a protein res)
+          else:
+            if additional_parent_atoms:# if the sidechain is incomplete
+              _DeleteSidechain(res, ed)
+  ed.UpdateICS()
+  return
+  
+def _Replacement(res, ed, old_name) :
+  #TEMP ONLY MSE
+  if old_name == "MSE" :
+    for atom in res.atoms:
+      atom.is_hetatom = False
+    sel = res.FindAtom("SE")
+    if sel.IsValid() :
+      ed.InsertAtom( res, "SD", sel.pos, "S", sel.occupancy, sel.b_factor ) #S radius=~1;SE=~1.2
+      ed.DeleteAtom( sel )
+    else:
+      _DeleteSidechain(res, ed)
+  else:
+    _DeleteSidechain(res, ed)
+  return
+  
+def _Deletion(res, ed) :
+  _DeleteSidechain(res, ed)
+  for atom in res.atoms :
+    atom.is_hetatom = False
+  return
+  
+def _Addition(res, ed, additional_modif_atoms) :
+  for add_atom_name in additional_modif_atoms:
+    add_atom = res.FindAtom( add_atom_name )
+    if add_atom.IsValid() :
+      ed.DeleteAtom( add_atom )
+  for atom in res.atoms:
+    atom.is_hetatom = False
+  return
+  
+def _Unchanged(res, ed) :
+  if res.chem_class == mol.D_PEPTIDE_LINKING:
+    ed.DeleteResidue(res)
+  else:
+    _DeleteSidechain(res, ed)
+    for atom in res.atoms :
+      atom.is_hetatom = False
+  return
+  
+def _DeleteSidechain(res, ed) :
+  for atom in res.atoms:
+    if not atom.name in ['CA','CB','C','N','O']:
+      ed.DeleteAtom(atom)
+  return
+  
+#visible functions
+__all__ = [Cleanup]
diff --git a/modules/conop/tests/CMakeLists.txt b/modules/conop/tests/CMakeLists.txt
index aa06791bf0be4d64640499010893d9db179f3e33..1af92df10be705d46354506d72df5c0c645be732 100644
--- a/modules/conop/tests/CMakeLists.txt
+++ b/modules/conop/tests/CMakeLists.txt
@@ -4,6 +4,7 @@ set(OST_CONOP_UNIT_TESTS
   tests.cc
   test_builder.cc
   test_compound.py
+  test_cleanup.py
 )
 
 ost_unittest(MODULE conop
diff --git a/modules/conop/tests/sample_noligands.pdb b/modules/conop/tests/sample_noligands.pdb
new file mode 100644
index 0000000000000000000000000000000000000000..7fd199276d45124bbe2b2bc6ddf7d47b780d9401
--- /dev/null
+++ b/modules/conop/tests/sample_noligands.pdb
@@ -0,0 +1,65 @@
+HETATM    1  N   MSE A   1      16.152  35.832  19.337  1.00 68.79           N
+ANISOU    1  N   MSE A   1     9680   7396   9061   1038   -716    -25       N
+HETATM    2  CA  MSE A   1      16.961  36.379  20.419  1.00 66.57           C
+ANISOU    2  CA  MSE A   1     9387   7212   8694   1121   -806     40       C
+HETATM    3  C   MSE A   1      18.345  36.796  19.931  1.00 62.52           C
+ANISOU    3  C   MSE A   1     8685   6846   8225   1266   -870   -107       C
+HETATM    4  O   MSE A   1      19.030  36.049  19.227  1.00 60.36           O
+ANISOU    4  O   MSE A   1     8358   6511   8064   1396   -922   -219       O
+HETATM    5  CB  MSE A   1      17.100  35.372  21.563  1.00 69.52           C
+ANISOU    5  CB  MSE A   1     9961   7405   9049   1204   -927    192       C
+HETATM    6  CG  MSE A   1      17.608  35.983  22.861  1.00 69.50           C
+ANISOU    6  CG  MSE A   1     9977   7515   8914   1239  -1006    292       C
+HETATM    7 SE   MSE A   1      16.174  36.321  24.145  0.70149.62          SE
+ANISOU    7 SE   MSE A   1    20306  17658  18883   1013   -917    492      SE
+HETATM    8  CE  MSE A   1      16.439  38.231  24.424  1.00101.32           C
+ANISOU    8  CE  MSE A   1    13992  11855  12651    950   -853    413       C
+ATOM      9  N   GLY A   2      21.960  55.913  14.093  1.00 32.26           N
+ANISOU    9  N   GLY A   2     3786   4717   3755   -417   -110   -697       N
+ATOM     10  CA  GLY A   2      21.067  57.037  14.316  1.00 33.47           C
+ANISOU   10  CA  GLY A   2     4071   4728   3919   -478    -95   -611       C
+ATOM     11  C   GLY A   2      20.632  57.066  15.769  1.00 28.81           C
+ANISOU   11  C   GLY A   2     3536   4062   3349   -382   -126   -599       C
+ATOM     12  O   GLY A   2      19.474  57.360  16.089  1.00 26.62           O
+ANISOU   12  O   GLY A   2     3358   3653   3104   -343   -120   -528       O
+HETATM  694  N   MLY A   3      26.382  48.690   2.460  1.00 30.43           N
+ANISOU  694  N   MLY A   3     2388   5919   3254   -646    317  -1852       N
+HETATM  695  CA  MLY A   3      27.776  48.333   2.142  1.00 32.62           C
+ANISOU  695  CA  MLY A   3     2438   6444   3514   -645    358  -2093       C
+HETATM  696  CB  MLY A   3      28.523  49.535   1.556  1.00 34.25           C
+ANISOU  696  CB  MLY A   3     2578   6874   3563   -918    442  -2107       C
+HETATM  697  CG  MLY A   3      28.053  50.058   0.208  1.00 51.47           C
+ANISOU  697  CG  MLY A   3     4819   9152   5586  -1157    527  -2046       C
+HETATM  698  CD  MLY A   3      29.170  50.911  -0.420  1.00 65.96           C
+ANISOU  698  CD  MLY A   3     6527  11268   7265  -1416    622  -2140       C
+HETATM  699  CE  MLY A   3      28.648  51.881  -1.478  1.00 78.91           C
+ANISOU  699  CE  MLY A   3     8300  12955   8728  -1697    684  -1985       C
+HETATM  700  NZ  MLY A   3      27.996  53.093  -0.885  1.00 83.04           N
+ANISOU  700  NZ  MLY A   3     9040  13267   9244  -1780    633  -1724       N
+HETATM  701  CH1 MLY A   3      27.490  53.898  -2.008  1.00 82.17           C
+ANISOU  701  CH1 MLY A   3     9059  13197   8964  -2028    679  -1578       C
+HETATM  702  CH2 MLY A   3      29.059  53.885  -0.248  1.00 83.95           C
+ANISOU  702  CH2 MLY A   3     9084  13473   9342  -1885    652  -1769       C
+HETATM  703  C   MLY A   3      28.551  47.875   3.386  1.00 35.38           C
+ANISOU  703  C   MLY A   3     2698   6760   3986   -436    275  -2175       C
+HETATM  704  O   MLY A   3      29.262  46.867   3.369  1.00 36.09           O
+ANISOU  704  O   MLY A   3     2632   6928   4151   -274    255  -2373       O
+HETATM   24  N   DHA A   4      26.289  27.329   2.438  1.00 21.02           N
+HETATM   25  CA  DHA A   4      26.295  27.688   3.823  1.00 20.17           C
+HETATM   26  CB  DHA A   4      27.128  28.481   4.578  1.00 25.40           C
+HETATM   27  C   DHA A   4      25.128  27.215   4.536  1.00 14.98           C
+HETATM   28  O   DHA A   4      24.918  27.318   5.770  1.00 15.17           O
+ATOM   1454  N   CYS A   5      35.381  45.298  39.476  1.00 31.23           N
+ATOM   1455  CA  CYS A   5      35.559  43.873  39.703  1.00 26.90           C
+ATOM   1456  C   CYS A   5      34.291  43.354  40.319  1.00 28.31           C
+ATOM   1457  OXT CYS A   5      33.569  44.119  40.933  1.00 32.71           O
+ATOM   1458  CB  CYS A   5      36.760  43.592  40.596  1.00 27.44           C
+ATOM   1460  H   CYS A   5      34.717  45.766  40.024  1.00  0.00           H
+HETATM 1345  N   DAL A   6      16.130  53.915  24.417  1.00  8.63           N
+HETATM 1346  CA  DAL A   6      16.958  55.083  24.235  1.00 24.17           C
+HETATM 1347  CB  DAL A   6      16.321  56.394  24.733  1.00 30.20           C
+HETATM 1348  C   DAL A   6      17.335  55.218  22.790  1.00 32.54           C
+HETATM 1349  O   DAL A   6      16.693  54.552  21.946  1.00 27.41           O
+HETATM 1350  OXT DAL A   6      18.286  55.960  22.546  1.00 18.81           O
+HETATM   36  O   HOH A  19       0.180  48.781   4.764  1.00 23.28           O
+END
diff --git a/modules/conop/tests/sample_nowater.pdb b/modules/conop/tests/sample_nowater.pdb
new file mode 100644
index 0000000000000000000000000000000000000000..0f8c440aa83c10dec2cfffe71cbe524a15d0af15
--- /dev/null
+++ b/modules/conop/tests/sample_nowater.pdb
@@ -0,0 +1,71 @@
+HETATM    1  N   MSE A   1      16.152  35.832  19.337  1.00 68.79           N
+ANISOU    1  N   MSE A   1     9680   7396   9061   1038   -716    -25       N
+HETATM    2  CA  MSE A   1      16.961  36.379  20.419  1.00 66.57           C
+ANISOU    2  CA  MSE A   1     9387   7212   8694   1121   -806     40       C
+HETATM    3  C   MSE A   1      18.345  36.796  19.931  1.00 62.52           C
+ANISOU    3  C   MSE A   1     8685   6846   8225   1266   -870   -107       C
+HETATM    4  O   MSE A   1      19.030  36.049  19.227  1.00 60.36           O
+ANISOU    4  O   MSE A   1     8358   6511   8064   1396   -922   -219       O
+HETATM    5  CB  MSE A   1      17.100  35.372  21.563  1.00 69.52           C
+ANISOU    5  CB  MSE A   1     9961   7405   9049   1204   -927    192       C
+HETATM    6  CG  MSE A   1      17.608  35.983  22.861  1.00 69.50           C
+ANISOU    6  CG  MSE A   1     9977   7515   8914   1239  -1006    292       C
+HETATM    7 SE   MSE A   1      16.174  36.321  24.145  0.70149.62          SE
+ANISOU    7 SE   MSE A   1    20306  17658  18883   1013   -917    492      SE
+HETATM    8  CE  MSE A   1      16.439  38.231  24.424  1.00101.32           C
+ANISOU    8  CE  MSE A   1    13992  11855  12651    950   -853    413       C
+ATOM      9  N   GLY A   2      21.960  55.913  14.093  1.00 32.26           N
+ANISOU    9  N   GLY A   2     3786   4717   3755   -417   -110   -697       N
+ATOM     10  CA  GLY A   2      21.067  57.037  14.316  1.00 33.47           C
+ANISOU   10  CA  GLY A   2     4071   4728   3919   -478    -95   -611       C
+ATOM     11  C   GLY A   2      20.632  57.066  15.769  1.00 28.81           C
+ANISOU   11  C   GLY A   2     3536   4062   3349   -382   -126   -599       C
+ATOM     12  O   GLY A   2      19.474  57.360  16.089  1.00 26.62           O
+ANISOU   12  O   GLY A   2     3358   3653   3104   -343   -120   -528       O
+HETATM  694  N   MLY A   3      26.382  48.690   2.460  1.00 30.43           N
+ANISOU  694  N   MLY A   3     2388   5919   3254   -646    317  -1852       N
+HETATM  695  CA  MLY A   3      27.776  48.333   2.142  1.00 32.62           C
+ANISOU  695  CA  MLY A   3     2438   6444   3514   -645    358  -2093       C
+HETATM  696  CB  MLY A   3      28.523  49.535   1.556  1.00 34.25           C
+ANISOU  696  CB  MLY A   3     2578   6874   3563   -918    442  -2107       C
+HETATM  697  CG  MLY A   3      28.053  50.058   0.208  1.00 51.47           C
+ANISOU  697  CG  MLY A   3     4819   9152   5586  -1157    527  -2046       C
+HETATM  698  CD  MLY A   3      29.170  50.911  -0.420  1.00 65.96           C
+ANISOU  698  CD  MLY A   3     6527  11268   7265  -1416    622  -2140       C
+HETATM  699  CE  MLY A   3      28.648  51.881  -1.478  1.00 78.91           C
+ANISOU  699  CE  MLY A   3     8300  12955   8728  -1697    684  -1985       C
+HETATM  700  NZ  MLY A   3      27.996  53.093  -0.885  1.00 83.04           N
+ANISOU  700  NZ  MLY A   3     9040  13267   9244  -1780    633  -1724       N
+HETATM  701  CH1 MLY A   3      27.490  53.898  -2.008  1.00 82.17           C
+ANISOU  701  CH1 MLY A   3     9059  13197   8964  -2028    679  -1578       C
+HETATM  702  CH2 MLY A   3      29.059  53.885  -0.248  1.00 83.95           C
+ANISOU  702  CH2 MLY A   3     9084  13473   9342  -1885    652  -1769       C
+HETATM  703  C   MLY A   3      28.551  47.875   3.386  1.00 35.38           C
+ANISOU  703  C   MLY A   3     2698   6760   3986   -436    275  -2175       C
+HETATM  704  O   MLY A   3      29.262  46.867   3.369  1.00 36.09           O
+ANISOU  704  O   MLY A   3     2632   6928   4151   -274    255  -2373       O
+HETATM   24  N   DHA A   4      26.289  27.329   2.438  1.00 21.02           N
+HETATM   25  CA  DHA A   4      26.295  27.688   3.823  1.00 20.17           C
+HETATM   26  CB  DHA A   4      27.128  28.481   4.578  1.00 25.40           C
+HETATM   27  C   DHA A   4      25.128  27.215   4.536  1.00 14.98           C
+HETATM   28  O   DHA A   4      24.918  27.318   5.770  1.00 15.17           O
+ATOM   1454  N   CYS A   5      35.381  45.298  39.476  1.00 31.23           N
+ATOM   1455  CA  CYS A   5      35.559  43.873  39.703  1.00 26.90           C
+ATOM   1456  C   CYS A   5      34.291  43.354  40.319  1.00 28.31           C
+ATOM   1457  OXT CYS A   5      33.569  44.119  40.933  1.00 32.71           O
+ATOM   1458  CB  CYS A   5      36.760  43.592  40.596  1.00 27.44           C
+ATOM   1460  H   CYS A   5      34.717  45.766  40.024  1.00  0.00           H
+HETATM 1345  N   DAL A   6      16.130  53.915  24.417  1.00  8.63           N
+HETATM 1346  CA  DAL A   6      16.958  55.083  24.235  1.00 24.17           C
+HETATM 1347  CB  DAL A   6      16.321  56.394  24.733  1.00 30.20           C
+HETATM 1348  C   DAL A   6      17.335  55.218  22.790  1.00 32.54           C
+HETATM 1349  O   DAL A   6      16.693  54.552  21.946  1.00 27.41           O
+HETATM 1350  OXT DAL A   6      18.286  55.960  22.546  1.00 18.81           O
+HETATM   29  C1  GOL A  17       3.793  59.768   8.209  1.00 31.00           C
+HETATM   30  O1  GOL A  17       3.244  58.473   8.337  1.00 27.42           O
+HETATM   31  C2  GOL A  17       4.701  60.020   9.406  1.00 26.81           C
+HETATM   32  O2  GOL A  17       5.573  58.919   9.512  1.00 26.44           O
+HETATM   33  C3  GOL A  17       5.505  61.287   9.156  1.00 24.74           C
+HETATM   34  O3  GOL A  17       6.429  61.468  10.222  1.00 31.06           O
+HETATM   35 CL    CL A  18      11.844  59.221  16.755  0.79 32.84          CL
+END
diff --git a/modules/conop/tests/sample_test_cleanup.pdb b/modules/conop/tests/sample_test_cleanup.pdb
new file mode 100644
index 0000000000000000000000000000000000000000..cce7fe04d4fe753d68c9e73620fea8617ce07059
--- /dev/null
+++ b/modules/conop/tests/sample_test_cleanup.pdb
@@ -0,0 +1,72 @@
+HETATM    1  N   MSE A   1      16.152  35.832  19.337  1.00 68.79           N
+ANISOU    1  N   MSE A   1     9680   7396   9061   1038   -716    -25       N
+HETATM    2  CA  MSE A   1      16.961  36.379  20.419  1.00 66.57           C
+ANISOU    2  CA  MSE A   1     9387   7212   8694   1121   -806     40       C
+HETATM    3  C   MSE A   1      18.345  36.796  19.931  1.00 62.52           C
+ANISOU    3  C   MSE A   1     8685   6846   8225   1266   -870   -107       C
+HETATM    4  O   MSE A   1      19.030  36.049  19.227  1.00 60.36           O
+ANISOU    4  O   MSE A   1     8358   6511   8064   1396   -922   -219       O
+HETATM    5  CB  MSE A   1      17.100  35.372  21.563  1.00 69.52           C
+ANISOU    5  CB  MSE A   1     9961   7405   9049   1204   -927    192       C
+HETATM    6  CG  MSE A   1      17.608  35.983  22.861  1.00 69.50           C
+ANISOU    6  CG  MSE A   1     9977   7515   8914   1239  -1006    292       C
+HETATM    7 SE   MSE A   1      16.174  36.321  24.145  0.70149.62          SE
+ANISOU    7 SE   MSE A   1    20306  17658  18883   1013   -917    492      SE
+HETATM    8  CE  MSE A   1      16.439  38.231  24.424  1.00101.32           C
+ANISOU    8  CE  MSE A   1    13992  11855  12651    950   -853    413       C
+ATOM      9  N   GLY A   2      21.960  55.913  14.093  1.00 32.26           N
+ANISOU    9  N   GLY A   2     3786   4717   3755   -417   -110   -697       N
+ATOM     10  CA  GLY A   2      21.067  57.037  14.316  1.00 33.47           C
+ANISOU   10  CA  GLY A   2     4071   4728   3919   -478    -95   -611       C
+ATOM     11  C   GLY A   2      20.632  57.066  15.769  1.00 28.81           C
+ANISOU   11  C   GLY A   2     3536   4062   3349   -382   -126   -599       C
+ATOM     12  O   GLY A   2      19.474  57.360  16.089  1.00 26.62           O
+ANISOU   12  O   GLY A   2     3358   3653   3104   -343   -120   -528       O
+HETATM  694  N   MLY A   3      26.382  48.690   2.460  1.00 30.43           N
+ANISOU  694  N   MLY A   3     2388   5919   3254   -646    317  -1852       N
+HETATM  695  CA  MLY A   3      27.776  48.333   2.142  1.00 32.62           C
+ANISOU  695  CA  MLY A   3     2438   6444   3514   -645    358  -2093       C
+HETATM  696  CB  MLY A   3      28.523  49.535   1.556  1.00 34.25           C
+ANISOU  696  CB  MLY A   3     2578   6874   3563   -918    442  -2107       C
+HETATM  697  CG  MLY A   3      28.053  50.058   0.208  1.00 51.47           C
+ANISOU  697  CG  MLY A   3     4819   9152   5586  -1157    527  -2046       C
+HETATM  698  CD  MLY A   3      29.170  50.911  -0.420  1.00 65.96           C
+ANISOU  698  CD  MLY A   3     6527  11268   7265  -1416    622  -2140       C
+HETATM  699  CE  MLY A   3      28.648  51.881  -1.478  1.00 78.91           C
+ANISOU  699  CE  MLY A   3     8300  12955   8728  -1697    684  -1985       C
+HETATM  700  NZ  MLY A   3      27.996  53.093  -0.885  1.00 83.04           N
+ANISOU  700  NZ  MLY A   3     9040  13267   9244  -1780    633  -1724       N
+HETATM  701  CH1 MLY A   3      27.490  53.898  -2.008  1.00 82.17           C
+ANISOU  701  CH1 MLY A   3     9059  13197   8964  -2028    679  -1578       C
+HETATM  702  CH2 MLY A   3      29.059  53.885  -0.248  1.00 83.95           C
+ANISOU  702  CH2 MLY A   3     9084  13473   9342  -1885    652  -1769       C
+HETATM  703  C   MLY A   3      28.551  47.875   3.386  1.00 35.38           C
+ANISOU  703  C   MLY A   3     2698   6760   3986   -436    275  -2175       C
+HETATM  704  O   MLY A   3      29.262  46.867   3.369  1.00 36.09           O
+ANISOU  704  O   MLY A   3     2632   6928   4151   -274    255  -2373       O
+HETATM   24  N   DHA A   4      26.289  27.329   2.438  1.00 21.02           N
+HETATM   25  CA  DHA A   4      26.295  27.688   3.823  1.00 20.17           C
+HETATM   26  CB  DHA A   4      27.128  28.481   4.578  1.00 25.40           C
+HETATM   27  C   DHA A   4      25.128  27.215   4.536  1.00 14.98           C
+HETATM   28  O   DHA A   4      24.918  27.318   5.770  1.00 15.17           O
+ATOM   1454  N   CYS A   5      35.381  45.298  39.476  1.00 31.23           N
+ATOM   1455  CA  CYS A   5      35.559  43.873  39.703  1.00 26.90           C
+ATOM   1456  C   CYS A   5      34.291  43.354  40.319  1.00 28.31           C
+ATOM   1457  OXT CYS A   5      33.569  44.119  40.933  1.00 32.71           O
+ATOM   1458  CB  CYS A   5      36.760  43.592  40.596  1.00 27.44           C
+ATOM   1460  H   CYS A   5      34.717  45.766  40.024  1.00  0.00           H
+HETATM 1345  N   DAL A   6      16.130  53.915  24.417  1.00  8.63           N
+HETATM 1346  CA  DAL A   6      16.958  55.083  24.235  1.00 24.17           C
+HETATM 1347  CB  DAL A   6      16.321  56.394  24.733  1.00 30.20           C
+HETATM 1348  C   DAL A   6      17.335  55.218  22.790  1.00 32.54           C
+HETATM 1349  O   DAL A   6      16.693  54.552  21.946  1.00 27.41           O
+HETATM 1350  OXT DAL A   6      18.286  55.960  22.546  1.00 18.81           O
+HETATM   29  C1  GOL A  17       3.793  59.768   8.209  1.00 31.00           C
+HETATM   30  O1  GOL A  17       3.244  58.473   8.337  1.00 27.42           O
+HETATM   31  C2  GOL A  17       4.701  60.020   9.406  1.00 26.81           C
+HETATM   32  O2  GOL A  17       5.573  58.919   9.512  1.00 26.44           O
+HETATM   33  C3  GOL A  17       5.505  61.287   9.156  1.00 24.74           C
+HETATM   34  O3  GOL A  17       6.429  61.468  10.222  1.00 31.06           O
+HETATM   35 CL    CL A  18      11.844  59.221  16.755  0.79 32.84          CL
+HETATM   36  O   HOH A  19       0.180  48.781   4.764  1.00 23.28           O
+END
diff --git a/modules/conop/tests/test_cleanup.py b/modules/conop/tests/test_cleanup.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee6ab57cae413a550e1cc6abdbc7875a1d7d3210
--- /dev/null
+++ b/modules/conop/tests/test_cleanup.py
@@ -0,0 +1,162 @@
+import unittest
+from ost import geom, conop
+from ost.conop import cleanup
+
+class TestCleanUp(unittest.TestCase):
+
+  def setUp(self):
+    self.comp_lib=conop.GetBuilder().compound_lib
+    self.ent = io.LoadPDB("sample_test_cleanup.pdb")
+    self.ent_no_wat = io.LoadPDB("sample_nowater.pdb")
+    self.ent_no_lig = io.LoadPDB("sample_noligands.pdb")
+
+  def testStripWater(self):
+    self.new_ent = cleanup.Cleanup(self.ent, strip_water=True, canonicalize=False, remove_ligands=False)
+    self.assertEqual( self.new_ent.residue_count, self.ent_no_wat.residue_count )
+    self.assertTrue( self.new_ent.residues[0].IsValid() )
+    self.assertEqual( self.new_ent.residues[0].qualified_name, self.ent_no_wat.residues[0].qualified_name)
+    self.assertTrue( self.new_ent.residues[1].IsValid() )
+    self.assertEqual( self.new_ent.residues[1].qualified_name, self.ent_no_wat.residues[1].qualified_name)
+    self.assertTrue( self.new_ent.residues[2].IsValid() )
+    self.assertEqual( self.new_ent.residues[2].qualified_name, self.ent_no_wat.residues[2].qualified_name)
+    self.assertTrue( self.new_ent.residues[3].IsValid() )
+    self.assertEqual( self.new_ent.residues[3].qualified_name, self.ent_no_wat.residues[3].qualified_name)
+    self.assertTrue( self.new_ent.residues[4].IsValid() )
+    self.assertEqual( self.new_ent.residues[4].qualified_name, self.ent_no_wat.residues[4].qualified_name)
+    self.assertTrue( self.new_ent.residues[5].IsValid() )
+    self.assertEqual( self.new_ent.residues[5].qualified_name, self.ent_no_wat.residues[5].qualified_name)
+    self.assertTrue( self.new_ent.residues[6].IsValid() )
+    self.assertEqual( self.new_ent.residues[6].qualified_name, self.ent_no_wat.residues[6].qualified_name)
+    self.assertTrue( self.new_ent.residues[7].IsValid() )
+    self.assertEqual( self.new_ent.residues[7].qualified_name, self.ent_no_wat.residues[7].qualified_name)
+
+  def testCanonicalize(self):
+    self.new_ent = cleanup.Cleanup(self.ent, strip_water=False, canonicalize=True, remove_ligands=False)
+    #standard residue must be the same
+    self.gly = self.ent.residues[1]
+    self.new_gly = self.new_ent.residues[1]
+    self.assertTrue(self.new_gly.IsValid())
+    self.assertTrue(self.new_gly.IsPeptideLinking())
+    self.assertEqual(self.gly.atom_count, self.new_gly.atom_count)
+    #TEMP del sidechain of incomplete residue and OXT if present
+    self.new_cys = self.new_ent.residues[4]
+    self.new_cys_atoms = set([atm.name for atm in self.new_cys.atoms])
+    self.assertEqual( len(self.new_cys_atoms), 4, msg = repr(self.new_cys_atoms))
+    self.assertTrue( "CB" in self.new_cys_atoms)
+    self.assertTrue( "CA" in self.new_cys_atoms)
+    self.assertTrue( "C" in self.new_cys_atoms)
+    self.assertFalse( "OXT" in self.new_cys_atoms)
+    self.assertTrue( "N" in self.new_cys_atoms)
+    #test replacement of atoms
+    self.mse = self.ent.residues[0]
+#    self.assertTrue( self.mse.IsValid())
+#    self.assertTrue( self.mse.IsPeptideLinking())
+    self.sel = self.mse.FindAtom("SE")
+#    self.assertTrue( self.sel.IsValid())
+    self.met = self.new_ent.residues[0]
+    self.assertTrue(self.met.IsValid())
+    self.assertEqual(self.mse.atom_count, self.met.atom_count)
+    self.assertEqual(self.met.name, "MET")
+    self.assertEqual(self.met.one_letter_code, "M")
+    self.assertTrue(self.met.IsPeptideLinking())
+    self.sul = self.met.FindAtom("SD")
+    self.assertTrue(self.sul.IsValid())
+    self.assertTrue(geom.Equal(self.sul.pos,self.sel.pos), msg = "sul:%s sel:%s"%(str(self.sul.pos), str(self.sel.pos)) )
+    self.assertEqual(self.sul.element, "S")
+#    self.AssertTrue( sul.mass == conop.Conopology.Instance().GetDefaultAtomMass("S"))
+#    self.AssertTrue( sul.radius == conop.Conopology.Instance().GetDefaultAtomRadius("S"))
+    for atm in self.met.atoms:
+      self.assertFalse( atm.is_hetatom)
+    #test addition
+    self.mly = self.ent.residues[2]
+#    self.assertTrue( self.mly.IsValid())
+#    self.assertTrue( self.mly.IsPeptideLinking())
+    self.new_lys = self.new_ent.residues[2]
+    self.assertTrue(self.new_lys.IsValid())
+    self.assertTrue(self.new_lys.IsPeptideLinking())
+    self.assertEqual(self.new_lys.name, "LYS")
+    self.assertEqual(self.new_lys.one_letter_code, "K")
+    self.new_lys_atoms = set([atm.name for atm in self.new_lys.atoms])
+    self.canon_lys = self.comp_lib.FindCompound("LYS")
+    self.canon_lys_atoms = set([atom.name for atom in self.canon_lys.atom_specs
+                                             if atom.element != "H" and atom.element != "D"  and not atom.is_leaving ])
+    self.assertEqual(self.canon_lys_atoms, self.new_lys_atoms)
+    self.assertFalse(self.canon_lys_atoms - self.new_lys_atoms)
+    self.assertFalse(self.new_lys_atoms - self.canon_lys_atoms) #test the reverse
+    for atm in self.new_lys.atoms:
+      self.assertFalse( atm.is_hetatom)
+    #deletions
+    self.dha = self.ent.residues[3]
+#    self.assertTrue( self.dha.IsValid())
+#    self.assertTrue( self.dha.IsPeptideLinking())
+    self.new_ser = self.new_ent.residues[3]
+    self.assertTrue(self.new_ser.IsValid())
+    self.assertTrue(self.new_ser.IsPeptideLinking())
+    self.assertEqual(self.new_ser.name, "SER")
+    self.assertEqual(self.new_ser.one_letter_code, "S")
+    self.new_ser_atoms = set([atm.name for atm in self.new_ser.atoms])
+    self.canon_ser = self.comp_lib.FindCompound("SER")
+    self.canon_ser_atoms = set([atom.name for atom in self.canon_ser.atom_specs
+                                             if atom.element != "H" and atom.element != "D"  and not atom.is_leaving ])
+    #TEMP
+    self.assertEqual( len(self.new_ser_atoms), 5)
+    self.assertTrue( "CB" in self.new_ser_atoms)
+    self.assertTrue( "CA" in self.new_ser_atoms)
+    self.assertTrue( "C" in self.new_ser_atoms)
+    self.assertTrue( "O" in self.new_ser_atoms)
+    self.assertTrue( "N" in self.new_ser_atoms)
+    #AFTER TEMP
+    #self.assertEqual( self.canon_ser_atoms, self.new_ser_atoms)
+    #self.assertFalse(self.canon_ser_atoms - self.new_ser_atoms)
+    #self.assertFalse(self.new_ser_atoms - self.canon_ser_atoms) #test the reverse
+    for atm in self.new_ser.atoms:
+      self.assertFalse( atm.is_hetatom)
+    #test deletion of whole residue
+    self.assertEqual(self.ent.residues[5].chem_class, "D_PEPTIDE_LINKING")
+    self.assertNotEqual(self.new_ent.residues[5].name, "DAL")
+    self.assertNotEqual(self.ent.residue_count, self.new_ent.residue_count)
+
+  def testRemoveLigands(self):
+    self.new_ent = cleanup.Cleanup(self.ent, strip_water=False, canonicalize=False, remove_ligands=True)
+    self.assertEqual(self.new_ent.residue_count, self.ent_no_lig.residue_count )
+    #MSE
+    self.assertTrue(self.new_ent.residues[0].IsValid() )
+    self.assertEqual(self.new_ent.residues[0].qualified_name, self.ent_no_lig.residues[0].qualified_name)
+    self.assertTrue(self.new_ent.residues[0].IsPeptideLinking())
+    self.assertTrue(self.new_ent.residues[0].atoms[0].is_hetatom)
+    #GLY
+    self.assertTrue(self.new_ent.residues[1].IsValid() )
+    self.assertEqual(self.new_ent.residues[1].qualified_name, self.ent_no_lig.residues[1].qualified_name)
+    self.assertTrue(self.new_ent.residues[1].IsPeptideLinking())
+    self.assertFalse(self.new_ent.residues[1].atoms[0].is_hetatom)
+    #MLY
+    self.assertTrue(self.new_ent.residues[2].IsValid() )
+    self.assertEqual(self.new_ent.residues[2].qualified_name, self.ent_no_lig.residues[2].qualified_name)
+    self.assertTrue(self.new_ent.residues[2].IsPeptideLinking())
+    self.assertTrue(self.new_ent.residues[2].atoms[0].is_hetatom)
+    #DHA
+    self.assertTrue(self.new_ent.residues[3].IsValid() )
+    self.assertEqual(self.new_ent.residues[3].qualified_name, self.ent_no_lig.residues[3].qualified_name)
+    self.assertTrue(self.new_ent.residues[3].IsPeptideLinking())
+    self.assertTrue(self.new_ent.residues[3].atoms[0].is_hetatom)
+    #CYS
+    self.assertTrue(self.new_ent.residues[4].IsValid() )
+    self.assertEqual(self.new_ent.residues[4].qualified_name, self.ent_no_lig.residues[4].qualified_name)
+    self.assertTrue(self.new_ent.residues[4].IsPeptideLinking())
+    self.assertFalse(self.new_ent.residues[4].atoms[0].is_hetatom)
+    #DAL
+    self.assertTrue(self.new_ent.residues[5].IsValid() )
+    self.assertEqual(self.new_ent.residues[5].qualified_name, self.ent_no_lig.residues[5].qualified_name)
+    self.assertTrue(self.new_ent.residues[5].IsPeptideLinking())
+    self.assertTrue(self.new_ent.residues[5].atoms[0].is_hetatom)
+    #HOH
+    self.assertTrue(self.new_ent.residues[6].IsValid() )
+    self.assertEqual(self.new_ent.residues[6].qualified_name, self.ent_no_lig.residues[6].qualified_name)
+    self.assertFalse(self.new_ent.residues[6].IsPeptideLinking()) # here assertFalse instead of assertTrue
+    self.assertTrue(self.new_ent.residues[6].atoms[0].is_hetatom)
+
+if not hasattr(conop.GetBuilder(), 'compound_lib'):
+  print 'Default builder without compound lib. Ignoring test_cleanup.py tests'
+  sys.exit()
+suite = unittest.TestLoader().loadTestsFromTestCase(TestCleanUp)
+unittest.TextTestRunner().run(suite)
diff --git a/modules/mol/base/pymod/export_residue.cc b/modules/mol/base/pymod/export_residue.cc
index 1e1b9f74a60d94c224b9aad51153a9e873b2a4a7..75d74bdb577c618094ee18223102dfa56f932abe 100644
--- a/modules/mol/base/pymod/export_residue.cc
+++ b/modules/mol/base/pymod/export_residue.cc
@@ -64,6 +64,13 @@ namespace {
 
 void export_Residue()
 {
+  class_<ChemClass>("ChemClass", init<char>(args("chem_class")))
+    .def(self!=self)
+    .def(self==self)
+    .def("IsPeptideLinking", &ChemClass::IsPeptideLinking)
+    .def("IsNucleotideLinking", &ChemClass::IsNucleotideLinking)
+  ;
+  implicitly_convertible<char, ChemClass>();
   
   class_<ResNum>("ResNum", init<int>(args("num")))
     .def(init<int,char>(args("num", "ins_code")))
@@ -149,8 +156,8 @@ void export_Residue()
     .def("GetNumber", &ResidueBase::GetNumber,
          return_value_policy<copy_const_reference>())
     .def("GetChemClass", &ResidueBase::GetChemClass)
+    .add_property("chem_class", &ResidueBase::GetChemClass, set_chemclass)
     .def("SetChemClass", set_chemclass)
-    .add_property("chem_class",&ResidueBase::GetChemClass,set_chemclass)
     .add_property("is_ligand", &ResidueBase::IsLigand, &ResidueBase::SetIsLigand)
     .def("IsLigand", &ResidueBase::IsLigand)
     .def("SetIsLigand", &ResidueBase::SetIsLigand)