diff --git a/modules/conop/doc/aminoacid.rst b/modules/conop/doc/aminoacid.rst new file mode 100644 index 0000000000000000000000000000000000000000..2dd39c057b33a3ea04c83c82052e9e552b99eab2 --- /dev/null +++ b/modules/conop/doc/aminoacid.rst @@ -0,0 +1,70 @@ +Functions and classes for standard amino acids +================================================================================ + +.. currentmodule:: ost.conop + +This document describes functions and classes to work with the 20 standard amino acids. The functions convert between different representations, e.g. one-letter-code, three-letter-code or the AminoAcid *enum* + + +The AminoAcid enum +-------------------------------------------------------------------------------- + +The amino acid enum enumerates all 20 standard amino acid and the special value +XXX, to signify an unknown amino acid. The amino acid enum supports the +following values: + + ALA, ARG, ASN, ASP, GLN, GLU, LYS, SER, CYS, MET + TRP, TYR, THR, VAL, ILE, LEU, GLY, PRO, HIS, PHE + XXX + +Converter functions +-------------------------------------------------------------------------------- +.. function:: ResidueToAminoAcid(residue) + OneLetterCodeToAminoAcid(olc) + + Get amino acid from residue or one-letter-code. For non-standard amino acids + or one-letter-codes, XXX is returned. + +.. function:: OneLetterCodeToResidueName(olc) + AminoAcidToResidueName(amino_acid) + + Get residue name from one-letter-code or amino_acid. For invalid + one-letter-codes or XXX, 'UNK' is returned. + + + + +.. function:: ResidueNameToOneLetterCode(rname) + + Get one-letter-code for the given residue name. Returns 'X' if residue name is + not one of the 20 standard amino acids. + +.. class:: AminoAcidSet + + A set of amino acids, with constant-time access + + .. staticmethod:: CreatePolarSet() + CreateAromaticSet() + CreateApolarSet() + + Returns a set containing all polar, aromatic or apolar amino acids, + respectively. + + + + .. method:: Add(amino_acid) + + Add amino acid to the set. + + .. method:: Remove(amino_acid) + + Remove amino acid from the set + + .. method Contains(amino_acid) + + Whether the set contains the given amino acid. + + .. method:: Empty() + + Whether the set is empty, i.e. doesn't contain any amino acids. + diff --git a/modules/conop/doc/compoundlib.rst b/modules/conop/doc/compoundlib.rst new file mode 100644 index 0000000000000000000000000000000000000000..0380f14ad0a585a5d338d87357a8901a7b918e12 --- /dev/null +++ b/modules/conop/doc/compoundlib.rst @@ -0,0 +1,202 @@ +.. currentmodule:: ost.conop + +The compound library +================================================================================ + +Compound libraries contain information on chemical compounds, such as their +connectivity, chemical class and one-letter-code. The compound library has +several uses, but the most important one is to provide the connectivy +information for the :class:`rule-based builder <RuleBasedBuilder>`. + +The compound definitions for standard PDB files are taken from the +components.cif dictionary provided by the PDB. The dictionary is updated with +every PDB release and augmented with the compound definitions of newly +crystallized compounds. + +If you downloaded the bundle, a recent version of the compound library is +already included. If you are compiling from source or want to incorporate the +latest compound definitions, follow :ref:`these instructions <mmcif-convert>` to +build the compound library manually. + + +.. class:: CompoundLib + + .. staticmethod:: Load(database, readonly=True) + + Load the compound lib from database with the given name. + + :param readonly: Whether the library should be opened in read-only mode. It + is important to note that only one program at the time has write access to + compound library. If multiple programs try to open the compound library in + write mode, the programs can deadlock. + :type readonly: :class:`bool` + + :returns: The loaded compound lib + + .. staticmethod:: Create(database) + + Create a new compound library + + .. method:: FindCompound(tlc, dialect='PDB') + + Lookup compound by its three-letter-code, e.g ALA. If no compound with that + name exists, the function returns None. Compounds are cached after they have + been loaded with FindCompound. To delete the compound cache, use + :meth:`ClearCache`. + + :returns: The found compound + :rtype: :class:`Compound` + + .. method:: Copy(dst_filename) + + Copy database to dst_filename. The new library will be an exact copy of the + database. The special name `:memory:` will create an in-memory version of + the database. At the expense of memory, database lookups will become much + faster. + + :returns: The copied compound library + + :rtype: :class:`CompoundLib` + + .. method:: ClearCache() + + Clear the compound cache. + +.. class:: Compound + + Holds the description of a chemical compound, such as three-letter-code, and + chemical class. + + .. attribute:: id + + Alias for :attr:`three_letter_code` + + .. attribute:: three_letter_code + + Three-letter code of the residue, e.g. ALA for alanine. The three-letter + code is unique for each compound, always in uppercase letters and is between + 1 and 3 characters long. + + code is always uppercase. + + .. attribute:: one_letter_code + + The one letter code of the residue, e.g. 'G' for glycine. If undefined, the + one letter code of the residue is set to '?' + + .. attribute:: formula + + The chemical composition, e.g. 'H2 O' for water. The elements are listed in + alphabetical order. + + .. attribute:: dialect + + The dialect of the compound. + + .. attribute:: atom_specs + + The atom definitions of this compound. Read-only + + :type: list of :class:`AtomSpec` + + .. attribute:: bond_specs + + The bond definitions of this compound. Read-only + + :type: list of :class:`BondSpec` + + +.. class:: AtomSpec + + Definition of an atom + + .. attribute:: element + + The element of the atom + + .. attribute:: name + + The primary name of the atom + + .. attribute:: alt_name + + Alternative atom name. If the atom has only one name, this is identical to + :attr:`name` + + .. attribute:: is_leaving + + Whether this atom is required for a residue to be complete. The best example + of a leaving atom is the *OXT* atom of amino acids that gets lost when a + peptide bond is formed. + +.. class:: BondSpec + + Definition of a bond + + .. attribute:: atom_one + + The first atom of the bond, encoded as index into the + :attr:`Compound.atom_specs` array. + + .. attribute:: atom_two + + The second atom of the bond, encoded as index into the + :attr:`Compound.atom_specs` array. + + .. attribute:: order + + The bond order, 1 for single bonds, 2 for double-bonds and 3 for + triple-bonds + + +Example: Translating SEQRES entries +-------------------------------------------------------------------------------- + +In this example we will translate the three-letter-codes given in the SEQRES record to one-letter-codes. Note that this automatically takes care of modified amino acids such as selenium-methionine. + + +.. code-block:: python + + compound_lib=conop.CompoundLib.Load('compounds.chemlib') + seqres='ALA GLY MSE VAL PHE' + sequence='' + for tlc in seqres.split(): + compound=compound_lib.FindCompound(compound_lib) + if compound: + sequence+=compound.one_letter_code + print sequence # prints 'AGMVF' + +.. _mmcif-convert: + +Creating a compound library +-------------------------------------------------------------------------------- + +The simplest way to create compound library is to use the :program:`chemdict_tool`. The programs allows you to import the chemical +description of the compounds from a MMCIF dictionary, e.g. the components.cif dictionary provided by the PDB. The latest dictionary for can be downloaded from the `wwPDB site <http://www.wwpdb.org/ccd.html>`_. The files are rather large, it is therefore recommended to download the gzipped version. + +After downloading the file use :program:`chemdict_tool` to convert the MMCIF dictionary into our internal format. + +.. code-block:: bash + + chemdict_tool create <components.cif> <compounds.chemlib> + +Note that the :program:`chemdict_tool` only understands `.cif` and `.cif.gz` +files. If you have would like to use other sources for the compound definitions, consider writing a script by using the :doc:`compound library <compoundlib>` API. + +If you are working with CHARMM trajectory files, you will also have to add the +definitions for CHARMM. Assuming your are in the top-level source directory of +OpenStructure, this can be achieved by: + +.. code-block:: bash + + chemdict_tool update modules/conop/data/charmm.cif <compounds.chemlib> charmm + + +Once your library has been created, you need to tell cmake where to find it and +make sure it gets staged. + + +.. code-block:: bash + + cmake -DCOMPOUND_LIB=compounds.chemlib + make diff --git a/modules/conop/doc/connectivity.rst b/modules/conop/doc/connectivity.rst new file mode 100644 index 0000000000000000000000000000000000000000..1b06107701568178e3810f1fab5341c8fba620f8 --- /dev/null +++ b/modules/conop/doc/connectivity.rst @@ -0,0 +1,253 @@ +Connectivity +================================================================================ + +.. currentmodule:: ost.conop + + +Motivation +-------------------------------------------------------------------------------- +Traditionally the connectivity between atoms has not been reliably described in +a PDB file. Different programs adopted various ways of finding out if two atoms +are connected. One way chosen is to rely on proper naming of the atoms. For +example, the backbone atoms of the standard amino acids are named as N, CA, C +and O and if atoms with these name appear in the same residue they are shown +connected. Another way is to apply additional heuristics to find out if a +peptide bond between two consecutive residues is formed. Breaks in the backbone +are indicated, e.g., by introducing a discontinuity in the numbering of the residue. + +Loader heuristics are great if you are the one that implemented them but are +problematic if you are just the user of a software that has them. As time goes +on, these heuristics become buried in thousands of lines of code and they are +often hard yet impossible to trace back. + +Different clients of the framework have different requirements. A visualisation +software wants to read in a PDB files as is without making any changes. A +script in an automated pipeline, however, does want to either strictly reject +files that are incomplete or fill-in missing structural features. All these +aspects are implemented in the conop module, separated from the loading of the +PDB file, giving clients a fine grained control over the loading process. + +The conop module defines a :class:`Builder` interface, to run connectivity +algorithms, that is to connect the atoms with bonds and perform basic clean up +of erroneous structures. The clients of the conop module can specify how the +Builder should treat unknown amino acids, missing atoms and chemically +infeasible bonds. + +The high-level interface +-------------------------------------------------------------------------------- + + +.. autofunction:: ConnectAll() + + + +A call to :func:`ConnectAll` is sufficient to assign residue and atoms +properties as well as to connect atoms with bonds. + + +.. code-block:: python + + # Suppose that BuildRawModel is a function that returns a protein structure + # with no atom properties assigned and no bonds formed. + ent=BuildRawModel(...) + print ent.bonds # will return an empty list + # Call ConnectAll() to assign properties/connect atoms + conop.ConnectAll(ent) + print ent.bonds # will print a list containing many bonds + +For a more fine-grained control, consider using the :class:`Builder` interface. + +The builder interface +-------------------------------------------------------------------------------- + +The exact behaviour for a builder is implementation-specific. So far, two +classes implement the Builder interface: A heuristic and a rule-based builder. The builders mainly differ in the source of their connectivity information. The +HeuristicBuilder uses a hard-coded heuristic connectivity table for the 20 +standard amino acids as well as nucleotides.For other compounds such as ligands +the HeuristicBuilder runs a distance-based connectivity algorithm that connects +two atoms if they are closer than a certain threshold. The RuleBasedBuilder +uses a connectivity library containing all molecular components present in the +PDB files on PDB.org. The library can easily be extended with custom +connectivity information, if required. If a :doc:`compound library <compoundlib>` is present, the :class:`RuleBasedBuilder` is enabled by default, otherwise the :class:`HeuristicBuilder` is used as a fallback. + +The following 3 functions give you access to builders known to OpenStructure, +and allow you to set the default builder: + + +.. autofunction:: GetBuilder() + +.. autofunction:: RegisterBuilder() + +.. autofunction:: SetDefaultBuilder() + +The Builder baseclass +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. class:: Builder + + .. method:: CompleteAtoms(residue) + + add any missing atoms to the residue based on its key, with coordinates set + to zero. + + :param residue: must be a valid residue + :type residue: mol.ResidueHandle + + .. method:: CheckResidueCompleteness(residue) + + verify that the given residue has all atoms it is supposed to have based on + its key. + + :param residue: must be a valid residue + :type residue: mol.ResidueHandle + + .. method:: IsResidueComplete(residue) + + Check whether the residue has all atoms it is supposed to have. Hydrogen + atoms are not required for a residue to be complete. + + :param residue: must be a valid residue + :type residue: mol.ResidueHandle + + .. method:: IdentifyResidue(residue) + + attempt to identify the residue based on its atoms, and return a suggestion + for the proper residue key. + + :param residue: must be a valid residue + :type residue: mol.ResidueHandle + + .. method:: ConnectAtomsOfResidue(residue) + + Connects atoms of residue based on residue and atom name. This method does + not establish inter-residue bonds. To connect atoms that belong to + different residues, use :meth:`ConnectResidueToPrev`, or + :meth:`ConnectResidueToNext`. + + :param residue: must be a valid residue + :type residue: mol.ResidueHandle + + .. method:: ConnectResidueToPrev(residue, prev) + + Connect atoms of residue to previous. The order of the parameters is + important. In case of a polypeptide chain, the residues are thought to be + ordered from N- to C- terminus. + + :param residue: must be a valid residue + :type residue: mol.ResidueHandle + :param prev: valid or invalid residue + :type prev: mol.ResidueHandle + + + .. method:: DoesPeptideBondExist(n, c) + + Check if peptide bond should be formed between the `n` and `c` atom. This + method is called by ConnectResidueWithNext() after making sure that + both residues participating in the peptide bond are peptide linking + components. + + By default, :meth:`IsBondFeasible` is used to check whether the two atoms + form a peptide bond. + + :param n: backbone nitrogen atom (IUPAC name `N`). Must be valid. + :type n: mol.AtomHandle + :param c: backbone C-atom (IUPAC name `C`). Must be valid. + :type c: mol.AtomHandle + + .. method:: IsBondFeasible(atom_a, atom_b) + + Overloadable hook to check if bond between to atoms is feasible. The + default implementation uses a distance-based check to check if the + two atoms should be connected. The atoms are connected if they are in + the range of 0.8 to 1.2 times their van-der-WAALS radius. + + :param atom_a: a valid atom + :type atom_b: mol.AtomHandle + :param atom_a: a valid atom + :type atom_b: mol.AtomHandle + + .. method:: GuessAtomElement(atom_name, hetatm) + + guess element of atom based on name and hetatm flag + + :param atom_name: IUPAC atom name, e.g. `CA`, `CB` or `N`. + :type atom_name: string + :param hetatm: Whether the atom is a hetatm or not + :type hetatm: bool + + .. method:: AssignBackboneTorsionsToResidue(residue) + + For :meth:`peptide-linking residues <mol.ResidueHandle.IsPeptideLinking>`, + residues, assigns phi, psi and omega torsions to amino acid. + + :param residue: must be a valid residue + :type residue: mol.ResidueHandle + + .. method:: GuessChemClass(residue) + + Guesses the chemical class of the residue based on its atom and + connectivity. + + So far, the method only guesses whether the residue is a peptide. A residue + is a peptide if all the backbone atoms N,CA,C,O are present, have the right + element and are in a suitable orientation to form bonds. + + +The RuleBasedBuilder class +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. class:: RuleBasedBuilder(compound_lib) + + :param compound_lib: The compound library + :type compound_lib: :class:`CompoundLib` + + The :class:`RuleBasedBuilder` implements the :class:`Builder` interface. + Refer to its documentation for a basic description of the methods. + + .. method:: CheckResidueCompleteness(residue) + + By using the description of the chemical compound, the completeness of + the residue is verified. The method distinguishes between required atoms + and atoms that are optional, like `OXT` that is only present, if not + peptide bond is formed. Whenever an unknown atom is encountered, + :meth:`OnUnknownAtom` is invoked. Subclasses of the + :class:`RuleBasedBuilder` may implement some additional logic to deal with + unknown atom. Likewise, whenever a required atom is missing, + :meth:`OnMissingAtom` is invoked. Hydrogen atoms are not considered as + required by default. + + :param residue: must be a valid residue + :type residue: mol.ResidueHandle + + .. method:: IdentifyResidue(residue) + + Looks-up the residue in the database of chemical compounds and returns + the name of the residue or "UNK" if the residue has not been found in the + library. + + :param residue: must be a valid residue + :type residue: mol.ResidueHandle + + + .. method:: OnUnknownAtom(atom) + + Invoked whenever an unkknown atom has been encountered during a residue + completeness check. + + The default implementation guesses the atom properties based on the name + and returns false, meaning that it should be treated as an unknown atom. + + Custom implementations of this method may delete the atom, or modify it. + + :param atom: the unknown atom + :type atom: mol.AtomHandle + + .. method:: OnMissingAtom(atom) + + Invoked whenever an atom is missing. It is up to the overloaded method + to deal with the missing atom, either by ignoring it or by inserting a + dummy atom. + + :param atom: The missing atom's name + :type atom: string + diff --git a/modules/conop/doc/conop.rst b/modules/conop/doc/conop.rst index b8dc369dc4663a13593a6c2ac406aed1f3e58d95..8c3f413469872a1d95f9da98e37f4e21ffb24bfc 100644 --- a/modules/conop/doc/conop.rst +++ b/modules/conop/doc/conop.rst @@ -5,267 +5,18 @@ :synopsis: The conop modules implement different strategies to derive connectivity information of molecules. -The main task of the conop module is to connect atoms with bonds. While the -bond class is also part of the base module, the conop module deals with setting -up the correct bonds between atoms. -Motivation --------------------------------------------------------------------------------- -Traditionally the connectivity between atoms has not been reliably described in -a PDB file. Different programs adopted various ways of finding out if two atoms -are connected. One way chosen is to rely on proper naming of the atoms. For -example, the backbone atoms of the standard amino acids are named as N, CA, C -and O and if atoms with these name appear in the same residue they are shown -connected. Another way is to apply additional heuristics to find out if a -peptide bond between two consecutive residues is formed. Breaks in the backbone -are indicated, e.g., by introducing a discontinuity in the numbering of the residue. - -Loader heuristics are great if you are the one that implemented them but are -problematic if you are just the user of a software that has them. As time goes -on, these heuristics become buried in thousands of lines of code and they are -often hard yet impossible to trace back. - -Different clients of the framework have different requirements. A visualisation -software wants to read in a PDB files as is without making any changes. A -script in an automated pipeline, however, does want to either strictly reject -files that are incomplete or fill-in missing structural features. All these -aspects are implemented in the conop module, separated from the loading of the -PDB file, giving clients a fine grained control over the loading process. - -The Builder interface --------------------------------------------------------------------------------- - -The conop module defines a :class:`Builder` interface, to run connectivity -algorithms, that is to connect the atoms with bonds and perform basic clean up -of errorneous structures. The clients of the conop module can specify how the -Builder should treat unknown amino acids, missing atoms and chemically -infeasible bonds. - -The exact behaviour for a builder is implementation-specific. So far, two -classes implement the Builder interface: A heuristic and a rule-based builder. The builders mainly differ in the source of their connectivity information. The -HeuristicBuilder uses a hard-coded heuristic connectivity table for the 20 -standard amino acids as well as nucleotides.For other compounds such as ligands -the HeuristicBuilder runs a distance-based connectivity algorithm that connects -two atoms if they are closer than a certain threshold. The RuleBasedBuilder -uses a connectivity library containing all molecular components present in the -PDB files on PDB.org. The library can easily be extended with custom -connectivity information, if required. By default the heuristic builder is used, -however the builder may be switched by setting the !RuleBasedBuilder as the -default. To do so, one has first to create a new instance of a RuleBasedBuilder -and register it in the builder registry of the conop module. In Python, this can -be achieved with - -.. code-block:: python - - from ost import conop - compound_lib=conop.CompoundLib.Load('...') - rbb=conop.RuleBasedBuilder(compound_lib) - conop.Conopology.Instance().RegisterBuilder(rbb,'rbb') - conop.Conopology.Instance().SetDefaultBuilder('rbb') - -All subsequent calls to :func:`ost.io.LoadEntity` will make use of the -RuleBasedBuilder instead of the heuristic builder. See -:ref:`here <mmcif-convert>` for more information on how to create the necessary -files to use the rule-based builder. - - -.. class:: Builder - - .. method:: CompleteAtoms(residue) - - add any missing atoms to the residue based on its key, with coordinates set - to zero. - - :param residue: must be a valid residue - :type residue: mol.ResidueHandle - - .. method:: CheckResidueCompleteness(residue) - - verify that the given residue has all atoms it is supposed to have based on - its key. - - :param residue: must be a valid residue - :type residue: mol.ResidueHandle - - .. method:: IsResidueComplete(residue) - - Check whether the residue has all atoms it is supposed to have. Hydrogen - atoms are not required for a residue to be complete. - - :param residue: must be a valid residue - :type residue: mol.ResidueHandle - - .. method:: IdentifyResidue(residue) - - attempt to identify the residue based on its atoms, and return a suggestion - for the proper residue key. - - :param residue: must be a valid residue - :type residue: mol.ResidueHandle - - .. method:: ConnectAtomsOfResidue(residue) - - Connects atoms of residue based on residue and atom name. This method does - not establish inter-residue bonds. To connect atoms that belong to - different residues, use :meth:`ConnectResidueToPrev`, or - :meth:`ConnectResidueToNext`. - - :param residue: must be a valid residue - :type residue: mol.ResidueHandle - - .. method:: ConnectResidueToPrev(residue, prev) - - Connect atoms of residue to previous. The order of the parameters is - important. In case of a polypeptide chain, the residues are thought to be - ordered from N- to C- terminus. - - :param residue: must be a valid residue - :type residue: mol.ResidueHandle - :param prev: valid or invalid residue - :type prev: mol.ResidueHandle - - - .. method:: DoesPeptideBondExist(n, c) - - Check if peptide bond should be formed between the `n` and `c` atom. This - method is called by ConnectResidueWithNext() after making sure that - both residues participating in the peptide bond are peptide linking - components. - - By default, :meth:`IsBondFeasible` is used to check whether the two atoms - form a peptide bond. - - :param n: backbone nitrogen atom (IUPAC name `N`). Must be valid. - :type n: mol.AtomHandle - :param c: backbone C-atom (IUPAC name `C`). Must be valid. - :type c: mol.AtomHandle - - .. method:: IsBondFeasible(atom_a, atom_b) - - Overloadable hook to check if bond between to atoms is feasible. The - default implementation uses a distance-based check to check if the - two atoms should be connected. The atoms are connected if they are in - the range of 0.8 to 1.2 times their van-der-WAALS radius. - - :param atom_a: a valid atom - :type atom_b: mol.AtomHandle - :param atom_a: a valid atom - :type atom_b: mol.AtomHandle - - .. method:: GuessAtomElement(atom_name, hetatm) - - guess element of atom based on name and hetatm flag - - :param atom_name: IUPAC atom name, e.g. `CA`, `CB` or `N`. - :type atom_name: string - :param hetatm: Whether the atom is a hetatm or not - :type hetatm: bool - - .. method:: AssignBackboneTorsionsToResidue(residue) - - For :meth:`peptide-linking residues <mol.ResidueHandle.IsPeptideLinking>`, - residues, assigns phi, psi and omega torsions to amino acid. - - :param residue: must be a valid residue - :type residue: mol.ResidueHandle - - .. method:: GuessChemClass(residue) +The main task of the :mod:`~ost.conop` module is to connect atoms with bonds. +While the bond class is also part of the base module, the conop module deals +with setting up the correct bonds between atoms. - Guesses the chemical class of the residue based on its atom and - connectivity. - So far, the method only guesses whether the residue is a peptide. A residue - is a peptide if all the backbone atoms N,CA,C,O are present, have the right - element and are in a suitable orientation to form bonds. - -.. class:: RuleBasedBuilder - - The :class:`RuleBasedBuilder` implements the :class:`Builder` interface. - Refer to its documentation for a basic description of the methods. - - .. method:: CheckResidueCompleteness(residue) - - By using the description of the chemical compound, the completeness of - the residue is verified. The method distinguishes between required atoms - and atoms that are optional, like `OXT` that is only present, if not - peptide bond is formed. Whenever an unknown atom is encountered, - :meth:`OnUnknownAtom` is invoked. Subclasses of the - :class:`RuleBasedBuilder` may implement some additional logic to deal with - unknown atom. Likewise, whenever a required atom is missing, - :meth:`OnMissingAtom` is invoked. Hydrogen atoms are not considered as - required by default. - - :param residue: must be a valid residue - :type residue: mol.ResidueHandle - - .. method:: IdentifyResidue(residue) - - Looks-up the residue in the database of chemical compounds and returns - the name of the residue or "UNK" if the residue has not been found in the - library. - - :param residue: must be a valid residue - :type residue: mol.ResidueHandle - - - .. method:: OnUnknownAtom(atom) - - Invoked whenever an unkknown atom has been encountered during a residue - completeness check. - - The default implementation guesses the atom properties based on the name - and returns false, meaning that it should be treated as an unknown atom. - - Custom implementations of this method may delete the atom, or modify it. - - :param atom: the unknown atom - :type atom: mol.AtomHandle - - .. method:: OnMissingAtom(atom) - - Invoked whenever an atom is missing. It is up to the overloaded method - to deal with the missing atom, either by ignoring it or by inserting a - dummy atom. - - :param atom: The missing atom's name - :type atom: string - -Connecting atoms +In this module -------------------------------------------------------------------------------- -A single function call to :func:`ConnectAll` is sufficient to assign residue and atoms properties as well as to connect atoms with bonds. - - -.. code-block:: python - - # Suppose that BuildRawModel is a function that returns a protein structure - # with no atom properties assigned and no bonds formed. - ent=BuildRawModel(...) - print ent.bonds # will return an empty list - # Call ConnectAll() to assign properties/connect atoms - conop.ConnectAll(ent) - print ent.bonds # will print a list containing many bonds - -For fine grained control, the :class:`Builder` interface may be used directly. - - -.. _mmcif-convert: - -Convert MM CIF dictionary --------------------------------------------------------------------------------- - -The CompoundLib may be created from a MM CIF dictionary. The latest dictionary -can be found on the `wwPDB site <http://www.wwpdb.org/ccd.html>`_. - -After downloading the file in MM CIF use the :program:`chemdict_tool` to convert -the MM CIF dictionary into our internal format. - -.. code-block:: bash - - chemdict_tool create <components.cif> <compounds.chemlib> - -If you are working with CHARMM trajectory files, you will also have to add the definitions for CHARMM. Assuming your are in the top-level source directory of OpenStructure, this can be achieved by: - -.. code-block:: bash +.. toctree:: + :maxdepth: 2 - chemdict_tool update modules/conop/data/charmm.cif <compounds.chemlib> charmm + aminoacid + connectivity + compoundlib diff --git a/modules/conop/pymod/__init__.py b/modules/conop/pymod/__init__.py index a40cfa86ab16c49ff8e021ea735be8774aecfaa9..6bccc9ee81feab85f3bad2c9f875c9b251ac9b6e 100644 --- a/modules/conop/pymod/__init__.py +++ b/modules/conop/pymod/__init__.py @@ -19,5 +19,41 @@ from _conop import * def ConnectAll(ent): + ''' + Uses the current default builder to connect the atoms of the entity, assign + torsions, and fill in missing or correct erroneous information such as the + chemical class of the residues and the atom's element. + + :param ent: A valid entity + :type ent: :class:`~ost.mol.EntityHandle` + ''' conop_inst=Conopology.Instance() - conop_inst.ConnectAll(conop_inst.GetBuilder("DEFAULT"), ent, 0) \ No newline at end of file + conop_inst.ConnectAll(conop_inst.GetBuilder("DEFAULT"), ent, 0) + +def GetBuilder(name='DEFAULT'): + ''' + Get registered builder by name + + :param name: The name of the builder + + :returns: The builder or None, if the builder doesn't exist + ''' + return Conopology.Instance().GetBuilder(name) + +def RegisterBuilder(builder, name): + ''' + Register builder to OpenStructure + + :param builder: A instance of :class:`Builder` + + :param name: The name of the builder + ''' + conop_inst=Conopology.Instance() + conop_inst.RegisterBuilder(builder, name) + +def SetDefaultBuilder(builder_name): + ''' + Set the builder with the given name as the default. + ''' + conop_inst=Conopology.Instance() + conop_inst.SetDefaultBuilder(builder_name) diff --git a/modules/conop/pymod/export_amino_acids.cc b/modules/conop/pymod/export_amino_acids.cc index cf06f17324f53667243573edc8b26f6011f9b3c7..923c74555dded939b0526771db983515776873d6 100644 --- a/modules/conop/pymod/export_amino_acids.cc +++ b/modules/conop/pymod/export_amino_acids.cc @@ -47,6 +47,7 @@ void export_AminoAcids() .value("HIS", HIS) .value("PHE", PHE) .value("TRP", TRP) + .value("XXX", XXX) .export_values() ; diff --git a/modules/conop/pymod/export_builder.cc b/modules/conop/pymod/export_builder.cc index 291f62c1a5128898e0b36b641662a491b3902cbd..871329a47a14bce969e952e5710581b274902946 100644 --- a/modules/conop/pymod/export_builder.cc +++ b/modules/conop/pymod/export_builder.cc @@ -54,5 +54,6 @@ void export_Builder() { ; class_<RuleBasedBuilder, bases<Builder> >("RuleBasedBuilder", init<const CompoundLibPtr&>()) + .add_property("compound_lib", &RuleBasedBuilder::GetCompoundLib) ; } diff --git a/modules/conop/pymod/export_compound.cc b/modules/conop/pymod/export_compound.cc index 6e95a0c246ebf854506e478cff94eaac91832bcb..d39a05a840570f31e466fee3757ab9ed7e3c8882 100644 --- a/modules/conop/pymod/export_compound.cc +++ b/modules/conop/pymod/export_compound.cc @@ -20,39 +20,105 @@ #include <boost/python/register_ptr_to_python.hpp> #include <boost/python/suite/indexing/vector_indexing_suite.hpp> using namespace boost::python; + #include <ost/conop/compound.hh> #include <ost/conop/compound_lib.hh> - +using namespace ost::mol; using namespace ost::conop; +namespace { + +Compound::Dialect tr_dialect(const String& dialect) +{ + if (dialect=="PDB") { + return Compound::PDB; + } + if (dialect=="CHARMM") { + return Compound::CHARMM; + } + if (dialect=="OPLS") { + return Compound::OPLS; + } + if (dialect=="AMBER") { + return Compound::AMBER; + } + std::stringstream ss; + ss << "unknown compound dialect '" << dialect << "'"; + throw std::runtime_error(ss.str()); +} + +void set_dialect(CompoundPtr compound, const String& dialect) +{ + compound->SetDialect(tr_dialect(dialect)); +} + +char get_chemclass(CompoundPtr compound) +{ + return char(compound->GetChemClass()); +} + +void set_chemclass(CompoundPtr compound, char cc) +{ + compound->SetChemClass(ChemClass(cc)); +} + +CompoundPtr find_compound(CompoundLibPtr comp_lib, + const String& tlc, const String& dialect) +{ + return comp_lib->FindCompound(tlc, tr_dialect(dialect)); +} + +} void export_Compound() { - class_<Compound>("Compound", no_init) + + class_<Compound, CompoundPtr>("Compound", no_init) .def("GetID", &Compound::GetID, return_value_policy<copy_const_reference>()) .def("SetOneLetterCode", &Compound::SetOneLetterCode) .def("GetOneLetterCode", &Compound::GetOneLetterCode) + + .add_property("three_letter_code", make_function(&Compound::GetID, return_value_policy<copy_const_reference>())) + .add_property("id", make_function(&Compound::GetID, return_value_policy<copy_const_reference>())) .add_property("one_letter_code", &Compound::GetOneLetterCode, - &Compound::SetOneLetterCode) + &Compound::SetOneLetterCode) .def("GetAtomSpecs", &Compound::GetAtomSpecs, return_value_policy<copy_const_reference>()) + .def("bond_specs", make_function(&Compound::GetBondSpecs, + return_value_policy<copy_const_reference>())) + .def("atom_specs", make_function(&Compound::GetAtomSpecs, + return_value_policy<copy_const_reference>())) + .def("AddAtom", &Compound::AddAtom) + .def("AddBond", &Compound::AddBond) + .def("IsPeptideLinking", &Compound::IsPeptideLinking) + .add_property("chem_class", &get_chemclass, + &set_chemclass) + .add_property("formula",make_function(&Compound::GetFormula, + return_value_policy<copy_const_reference>()), + &Compound::SetFormula) + .add_property("dialect", &Compound::GetDialectAsString, + &set_dialect) ; class_<AtomSpec>("AtomSpec", no_init) .def_readonly("element", &AtomSpec::element) - .def_readonly("name", &AtomSpec::name) + .def_readonly("name", &AtomSpec::name) .def_readonly("alt_name", &AtomSpec::alt_name) - .def_readonly("is_leaving", &AtomSpec::is_leaving) + .def_readonly("is_leaving", &AtomSpec::is_leaving) + .def_readonly("is_aromatic", &AtomSpec::is_aromatic) + .def_readonly("ordinal", &AtomSpec::ordinal) ; class_<BondSpec>("BondSpec", no_init) .def_readonly("atom_one", &BondSpec::atom_one) .def_readonly("atom_two", &BondSpec::atom_two) + .def_readonly("border", &BondSpec::order) + ; - register_ptr_to_python<CompoundPtr>(); - + class_<CompoundLib>("CompoundLib", no_init) .def("Load", &CompoundLib::Load, arg("readonly")=true).staticmethod("Load") - .def("FindCompound", &CompoundLib::FindCompound) + .def("FindCompound", &find_compound, + (arg("tlc"), arg("dialect")="PDB")) .def("ClearCache", &CompoundLib::ClearCache) ; diff --git a/modules/conop/pymod/export_conop.cc b/modules/conop/pymod/export_conop.cc index 1a2e689a34fbbe2a2180b8c3075fd22feac5be46..309ce86b3572987be230a6badeb519a252c073db 100644 --- a/modules/conop/pymod/export_conop.cc +++ b/modules/conop/pymod/export_conop.cc @@ -28,7 +28,7 @@ using namespace ost::conop; void export_Conop() { class_<Conopology, boost::noncopyable>("Conopology", no_init) - .def("Instance", &Conopology::Instance, return_value_policy<reference_existing_object>()).staticmethod("Instance") + .def("Instance", &Conopology::Instance, return_value_policy<reference_existing_object>()).staticmethod("Instance") .def("ConnectAll", &Conopology::ConnectAll) .def("GetBuilder", &Conopology::GetBuilder) .def("ConnectAll", &Conopology::ConnectAll) diff --git a/modules/conop/pymod/wrap_conop.cc b/modules/conop/pymod/wrap_conop.cc index 00ea6e6a9ffcb01eb32e75b927bd0027a9ba70a8..95cc7c3be92daaa3f6dd023fe6bb8b2638e59e38 100644 --- a/modules/conop/pymod/wrap_conop.cc +++ b/modules/conop/pymod/wrap_conop.cc @@ -24,10 +24,12 @@ void export_Compound(); void export_Sanitizer(); void export_Conop(); void export_RingFinder(); +void export_AminoAcids(); BOOST_PYTHON_MODULE(_conop) { export_Builder(); export_Conop(); export_Compound(); export_RingFinder(); + export_AminoAcids(); } diff --git a/modules/conop/src/builder.cc b/modules/conop/src/builder.cc index 0b71e97b8b0fb2bd88438bec0427117fecb182e2..0a24cf56fa398f19fb61e6a37d181e2fbd5ebb95 100644 --- a/modules/conop/src/builder.cc +++ b/modules/conop/src/builder.cc @@ -176,7 +176,7 @@ void Builder::GuessChemClass(mol::ResidueHandle res) if (!o.IsValid() || o.GetElement()!="O") return; if (this->IsBondFeasible(n, ca) && this->IsBondFeasible(ca, c) && this->IsBondFeasible(c, o)) { - res.SetChemClass(mol::ChemClass(mol::ChemClass::PeptideLinking)); + res.SetChemClass(mol::ChemClass(mol::ChemClass::PEPTIDE_LINKING)); } } diff --git a/modules/conop/src/compound_lib.cc b/modules/conop/src/compound_lib.cc index fd5b76631fa391b57d8291f35e2a81497193a068..03435ff2c6a2b1f740548debc00aa73e1c12d905 100644 --- a/modules/conop/src/compound_lib.cc +++ b/modules/conop/src/compound_lib.cc @@ -308,7 +308,8 @@ CompoundPtr CompoundLib::FindCompound(const String& id, if (i!=compound_cache_.end()) { return i->second; } - String query="SELECT id, tlc, olc, chem_class, dialect FROM chem_compounds" + String query="SELECT id, tlc, olc, chem_class, dialect, formula " + " FROM chem_compounds" " WHERE tlc='"+id+"' AND dialect='"+String(1, char(dialect))+"'"; sqlite3_stmt* stmt; int retval=sqlite3_prepare_v2(conn_, query.c_str(), @@ -327,6 +328,8 @@ CompoundPtr CompoundLib::FindCompound(const String& id, compound->SetOneLetterCode((sqlite3_column_text(stmt, 2))[0]); compound->SetChemClass(mol::ChemClass(sqlite3_column_text(stmt, 3)[0])); compound->SetDialect(Compound::Dialect(sqlite3_column_text(stmt, 4)[0])); + const char* f=reinterpret_cast<const char*>(sqlite3_column_text(stmt, 5)); + compound->SetFormula(f); // Load atoms and bonds this->LoadAtomsFromDB(compound, pk); this->LoadBondsFromDB(compound, pk); diff --git a/modules/conop/src/heuristic_builder.cc b/modules/conop/src/heuristic_builder.cc index eda01852e25d0437367738ac21bc77cc60b7311a..89b8ad8e1720754b0941e8b1a7d8977e252c14fc 100644 --- a/modules/conop/src/heuristic_builder.cc +++ b/modules/conop/src/heuristic_builder.cc @@ -451,9 +451,9 @@ void HeuristicBuilder::FillResidueProps(mol::ResidueHandle residue) { } else { if (residue.FindAtom("N") && residue.FindAtom("CA") && residue.FindAtom("C") && residue.FindAtom("O")) { - residue.SetChemClass(mol::ChemClass(mol::ChemClass::LPeptideLinking)); + residue.SetChemClass(mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING)); } else { - residue.SetChemClass(mol::ChemClass(mol::ChemClass::Unknown)); + residue.SetChemClass(mol::ChemClass(mol::ChemClass::UNKNOWN)); } residue.SetOneLetterCode('?'); diff --git a/modules/conop/src/heuristic_builder.hh b/modules/conop/src/heuristic_builder.hh index 05c9fe31109a14488978fe0c317a92e436c6f62b..bc3930bcaf37a29d2cda545eca0cbc40bf2fb5c6 100644 --- a/modules/conop/src/heuristic_builder.hh +++ b/modules/conop/src/heuristic_builder.hh @@ -43,7 +43,7 @@ public: public: ConnResEntry(const String& rname="", char single='\0', - const mol::ChemClass& chem_class=mol::ChemClass(mol::ChemClass::Unknown)); + const mol::ChemClass& chem_class=mol::ChemClass(mol::ChemClass::UNKNOWN)); int Check(const String& name1, const String& name2) const; bool HasAtom(const String& name); void AddAtom(const String& atom) { required_atoms_.push_back(atom); } diff --git a/modules/conop/src/heuristic_connect_table.hh b/modules/conop/src/heuristic_connect_table.hh index 69cd0892011f20b66c3e5c365c0857c3eaeeaf0e..aa5c0b651a5d2d13364946146d5a49f603d3528b 100644 --- a/modules/conop/src/heuristic_connect_table.hh +++ b/modules/conop/src/heuristic_connect_table.hh @@ -42,28 +42,28 @@ struct CONN_DEF_ENTRY { CONN_DEF_ENTRY def_entry_table[]={ // the first entry must be this generic one - {"Generic","___",'_', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Generic","___",'_', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C"},3, {{-2,1}, {1,2}, {2,3}, {3,-3}},4, { },0, {0, 0, 0, 0, 0, 0},6 }, - {"Alanine","ALA",'A', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Alanine","ALA",'A', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","CB","OXT"},6, {{-2,1}, {1,2}, {2,3}, {3,4}, {2,5}, {3,-3}, {6, 3}},7, { },0, {0, 0, 0, 0, 0, 0},6 }, - {"Cystein","CYS",'C', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Cystein","CYS",'C', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","CB","SG","OXT"},7, {{-2,1},{1,2},{2,3},{3,4},{2,5},{5,6},{3,-3},{8, 3}},8, {{1,2,5,6,"CHI1"} },1, {0, 0, 0, 0, 0, 0, 0},7 }, - {"Aspartate","ASP",'D', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Aspartate","ASP",'D', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","CB","CG","OD1","OD2","OXT"},9, {{-2,1},{1,2},{2,3},{3,4},{2,5},{5,6},{6,7},{6,8},{3,-3},{9, 3}},10, { @@ -71,7 +71,7 @@ CONN_DEF_ENTRY def_entry_table[]={ },2, {0, 0, 0, 0, 0, 0, 0, 0, 0},9 }, - {"Glutamate","GLU",'E', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Glutamate","GLU",'E', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","CB","CG","CD","OE1","OE2","OXT"},10, {{-2,1},{1,2},{2,3},{3,4},{2,5},{5,6},{6,7},{7,8},{7,9},{3,-3},{10, 3}},11, { @@ -79,7 +79,7 @@ CONN_DEF_ENTRY def_entry_table[]={ },3, {0},1 }, - {"Phenylalanine","PHE",'F', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Phenylalanine","PHE",'F', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","CB","CG","CD1","CD2","CE1","CE2","CZ","OXT"},12, {{-2,1},{1,2},{2,3},{3,4},{2,5},{5,6},{6,7},{6,8},{7,9},{8,10},{9,11},{10,11},{3,-3},{12, 3}},14, { @@ -87,14 +87,14 @@ CONN_DEF_ENTRY def_entry_table[]={ },2, {0},1 }, - {"Glycin","GLY",'G', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Glycin","GLY",'G', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","OXT"},5, {{-2,1},{1,2},{2,3},{3,4},{3,-3},{5, 3}},6, { },0, {0},1 }, - {"Histidine","HIS",'H', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Histidine","HIS",'H', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","CB","CG","ND1","CD2","CE1","NE2","OXT"},11, {{-2,1},{1,2},{2,3},{3,4},{2,5},{5,6},{6,7},{6,8},{7,9},{8,10},{9,10},{3,-3},{11, 3}},13, { @@ -102,7 +102,7 @@ CONN_DEF_ENTRY def_entry_table[]={ },2, {0},1 }, - {"Isoleucine","ILE",'I', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Isoleucine","ILE",'I', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","CB","CG1","CG2","CD1","OXT"},9, {{-2,1},{1,2},{2,3},{3,4},{2,5},{5,6},{5,7},{6,8},{3,-3},{9, 3}},10, { @@ -110,7 +110,7 @@ CONN_DEF_ENTRY def_entry_table[]={ },2, {0},1 }, - {"Lysin","LYS",'K', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Lysin","LYS",'K', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","CB","CG","CD","CE","NZ","OXT"},10, {{-2,1},{1,2},{2,3},{3,4},{2,5},{5,6},{6,7},{7,8},{8,9},{3,-3},{10, 3}},11, { @@ -118,7 +118,7 @@ CONN_DEF_ENTRY def_entry_table[]={ },4, {0},1 }, - {"Leucin","LEU",'L', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Leucin","LEU",'L', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","CB","CG","CD1","CD2","OXT"},9, {{-2,1},{1,2},{2,3},{3,4},{2,5},{5,6},{6,7},{6,8},{3,-3},{9, 3}},10, { @@ -126,7 +126,7 @@ CONN_DEF_ENTRY def_entry_table[]={ },2, {0},1 }, - {"Methionine","MET",'M', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Methionine","MET",'M', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","CB","CG","SD","CE","OXT"},9, {{-2,1},{1,2},{2,3},{3,4},{2,5},{5,6},{6,7},{7,8},{3,-3},{10, 3}},10, { @@ -134,7 +134,7 @@ CONN_DEF_ENTRY def_entry_table[]={ },3, {0},1 }, - {"Asparagine","ASN",'N', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Asparagine","ASN",'N', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","CB","CG","OD1","ND2","OXT"},9, {{-2,1},{1,2},{2,3},{3,4},{2,5},{5,6},{6,7},{6,8},{3,-3},{9, 3}},10, { @@ -142,7 +142,7 @@ CONN_DEF_ENTRY def_entry_table[]={ },2, {0},1 }, - {"Proline","PRO",'P', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Proline","PRO",'P', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","CB","CG","CD","OXT"},8, {{-2,1},{1,2},{2,3},{3,4},{2,5},{5,6},{6,7},{3,-3},{1,7},{8, 3}},10, { @@ -150,7 +150,7 @@ CONN_DEF_ENTRY def_entry_table[]={ },2, {0},1 }, - {"Glutamine","GLN",'Q', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Glutamine","GLN",'Q', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","CB","CG","CD","OE1","NE2","OXT"},10, {{-2,1},{1,2},{2,3},{3,4},{2,5},{5,6},{6,7},{7,8},{7,9},{3,-3},{10, 3}},11, { @@ -158,7 +158,7 @@ CONN_DEF_ENTRY def_entry_table[]={ },3, {0},1 }, - {"Arginine","ARG",'R', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Arginine","ARG",'R', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","CB","CG","CD","NE","CZ","NH1","NH2","OXT"},12, {{-2,1},{1,2},{2,3},{3,4},{2,5},{5,6},{6,7},{7,8},{8,9},{9,10},{9,11},{3,-3},{12, 3}},13, { @@ -166,7 +166,7 @@ CONN_DEF_ENTRY def_entry_table[]={ },5, {0},1 }, - {"Serine","SER",'S', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Serine","SER",'S', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","CB","OG","OXT"},7, {{-2,1},{1,2},{2,3},{3,4},{2,5},{5,6},{3,-3},{7, 3}},8, { @@ -174,7 +174,7 @@ CONN_DEF_ENTRY def_entry_table[]={ },1, {0},1 }, - {"Threonine","THR",'T', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Threonine","THR",'T', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","CB","OG1","CG2","OXT"},8, {{-2,1},{1,2},{2,3},{3,4},{2,5},{5,6},{5,7},{3,-3},{8, 3}},9, { @@ -182,7 +182,7 @@ CONN_DEF_ENTRY def_entry_table[]={ },1, {0},1 }, - {"Valine","VAL",'V', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Valine","VAL",'V', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","CB","CG1","CG2","OXT"},8, {{-2,1},{1,2},{2,3},{3,4},{2,5},{5,6},{5,7},{3,-3},{8, 3}},9, { @@ -190,7 +190,7 @@ CONN_DEF_ENTRY def_entry_table[]={ },1, {0},1 }, - {"Tryptophan","TRP",'W', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Tryptophan","TRP",'W', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","CB","CG","CD1","CD2","NE1","CE2","CE3","CZ2","CZ3","CH2","OXT"},15, {{-2,1},{1,2},{2,3},{3,4},{2,5},{5,6},{6,7},{6,8},{7,9},{8,10},{9,10},{8,11},{10,12},{11,13},{12,14},{13,14},{3,-3},{15, 3}},18, { @@ -198,7 +198,7 @@ CONN_DEF_ENTRY def_entry_table[]={ },2, {0},1 }, - {"Tyrosin","TYR",'Y', mol::ChemClass(mol::ChemClass::LPeptideLinking), + {"Tyrosin","TYR",'Y', mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING), {"N","CA","C","O","CB","CG","CD1","CD2","CE1","CE2","CZ","OH","OXT"},13, {{-2,1},{1,2},{2,3},{3,4},{2,5},{5,6},{6,7},{6,8},{7,9},{8,10},{9,11},{10,11},{11,12},{3,-3},{13, 3}},15, { @@ -207,91 +207,91 @@ CONN_DEF_ENTRY def_entry_table[]={ {0},1 }, /* NUCLEIC ACIDS */ - {"Adenosin","A",'?', mol::ChemClass(mol::ChemClass::DNALinking), + {"Adenosin","A",'?', mol::ChemClass(mol::ChemClass::DNA_LINKING), {"P","O5'","C5'","C4'","O4'","C3'","O3'","C2'","C1'","O1P","O2P","N9","C8","N7","C5","C6","N6","N1","C2","N3","C4","O2'"},12, {{-2,1},{1,2},{2,3},{3,4},{4,5},{4,6},{6,7},{6,8},{8,9},{5,9},{1,10},{1,11},{8,22},{7,-3}},14, {{0,0,0,0,""}},1, {0},1 }, - {"Cytosin","C",'?', mol::ChemClass(mol::ChemClass::DNALinking), + {"Cytosin","C",'?', mol::ChemClass(mol::ChemClass::DNA_LINKING), {"P","O5'","C5'","C4'","O4'","C3'","O3'","C2'","C1'","O1P","O2P","O2'"},12, {{-2,1},{1,2},{2,3},{3,4},{4,5},{4,6},{6,7},{6,8},{8,9},{5,9},{1,10},{1,11},{8,12},{7,-3}},14, {{0,0,0,0,""}},1, {0},1 }, - {"Guanidin","G",'?', mol::ChemClass(mol::ChemClass::DNALinking), + {"Guanidin","G",'?', mol::ChemClass(mol::ChemClass::DNA_LINKING), {"P","O5'","C5'","C4'","O4'","C3'","O3'","C2'","C1'","O1P","O2P","O2'"},12, {{-2,1},{1,2},{2,3},{3,4},{4,5},{4,6},{6,7},{6,8},{8,9},{5,9},{7,-3},{1,10},{1,11},{8,12}},14, {{0,0,0,0,""}},1, {0},1 }, - {"Thymidin","T",'?', mol::ChemClass(mol::ChemClass::DNALinking), + {"Thymidin","T",'?', mol::ChemClass(mol::ChemClass::DNA_LINKING), {"P","O5'","C5'","C4'","O4'","C3'","O3'","C2'","C1'","O1P","O2P","O2'"},12, {{-2,1},{1,2},{2,3},{3,4},{4,5},{4,6},{6,7},{6,8},{8,9},{5,9},{7,-3},{1,10},{8,12},{1,11}},15, {{0,0,0,0,""}},1, {0},1 }, - {"Uracil","U",'?', mol::ChemClass(mol::ChemClass::DNALinking), + {"Uracil","U",'?', mol::ChemClass(mol::ChemClass::DNA_LINKING), {"P","O5'","C5'","C4'","O4'","C3'","O3'","C2'","C1'","O1P","O2P","O2'"},12, {{-2,1},{1,2},{2,3},{3,4},{4,5},{4,6},{6,7},{6,8},{8,9},{5,9},{7,-3},{1,10},{1,11},{8,12}},14, {{0,0,0,0,""}},1, {0},1 }, - {"Adenosin","ADE",'?', mol::ChemClass(mol::ChemClass::DNALinking), + {"Adenosin","ADE",'?', mol::ChemClass(mol::ChemClass::DNA_LINKING), {"P","O5'","C5'","C4'","O4'","C3'","O3'","C2'","C1'","O1P","O2P","N9","C8","N7","C5","C6","N6","N1","C2","N3","C4","O2'"},12, {{-2,1},{1,2},{2,3},{3,4},{4,5},{4,6},{6,7},{6,8},{8,9},{5,9},{1,10},{1,11},{8,22},{9,12},{7,-3}},15, {{0,0,0,0,""}},1, {0},1 }, - {"Cytosin","CYT",'?', mol::ChemClass(mol::ChemClass::DNALinking), + {"Cytosin","CYT",'?', mol::ChemClass(mol::ChemClass::DNA_LINKING), {"P","O5'","C5'","C4'","O4'","C3'","O3'","C2'","C1'","O1P","O2P","O2'"},12, {{-2,1},{1,2},{2,3},{3,4},{4,5},{4,6},{6,7},{6,8},{8,9},{5,9},{1,10},{1,11},{8,12},{7,-3}},14, {{0,0,0,0,""}},1, {0},1 }, - {"Guanidin","GUA",'?', mol::ChemClass(mol::ChemClass::DNALinking), + {"Guanidin","GUA",'?', mol::ChemClass(mol::ChemClass::DNA_LINKING), {"P","O5'","C5'","C4'","O4'","C3'","O3'","C2'","C1'","O1P","O2P","O2'"},12, {{-2,1},{1,2},{2,3},{3,4},{4,5},{4,6},{6,7},{6,8},{8,9},{5,9},{7,-3},{1,10},{1,11},{8,12}},14, {{0,0,0,0,""}},1, {0},1 }, - {"Thymidin","THY",'?', mol::ChemClass(mol::ChemClass::DNALinking), + {"Thymidin","THY",'?', mol::ChemClass(mol::ChemClass::DNA_LINKING), {"P","O5'","C5'","C4'","O4'","C3'","O3'","C2'","C1'","O1P","O2P","O2'"},12, {{-2,1},{1,2},{2,3},{3,4},{4,5},{4,6},{6,7},{6,8},{8,9},{5,9},{7,-3},{1,10},{8,12},{1,11}},15, {{0,0,0,0,""}},1, {0},1 }, - {"Uracil","URI",'?', mol::ChemClass(mol::ChemClass::DNALinking), + {"Uracil","URI",'?', mol::ChemClass(mol::ChemClass::DNA_LINKING), {"P","O5'","C5'","C4'","O4'","C3'","O3'","C2'","C1'","O1P","O2P","O2'"},12, {{-2,1},{1,2},{2,3},{3,4},{4,5},{4,6},{6,7},{6,8},{8,9},{5,9},{7,-3},{1,10},{1,11},{8,12}},14, {{0,0,0,0,""}},1, {0},1 }, - {"Adenosin","A",'?', mol::ChemClass(mol::ChemClass::DNALinking), + {"Adenosin","A",'?', mol::ChemClass(mol::ChemClass::DNA_LINKING), {"P","O5'","C5'","C4'","O4'","C3'","O3'","C2'","C1'","O1P","O2P","N9","C8","N7","C5","C6","N6","N1","C2","N3","C4","O2'"},12, {{-2,1},{1,2},{2,3},{3,4},{4,5},{4,6},{6,7},{6,8},{8,9},{5,9},{1,10},{1,11},{8,22},{9,12},{7,-3}},15, {{0,0,0,0,""}},1, {0},1 }, - {"Cytosin","C",'?', mol::ChemClass(mol::ChemClass::DNALinking), + {"Cytosin","C",'?', mol::ChemClass(mol::ChemClass::DNA_LINKING), {"P","O5'","C5'","C4'","O4'","C3'","O3'","C2'","C1'","O1P","O2P","O2'"},12, {{-2,1},{1,2},{2,3},{3,4},{4,5},{4,6},{6,7},{6,8},{8,9},{5,9},{1,10},{1,11},{8,12},{7,-3}},14, {{0,0,0,0,""}},1, {0},1 }, - {"Guanidin","G",'?', mol::ChemClass(mol::ChemClass::DNALinking), + {"Guanidin","G",'?', mol::ChemClass(mol::ChemClass::DNA_LINKING), {"P","O5'","C5'","C4'","O4'","C3'","O3'","C2'","C1'","O1P","O2P","O2'"},12, {{-2,1},{1,2},{2,3},{3,4},{4,5},{4,6},{6,7},{6,8},{8,9},{5,9},{7,-3},{1,10},{1,11},{8,12}},14, {{0,0,0,0,""}},1, {0},1 }, - {"Thymidin","T",'?', mol::ChemClass(mol::ChemClass::DNALinking), + {"Thymidin","T",'?', mol::ChemClass(mol::ChemClass::DNA_LINKING), {"P","O5'","C5'","C4'","O4'","C3'","O3'","C2'","C1'","O1P","O2P","O2'"},12, {{-2,1},{1,2},{2,3},{3,4},{4,5},{4,6},{6,7},{6,8},{8,9},{5,9},{7,-3},{1,10},{8,12},{1,11}},15, {{0,0,0,0,""}},1, {0},1 }, - {"Uracil","U",'?', mol::ChemClass(mol::ChemClass::DNALinking), + {"Uracil","U",'?', mol::ChemClass(mol::ChemClass::DNA_LINKING), {"P","O5'","C5'","C4'","O4'","C3'","O3'","C2'","C1'","O1P","O2P","O2'"},12, {{-2,1},{1,2},{2,3},{3,4},{4,5},{4,6},{6,7},{6,8},{8,9},{5,9},{7,-3},{1,10},{1,11},{8,12}},14, {{0,0,0,0,""}},1, diff --git a/modules/conop/src/rule_based_builder.cc b/modules/conop/src/rule_based_builder.cc index 0406d372c166e052e1da98cb9db0680f304f8dcf..9cd65b269fe978a6d6c279ecd52cfe7928f578b4 100644 --- a/modules/conop/src/rule_based_builder.cc +++ b/modules/conop/src/rule_based_builder.cc @@ -144,8 +144,8 @@ void RuleBasedBuilder::ReorderAtoms(mol::ResidueHandle residue, unknown_atoms_=this->HasUnknownAtoms(residue); if (unknown_atoms_) { LOG_WARNING("residue " << residue << " doesn't look like a standard " - << residue.GetKey()); - residue.SetChemClass(mol::ChemClass(mol::ChemClass::Unknown)); + << residue.GetKey() << " (" << compound->GetFormula() << ")"); + residue.SetChemClass(mol::ChemClass(mol::ChemClass::UNKNOWN)); residue.SetOneLetterCode('?'); } } diff --git a/modules/conop/tests/CMakeLists.txt b/modules/conop/tests/CMakeLists.txt index 9fc8af3b0b5ed2b35ab2f965c903c5779c51282a..c7d40239727c5eb35b8ced9a6587150d3e5ae3c2 100644 --- a/modules/conop/tests/CMakeLists.txt +++ b/modules/conop/tests/CMakeLists.txt @@ -2,6 +2,7 @@ set(OST_CONOP_UNIT_TESTS test_heuristic_builder.cc tests.cc test_builder.cc + test_compound.py ) ost_unittest(conop "${OST_CONOP_UNIT_TESTS}") diff --git a/modules/conop/tests/test_compound.py b/modules/conop/tests/test_compound.py new file mode 100644 index 0000000000000000000000000000000000000000..2c57bce756e0f26deab7e69467807993e96c51aa --- /dev/null +++ b/modules/conop/tests/test_compound.py @@ -0,0 +1,31 @@ +import unittest +from ost import mol +from ost import conop + +class TestCompound(unittest.TestCase): + + def setUp(self): + self.compound_lib=conop.GetBuilder().compound_lib + + def testFindCompound(self): + compound=self.compound_lib.FindCompound('***') + self.assertEqual(compound, None) + compound=self.compound_lib.FindCompound('ALA') + self.assertNotEqual(compound, None) + self.assertEqual(compound.id, 'ALA') + self.assertEqual(compound.three_letter_code, 'ALA') + self.assertEqual(compound.one_letter_code, 'A') + self.assertTrue(compound.IsPeptideLinking()) + self.assertEqual(compound.dialect, 'PDB') + self.assertEqual(compound.formula, 'C3 H7 N O2') + self.assertEqual(compound.chem_class, mol.L_PEPTIDE_LINKING) + + +if __name__=='__main__': + builder=conop.GetBuilder() + if not hasattr(builder, 'compound_lib'): + print 'default builder does not use compound library. ignoring unit tests' + else: + suite = unittest.TestLoader().loadTestsFromTestCase(TestCompound) + unittest.TextTestRunner().run(suite) + diff --git a/modules/conop/tests/test_heuristic_builder.cc b/modules/conop/tests/test_heuristic_builder.cc index 6a01a237d0cc8199affc79eb6d6490a09f305be2..4220f3110ca1af0ecec5acc183dba6d1ad5b5f11 100644 --- a/modules/conop/tests/test_heuristic_builder.cc +++ b/modules/conop/tests/test_heuristic_builder.cc @@ -168,9 +168,9 @@ BOOST_AUTO_TEST_CASE(test_assign_torsions){ ResidueHandle l1=make_leu(c); ResidueHandle a2=make_arg(c); ResidueHandle l3=make_leu(c); - l1.SetChemClass(ChemClass(ChemClass::LPeptideLinking)); - a2.SetChemClass(ChemClass(ChemClass::LPeptideLinking)); - l3.SetChemClass(ChemClass(ChemClass::LPeptideLinking)); + l1.SetChemClass(ChemClass(ChemClass::L_PEPTIDE_LINKING)); + a2.SetChemClass(ChemClass(ChemClass::L_PEPTIDE_LINKING)); + l3.SetChemClass(ChemClass(ChemClass::L_PEPTIDE_LINKING)); HeuristicBuilder heuristic_builder; for (AtomHandleIter i=e.AtomsBegin(),x=e.AtomsEnd(); i!=x; ++i) { heuristic_builder.FillAtomProps(*i); diff --git a/modules/doc/install.rst b/modules/doc/install.rst index ae169274574b92617919c4d042442e71d16b75e8..70028c496dbe1dbc703708dbf9c6704d3d7c8203 100644 --- a/modules/doc/install.rst +++ b/modules/doc/install.rst @@ -197,7 +197,7 @@ or, to start the command-line interpreter: stage/bin/ost -If you repeatedly use OpenStructure, it is recommended to add /path/to/dng/stage/bin to your path. +If you repeatedly use OpenStructure, it is recommended to add /path/to/ost/stage/bin to your path. Getting the newest changes -------------------------------------------------------------------------------- diff --git a/modules/index.rst b/modules/index.rst index 307bbce6c93821272ccc07399e87c8256f603ce6..d44b3aaa9c303f3d9818a072afbfeddebe9a614a 100644 --- a/modules/index.rst +++ b/modules/index.rst @@ -17,6 +17,7 @@ OpenStructure documentation img/alg/alg seq/base/seq seq/alg/seqalg + io/io gfx/gfx gui/gui @@ -41,7 +42,7 @@ Molecules **Input/Output**: :ref:`loading and saving molecules <mol-io>` -**Connectivity**: :doc:`the conop module <conop/conop>` +**Connectivity**: :doc:`the conop module <conop/conop>` | :doc:`compound library <conop/compoundlib>` diff --git a/modules/io/src/mol/chemdict_parser.cc b/modules/io/src/mol/chemdict_parser.cc index 4e52b087713971e27ce475f09db65efab63ff8ba..e493b79341767ac2ae4f58be016b177c09238a75 100644 --- a/modules/io/src/mol/chemdict_parser.cc +++ b/modules/io/src/mol/chemdict_parser.cc @@ -82,7 +82,7 @@ void ChemdictParser::OnDataItem(const StarDataItem& item) } // The type of water is set to "?". let's change it to water... if (compound_->GetID()=="HOH") { - compound_->SetChemClass(mol::ChemClass(mol::ChemClass::Water)); + compound_->SetChemClass(mol::ChemClass(mol::ChemClass::WATER)); compound_->SetOneLetterCode('.'); } else { std::map<String, mol::ChemClass>::iterator i=tm_.find(type); @@ -137,31 +137,31 @@ void ChemdictParser::InitTypeMap() { if (!tm_.empty()) return; - tm_["L-PEPTIDE COOH CARBOXY TERMINUS"]=mol::ChemClass(mol::ChemClass::LPeptideLinking); - tm_["L-PEPTIDE NH3 AMINO TERMINUS"]=mol::ChemClass(mol::ChemClass::LPeptideLinking); - tm_["D-PEPTIDE NH3 AMINO TERMINUS"]=mol::ChemClass(mol::ChemClass::DPeptideLinking); - tm_["L-SACCHARIDE 1,4 AND 1,4 LINKING"]=mol::ChemClass(mol::ChemClass::LSaccharide); - tm_["D-SACCHARIDE 1,4 AND 1,4 LINKING"]=mol::ChemClass(mol::ChemClass::DSaccharide); - tm_["L-SACCHARIDE"]=mol::ChemClass(mol::ChemClass::LSaccharide); - tm_["D-SACCHARIDE"]=mol::ChemClass(mol::ChemClass::DSaccharide); - tm_["SACCHARIDE"]=mol::ChemClass(mol::ChemClass::Saccharide); - tm_["D-PEPTIDE LINKING"]=mol::ChemClass(mol::ChemClass::DPeptideLinking); - tm_["L-PEPTIDE LINKING"]=mol::ChemClass(mol::ChemClass::LPeptideLinking); - tm_["L-PEPTIDE-LINKING"]=mol::ChemClass(mol::ChemClass::LPeptideLinking); - tm_["DNA LINKING"]=mol::ChemClass(mol::ChemClass::DNALinking); - tm_["RNA LINKING"]=mol::ChemClass(mol::ChemClass::RNALinking); - tm_["L-DNA LINKING"]=mol::ChemClass(mol::ChemClass::DNALinking); - tm_["L-RNA LINKING"]=mol::ChemClass(mol::ChemClass::RNALinking); - tm_["R-DNA LINKING"]=mol::ChemClass(mol::ChemClass::DNALinking); - tm_["R-RNA LINKING"]=mol::ChemClass(mol::ChemClass::RNALinking); - tm_["DNA OH 3 PRIME TERMINUS"]=mol::ChemClass(mol::ChemClass::DNALinking); - tm_["PEPTIDE-LIKE"]=mol::ChemClass(mol::ChemClass::PeptideLinking); - tm_["PEPTIDE LINKING"]=mol::ChemClass(mol::ChemClass::PeptideLinking); - tm_["PEPTIDE-LINKING"]=mol::ChemClass(mol::ChemClass::PeptideLinking); - tm_["NON-POLYMER"]=mol::ChemClass(mol::ChemClass::NonPolymer); - tm_["RNA OH 3 PRIME TERMINUS"]=mol::ChemClass(mol::ChemClass::RNALinking); - tm_["?"]=mol::ChemClass(mol::ChemClass::Unknown); - tm_["WATER"]=mol::ChemClass(mol::ChemClass::Water); + tm_["L-PEPTIDE COOH CARBOXY TERMINUS"]=mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING); + tm_["L-PEPTIDE NH3 AMINO TERMINUS"]=mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING); + tm_["D-PEPTIDE NH3 AMINO TERMINUS"]=mol::ChemClass(mol::ChemClass::D_PEPTIDE_LINKING); + tm_["L-SACCHARIDE 1,4 AND 1,4 LINKING"]=mol::ChemClass(mol::ChemClass::L_SACCHARIDE); + tm_["D-SACCHARIDE 1,4 AND 1,4 LINKING"]=mol::ChemClass(mol::ChemClass::D_SACCHARIDE); + tm_["L-SACCHARIDE"]=mol::ChemClass(mol::ChemClass::L_SACCHARIDE); + tm_["D-SACCHARIDE"]=mol::ChemClass(mol::ChemClass::D_SACCHARIDE); + tm_["SACCHARIDE"]=mol::ChemClass(mol::ChemClass::SACCHARIDE); + tm_["D-PEPTIDE LINKING"]=mol::ChemClass(mol::ChemClass::D_PEPTIDE_LINKING); + tm_["L-PEPTIDE LINKING"]=mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING); + tm_["L-PEPTIDE-LINKING"]=mol::ChemClass(mol::ChemClass::L_PEPTIDE_LINKING); + tm_["DNA LINKING"]=mol::ChemClass(mol::ChemClass::DNA_LINKING); + tm_["RNA LINKING"]=mol::ChemClass(mol::ChemClass::RNA_LINKING); + tm_["L-DNA LINKING"]=mol::ChemClass(mol::ChemClass::DNA_LINKING); + tm_["L-RNA LINKING"]=mol::ChemClass(mol::ChemClass::RNA_LINKING); + tm_["R-DNA LINKING"]=mol::ChemClass(mol::ChemClass::DNA_LINKING); + tm_["R-RNA LINKING"]=mol::ChemClass(mol::ChemClass::RNA_LINKING); + tm_["DNA OH 3 PRIME TERMINUS"]=mol::ChemClass(mol::ChemClass::DNA_LINKING); + tm_["PEPTIDE-LIKE"]=mol::ChemClass(mol::ChemClass::PEPTIDE_LINKING); + tm_["PEPTIDE LINKING"]=mol::ChemClass(mol::ChemClass::PEPTIDE_LINKING); + tm_["PEPTIDE-LINKING"]=mol::ChemClass(mol::ChemClass::PEPTIDE_LINKING); + tm_["NON-POLYMER"]=mol::ChemClass(mol::ChemClass::NON_POLYMER); + tm_["RNA OH 3 PRIME TERMINUS"]=mol::ChemClass(mol::ChemClass::RNA_LINKING); + tm_["?"]=mol::ChemClass(mol::ChemClass::UNKNOWN); + tm_["WATER"]=mol::ChemClass(mol::ChemClass::WATER); } }} \ No newline at end of file diff --git a/modules/mol/base/doc/entity.rst b/modules/mol/base/doc/entity.rst index 34edb07aa1b24c7d2b1dfccd72f137db926dee1e..4520df521aa86b3f51c8e61bd59a03864903a76b 100644 --- a/modules/mol/base/doc/entity.rst +++ b/modules/mol/base/doc/entity.rst @@ -495,7 +495,7 @@ The Handle Classes The chemical class of a residue is used to broadly categorize residues based on their chemical properties. For example, peptides belong to the - `LPeptideLinking` or `DPeptideLinking` classes. + `L_PEPTIDE_LINKING` or `D_PEPTIDE_LINKING` classes. .. attribute:: sec_structure diff --git a/modules/mol/base/pymod/export_residue.cc b/modules/mol/base/pymod/export_residue.cc index 2b4f3abad142dcefabece9c405d6c78bc21751b6..e91f76917ed94ffeb9fdf8dda406d903bce18400 100644 --- a/modules/mol/base/pymod/export_residue.cc +++ b/modules/mol/base/pymod/export_residue.cc @@ -20,7 +20,7 @@ #include <boost/python/suite/indexing/vector_indexing_suite.hpp> using namespace boost::python; - +#include <ost/mol/chem_class.hh> #include <ost/mol/mol.hh> #include <ost/export_helper/vector.hh> using namespace ost; @@ -52,6 +52,7 @@ namespace { // ResidueHandle::InsertAtom, 2, 3) void export_Residue() { + class_<ResNum>("ResNum", init<int>(args("num"))) .def(init<int,char>(args("num", "ins_code"))) .def("GetNum", &ResNum::GetNum) @@ -72,6 +73,20 @@ void export_Residue() .def(self-int()) ; implicitly_convertible<int, ResNum>(); + + scope().attr("PEPTIDE_LINKING")=char(ChemClass::PEPTIDE_LINKING); + scope().attr("D_PEPTIDE_LINKING")=char(ChemClass::D_PEPTIDE_LINKING); + scope().attr("L_PEPTIDE_LINKING")=char(ChemClass::L_PEPTIDE_LINKING); + scope().attr("RNA_LINKING")=char(ChemClass::RNA_LINKING); + scope().attr("DNA_LINKING")=char(ChemClass::DNA_LINKING); + scope().attr("NON_POLYMER")=char(ChemClass::NON_POLYMER); + scope().attr("L_SACCHARIDE")=char(ChemClass::L_SACCHARIDE); + scope().attr("D_SACCHARIDE")=char(ChemClass::D_SACCHARIDE); + scope().attr("SACCHARIDE")=char(ChemClass::SACCHARIDE); + scope().attr("WATER")=char(ChemClass::WATER); + scope().attr("UNKNOWN")=char(ChemClass::UNKNOWN); + + { scope sec_struct_scope=class_<SecStructure>("SecStructure", init<>()) .def(init<char>()) diff --git a/modules/mol/base/src/chem_class.hh b/modules/mol/base/src/chem_class.hh index 7bbbe963aa4c0b28cf35072be2dec5dba4ace656..73ff48698baf5997d86284f0863be6d2358b8324 100644 --- a/modules/mol/base/src/chem_class.hh +++ b/modules/mol/base/src/chem_class.hh @@ -16,8 +16,8 @@ // along with this library; if not, write to the Free Software Foundation, Inc., // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA //------------------------------------------------------------------------------ -#ifndef OST_BASE_CHEM_CLASS_HI -#define OST_BASE_CHEM_CLASS_HI +#ifndef OST_BASE_CHEM_CLASS_HH +#define OST_BASE_CHEM_CLASS_HH #include <ost/mol/module_config.hh> @@ -25,23 +25,37 @@ namespace ost { namespace mol { struct DLLEXPORT ChemClass { - const static char PeptideLinking ='P'; - const static char DPeptideLinking ='D'; - const static char LPeptideLinking ='L'; - const static char RNALinking ='R'; - const static char DNALinking ='S'; - const static char NonPolymer ='N'; - const static char LSaccharide ='X'; - const static char DSaccharide ='Y'; - const static char Saccharide ='Z'; - const static char Water ='W'; - const static char Unknown ='U'; + + const static char PEPTIDE_LINKING ='P'; + const static char D_PEPTIDE_LINKING ='D'; + const static char L_PEPTIDE_LINKING ='L'; + const static char RNA_LINKING ='R'; + const static char DNA_LINKING ='S'; + const static char NON_POLYMER ='N'; + const static char L_SACCHARIDE ='X'; + const static char D_SACCHARIDE ='Y'; + const static char SACCHARIDE ='Z'; + const static char WATER ='W'; + const static char UNKNOWN ='U'; + + // for backward compatibility to 1.1 and earlier + const static char PeptideLinking =PEPTIDE_LINKING; + const static char DPeptideLinking =D_PEPTIDE_LINKING; + const static char LPeptideLinking =L_PEPTIDE_LINKING; + const static char RNALinking =RNA_LINKING; + const static char DNALinking =DNA_LINKING; + const static char NonPolymer =NON_POLYMER; + const static char LSaccharide =L_SACCHARIDE; + const static char DSaccharide =D_SACCHARIDE; + const static char Saccharide =SACCHARIDE; + const static char Water =WATER; + const static char Unknown =UNKNOWN; explicit ChemClass(char chem_class) : chem_class_(chem_class) { } ChemClass() - : chem_class_(Unknown) { + : chem_class_(UNKNOWN) { } bool operator==(const ChemClass& cc) const { return cc.chem_class_==chem_class_; @@ -52,16 +66,16 @@ struct DLLEXPORT ChemClass { } bool IsPeptideLinking() const { - return (chem_class_==ChemClass::PeptideLinking || - chem_class_==ChemClass::DPeptideLinking || - chem_class_==ChemClass::LPeptideLinking); + return (chem_class_==ChemClass::PEPTIDE_LINKING || + chem_class_==ChemClass::D_PEPTIDE_LINKING || + chem_class_==ChemClass::L_PEPTIDE_LINKING); } bool IsNucleotideLinking() const { - return (chem_class_==ChemClass::DNALinking || - chem_class_==ChemClass::RNALinking); + return (chem_class_==ChemClass::DNA_LINKING || + chem_class_==ChemClass::RNA_LINKING); } - bool IsWater() const { return chem_class_==ChemClass::Water; } + bool IsWater() const { return chem_class_==ChemClass::WATER; } operator char() const { return chem_class_; } diff --git a/modules/mol/base/src/residue_base.hh b/modules/mol/base/src/residue_base.hh index 153d6e2d7a6426f72d6b2b5bfd05c14409f8801e..ea5a5f01081787d3fe6e177894f288cfb8699f99 100644 --- a/modules/mol/base/src/residue_base.hh +++ b/modules/mol/base/src/residue_base.hh @@ -49,10 +49,10 @@ namespace ost { namespace mol { /// code is set to \c ?. /// /// Residues have a \ref ChemClass "chemical class". For standard amino acids this -/// class is ChemClass::LPeptideLinking, indicating that the residue is capable to +/// class is ChemClass::L_PEPTIDE_LINKING, indicating that the residue is capable to /// participate in a peptide bond. For nucleotides, the chemical class is either -/// ChemClass::RNALinking or ChemClass::DNALinking. For unknown compounds the -/// chemical class is ChemClass::Unknown. +/// ChemClass::RNA_LINKING or ChemClass::DNA_LINKING. For unknown compounds the +/// chemical class is ChemClass::UNKNOWN. /// /// When loading an entity from file, the one-letter and chemical class of a /// residue are assigned by the \ref conop::Builder "default builder". diff --git a/modules/mol/base/tests/test_entity.cc b/modules/mol/base/tests/test_entity.cc index 1bf5a2bba8550420786b9e19db640d0170869c7d..d30d6d183fa41959ae596f8752a5686952846985 100644 --- a/modules/mol/base/tests/test_entity.cc +++ b/modules/mol/base/tests/test_entity.cc @@ -65,8 +65,8 @@ EntityHandle make_test_entity() e.Connect(res2.FindAtom("N"), res2.FindAtom("CA")); e.Connect(res2.FindAtom("CA"), res2.FindAtom("C")); e.Connect(res2.FindAtom("C"), res2.FindAtom("O")); - res1.SetChemClass(ChemClass(ChemClass::LPeptideLinking)); - res2.SetChemClass(ChemClass(ChemClass::LPeptideLinking)); + res1.SetChemClass(ChemClass(ChemClass::L_PEPTIDE_LINKING)); + res2.SetChemClass(ChemClass(ChemClass::L_PEPTIDE_LINKING)); e.AddTorsion("PHI", res1.FindAtom("C"), res2.FindAtom("N"), res2.FindAtom("CA"), res2.FindAtom("C")); return eh; @@ -349,7 +349,7 @@ BOOST_AUTO_TEST_CASE(copy_residue_props) res.SetOneLetterCode('X'); res.SetIsProtein(true); res.SetIsLigand(true); - ChemClass cl(ChemClass::LPeptideLinking); + ChemClass cl(ChemClass::L_PEPTIDE_LINKING); res.SetSecStructure(SecStructure(SecStructure::ALPHA_HELIX)); res.SetChemClass(cl); EntityHandle copy=ent.Copy(); diff --git a/modules/mol/base/tests/test_view_op.cc b/modules/mol/base/tests/test_view_op.cc index 30cfc8f9fa5cc91a0ba991802a9ea87cb6f62151..7d084e6fbb2816a8e49b73727160e79b4c6e845a 100644 --- a/modules/mol/base/tests/test_view_op.cc +++ b/modules/mol/base/tests/test_view_op.cc @@ -356,7 +356,7 @@ BOOST_AUTO_TEST_CASE(ent_from_view_residue_props) ResidueHandle res=edi.AppendResidue(ch, "DUMMY", mol::ResNum(666, '6')); res.SetOneLetterCode('X'); res.SetIsProtein(true); - ChemClass cl(ChemClass::LPeptideLinking); + ChemClass cl(ChemClass::L_PEPTIDE_LINKING); res.SetSecStructure(SecStructure(SecStructure::ALPHA_HELIX)); res.SetChemClass(cl); EntityHandle copy=mol::CreateEntityFromView(ent.Select(""), false); diff --git a/scripts/init_cl.py b/scripts/init_cl.py index b173207477f1620c5ea7c60c617210223ba5ea75..5c080e95d13a7675f8889202782f9379a5d045d7 100644 --- a/scripts/init_cl.py +++ b/scripts/init_cl.py @@ -40,10 +40,9 @@ ost.SetPrefixPath(os.getenv('DNG_ROOT')) def _InitRuleBasedBuilder(): compound_lib_path=os.path.join(ost.GetSharedDataPath(), 'compounds.chemlib') if os.path.exists(compound_lib_path): - conop_inst=conop.Conopology.Instance() compound_lib=conop.CompoundLib.Load(compound_lib_path) - conop_inst.RegisterBuilder(conop.RuleBasedBuilder(compound_lib), 'RBB') - conop_inst.SetDefaultBuilder('RBB') + conop.RegisterBuilder(conop.RuleBasedBuilder(compound_lib), 'RBB') + conop.SetDefaultBuilder('RBB') # switch to rule-based builder for high fidelity if compounds.chemlib is # available