diff --git a/modules/base/pymod/wrap_base.cc b/modules/base/pymod/wrap_base.cc index 1ba0b15718baec2fbf014e9301583d052a3d2205..1c305685bba1800c9f4b10d0efc42dcd5707b526 100644 --- a/modules/base/pymod/wrap_base.cc +++ b/modules/base/pymod/wrap_base.cc @@ -21,6 +21,7 @@ #include <vector> #include <ost/geom/export_helper/vector.hh> #include <ost/base.hh> +#include <ost/string_ref.hh> #include <ost/platform.hh> #include <ost/message.hh> #include <ost/version.hh> @@ -38,8 +39,48 @@ void translator(const ost::Error& x) { } +struct stringref_to_python_string +{ + static PyObject* convert(ost::StringRef const& s) + { + return boost::python::incref(boost::python::object(s.str()).ptr()); + } +}; + +struct stringref_from_python_string +{ + stringref_from_python_string() + { + boost::python::converter::registry::push_back(&convertible, + &construct, + boost::python::type_id<ost::StringRef>()); + } + + static void* convertible(PyObject* obj_ptr) + { + if (!PyString_Check(obj_ptr)) return 0; + return obj_ptr; + } + + static void construct(PyObject* obj_ptr, + boost::python::converter::rvalue_from_python_stage1_data* data) + { + const char* value = PyString_AsString(obj_ptr); + if (value == 0) boost::python::throw_error_already_set(); + void* storage = ( + (boost::python::converter::rvalue_from_python_storage<ost::StringRef>*) + data)->storage.bytes; + new (storage) ost::StringRef(value, strlen(value)); + data->convertible = storage; + } +}; + + BOOST_PYTHON_MODULE(_ost_base) { + boost::python::to_python_converter<ost::StringRef, + stringref_to_python_string>(); + stringref_from_python_string(); register_exception_translator<ost::Error>(&translator); def("SetPrefixPath", &ost::SetPrefixPath); @@ -67,5 +108,5 @@ BOOST_PYTHON_MODULE(_ost_base) class_<std::vector<int> >("IntList", init<>()) .def(vector_indexing_suite<std::vector<int> >()) .def(geom::VectorAdditions<IntList>()) - ; + ; } diff --git a/modules/io/doc/io.rst b/modules/io/doc/io.rst index 4746f452cf6e72287c54e2e05012fd775d7be3d3..41759d1987ef149bd53ed942fc21814b22ba43c0 100644 --- a/modules/io/doc/io.rst +++ b/modules/io/doc/io.rst @@ -5,6 +5,7 @@ :hidden: formats + mmcif profile .. module:: ost.io diff --git a/modules/io/doc/mmcif.rst b/modules/io/doc/mmcif.rst new file mode 100644 index 0000000000000000000000000000000000000000..da840874e29de801134a25a648a83eca6911499a --- /dev/null +++ b/modules/io/doc/mmcif.rst @@ -0,0 +1,237 @@ +MMCif File Format +-------------------------------------------------------------------------------- + +The MMCif file format is an alternate container for structural entities, also +provided by the PDB. Here we describe how to load those files and how to deal +with information provided above the common PDB format. + + +Loading MMCif Files +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autofunction:: ost.io.LoadMMCIF + + +Categories Available +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +Info Classes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Information from MMCif files which goes beyond structural data, is kept in a +special container, the :class:`MMCifInfo` class. Here is a detailed description +of the annotation available. + +.. class:: MMCifInfo + + This is the container for all bits of non-molecular data pulled from a MMCif + file. + + .. attribute:: citations + + Stores a list of citations (:class:`MMCifInfoCitation`). + + Also available as :meth:`GetCitations`. + + .. method:: AddCitation(citation) + + Add a citation to the citation list of an info object. + + :param citation: Citation to be added. + :type citation: :class:`MMCifInfoCitation` + + .. method:: AddAuthorsToCitation(id, authors) + + Adds a list of authors to a specific citation. + + :param id: identifier of the citation + :type id: :class:`str` + :param authors: List of authors. + :type authors: :class:`~ost.StringList` + + .. method:: GetCitations() + + See :attr:`citations` + +.. class:: MMCifInfoCitation + + This stores citation information from an input file. + + .. attribute:: id + + Stores an internal identifier for a citation. If not provided, resembles an + empty string. + + Also available as :meth:`GetID`. May also be modified by :meth:`SetID`. + + .. attribute:: cas + + Stores a Chemical Abstract Service identifier, if available. If not + provided, resembles an empty string. + + Also available as :meth:`GetCAS`. May also be modified by :meth:`SetCas`. + + .. attribute:: isbn + + Stores the ISBN code, presumably for cited books. If not + provided, resembles an empty string. + + Also available as :meth:`GetISBN`. May also be modified by :meth:`SetISBN`. + + .. attribute:: published_in + + Stores the book or journal title of a publication. Should take the full + title, no abbreviations. If not provided, resembles an empty string. + + Also available as :meth:`GetPublishedIn`. May also be modified by + :meth:`SetPublishedIn`. + + .. attribute:: volume + + Supposed to store volume information for journals. Since the volume number + is not always a simple integer, it is stored as a string. If not provided, + resembles an empty string. + + Also available as :meth:`GetVolume`. May also be modified by + :meth:`SetVolume`. + + .. attribute:: page_first + + Stores the first page of a publication. Since the page numbers are not + always a simple integers, they are stored as strings. If not provided, + resembles empty strings. + + Also available as :meth:`GetPageFirst`. May also be modified by + :meth:`SetPageFirst`. + + .. attribute:: page_last + + Stores the last page of a publication. Since the page numbers are not + always a simple integers, they are stored as strings. If not provided, + resembles empty strings. + + Also available as :meth:`GetPageLast`. May also be modified by + :meth:`SetPageLast`. + + .. attribute:: doi + + Stores the Document Object Identifier as used by doi.org for a cited + document. If not provided, resembles a empty strings. + + Also available as :meth:`GetDOI`. May also be modified by :meth:`SetDOI`. + + .. attribute:: pubmed + + Stores the PubMed accession number. If not provided, is set to 0. + + Also available as :meth:`GetPubMed`. May also be modified by + :meth:`SetPubmed`. + + .. attribute:: year + + Stores the publication year. If not provided, is set to 0. + + Also available as :meth:`GetYear`. May also be modified by :meth:`SetYear`. + + .. attribute:: title + + Stores a title. If not provided, is set to an empty string. + + Also available as :meth:`GetTitle`. May also be modified by + :meth:`SetTitle`. + + .. attribute:: authors + + Stores a :class:`~ost.StringList` of authors. + + Also available as :meth:`GetAuthorList`. May also be modified by + :meth:`SetAuthorList`. + + .. method:: GetCAS() + + See :attr:`cas` + + .. method:: SetCAS(cas) + + See :attr:`cas` + + .. method:: GetISBN() + + See :attr:`isbn` + + .. method:: SetISBN(isbn) + + See :attr:`isbn` + + .. method:: GetPublishedIn() + + See :attr:`published_in` + + .. method:: SetPublishedIn(title) + + See :attr:`published_in` + + .. method:: GetVolume() + + See :attr:`volume` + + .. method:: SetVolume(volume) + + See :attr:`volume` + + .. method:: GetPageFirst() + + See :attr:`page_first` + + .. method:: SetPageFirst(first) + + See :attr:`page_first` + + .. method:: GetPageLast() + + See :attr:`page_last` + + .. method:: SetPageLast(last) + + See :attr:`page_last` + + .. method:: GetDOI() + + See :attr:`doi` + + .. method:: SetDOI(doi) + + See :attr:`doi` + + .. method:: GetPubMed() + + See :attr:`pubmed` + + .. method:: SetPubMed(no) + + See :attr:`pubmed` + + .. method:: GetYear() + + See :attr:`year` + + .. method:: SetYear(year) + + See :attr:`year` + + .. method:: GetTitle() + + See :attr:`title` + + .. method:: SetTitle(title) + + See :attr:`title` + + .. method:: GetAuthorList() + + See :attr:`authors` + + .. method:: SetAuthorList(list) + + See :attr:`authors` diff --git a/modules/io/pymod/__init__.py b/modules/io/pymod/__init__.py index 1eb524a2f8cd65dc81a73eb6b1b2ab1de69031d4..87e060687a8e590f87d5b198e90199f3cf1f7bf9 100644 --- a/modules/io/pymod/__init__.py +++ b/modules/io/pymod/__init__.py @@ -264,7 +264,7 @@ def LoadCHARMMTraj(crd, dcd_file=None, profile='CHARMM', raise ValueError("No DCD filename given") return LoadCHARMMTraj_(crd, dcd_file, stride, lazy_load) -def LoadMMCIF(filename, restrict_chains="", fault_tolerant=None, calpha_only=None, profile='DEFAULT', remote=False, strict_hydrogens=None, seqres=False): +def LoadMMCIF(filename, restrict_chains="", fault_tolerant=None, calpha_only=None, profile='DEFAULT', remote=False, strict_hydrogens=None, seqres=False, info=False): """ Load MMCIF file from disk and return one or more entities. Several options allow to customize the exact behaviour of the MMCIF import. For more @@ -283,13 +283,16 @@ def LoadMMCIF(filename, restrict_chains="", fault_tolerant=None, calpha_only=Non pdb id. :rtype: :class:`~ost.mol.EntityHandle`. - - :param seqres: Whether to read SEQRES records. If set to true, the loaded - entity and seqres entry will be returned as a tuple. :param strict_hydrogens: If set, overrides the value of :attr:`IOProfile.strict_hydrogens`. + :param seqres: Whether to read SEQRES records. If set to true, the loaded + entity and seqres entry will be returned as second item. + + :param info: Whether to return an info container with the other output. + Returns a :class:`MMCifInfo` object as last item. + :raises: :exc:`~ost.io.IOException` if the import fails due to an erroneous or inexistent file """ @@ -328,8 +331,12 @@ def LoadMMCIF(filename, restrict_chains="", fault_tolerant=None, calpha_only=Non conop_inst.ConnectAll(builder, ent, 0) #else: # raise IOError("File doesn't contain any entities") + if seqres and info: + return ent, reader.seqres, reader.info if seqres: return ent, reader.seqres + if info: + return ent, reader.info return ent except: raise diff --git a/modules/io/pymod/export_mmcif_io.cc b/modules/io/pymod/export_mmcif_io.cc index 9fb3740887f4e7c4f3122be992a6416c692474ec..4e1a7e60af45d51fa8b03c2ab92c887eb5f42374 100644 --- a/modules/io/pymod/export_mmcif_io.cc +++ b/modules/io/pymod/export_mmcif_io.cc @@ -17,10 +17,12 @@ // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA //------------------------------------------------------------------------------ #include <boost/python.hpp> +#include <boost/python/suite/indexing/vector_indexing_suite.hpp> using namespace boost::python; #include <ost/io/mol/io_profile.hh> #include <ost/io/mol/mmcif_reader.hh> +#include <ost/io/mol/mmcif_info.hh> using namespace ost; using namespace ost::io; using namespace ost::mol; @@ -32,6 +34,8 @@ void export_mmcif_io() .def("SetRestrictChains", &MMCifParser::SetRestrictChains) .def("SetReadCanonicalSeqRes", &MMCifParser::SetReadCanonicalSeqRes) .def("GetSeqRes", &MMCifParser::GetSeqRes) + .def("GetInfo", make_function(&MMCifParser::GetInfo, + return_value_policy<copy_const_reference>())) .add_property("restrict_chains", make_function(&MMCifParser::GetRestrictChains, return_value_policy<copy_const_reference>()), @@ -39,5 +43,71 @@ void export_mmcif_io() .add_property("seqres", &MMCifParser::GetSeqRes) .add_property("read_seqres", &MMCifParser::GetReadSeqRes, &MMCifParser::SetReadSeqRes) + .add_property("info", make_function(&MMCifParser::GetInfo, + return_value_policy<copy_const_reference>())) + ; + + class_<MMCifInfoCitation, boost::noncopyable>("MMCifInfoCitation", init<>()) + .def("SetID", &MMCifInfoCitation::SetID) + .def("GetID", &MMCifInfoCitation::GetID) + .def("SetCAS", &MMCifInfoCitation::SetCAS) + .def("GetCAS", &MMCifInfoCitation::GetCAS) + .def("SetISBN", &MMCifInfoCitation::SetISBN) + .def("GetISBN", &MMCifInfoCitation::GetISBN) + .def("SetPublishedIn", &MMCifInfoCitation::SetPublishedIn) + .def("GetPublishedIn", &MMCifInfoCitation::GetPublishedIn) + .def("SetVolume", &MMCifInfoCitation::SetVolume) + .def("GetVolume", &MMCifInfoCitation::GetVolume) + .def("SetPageFirst", &MMCifInfoCitation::SetPageFirst) + .def("GetPageFirst", &MMCifInfoCitation::GetPageFirst) + .def("SetPageLast", &MMCifInfoCitation::SetPageLast) + .def("GetPageLast", &MMCifInfoCitation::GetPageLast) + .def("SetDOI", &MMCifInfoCitation::SetDOI) + .def("GetDOI", &MMCifInfoCitation::GetDOI) + .def("SetPubMed", &MMCifInfoCitation::SetPubMed) + .def("GetPubMed", &MMCifInfoCitation::GetPubMed) + .def("SetYear", &MMCifInfoCitation::SetYear) + .def("GetYear", &MMCifInfoCitation::GetYear) + .def("SetTitle", &MMCifInfoCitation::SetTitle) + .def("GetTitle", &MMCifInfoCitation::GetTitle) + .def("SetAuthorList", &MMCifInfoCitation::SetAuthorList) + .def("GetAuthorList", make_function(&MMCifInfoCitation::GetAuthorList, + return_value_policy<copy_const_reference>())) + .add_property("id", &MMCifInfoCitation::GetID, &MMCifInfoCitation::SetID) + .add_property("cas", &MMCifInfoCitation::GetCAS, &MMCifInfoCitation::SetCAS) + .add_property("isbn", &MMCifInfoCitation::GetISBN, + &MMCifInfoCitation::SetISBN) + .add_property("published_in", &MMCifInfoCitation::GetPublishedIn, + &MMCifInfoCitation::SetPublishedIn) + .add_property("volume", &MMCifInfoCitation::GetVolume, + &MMCifInfoCitation::SetVolume) + .add_property("page_first", &MMCifInfoCitation::GetPageFirst, + &MMCifInfoCitation::SetPageFirst) + .add_property("page_last", &MMCifInfoCitation::GetPageLast, + &MMCifInfoCitation::SetPageLast) + .add_property("doi", &MMCifInfoCitation::GetDOI, &MMCifInfoCitation::SetDOI) + .add_property("pubmed", &MMCifInfoCitation::GetPubMed, + &MMCifInfoCitation::SetPubMed) + .add_property("year", &MMCifInfoCitation::GetYear, + &MMCifInfoCitation::SetYear) + .add_property("title", &MMCifInfoCitation::GetTitle, + &MMCifInfoCitation::SetTitle) + .add_property("authors", make_function(&MMCifInfoCitation::GetAuthorList, + return_value_policy<copy_const_reference>()), + &MMCifInfoCitation::SetAuthorList) + ; + + typedef std::vector<MMCifInfoCitation> MMCifInfoCitationList; + class_<std::vector<MMCifInfoCitation> >("MMCifInfoCitationList", init<>()) + .def(vector_indexing_suite<std::vector<MMCifInfoCitation> >()) + ; + + class_<MMCifInfo, boost::noncopyable>("MMCifInfo", init<>()) + .def("AddCitation", &MMCifInfo::AddCitation) + .def("GetCitations", make_function(&MMCifInfo::GetCitations, + return_value_policy<copy_const_reference>())) + .def("AddAuthorsToCitation", &MMCifInfo::AddAuthorsToCitation) + .add_property("citations", make_function(&MMCifInfo::GetCitations, + return_value_policy<copy_const_reference>())) ; } diff --git a/modules/io/src/mol/CMakeLists.txt b/modules/io/src/mol/CMakeLists.txt index db328bb9c366cdba2a6d6354348e78213773dfef..05c813456155e14b540e7f00b3848932124db523 100644 --- a/modules/io/src/mol/CMakeLists.txt +++ b/modules/io/src/mol/CMakeLists.txt @@ -16,6 +16,7 @@ io_profile.cc dcd_io.cc star_parser.cc mmcif_reader.cc +mmcif_info.cc PARENT_SCOPE ) @@ -23,6 +24,7 @@ set(OST_IO_MOL_HEADERS chemdict_parser.hh star_parser.hh mmcif_reader.hh +mmcif_info.hh io_profile.hh dcd_io.hh entity_io_crd_handler.hh diff --git a/modules/io/src/mol/mmcif_info.cc b/modules/io/src/mol/mmcif_info.cc new file mode 100644 index 0000000000000000000000000000000000000000..1e4c8928123b010f211564635cfa5a1e9ba3189d --- /dev/null +++ b/modules/io/src/mol/mmcif_info.cc @@ -0,0 +1,39 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + +#include <ost/io/io_exception.hh> +#include <ost/io/mol/mmcif_info.hh> + +namespace ost { namespace io { + +void MMCifInfo::AddAuthorsToCitation(StringRef id, std::vector<String> list) +{ + // find citation + std::vector<MMCifInfoCitation>::iterator cit_it; + for (cit_it = citations_.begin(); cit_it != citations_.end(); ++cit_it) { + if (id == StringRef(cit_it->GetID().c_str(), cit_it->GetID().length())) { + cit_it->SetAuthorList(list); + return; + } + } + + throw IOException("No citation for identifier '" + id.str() + "' found."); +} + +}} //ns diff --git a/modules/io/src/mol/mmcif_info.hh b/modules/io/src/mol/mmcif_info.hh new file mode 100644 index 0000000000000000000000000000000000000000..70624d4f74eb0356f8f618cc2d496357451fa90e --- /dev/null +++ b/modules/io/src/mol/mmcif_info.hh @@ -0,0 +1,281 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ +#ifndef OST_MMCIF_INFO_HH +#define OST_MMCIF_INFO_HH + +#include <vector> +#include <ost/string_ref.hh> +#include <ost/io/module_config.hh> + +namespace ost { namespace io { + +class DLLEXPORT_OST_IO MMCifInfoCitation { +public: + /// \brief Create a citation. + MMCifInfoCitation(): id_(""), where_(UNKNOWN), cas_(""), published_in_(""), + volume_(""), page_first_(""), page_last_(""), doi_(""), pubmed_(0), + year_(0), title_("") {}; + + /// \brief Set ID + /// + /// \param id ID + void SetID(String id) { id_ = id; } + /// \brief Get ID + /// + /// \return ID + String GetID() const { return id_; } + + /// \brief Set a CAS identifier + /// + /// \param id CAS identifier + void SetCAS(String id) { cas_ = id; } + /// \brief Get a CAS identifier + /// + /// \return CAS identifier + String GetCAS() const { return cas_; } + + /// \brief Set an ISBN code + /// + /// \param code ISBN code + void SetISBN(String code) { isbn_ = code; } + + /// \brief Get an ISBN code + /// + /// \return ISBN code + String GetISBN() const { return isbn_; } + + /// \brief Set a book title or journal name + /// + /// \param title where published + void SetPublishedIn(String title) { published_in_ = title; } + + /// \brief Get a book title or journal name + /// + /// \return title + String GetPublishedIn() const { return published_in_; } + + /// \brief Set a journal volume + /// + /// \param volume + void SetVolume(String volume) { volume_ = volume; } + + /// \brief Get a journal volume + /// + /// \return volume + String GetVolume() const { return volume_; } + + /// \brief Set the start page for a publication + /// + /// \param start + void SetPageFirst(String first) { page_first_ = first; } + + /// \brief Get the start page of a publication + /// + /// \return first page + String GetPageFirst() const { return page_first_; } + + /// \brief Set the end page for a publication + /// + /// \param end + void SetPageLast(String last) { page_last_ = last; } + + /// \brief Get the last page of a publication + /// + /// \return last page + String GetPageLast() const { return page_last_; } + + /// \brief Set the DOI of a document + /// + /// \param doi + void SetDOI(String doi) { doi_ = doi; } + + + /// \brief Get the DOI of a document + /// + /// \return DOI + String GetDOI() const { return doi_; } + + /// \brief Set the PubMed accession number + /// + /// \param no + void SetPubMed(int no) { pubmed_ = no; } + + /// \brief Get the PubMed accession number + /// + /// \return PubMed accession + int GetPubMed() const { return pubmed_; } + + /// \brief Set the year of a publication + /// + /// \param year + void SetYear(int year) { year_ = year; } + + + /// \brief Get the year of a publication + /// + /// \return year + int GetYear() const { return year_; } + + + /// \brief Set the title of a publication + /// + /// \param title + void SetTitle(String title) { title_ = title; } + + /// \brief Get the title of a publication + /// + /// \return title + String GetTitle() const { return title_; } + + /// \brief Set the list of authors + /// + /// \param list + void SetAuthorList(std::vector<String> list) { authors_ = list; } + + /// \brief Get the list of authors + /// + /// \return list + const std::vector<String>& GetAuthorList() const { return authors_; } + + bool operator==(const MMCifInfoCitation& cit) const { + if (this->year_ != cit.year_) { + return false; + } + if (this->pubmed_ != cit.pubmed_) { + return false; + } + if (this->where_ != cit.where_) { + return false; + } + if (StringRef(this->id_.c_str(), this->id_.length()) != + StringRef(cit.id_.c_str(), cit.id_.length())) { + return false; + } + if (StringRef(this->cas_.c_str(), this->cas_.length()) != + StringRef(cit.cas_.c_str(), cit.cas_.length())) { + return false; + } + if (StringRef(this->isbn_.c_str(), this->isbn_.length()) != + StringRef(cit.isbn_.c_str(), cit.isbn_.length())) { + return false; + } + if (StringRef(this->published_in_.c_str(), this->published_in_.length()) != + StringRef(cit.published_in_.c_str(), cit.published_in_.length())) { + return false; + } + if (StringRef(this->volume_.c_str(), this->volume_.length()) != + StringRef(cit.volume_.c_str(), cit.volume_.length())) { + return false; + } + if (StringRef(this->page_first_.c_str(), this->page_first_.length()) != + StringRef(cit.page_first_.c_str(), cit.page_first_.length())) { + return false; + } + if (StringRef(this->page_last_.c_str(), this->page_last_.length()) != + StringRef(cit.page_last_.c_str(), cit.page_last_.length())) { + return false; + } + if (StringRef(this->doi_.c_str(), this->doi_.length()) != + StringRef(cit.doi_.c_str(), cit.doi_.length())) { + return false; + } + if (StringRef(this->title_.c_str(), this->title_.length()) != + StringRef(cit.title_.c_str(), cit.title_.length())) { + return false; + } + if (this->authors_ != cit.authors_) { + return false; + } + + return true; + } + + bool operator!=(const MMCifInfoCitation& cit) const { + return !this->operator==(cit); + } + +private: + /// \enum types of citations + typedef enum { + JOURNAL, + BOOK, + UNKNOWN + } MMCifInfoCType; + + //CITATION_ID + String id_; ///< internal identifier + MMCifInfoCType where_; ///< journal or book? + String cas_; ///< CAS identifier + String isbn_; ///< ISBN no. of medium + String published_in_; ///< book title or full journal name + String volume_; ///< journal volume + String page_first_; ///< first page + String page_last_; ///< last page + String doi_; ///< DOI identifier + int pubmed_; ///< accession no. + int year_; ///< year of publication + String title_; ///< title of the publication + std::vector<String> authors_; ///< author information +}; + +/// \brief container class for additional information from MMCif files +/// +/// \section mmcif annotation information +/// +/// MMCif files contain loads of additional information beside coordinates. +/// This class is set up to capture some of it. In detail, we have: +/// +/// \li citations +class DLLEXPORT_OST_IO MMCifInfo { +public: + /// \brief Create an info object. + MMCifInfo() {}; + + /// \brief Add an item to the list of citations + /// + /// \param citation to be added + void AddCitation(MMCifInfoCitation citation) // unit test + { + citations_.push_back(citation); + } + + /// \brief Add a list of authors to a specific citation. + /// + /// \param id identifier of the citation to be modified. + /// \param list list of authors to be added. + void AddAuthorsToCitation(StringRef id, std::vector<String> list); //unit test + + /// \brief Get the list of citations stored in an info object. + /// + /// \return vector of MMCifInfoCitation objects + const std::vector<MMCifInfoCitation>& GetCitations() const + { + return citations_; + } + +//protected: + +private: + // members + std::vector<MMCifInfoCitation> citations_; ///< list of citations +}; + +}} // ns + +#endif diff --git a/modules/io/src/mol/mmcif_reader.cc b/modules/io/src/mol/mmcif_reader.cc index dc384b57f8de98163474437056824d442f6ba5cf..5b85c1340ec990377e9f66deea17ea5620f2ecfe 100644 --- a/modules/io/src/mol/mmcif_reader.cc +++ b/modules/io/src/mol/mmcif_reader.cc @@ -62,6 +62,7 @@ void MMCifParser::Init() seqres_ = seq::CreateSequenceList(); read_seqres_ = false; warned_rule_based_ = false; + info_ = MMCifInfo(); } void MMCifParser::ClearState() @@ -74,7 +75,9 @@ void MMCifParser::ClearState() category_ = DONT_KNOW; warned_name_mismatch_ = false; seqres_ = seq::CreateSequenceList(); + info_ = MMCifInfo(); entity_desc_map_.clear(); + authors_map_.clear(); } void MMCifParser::SetRestrictChains(const String& restrict_chains) @@ -160,7 +163,7 @@ bool MMCifParser::OnBeginLoop(const StarLoopDesc& header) } else if (header.GetCategory() == "entity_poly") { category_ = ENTITY_POLY; // mandatory - this->TryStoreIdx(ENTITY_ID, "entity_id", header); + this->TryStoreIdx(ENTITY_ID, "entity_id", header); // optional indices_[EP_TYPE] = header.GetIndex("type"); indices_[PDBX_SEQ_ONE_LETTER_CODE] = @@ -171,7 +174,7 @@ bool MMCifParser::OnBeginLoop(const StarLoopDesc& header) } else if (header.GetCategory() == "citation") { category_ = CITATION; // mandatory items - this->TryStoreIdx(CITATION_ID, "id", header); + this->TryStoreIdx(CITATION_ID, "id", header); // optional indices_[ABSTRACT_ID_CAS] = header.GetIndex("abstract_id_CAS"); indices_[BOOK_ID_ISBN] = header.GetIndex("book_id_ISBN"); @@ -186,9 +189,15 @@ bool MMCifParser::OnBeginLoop(const StarLoopDesc& header) indices_[PDBX_DATABASE_ID_PUBMED] = header.GetIndex("pdbx_database_id_PubMed"); cat_available = true; - } - /*else if (header.GetCategory()=="pdbx_poly_seq_scheme") { - } else if (header.GetCategory()=="pdbx_struct_assembly") { + } else if (header.GetCategory()=="citation_author") { + category_ = CITATION_AUTHOR; + // mandatory items + this->TryStoreIdx(AUTHOR_CITATION_ID, "citation_id", header); + this->TryStoreIdx(AUTHOR_NAME, "name", header); + this->TryStoreIdx(ORDINAL, "ordinal", header); + cat_available = true; + } /* +else if (header.GetCategory()=="pdbx_struct_assembly") { } else if (header.GetCategory()=="struct_conf") { }*/ category_counts_[category_]++; @@ -590,7 +599,109 @@ String MMCifParser::ConvertSEQRES(const String& seqres, void MMCifParser::ParseCitation(const std::vector<StringRef>& columns) { - // fetch dependencies from dscription, like article requires year + // create citation object + MMCifInfoCitation cit = MMCifInfoCitation(); + // just add info + cit.SetID(columns[indices_[CITATION_ID]].str()); + if (indices_[ABSTRACT_ID_CAS] != -1) { + cit.SetCAS(columns[indices_[ABSTRACT_ID_CAS]].str()); + } + if (indices_[BOOK_ID_ISBN] != -1) { + cit.SetISBN(columns[indices_[BOOK_ID_ISBN]].str()); + } + if (indices_[BOOK_TITLE] != -1) { + if (columns[indices_[BOOK_TITLE]] != StringRef(".", 1)) { + cit.SetPublishedIn(columns[indices_[BOOK_TITLE]].str()); + } + } + if (indices_[JOURNAL_FULL] != -1) { + if (columns[indices_[JOURNAL_FULL]] != StringRef(".", 1)) { + if (cit.GetPublishedIn().length() > 0) { + throw IOException(this->FormatDiagnostic(STAR_DIAG_WARNING, + "citation.book_title already occupies the 'published_in' field of this citation, cannot add " + + columns[indices_[JOURNAL_FULL]].str() + + ".", + this->GetCurrentLinenum())); + } else { + cit.SetPublishedIn(columns[indices_[JOURNAL_FULL]].str()); + } + } + } + if (indices_[JOURNAL_VOLUME] != -1) { + cit.SetVolume(columns[indices_[JOURNAL_VOLUME]].str()); + } + if (indices_[PAGE_FIRST] != -1) { + cit.SetPageFirst(columns[indices_[PAGE_FIRST]].str()); + } + if (indices_[PAGE_LAST] != -1) { + cit.SetPageLast(columns[indices_[PAGE_LAST]].str()); + } + if (indices_[PDBX_DATABASE_ID_DOI] != -1) { + cit.SetDOI(columns[indices_[PDBX_DATABASE_ID_DOI]].str()); + } + if (indices_[PDBX_DATABASE_ID_PUBMED] != -1) { + cit.SetPubMed(this->TryGetInt(columns[indices_[PDBX_DATABASE_ID_PUBMED]], + "citation.pdbx_database_id_PubMed")); + } + if (indices_[YEAR] != -1) { + cit.SetPubMed(this->TryGetInt(columns[indices_[YEAR]], "citation.year")); + } + if (indices_[TITLE] != -1) { + cit.SetTitle(columns[indices_[TITLE]].str()); + } + + // store citation (wo author, yet) + info_.AddCitation(cit); +} + +void MMCifParser::ParseCitationAuthor(const std::vector<StringRef>& columns) +{ + // get/ pack values + MMCifCitationAuthorMap::iterator atm_it; + std::vector<String> at_vec; + std::vector<int> pos_vec; + atm_it = authors_map_.find(columns[indices_[AUTHOR_CITATION_ID]].str()); + if (atm_it != authors_map_.end()) { + at_vec = atm_it->second.second; + pos_vec = atm_it->second.first; + } + at_vec.push_back(columns[indices_[AUTHOR_NAME]].str()); + pos_vec.push_back(this->TryGetInt(columns[indices_[ORDINAL]], + "citation_author.ordinal")); + + // sort new author into right position + std::vector<int>::iterator pos_it; + std::vector<String>::iterator atv_it; + int ti; + String ts; + pos_it = pos_vec.end(); + atv_it = at_vec.end(); + --pos_it; + --atv_it; + for (; pos_it != pos_vec.begin(); --pos_it, --atv_it) { + if (*pos_it < *(pos_it-1)) { + ti = *pos_it; + *pos_it = *(pos_it-1); + *(pos_it-1) = ti; + ts = *atv_it; + *atv_it = *(atv_it-1); + *(atv_it-1) = ts; + } + else { + break; + } + } + + // store new values in map + if (atm_it != authors_map_.end()) { + atm_it->second.second = at_vec; + atm_it->second.first = pos_vec; + } else { + authors_map_.insert(MMCifCitationAuthorMap::value_type( + columns[indices_[AUTHOR_CITATION_ID]].str(), + std::pair<std::vector<int>, std::vector<String> >(pos_vec, at_vec) + )); + } } void MMCifParser::OnDataRow(const StarLoopDesc& header, @@ -610,9 +721,13 @@ void MMCifParser::OnDataRow(const StarLoopDesc& header, this->ParseEntityPoly(columns); break; case CITATION: - LOG_TRACE("processing citation entry") + LOG_TRACE("processing citation entry"); this->ParseCitation(columns); break; + case CITATION_AUTHOR: + LOG_TRACE("processing citation_author entry") + this->ParseCitationAuthor(columns); + break; default: throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR, "Uncatched category '"+ header.GetCategory() +"' found.", @@ -621,141 +736,6 @@ void MMCifParser::OnDataRow(const StarLoopDesc& header, } } - /* -void PDBReader::Import(mol::EntityHandle& ent, - const String& restrict_chains) -{ - do { - switch(curr_line[0]) { - case 'A': - case 'a': - if (IEquals(curr_line.substr(0, 6), StringRef("ANISOU", 6))) { - if (!charmm_style_) { - LOG_TRACE("processing ANISOU entry"); - this->ParseAnisou(curr_line, line_num_, ent); - } - } - break; - case 'C': - case 'c': - if (curr_line.size()<20) { - LOG_TRACE("skipping entry"); - continue; - } - if (IEquals(curr_line.substr(0, 6), StringRef("COMPND", 6))) { - LOG_TRACE("processing COMPND entry"); - this->ParseCompndEntry(curr_line, line_num_); - } - break; - case 'E': - case 'e': - if (curr_line.size()<3) { - continue; - } - if (IEquals(curr_line.rtrim(), StringRef("END", 3))) { - hard_end_=true; - go_on=false; - break; - } - if (curr_line.size()<6) { - continue; - } - if (IEquals(curr_line.substr(0, 6), StringRef("ENDMDL", 6))) { - go_on=false; - num_model_records_=0; - break; - } - case 'H': - case 'h': - if (curr_line.size()<6) { - continue; - } - if (IEquals(curr_line.substr(0, 6), StringRef("HETATM", 6))) { - if (profile_.no_hetatms) - continue; - LOG_TRACE("processing HETATM entry"); - this->ParseAndAddAtom(curr_line, line_num_, ent, - StringRef("HETATM", 6)); - } else if (IEquals(curr_line.substr(0, 6), StringRef("HELIX ", 6))) { - if (!charmm_style_) { - this->ParseHelixEntry(curr_line); - } - } else if (IEquals(curr_line.substr(0, 6), StringRef("HET ", 6))) { - // remember het entry to mark the residues as ligand during ATOM import - char chain=curr_line[12]; - std::pair<bool, int> num=curr_line.substr(13, 4).ltrim().to_int(); - if (!num.first) { - if (profile_.fault_tolerant) { - LOG_WARNING("Invalid HET entry on line " << line_num_); - continue; - } else { - String msg=str(format("Invalid HET entry on line %d")%line_num_); - throw IOException(msg); - } - } - hets_.push_back(HetEntry(chain, to_res_num(num.second, - curr_line[17]))); - } - break; - case 'M': - case 'm': - if (curr_line.size()<6) { - continue; - } - if (IEquals(curr_line.substr(0, 6), StringRef("MODEL ", 6))) { - ++num_model_records_; - if (num_model_records_<2) { - continue; - } - if (profile_.fault_tolerant) { - go_on=false; - num_model_records_=1; - break; - } - String msg=str(format("MODEL record without matching ENDMDL on line %d")%line_num_); - throw IOException(msg); - } - break; - case 'S': - case 's': - if (curr_line.size()<6) { - continue; - } - if (IEquals(curr_line.substr(0, 6), StringRef("SHEET ", 6))) { - if (!charmm_style_) { - this->ParseStrandEntry(curr_line); - } - } - break; - default: - break; - } - } while (std::getline(in_, curr_line_) && ++line_num_ && go_on); - LOG_INFO("imported " - << chain_count_ << " chains, " - << residue_count_ << " residues, " - << atom_count_ << " atoms; with " - << helix_list_.size() << " helices and " - << strand_list_.size() << " strands"); - this->AssignSecStructure(ent); - this->AssignMolIds(ent); - for (HetList::const_iterator i=hets_.begin(), e=hets_.end(); i!=e; ++i) { - mol::ResidueHandle res=ent.FindResidue(String(1, i->chain), i->num); - if (res.IsValid()) { - res.SetIsLigand(true); - } - } -} - */ - - - /* - virtual void OnEndLoop() { } - - virtual void OnDataItem(const StarDataItem& item) { } - - };*/ - void MMCifParser::OnEndData() { mol::XCSEditor editor=ent_handle_.EditXCS(mol::BUFFERED_EDIT); @@ -779,6 +759,17 @@ void MMCifParser::OnEndData() } } + // process citations (couple with authors + // iterate citations + MMCifCitationAuthorMap::const_iterator atm_it; + std::vector<String>::const_iterator atv_it; + std::vector<int>::const_iterator pos_it; + for (atm_it = authors_map_.begin(); atm_it != authors_map_.end(); ++atm_it) { + info_.AddAuthorsToCitation(StringRef(atm_it->first.c_str(), + atm_it->first.length()), + atm_it->second.second); + } + LOG_INFO("imported " << chain_count_ << " chains, " << residue_count_ << " residues, " diff --git a/modules/io/src/mol/mmcif_reader.hh b/modules/io/src/mol/mmcif_reader.hh index d82b9fb103d8dcf269af4463f9d6b51ab2106e57..ecd9f31a39a0982dc8d0f5f65f8001056bbce00f 100644 --- a/modules/io/src/mol/mmcif_reader.hh +++ b/modules/io/src/mol/mmcif_reader.hh @@ -31,6 +31,7 @@ #include <ost/io/mol/io_profile.hh> #include <ost/io/io_exception.hh> #include <ost/io/mol/star_parser.hh> +#include <ost/io/mol/mmcif_info.hh> namespace ost { namespace io { @@ -43,6 +44,9 @@ namespace ost { namespace io { /// parser: /// /// \li atom_site +/// \li entity +/// \li entity_poly +/// \li citation class DLLEXPORT_OST_IO MMCifParser : public StarParser { public: /// \brief create a MMCifParser @@ -133,7 +137,12 @@ public: return read_seqres_; } - protected: + /// \brief Get additional information of the MMCif file. + /// + /// \return MMCitfInfo object + const MMCifInfo& GetInfo() { return info_; } + +protected: /// \brief Store an item index from loop header in preparation for reading a /// row. Throws an exception if the item does not exist. /// @@ -171,6 +180,8 @@ public: /// \param[out] res_name fetches atom_site.label_comp_id /// \param[out] resnum gets atom_site.label_seq_id if available, consecutive /// numbers, otherwise + /// \param[out] valid_res_num shows if we have a valid residue number or if + /// we have to invent our own /// \param[out] atom_name corresponds to label_atom_id /// \param[out] alt_loc gets first letter of atom_site.label_alt_id bool ParseAtomIdent(const std::vector<StringRef>& columns, @@ -210,6 +221,11 @@ public: /// to some of the residues. To be consistent, we have to do the conversion on /// our own. String ConvertSEQRES(const String& seqres, conop::CompoundLibPtr compound_lib); + /// \brief Fetch MMCif citation_author information + /// + /// \param columns data row + void ParseCitationAuthor(const std::vector<StringRef>& columns); + private: /// \enum magic numbers of this class typedef enum { @@ -270,12 +286,20 @@ private: TITLE ///< title of the citation } CitationItems; + /// \enum items of the citation_author category + typedef enum { + AUTHOR_CITATION_ID, ///< link to CITATION_ID + AUTHOR_NAME, ///< name of an author + ORDINAL ///< position in author list + } CitationAuthorItems; + /// \enum categories of the mmcif format typedef enum { ATOM_SITE, ENTITY, ENTITY_POLY, CITATION, + CITATION_AUTHOR, DONT_KNOW } MMCifCategory; @@ -287,6 +311,9 @@ private: } MMCifEntityDesc; typedef std::map<String, MMCifEntityDesc> MMCifEntityDescMap; + //typedef std::map<String, std::pair<std::vector<int>, std::vector<String> > > + typedef std::map<String, std::pair<std::vector<int>, std::vector<String> > > + MMCifCitationAuthorMap; // members MMCifCategory category_; @@ -312,6 +339,8 @@ private: MMCifEntityDescMap entity_desc_map_; ///< stores entity items seq::SequenceList seqres_; bool read_seqres_; + MMCifInfo info_; ///< info container + MMCifCitationAuthorMap authors_map_; }; }} diff --git a/modules/io/tests/CMakeLists.txt b/modules/io/tests/CMakeLists.txt index bcad4b24e6e76e55b6c08f40bcd9ea69670ca704..c4b3486d52aeb26db1865488f61e42c750f9c67c 100644 --- a/modules/io/tests/CMakeLists.txt +++ b/modules/io/tests/CMakeLists.txt @@ -1,5 +1,6 @@ set(OST_IO_UNIT_TESTS test_io_pdb.py + test_io_mmcif.py test_clustal.cc test_io_pdb.cc test_io_crd.cc @@ -9,6 +10,7 @@ set(OST_IO_UNIT_TESTS tests.cc test_star_parser.cc test_mmcif_reader.cc + test_mmcif_info.cc ) if (ENABLE_IMG) list(APPEND OST_IO_UNIT_TESTS test_io_img.cc) diff --git a/modules/io/tests/test_io_mmcif.py b/modules/io/tests/test_io_mmcif.py new file mode 100644 index 0000000000000000000000000000000000000000..47c52b7ece410092f5e4f46464680f143be3af1f --- /dev/null +++ b/modules/io/tests/test_io_mmcif.py @@ -0,0 +1,64 @@ +import unittest +from ost import * + +class TestPDB(unittest.TestCase): + def setUp(self): + pass + + def test_mmcifinfo(self): + c = io.MMCifInfoCitation() + # test ID setting/ getting + c.SetID('ID') + self.assertEquals(c.GetID(), 'ID') + # test CAS setting/ getting + c.SetCAS('FOO') + self.assertEquals(c.GetCAS(), 'FOO') + # test ISBN setting/ getting + c.SetISBN('0-0-0-0-0-0') + self.assertEquals(c.GetISBN(), '0-0-0-0-0-0') + # test published_in setting/ getting + c.SetPublishedIn('Best Book Ever') + self.assertEquals(c.GetPublishedIn(), 'Best Book Ever') + # test volume setting/ getting + c.SetVolume('3') + self.assertEquals(c.GetVolume(), '3') + # test page setting/ getting + c.SetPageFirst('1') + self.assertEquals(c.GetPageFirst(), '1') + c.SetPageLast('10') + self.assertEquals(c.GetPageLast(), '10') + # test doi setting/ getting + c.SetDOI('HERE') + self.assertEquals(c.GetDOI(), 'HERE') + # test PubMed setting/ getting + c.SetPubMed(815) + self.assertEquals(c.GetPubMed(), 815) + # test year setting/ getting + c.SetYear(815) + self.assertEquals(c.GetYear(), 815) + # test title setting/ getting + c.SetTitle('Foo') + self.assertEquals(c.GetTitle(), 'Foo') + # test auhtors setting/ getting + s = ost.StringList() + s.append('Foo') + c.SetAuthorList(s) + s2 = c.GetAuthorList() + self.assertEquals(s2[0], 'Foo') + + i = io.MMCifInfo() + i.AddCitation(c) + s.append('Bar') + i.AddAuthorsToCitation('ID', s) + + cl = i.GetCitations() + self.assertEquals(len(cl), 1) + al = cl[0].GetAuthorList() + self.assertEquals(len(al), 2) + self.assertEquals(al[0], 'Foo') + self.assertEquals(al[1], 'Bar') + +if __name__== '__main__': + unittest.main() + + diff --git a/modules/io/tests/test_mmcif_info.cc b/modules/io/tests/test_mmcif_info.cc new file mode 100644 index 0000000000000000000000000000000000000000..97346d7ff5ee6b6bbc090822497b3d391a634da9 --- /dev/null +++ b/modules/io/tests/test_mmcif_info.cc @@ -0,0 +1,82 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2011 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + +#include <ost/io/io_exception.hh> +#include <ost/io/mol/mmcif_info.hh> + +#define BOOST_AUTO_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> + + +using namespace ost; +using namespace ost::io; + +BOOST_AUTO_TEST_SUITE( io ); + +BOOST_AUTO_TEST_CASE(mmcif_info_citation) +{ + BOOST_MESSAGE(" Running mmcif_info_citation tests..."); + + MMCifInfoCitation cit = MMCifInfoCitation(); + std::vector<String> author_list; + + author_list.push_back("Kabel, H."); + + cit.SetID("ID"); + cit.SetCAS("FOO"); + cit.SetISBN("0-0-0-0-0"); + cit.SetPublishedIn("Journal of Uncanny Science"); + cit.SetVolume("3"); + cit.SetPageFirst("1"); + cit.SetPageLast("10"); + cit.SetDOI("HERE"); + cit.SetPubMed(815); + cit.SetYear(815); + cit.SetTitle("Foo"); + cit.SetAuthorList(author_list); + author_list.clear(); + + BOOST_CHECK(cit.GetID() == "ID"); + BOOST_CHECK(cit.GetCAS() == "FOO"); + BOOST_CHECK(cit.GetISBN() == "0-0-0-0-0"); + BOOST_CHECK(cit.GetPublishedIn() == "Journal of Uncanny Science"); + BOOST_CHECK(cit.GetVolume() == "3"); + BOOST_CHECK(cit.GetPageFirst() == "1"); + BOOST_CHECK(cit.GetPageLast() == "10"); + BOOST_CHECK(cit.GetDOI() == "HERE"); + BOOST_CHECK(cit.GetPubMed() == 815); + BOOST_CHECK(cit.GetYear() == 815); + BOOST_CHECK(cit.GetTitle() == "Foo"); + author_list = cit.GetAuthorList(); + BOOST_CHECK(author_list.back() == "Kabel, H."); + + BOOST_MESSAGE(" done."); + BOOST_MESSAGE(" trying to add everything to an info object"); + MMCifInfo info = MMCifInfo(); + info.AddCitation(cit); + std::vector<MMCifInfoCitation> citations = info.GetCitations(); + BOOST_CHECK(citations.size() == 1); + BOOST_CHECK(citations.back() == cit); + BOOST_CHECK_THROW(info.AddAuthorsToCitation(StringRef("Foo", 3), + author_list), + IOException); + BOOST_MESSAGE(" done."); +} + +BOOST_AUTO_TEST_SUITE_END(); diff --git a/modules/io/tests/test_mmcif_reader.cc b/modules/io/tests/test_mmcif_reader.cc index 08b3a8bc972e1e53b0f2d06eae5e81795c237559..d534e834a072416f086dc85cd5e8201483424d12 100644 --- a/modules/io/tests/test_mmcif_reader.cc +++ b/modules/io/tests/test_mmcif_reader.cc @@ -53,6 +53,7 @@ public: using MMCifParser::ParseAndAddAtom; using MMCifParser::ParseEntity; using MMCifParser::ParseEntityPoly; + using MMCifParser::ParseCitation; using MMCifParser::TryStoreIdx; using MMCifParser::SetReadSeqRes; using MMCifParser::SetReadCanonicalSeqRes; @@ -507,6 +508,65 @@ columns.push_back(StringRef("polydeoxyribonucleotide/polyribonucleotide hybrid", conop::Conopology::Instance().SetDefaultBuilder("HEURISTIC"); } +BOOST_AUTO_TEST_CASE(mmcif_citation_tests) +{ + BOOST_MESSAGE(" Running mmcif_citation_tests..."); + //build dummy citation + mol::EntityHandle eh; + TestMMCifParserProtected tmmcif_p("testfiles/mmcif/atom_site.mmcif", eh); + StarLoopDesc tmmcif_h; + std::vector<StringRef> columns; + + tmmcif_h.SetCategory(StringRef("citation", 8)); + tmmcif_h.Add(StringRef("id", 2)); + tmmcif_h.Add(StringRef("book_title", 10)); + tmmcif_h.Add(StringRef("journal_full", 12)); + tmmcif_p.OnBeginLoop(tmmcif_h); + + columns.push_back(StringRef("Foo", 3)); + columns.push_back(StringRef("The Guide", 9)); + columns.push_back(StringRef(".", 1)); + + BOOST_CHECK_NO_THROW(tmmcif_p.ParseCitation(columns)); + + columns.pop_back(); + columns.pop_back(); + columns.push_back(StringRef(".", 1)); + columns.push_back(StringRef("Hitch", 5)); + + BOOST_CHECK_NO_THROW(tmmcif_p.ParseCitation(columns)); + + columns.pop_back(); + columns.pop_back(); + columns.push_back(StringRef("The Guide", 9)); + columns.push_back(StringRef("Hitch", 5)); + + BOOST_CHECK_THROW(tmmcif_p.ParseCitation(columns), IOException); + + BOOST_MESSAGE(" done."); +} + +BOOST_AUTO_TEST_CASE(mmcif_citation_author_tests) +{ + BOOST_MESSAGE(" Running mmcif_citation_author_tests..."); + + mol::EntityHandle eh = mol::CreateEntity(); + std::ifstream s("testfiles/mmcif/atom_site.mmcif"); + IOProfile profile; + MMCifParser mmcif_p(s, eh, profile); + BOOST_CHECK_NO_THROW(mmcif_p.Parse()); + + std::vector<String> authors = + mmcif_p.GetInfo().GetCitations().back().GetAuthorList(); + + BOOST_CHECK(authors.size() == 3); + BOOST_CHECK(authors[0] == "Whiskers, P.D."); + BOOST_CHECK(authors[1] == "McCheese, B.M."); + BOOST_CHECK(authors[2] == "Van Hummel, J.F."); + + BOOST_MESSAGE(" done."); +} + BOOST_AUTO_TEST_CASE(mmcif_parseatomident) { BOOST_MESSAGE(" Running mmcif_parseatomident tests..."); @@ -520,6 +580,8 @@ BOOST_AUTO_TEST_CASE(mmcif_parseatomident) String chain_name; StringRef res_name; mol::ResNum resnum(0); + bool valid_res_num = false; + char alt_loc; //StringRef atom_name; BOOST_MESSAGE(" testing valid line"); diff --git a/modules/io/tests/testfiles/mmcif/atom_site.mmcif b/modules/io/tests/testfiles/mmcif/atom_site.mmcif index eaa46793866696ac28513af3c1c467f7dfe005fa..2d2322af617a75d450de0f054869b5dbe5d38830 100644 --- a/modules/io/tests/testfiles/mmcif/atom_site.mmcif +++ b/modules/io/tests/testfiles/mmcif/atom_site.mmcif @@ -21,6 +21,28 @@ _entity_poly.nstd_linkage no _entity_poly.nstd_monomer no _entity_poly.pdbx_seq_one_letter_code 'VTI' +loop_ +_citation.id +_citation.abstract_id_CAS +_citation.title +_citation.journal_volume +_citation.journal_full +_citation.page_first +_citation.page_last +_citation.pdbx_database_id_DOI +_citation.pdbx_database_id_PubMed +_citation.year +primary 58-08-2 'Very important, but we won't tell' 1 'Some Journal' 0 10 0815 0815 2011 + +# the authors are not ordered on purpose for unit tests +loop_ +_citation_author.citation_id +_citation_author.ordinal +_citation_author.name + primary 1 'Whiskers, P.D.' + primary 3 'Van Hummel, J.F.' + primary 2 'McCheese, B.M.' + loop_ _atom_site.group_PDB _atom_site.type_symbol