Skip to content
Snippets Groups Projects
Commit 57b3db46 authored by Bienchen's avatar Bienchen
Browse files

Added first bit of struct_conf parsing to the mmCif parser, NO UNIT TESTS, yet

parent a1c64afa
Branches
Tags
No related merge requests found
......@@ -80,6 +80,7 @@ void MMCifParser::ClearState()
authors_map_.clear();
bu_origin_map_.clear();
bu_assemblies_.clear();
secstruct_list_.clear();
}
void MMCifParser::SetRestrictChains(const String& restrict_chains)
......@@ -260,6 +261,20 @@ bool MMCifParser::OnBeginLoop(const StarLoopDesc& header)
= header.GetIndex("pdbx_model_type_details");
indices_[STRUCT_TITLE] = header.GetIndex("title");
cat_available = true;
} else if (header.GetCategory() == "struct_conf") {
category_ = STRUCT_CONF;
// mandatory items
this->TryStoreIdx(BEG_LABEL_ASYM_ID, "beg_label_asym_id", header);
this->TryStoreIdx(BEG_LABEL_COMP_ID, "beg_label_comp_id", header);
this->TryStoreIdx(BEG_LABEL_SEQ_ID, "beg_label_seq_id", header);
this->TryStoreIdx(CONF_TYPE_ID, "conf_type_id", header);
this->TryStoreIdx(END_LABEL_ASYM_ID, "end_label_asym_id", header);
this->TryStoreIdx(END_LABEL_COMP_ID, "end_label_comp_id", header);
this->TryStoreIdx(END_LABEL_SEQ_ID, "end_label_seq_id", header);
this->TryStoreIdx(STRUCT_CONF_ID, "id", header);
// optional items
indices_[BEG_AUTH_ASYM_ID] = header.GetIndex("beg_auth_asym_id");
cat_available = true;
}
category_counts_[category_]++;
return cat_available;
......@@ -1008,6 +1023,34 @@ void MMCifParser::ParseStruct(const std::vector<StringRef>& columns)
info_.SetStructDetails(details);
}
void MMCifParser::ParseStructConf(const std::vector<StringRef>& columns)
{
StringRef chain_name;
int s_res_num;
int e_res_num;
// fetch start and end
s_res_num = this->TryGetInt(columns[indices_[BEG_LABEL_SEQ_ID]],
"struct_conf.beg_label_seq_id"); // unit test
e_res_num = this->TryGetInt(columns[indices_[END_LABEL_SEQ_ID]],
"struct_conf.end_label_seq_id"); // unit test
if(auth_chain_id_) { // unit test both ways
if (indices_[BEG_AUTH_ASYM_ID] != -1) { // unit test
chain_name = columns[indices_[BEG_AUTH_ASYM_ID]];
} else { // unit test
throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
"foo",
this->GetCurrentLinenum()));
}
} else { // unit test
chain_name = columns[indices_[BEG_LABEL_ASYM_ID]];
}
MMCifHSEntry hse = {to_res_num(s_res_num, ' '),
to_res_num(e_res_num, ' '),
chain_name.str(),
columns[indices_[CONF_TYPE_ID]].str()};
secstruct_list_.push_back(hse);
}
void MMCifParser::OnDataRow(const StarLoopDesc& header,
const std::vector<StringRef>& columns)
{
......@@ -1056,6 +1099,10 @@ void MMCifParser::OnDataRow(const StarLoopDesc& header,
LOG_TRACE("processing struct entry")
this->ParseStruct(columns);
break;
case STRUCT_CONF:
LOG_TRACE("processing struct_conf entry")
this->ParseStructConf(columns);
break;
default:
throw IOException(this->FormatDiagnostic(STAR_DIAG_ERROR,
"Uncatched category '"+ header.GetCategory() +"' found.",
......@@ -1064,6 +1111,18 @@ void MMCifParser::OnDataRow(const StarLoopDesc& header,
}
}
void MMCifParser::AssignSecStructure(mol::EntityHandle ent)
{
// for each helix, take chain
// check for overlaps (visual artifacts)
// assign helix to chain
// for all strands
// take chain
// check for overlaps
// assign strand to chain
}
void MMCifParser::OnEndData()
{
mol::XCSEditor editor=ent_handle_.EditXCS(mol::BUFFERED_EDIT);
......@@ -1155,6 +1214,8 @@ void MMCifParser::OnEndData()
}
bu_assemblies_.clear();
// create secondary structure from struct_conf info
LOG_INFO("imported "
<< chain_count_ << " chains, "
<< residue_count_ << " residues, "
......
......@@ -270,6 +270,14 @@ protected:
/// \param columns data row
void ParseStruct(const std::vector<StringRef>& columns);
/// \brief Fetch MMCif struct_conf (secondary structure) information
///
/// \param columns data row
void ParseStructConf(const std::vector<StringRef>& columns);
/// \brief Transform data from struct_conf entry into secondary structure
void AssignSecStructure(mol::EntityHandle ent);
private:
/// \enum magic numbers of this class
typedef enum {
......@@ -394,6 +402,19 @@ private:
STRUCT_TITLE ///< title for the data block
} StructItems;
/// \enum items of the struct_conf category
typedef enum {
BEG_AUTH_ASYM_ID, ///< Starting residue, points to atom_site.auth_asym_id
BEG_LABEL_ASYM_ID, ///< Starting residue, points to atom_site.label_asym_id
BEG_LABEL_COMP_ID, ///< Starting residue, points to atom_site.label_comp_id
BEG_LABEL_SEQ_ID, ///< Starting residue, points to atom_site.label_seq_id
CONF_TYPE_ID, ///< Pointer to struct_conf_type.id
END_LABEL_ASYM_ID, ///< Ending residue, points to atom_site.label_asym_id
END_LABEL_COMP_ID, ///< Ending residue, points to atom_site.label_comp_id
END_LABEL_SEQ_ID, ///< Ending residue, points to atom_site.label_seq_id
STRUCT_CONF_ID, ///< Unique identifier
} StructConfItems;
/// \enum categories of the mmcif format
typedef enum {
ATOM_SITE,
......@@ -407,6 +428,7 @@ private:
PDBX_STRUCT_ASSEMBLY_GEN,
PDBX_STRUCT_OPER_LIST,
STRUCT,
STRUCT_CONF,
DONT_KNOW
} MMCifCategory;
......@@ -429,6 +451,15 @@ private:
typedef std::map<String, std::pair<std::vector<int>, std::vector<String> > >
MMCifCitationAuthorMap;
/// \struct store struct_conf info (secondary structure)
typedef struct {
mol::ResNum start;
mol::ResNum end;
String chain_name;
String type;
} MMCifHSEntry;
typedef std::vector<MMCifHSEntry> MMCifHSVector;
// members
MMCifCategory category_;
int category_counts_[DONT_KNOW+1]; ///< overall no. of atom_site loops
......@@ -457,6 +488,7 @@ private:
MMCifCitationAuthorMap authors_map_;
MMCifBioUAssemblyVector bu_assemblies_;
std::map<String, String> bu_origin_map_; ///< pdbx_struct_assembly.details
MMCifHSVector secstruct_list_; ///< for storing struct_conf sec.struct. data
};
}}
......
......@@ -724,8 +724,6 @@ BOOST_AUTO_TEST_CASE(mmcif_biounit_tests)
tmmcif_h.Add(StringRef("matrix[3][2]", 12));
tmmcif_h.Add(StringRef("matrix[3][3]", 12));
tmmcif_p.OnBeginLoop(tmmcif_h);
columns.pop_back();
......@@ -940,6 +938,8 @@ BOOST_AUTO_TEST_CASE(mmcif_testreader)
BOOST_CHECK_EQUAL(rs->GetNumber().GetNum(), i);
}
// add checking of struct_conf info, here
BOOST_MESSAGE(" done.");
BOOST_MESSAGE(" reading data fields which should not fail...");
......
......@@ -94,6 +94,27 @@ _struct.pdbx_formula_weight_method 'Good Guess'
_struct.pdbx_model_details 'Even better guessing'
_struct.pdbx_model_type_details 'Guess'
loop_
_struct_conf.id
_struct_conf.conf_type_id
_struct_conf.beg_label_comp_id
_struct_conf.beg_label_asym_id
_struct_conf.beg_label_seq_id
_struct_conf.end_label_comp_id
_struct_conf.end_label_asym_id
_struct_conf.end_label_seq_id
_struct_conf.details
HELX1 HELX_RH_AL_P ARG A 87 GLN A 92 .
HELX2 HELX_RH_AL_P ARG B 287 GLN B 292 .
STRN1 STRN_P PRO A 1 LEU A 5 .
STRN2 STRN_P CYS B 295 PHE B 299 .
STRN3 STRN_P CYS A 95 PHE A 299 .
STRN4 STRN_P PRO B 201 LEU B 205 .
TURN1 TURN_TY1P_P ILE A 15 GLN A 18 .
TURN2 TURN_TY2_P GLY A 49 GLY A 52 .
TURN3 TURN_TY1P_P ILE A 55 HIS A 69 .
TURN4 TURN_TY1_P THR A 91 GLY A 94 .
loop_
_atom_site.group_PDB
_atom_site.type_symbol
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment