diff --git a/.gitignore b/.gitignore index f9b94e12aec3f7a29cb4490cb370d0b3b9ef66d5..1f397aa5f693201f35466de2f13d01b29c657485 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ qmean.files* local_settings.py doc/build doc/source/example_scripts/example_out/* +.DS_Store diff --git a/CHANGELOG.txt b/CHANGELOG.txt index e961c2623d8c53a78a164c4716f22ab65c291507..94bb55fee86ace449443c8dadf6a834a0e729490 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,3 +1,10 @@ +Changes in Release 4.1.0 +-------------------------------------------------------------------------------- + + * GMQE (Global Model Quality Estimate) to predict expected quality of a protein + model given a certain template structure. + * Several minor bug fixes, improvements + Changes in Release 4.0.0 -------------------------------------------------------------------------------- diff --git a/CMakeLists.txt b/CMakeLists.txt index dd06b55a5393ca09aadef86f2eb29e943cc373c6..a5546a95e4d9a0573ac576b8109ff3e8f06cc619 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,7 +11,7 @@ cmake_minimum_required(VERSION 3.12.1 FATAL_ERROR) cmake_policy(SET CMP0060 NEW) set (QMEAN_VERSION_MAJOR 4) -set (QMEAN_VERSION_MINOR 0) +set (QMEAN_VERSION_MINOR 1) set (QMEAN_VERSION_PATCH 0) set (QMEAN_VERSION_STRING ${QMEAN_VERSION_MAJOR}.${QMEAN_VERSION_MINOR}.${QMEAN_VERSION_PATCH} ) @@ -61,6 +61,8 @@ endif() file(MAKE_DIRECTORY ${STAGE_DIR} ${HEADER_STAGE_PATH} ${LIB_STAGE_PATH}) +setup_compiler_flags() + # Python needed before Boost find_package(Python 3.6.0 REQUIRED) # Split version string @@ -68,7 +70,7 @@ string(REPLACE "." ";" _python_version_list ${PYTHON_VERSION}) list(GET _python_version_list 0 PYTHON_VERSION_MAJOR) list(GET _python_version_list 1 PYTHON_VERSION_MINOR) -find_package(OPENSTRUCTURE 2.0.0 REQUIRED +find_package(OPENSTRUCTURE 2.1.0 REQUIRED COMPONENTS mol seq seq_alg mol_alg conop) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY) diff --git a/NOTICE b/NOTICE index c8b8f59eca52718aa35f95007e4c15453122e08c..127007646656280b1bf85b0515b5cce07319da76 100644 --- a/NOTICE +++ b/NOTICE @@ -7,17 +7,16 @@ The main authors are: Gabriel Studer, Pascal Benkert and Marco Biasini If you find this software useful, please cite: +Reference for the QMEANDisCo scoring function: +Studer, G., Rempfer, C., Waterhouse, A.M., Gumienny, G., Haas, J., Schwede, T. +QMEANDisCo - distance constraints applied on model quality estimation. +Bioinformatics 36, 1765-1771 (2020). + Reference for the QMEAN scoring function: Benkert, P., Biasini, M., Schwede, T. Toward the estimation of the absolute quality of individual protein structure models. Bioinformatics 27, 343-350 (2011). -Reference for the QMEANDisCo scoring function: -Waterhouse, A., Bertoni, M., Bienert, S., Studer, G., Tauriello, G., -Gumienny, R., Heer, F.T., de Beer, T.A.P., Rempfer, C., Bordoli, L., Lepore, R., -Schwede, T. SWISS-MODEL: homology modelling of protein structures and complexes. -Nucleic Acids Res. 46(W1), W296-W303 (2018). - Reference for the QMEANBrane scoring function: Studer, G., Biasini, M., Schwede, T. Assessing the local structural quality of transmembrane protein models using statistical potentials (QMEANBrane). diff --git a/cmake_support/QMEAN2.cmake b/cmake_support/QMEAN2.cmake index 38dd051cc0d97482d775165cac8b4659d03f3248..6d6123065202a91d4f662c20418a7fd58a8325d6 100644 --- a/cmake_support/QMEAN2.cmake +++ b/cmake_support/QMEAN2.cmake @@ -899,3 +899,36 @@ macro(find_path_recursive VARIABLE) set(_fst_subs ${_tmp_dlist}) endwhile(_fst_subs) endmacro(find_path_recursive) + + +function(get_compiler_version _OUTPUT_VERSION) + exec_program(${CMAKE_CXX_COMPILER} + ARGS ${CMAKE_CXX_COMPILER_ARG1} -dumpversion + OUTPUT_VARIABLE _COMPILER_VERSION + ) + string(REGEX REPLACE "([0-9])\\.([0-9])(\\.[0-9])?" "\\1\\2" + _COMPILER_VERSION ${_COMPILER_VERSION}) + + set(${_OUTPUT_VERSION} ${_COMPILER_VERSION} PARENT_SCOPE) +endfunction(get_compiler_version) + + +macro(setup_compiler_flags) + if(CMAKE_COMPILER_IS_GNUCXX) + get_compiler_version(_GCC_VERSION) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall" ) + if(_GCC_VERSION MATCHES "44") + # gcc 4.4. is very strict about aliasing rules. the shared_count + # implementation that is used boost's shared_ptr violates these rules. To + # silence the warnings and prevent miscompiles, enable + # -fno-strict-aliasing + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing" ) + endif() + #message(STATUS "GCC VERSION " ${_GCC_VERSION}) + if (_GCC_VERSION LESS "60") + # for older compilers we need to enable C++11 + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + endif() + endif() +endmacro(setup_compiler_flags) + diff --git a/data/CMakeLists.txt b/data/CMakeLists.txt index f78bde4c5bbb803616e1a62ed3a63a4356995617..fa84c0b8d733bac2857322da9b80e766dda0999a 100644 --- a/data/CMakeLists.txt +++ b/data/CMakeLists.txt @@ -41,6 +41,27 @@ ${LOCAL_NN_SCORER_DIR}/nn_14.dat ${LOCAL_NN_SCORER_DIR}/nn_15.dat ) +set(GMQE_SCORER_DIR "${QMEAN_DATA_DIR}/scorer/gmqe_scorer") +set(GMQE_SCORER_FILES +${GMQE_SCORER_DIR}/feature_groups.json +${GMQE_SCORER_DIR}/nn_0.dat +${GMQE_SCORER_DIR}/nn_1.dat +${GMQE_SCORER_DIR}/nn_2.dat +${GMQE_SCORER_DIR}/nn_3.dat +${GMQE_SCORER_DIR}/nn_4.dat +${GMQE_SCORER_DIR}/nn_5.dat +${GMQE_SCORER_DIR}/nn_6.dat +${GMQE_SCORER_DIR}/nn_7.dat +${GMQE_SCORER_DIR}/nn_8.dat +${GMQE_SCORER_DIR}/nn_9.dat +${GMQE_SCORER_DIR}/nn_10.dat +${GMQE_SCORER_DIR}/nn_11.dat +${GMQE_SCORER_DIR}/nn_12.dat +${GMQE_SCORER_DIR}/nn_13.dat +${GMQE_SCORER_DIR}/nn_14.dat +${GMQE_SCORER_DIR}/nn_15.dat +) + add_custom_target(qmean_data_files ALL) copy_if_different("./" "${SHARED_DATA_PATH}/potentials" @@ -56,6 +77,10 @@ copy_if_different("./" "${SHARED_DATA_PATH}/scorer" copy_if_different("./" "${SHARED_DATA_PATH}/scorer/local_nn_scorer" "${LOCAL_NN_SCORER_FILES}" "QMEAN_LOCAL_NN_SCORER_FILES" + qmean_data_files) + +copy_if_different("./" "${SHARED_DATA_PATH}/scorer/gmqe_scorer" + "${GMQE_SCORER_FILES}" "QMEAN_GMQE_SCORER_FILES" qmean_data_files) @@ -63,4 +88,5 @@ install(FILES ${POTENTIAL_FILES} DESTINATION "share/qmean/potentials") install(FILES ${REFERENCE_TABLE_FILES} DESTINATION "share/qmean/reference_values") install(FILES ${LINEAR_SCORER_FILES} DESTINATION "share/qmean/scorer") install(FILES ${LOCAL_NN_SCORER_FILES} DESTINATION "share/qmean/scorer/local_nn_scorer") +install(FILES ${GMQE_SCORER_FILES} DESTINATION "share/qmean/scorer/gmqe_scorer") diff --git a/data/qmean/scorer/gmqe_scorer/feature_groups.json b/data/qmean/scorer/gmqe_scorer/feature_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..18ef755a9328913c5c13b43ab8f08139ff810534 --- /dev/null +++ b/data/qmean/scorer/gmqe_scorer/feature_groups.json @@ -0,0 +1 @@ +[["QMEANDisCo", "coverage", "seqres_coverage", "seqres_length", "n_insertions", "n_deletions", "seq_id", "seq_sim", "profile_aln_score", "avg_entropy", "ss_agreement", "cb_packing", "reduced", "torsion", "dist_const"], ["QMEANDisCo", "coverage", "seqres_coverage", "seqres_length", "n_insertions", "n_deletions", "seq_id", "seq_sim", "ss_agreement", "cb_packing", "reduced", "torsion", "dist_const"], ["QMEANDisCo", "coverage", "seqres_coverage", "seqres_length", "n_insertions", "n_deletions", "seq_id", "seq_sim", "profile_aln_score", "avg_entropy", "ss_agreement", "cb_packing", "reduced", "torsion"], ["QMEANDisCo", "coverage", "seqres_coverage", "seqres_length", "n_insertions", "n_deletions", "seq_id", "seq_sim", "ss_agreement", "cb_packing", "reduced", "torsion"], ["QMEANDisCo", "coverage", "seqres_coverage", "seqres_length", "n_insertions", "n_deletions", "seq_id", "seq_sim", "profile_aln_score", "avg_entropy", "cb_packing", "reduced", "torsion", "dist_const"], ["QMEANDisCo", "coverage", "seqres_coverage", "seqres_length", "n_insertions", "n_deletions", "seq_id", "seq_sim", "cb_packing", "reduced", "torsion", "dist_const"], ["QMEANDisCo", "coverage", "seqres_coverage", "seqres_length", "n_insertions", "n_deletions", "seq_id", "seq_sim", "profile_aln_score", "avg_entropy", "cb_packing", "reduced", "torsion"], ["QMEANDisCo", "coverage", "seqres_coverage", "seqres_length", "n_insertions", "n_deletions", "seq_id", "seq_sim", "cb_packing", "reduced", "torsion"], ["coverage", "seqres_coverage", "seqres_length", "n_insertions", "n_deletions", "seq_id", "seq_sim", "profile_aln_score", "avg_entropy", "ss_agreement", "cb_packing", "reduced", "torsion", "dist_const"], ["coverage", "seqres_coverage", "seqres_length", "n_insertions", "n_deletions", "seq_id", "seq_sim", "ss_agreement", "cb_packing", "reduced", "torsion", "dist_const"], ["coverage", "seqres_coverage", "seqres_length", "n_insertions", "n_deletions", "seq_id", "seq_sim", "profile_aln_score", "avg_entropy", "ss_agreement", "cb_packing", "reduced", "torsion"], ["coverage", "seqres_coverage", "seqres_length", "n_insertions", "n_deletions", "seq_id", "seq_sim", "ss_agreement", "cb_packing", "reduced", "torsion"], ["coverage", "seqres_coverage", "seqres_length", "n_insertions", "n_deletions", "seq_id", "seq_sim", "profile_aln_score", "avg_entropy", "cb_packing", "reduced", "torsion", "dist_const"], ["coverage", "seqres_coverage", "seqres_length", "n_insertions", "n_deletions", "seq_id", "seq_sim", "cb_packing", "reduced", "torsion", "dist_const"], ["coverage", "seqres_coverage", "seqres_length", "n_insertions", "n_deletions", "seq_id", "seq_sim", "profile_aln_score", "avg_entropy", "cb_packing", "reduced", "torsion"], ["coverage", "seqres_coverage", "seqres_length", "n_insertions", "n_deletions", "seq_id", "seq_sim", "cb_packing", "reduced", "torsion"]] diff --git a/data/qmean/scorer/gmqe_scorer/nn_0.dat b/data/qmean/scorer/gmqe_scorer/nn_0.dat new file mode 100644 index 0000000000000000000000000000000000000000..c5ab98bcdb3a469e055492ad46e701b3851f68c1 Binary files /dev/null and b/data/qmean/scorer/gmqe_scorer/nn_0.dat differ diff --git a/data/qmean/scorer/gmqe_scorer/nn_1.dat b/data/qmean/scorer/gmqe_scorer/nn_1.dat new file mode 100644 index 0000000000000000000000000000000000000000..4575aa33cfa3b9220f5b311285d8aefb6fd941af Binary files /dev/null and b/data/qmean/scorer/gmqe_scorer/nn_1.dat differ diff --git a/data/qmean/scorer/gmqe_scorer/nn_10.dat b/data/qmean/scorer/gmqe_scorer/nn_10.dat new file mode 100644 index 0000000000000000000000000000000000000000..2b5dd5bcd2cd6120caa90133c31a20a8d1762b8a Binary files /dev/null and b/data/qmean/scorer/gmqe_scorer/nn_10.dat differ diff --git a/data/qmean/scorer/gmqe_scorer/nn_11.dat b/data/qmean/scorer/gmqe_scorer/nn_11.dat new file mode 100644 index 0000000000000000000000000000000000000000..4bc9f5da1ec9677f87cfba2e1ca543b9158e116f Binary files /dev/null and b/data/qmean/scorer/gmqe_scorer/nn_11.dat differ diff --git a/data/qmean/scorer/gmqe_scorer/nn_12.dat b/data/qmean/scorer/gmqe_scorer/nn_12.dat new file mode 100644 index 0000000000000000000000000000000000000000..69bccbcb39b16c3154535ab7e44c8b4c44ce76b1 Binary files /dev/null and b/data/qmean/scorer/gmqe_scorer/nn_12.dat differ diff --git a/data/qmean/scorer/gmqe_scorer/nn_13.dat b/data/qmean/scorer/gmqe_scorer/nn_13.dat new file mode 100644 index 0000000000000000000000000000000000000000..b7429bf876d1e0c2e27a631e042695ba0c6dd716 Binary files /dev/null and b/data/qmean/scorer/gmqe_scorer/nn_13.dat differ diff --git a/data/qmean/scorer/gmqe_scorer/nn_14.dat b/data/qmean/scorer/gmqe_scorer/nn_14.dat new file mode 100644 index 0000000000000000000000000000000000000000..cf332d2601aa2d24bdece64fb66499a83b9a9d7c Binary files /dev/null and b/data/qmean/scorer/gmqe_scorer/nn_14.dat differ diff --git a/data/qmean/scorer/gmqe_scorer/nn_15.dat b/data/qmean/scorer/gmqe_scorer/nn_15.dat new file mode 100644 index 0000000000000000000000000000000000000000..fc4612b6de351a86bac17589e23ad6a5f87179c4 Binary files /dev/null and b/data/qmean/scorer/gmqe_scorer/nn_15.dat differ diff --git a/data/qmean/scorer/gmqe_scorer/nn_2.dat b/data/qmean/scorer/gmqe_scorer/nn_2.dat new file mode 100644 index 0000000000000000000000000000000000000000..11bfd5c3bd10bf575f30a5291b4ae861d4602e7f Binary files /dev/null and b/data/qmean/scorer/gmqe_scorer/nn_2.dat differ diff --git a/data/qmean/scorer/gmqe_scorer/nn_3.dat b/data/qmean/scorer/gmqe_scorer/nn_3.dat new file mode 100644 index 0000000000000000000000000000000000000000..ea868ed6c9b69bfa7bef9bb983b6962db414026a Binary files /dev/null and b/data/qmean/scorer/gmqe_scorer/nn_3.dat differ diff --git a/data/qmean/scorer/gmqe_scorer/nn_4.dat b/data/qmean/scorer/gmqe_scorer/nn_4.dat new file mode 100644 index 0000000000000000000000000000000000000000..0eaf9113228465bc369c7d58b4bb96b627f5dc7f Binary files /dev/null and b/data/qmean/scorer/gmqe_scorer/nn_4.dat differ diff --git a/data/qmean/scorer/gmqe_scorer/nn_5.dat b/data/qmean/scorer/gmqe_scorer/nn_5.dat new file mode 100644 index 0000000000000000000000000000000000000000..c014522d48b626387a79751cb14403f1dfadfd7f Binary files /dev/null and b/data/qmean/scorer/gmqe_scorer/nn_5.dat differ diff --git a/data/qmean/scorer/gmqe_scorer/nn_6.dat b/data/qmean/scorer/gmqe_scorer/nn_6.dat new file mode 100644 index 0000000000000000000000000000000000000000..7e787f8102b5276a6aa945673d1e1369ae72c084 Binary files /dev/null and b/data/qmean/scorer/gmqe_scorer/nn_6.dat differ diff --git a/data/qmean/scorer/gmqe_scorer/nn_7.dat b/data/qmean/scorer/gmqe_scorer/nn_7.dat new file mode 100644 index 0000000000000000000000000000000000000000..4275c7b1effd5ccfc6a3f259484ed7729e11af69 Binary files /dev/null and b/data/qmean/scorer/gmqe_scorer/nn_7.dat differ diff --git a/data/qmean/scorer/gmqe_scorer/nn_8.dat b/data/qmean/scorer/gmqe_scorer/nn_8.dat new file mode 100644 index 0000000000000000000000000000000000000000..0797f41292517faed9bd40957e6ee8c20ff6269a Binary files /dev/null and b/data/qmean/scorer/gmqe_scorer/nn_8.dat differ diff --git a/data/qmean/scorer/gmqe_scorer/nn_9.dat b/data/qmean/scorer/gmqe_scorer/nn_9.dat new file mode 100644 index 0000000000000000000000000000000000000000..2a4925d500d45e7dc684698babb29e9947d2bf3a Binary files /dev/null and b/data/qmean/scorer/gmqe_scorer/nn_9.dat differ diff --git a/doc/source/conf.py b/doc/source/conf.py index b241fa8e548e8cdcdd3bf545c3d2f5c68c0f3c41..44ed4397cbedb96d98462312b36cd218f090951e 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -70,9 +70,9 @@ copyright = u'2016-2020, Gabriel Studer' # built documents. # # The short X.Y version. -release = '4.0.0' +release = '4.1.0' # The full version, including alpha/beta/rc tags. -release = '4.0.0' +release = '4.1.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/doc/source/example_scripts/CMakeLists.txt b/doc/source/example_scripts/CMakeLists.txt index 955447637ab5521472fb71c438487d4b67cafb73..c60641a701395dbba8e20afe83dc44a1486c1186 100644 --- a/doc/source/example_scripts/CMakeLists.txt +++ b/doc/source/example_scripts/CMakeLists.txt @@ -74,6 +74,12 @@ set(DOC_TEST_DATA example_data/shift_in_front_helix_four_local_scores.txt example_data/shift_into_middle_local_scores.txt example_data/shift_towards_cter_local_scores.txt + example_data/housing.csv + example_data/1crn.1.A.hhm + example_data/1crn_dc.dat + example_data/1crn_3szs_aln.fasta + example_data/3szs.2.A.pdb + example_data/3szs.2.A.hhm ) set (DOC_TEST_SCRIPTS @@ -93,6 +99,8 @@ set (DOC_TEST_SCRIPTS assess_model_quality_example.py assess_membrane_model_quality.py reproduce_fig4_from_publication.py + regressor_training.py + gmqe_example.py ) add_custom_target(doctest) diff --git a/doc/source/example_scripts/example_data/1crn.1.A.hhm b/doc/source/example_scripts/example_data/1crn.1.A.hhm new file mode 100644 index 0000000000000000000000000000000000000000..eb35d07151bbc66ea6f979f155f3f1fef9cf2d40 --- /dev/null +++ b/doc/source/example_scripts/example_data/1crn.1.A.hhm @@ -0,0 +1,180 @@ +HHsearch 1.5 +NAME c6a0deb50c4f69619d193f0fde0517c2 +FAM +FILE seq01 +COM /scicore/soft/apps/HH-suite/2.0.16-goolf-1.4.10/bin/hhmake -i /scratch/14369431.1.short.q/tmpqOaEJI/seq01.a3m -o /scratch/14369431.1.short.q/tmpqOaEJI/seq01.hhm +DATE Mon Mar 7 14:19:45 2016 +LENG 46 match states, 46 columns in multiple alignment +FILT 55 out of 57 sequences passed filter (-id 90 -cov 0 -qid 0 -qsc -20.00 -diff 100) +NEFF 3.8 +SEQ +>ss_pred PSIPRED predicted secondary structure +CCCCCCHHHHHHHHHCCCCCCCHHHHHHHCCCEEECCCCCCCCCCC +>ss_conf PSIPRED confidence values +9877783554421111168998455542048366208989999999 +>Consensus +xsCCpstxaRnxYnxCrxxgxsxxxCaxxsgCkixsgxxCPxxyxx +>c6a0deb50c4f69619d193f0fde0517c2 +TTCCPSIVARSNFNVCRLPGTPEAICATYTGCIIIPGATCPGDYAN +>gi|115605364|gb|ABJ15789.1| putative thionin precursor [Polygonum sibiricum] +-SCCQTTTARNIYNSCRLAGGSRERCASLSGCKHVTGNTCSPGWEK +>gi|4007745|emb|CAA65316.1| purothionin [Secale cereale] +-SCCKSTLGRNCYNLCRTRGAQK-LCANFCRCKLISSTSCPKEFPK +>gi|17381172|gb|AAL36398.1| putative thionin protein [Arabidopsis thaliana]gi|62320644|dbj|BAD95310.1| putative thionin [Arabidopsis thaliana] +-TCCPSQSTRKGFEDCISEGNLQILCSAESGCRDTYVGYCPSGFPY +>gi|802170|gb|AAB33011.1| crambin precursor=thionin variant Thi2Ca12 [Crambe abyssinica, seeds, Peptide Partial, 135 aa] +-SCCPTKSARNTFDVCRLTGTSMGLCAAISECKILSVTKCPSNLPY +>gi|21553588|gb|AAM62681.1| thionin Thi2.2 [Arabidopsis thaliana] +-ICCPTKDDRSVYFVCMLSVSSQFYCLLKSKCKNTSQTICPPGYTN +>gi|15218931|ref|NP_176784.1| thionin [Arabidopsis thaliana]gi|44888531|sp|Q9C8D6.1|THN24_ARATH RecName: Full=Probable thionin-2.4; Contains: RecName: Full=Probable thionin-2.4; Contains: RecName: Full=Acidic protein; Flags: Precursorgi|12322605|gb|AAG51299.1|AC026480_6 thionin, putative [Arabidopsis thaliana]gi|14190505|gb|AAK55733.1|AF380652_1 At1g66100/F15E12_20 [Arabidopsis thaliana]gi|15809774|gb|AAL06815.1| At1g66100/F15E12_20 [Arabidopsis thaliana]gi|332196341|gb|AEE34462.1| thionin [Arabidopsis thaliana] +-ICCPSIQARTFYNACLFAVGSPSSCIRNSSCLDISESTCPRGYTN +>gi|1729954|sp|Q05806.1|THN5_WHEAT RecName: Full=Type-5 thionin; Contains: RecName: Full=Type-5 thionin; AltName: Full=Type V thionin; Contains: RecName: Full=Acidic protein; Flags: Precursorgi|21885|emb|CAA43844.1| wheat type V thionin [Triticum aestivum]gi|21887|emb|CAA43845.1| wheat type V thionin [Triticum aestivum] +-DCGANPFKVACFNSCLLGPSTVFQCADFCACRLPAG--------- +>gi|120564556|gb|ABM30200.1| thionin [Brassica juncea] +-SCCPSTAARWAYYLCTNSWPLTPLCISHTGC-IESETTCPPGYPY +>gi|545031|gb|AAB29760.1| thionin precursor {clone Thi1Va1} [Viscum album=mistletoe, Peptide, 115 aa] +-ICCRAPAGKKCYNLCTA-lLSSE-TCANTCYCKDVSGETCPAD--- +# +NULL 3706 5728 4211 4064 4839 3729 4763 4308 4069 3323 5509 4640 4464 4937 4285 4423 3815 3783 6325 4665 +HMM A C D E F G H I K L M N P Q R S T V W Y + M->M M->I M->D I->M I->I D->M D->D Neff Neff_I Neff_D + 0 * * 0 * 0 * * * * +T 1 * * * * * * * * * * * * * * * * 0 * * * 1 + 0 * * * * * * 1000 0 0 + +T 2 * * 4186 * * 5283 * 2677 * 5264 * * * * * 833 3153 3986 * * 2 + 0 * * * * * * 3941 0 0 + +C 3 * 0 * * * * * * * * * * * * * * * * * * 3 + 0 * * * * * * 3941 0 0 + +C 4 * 122 * * 5259 4186 * * * * * * * * * * * * * * 4 + 0 * * * * * * 3941 0 0 + +P 5 4186 * * * * * * * 3250 * * * 549 5753 2856 * * * * * 5 + 0 * * * * * * 3941 0 0 + +S 6 5271 * 4429 * * * * * * * * 2329 * * 5941 866 2612 * * * 6 + 0 * * * * * * 3941 0 0 + +I 7 * * 5440 4820 * * * 3810 4212 * 5935 4983 3272 3386 4684 5283 1068 5174 * * 7 + 0 * * * * * * 3941 0 0 + +V 8 2614 * 4805 * 4186 * * 3036 * 3320 * * * 4094 * 2750 1838 5660 5873 * 8 + 0 * * * * * * 3941 0 0 + +A 9 646 * 4264 * * 2749 * * 4186 * * * * * * 3803 4903 * * * 9 + 0 * * * * * * 3941 0 0 + +R 10 * * * * * * * 5975 4265 * * * * * 189 * * 4186 * * 10 + 0 * * * * * * 3941 0 0 + +S 11 4186 * * 3803 * * * 5935 3608 * * 836 * * * 2953 5214 * 4935 5259 11 + 0 * * * * * * 3941 0 0 + +N 12 4935 1544 * 4787 5214 3942 * 2698 5264 * 4684 3736 * 5164 5174 * 3934 3625 * * 12 + 0 * * * * * * 3941 0 0 + +F 13 * * * * 2199 * * * * * * * * * * * * * * 354 13 + 0 * * * * * * 3941 0 0 + +N 14 * * 4222 3844 4805 * * * * 5264 * 510 * * * 4983 5283 5464 * 4935 14 + 0 * * * * * * 3941 0 0 + +V 15 2800 * 3844 * * * * 5271 * 2403 5174 * * * * 2663 3074 1902 * * 15 + 0 * * * * * * 3941 0 0 + +C 16 * 0 * * * * * * * * * * * * * * * * * * 16 + 0 * * * * * * 3941 0 0 + +R 17 * * * * * * 6200 2932 * 3611 4805 * * * 558 * 4093 * * * 17 + 0 * * * * * * 3941 0 0 + +L 18 3969 * * * 2171 * * 3956 * 1318 5174 4935 * * 5264 3115 6073 4979 * * 18 + 38 * 5271 * * * * 3941 0 0 + +P 19 3003 * * 4863 * 3441 * * * 5901 * * 2369 4784 2418 3812 2907 3735 * 4814 19 + 41 * 5155 0 * 0 * 3898 1025 1025 + +G 20 * * * * 4949 454 6123 * * 4542 * * 3604 * * * * 3948 4909 * 20 + 72 * 4354 * * * 0 3901 0 1033 + +T 21 2209 5224 * * * 2278 * * * * * 3752 4867 * * 2344 2028 * * * 21 + 39 5224 * 0 * 669 1430 3845 1023 1191 + +P 22 5624 * * * * * * 5870 * 3138 * * 1935 4054 * 1200 3439 * * * 22 + 0 * * * * 0 * 3903 0 1033 + +E 23 5952 6850 * 3099 * * 4802 5475 3316 * 4903 * 4232 2872 1627 5880 4176 4193 * 5287 23 + 184 * 3059 * * * * 3943 0 0 + +A 24 4442 * 3710 2779 2989 4157 * 4702 * * * * 1565 * 5707 4382 3234 * * * 24 + 28 5707 * 0 * 0 * 3843 1000 1329 + +I 25 4358 * * * 6658 * 5279 3244 4611 2579 * 5958 * 4193 3566 3476 2812 2484 * 4816 25 + 0 * * * * * * 3943 0 0 + +C 26 * 31 * * * * * * * * * * * * * 5573 * * * * 26 + 0 * * * * * * 3943 0 0 + +A 27 560 6231 * * * 4233 * 3636 * 4029 4985 * * * * 3609 * * * * 27 + 0 * * * * * * 3943 0 0 + +T 28 2460 * 3819 * * 4273 * 5850 2738 4816 * 3203 * 4985 3239 2607 3596 * * * 28 + 0 * * * * * * 3943 0 0 + +Y 29 4929 * 5658 4919 2879 4802 4950 3737 4252 1699 5444 5287 5995 * 5262 * 3899 3734 * 4566 29 + 0 * * * * * * 3943 0 0 + +T 30 3911 1554 * * * * * * * * * * * * * 1040 3227 * * * 30 + 0 * * * * * * 3943 0 0 + +G 31 4193 * 3877 5850 * 811 * * 4816 * * * * * 2699 4815 4659 * * 5301 31 + 0 * * * * * * 3943 0 0 + +C 32 * 0 * * * * * * * * * * * * * * * * * * 32 + 47 * 4950 * * * * 3943 0 0 + +I 33 * * * 4672 * * * 2865 809 3414 * * * 4933 3475 * * 4789 * * 33 + 0 * * * * 0 * 3904 0 1044 + +I 34 * * 3573 4802 * * 3933 921 * 2779 * 3898 * * 4839 * 5475 5952 * * 34 + 0 * * * * * * 3943 0 0 + +I 35 * * 5287 4950 4358 * * 1603 * 3869 * * 4193 6130 * 3498 3006 2227 * * 35 + 46 * 4985 * * * * 3943 0 0 + +P 36 4173 * 2874 * 5250 * * 5139 * 4792 * * 4633 4645 5394 1055 4050 * * 3805 36 + 201 * 2941 * * * 0 3881 0 1045 + +G 37 5480 * 5485 3849 * 869 * * * * * * * 4548 5245 3819 4686 2648 * * 37 + 22 * 6064 * * 0 * 3796 0 1453 + +A 38 4564 * 4944 5167 * 1910 * * 4653 4470 * 3572 4946 * 5111 2927 1892 * * * 38 + 0 * * * * 0 * 3775 0 1000 + +T 39 * 4660 * 5366 4744 6406 * 3919 2193 * * 4099 * 5382 5207 3976 1348 * * 4782 39 + 0 * * * * * * 3764 0 0 + +C 40 * 58 * * * * * * 4660 * * * * * * * * * * * 40 + 0 * * * * * * 3764 0 0 + +P 41 * * 4306 * * * * * 3731 * * 6351 322 4587 * 5643 * * * * 41 + 0 * * * * * * 3764 0 0 + +G 42 4398 * 6402 * * 4592 * * 4079 * * 4374 1200 * 3395 2109 5063 * * * 42 + 0 * * * * * * 3764 0 0 + +D 43 * * 2024 6014 4769 1231 4660 * * * * 4386 3096 4744 * 4862 * * * * 43 + 0 * * * * * * 3764 0 0 + +Y 44 * * * * 2991 * 4830 * * 3705 * * * * * * * * 2806 691 44 + 0 * * * * * * 3718 0 0 + +A 45 4566 * 3231 5056 * * * 3467 * * * 5405 1034 * 5100 6384 2896 4532 * * 45 + 35 5366 * 4087 87 * * 3718 1002 0 + +N 46 * * 4370 * * * 2588 * 1723 * * 2200 * * 5953 * * * * 2008 46 + 0 * * 0 * * * 3699 0 0 + +// diff --git a/doc/source/example_scripts/example_data/1crn_3szs_aln.fasta b/doc/source/example_scripts/example_data/1crn_3szs_aln.fasta new file mode 100644 index 0000000000000000000000000000000000000000..1c5d1b33460e4f279960b31ccd0dd2b98e13b3ec --- /dev/null +++ b/doc/source/example_scripts/example_data/1crn_3szs_aln.fasta @@ -0,0 +1,5 @@ +>crambin +TTCCPSIVARSNFNVCRLPGTPEAICATYTGCIIIPGATCPGDYAN +>3szs.2.A Hellethionin-D +KSCCRNTLARNCYNACRFTGGSQPTCGILCDCIHVTTTTCPSSHPS + diff --git a/doc/source/example_scripts/example_data/1crn_dc.dat b/doc/source/example_scripts/example_data/1crn_dc.dat new file mode 100644 index 0000000000000000000000000000000000000000..ec8a62dd754c39431f73022e44b842a50c5d7e36 Binary files /dev/null and b/doc/source/example_scripts/example_data/1crn_dc.dat differ diff --git a/doc/source/example_scripts/example_data/3szs.2.A.hhm b/doc/source/example_scripts/example_data/3szs.2.A.hhm new file mode 100644 index 0000000000000000000000000000000000000000..477ed3e84981c5c4fc5db1bea159baf91f616aea --- /dev/null +++ b/doc/source/example_scripts/example_data/3szs.2.A.hhm @@ -0,0 +1,180 @@ +HHsearch 1.5 +NAME c38e8578723b5c8dc4c2b2b22def8e66 +FAM +FILE seq01 +COM /scicore/soft/apps/HH-suite/2.0.16-goolf-1.4.10/bin/hhmake -i /scratch/14393521.1.short.q/tmpxV21rm/seq01.a3m -o /scratch/14393521.1.short.q/tmpxV21rm/seq01.hhm +DATE Mon Mar 7 22:58:26 2016 +LENG 46 match states, 46 columns in multiple alignment +FILT 55 out of 57 sequences passed filter (-id 90 -cov 0 -qid 0 -qsc -20.00 -diff 100) +NEFF 3.9 +SEQ +>ss_pred PSIPRED predicted secondary structure +CCCCCCHHHHHHHHHCEECCCCCCHHHHCCCCEEEECCCCCCCCCC +>ss_conf PSIPRED confidence values +9867773554321000105889131100166177418989999999 +>Consensus +ksCCpstxaRnxYnxCrxxgxsxxxCaxxsgCkiisgxxCPxxyxx +>c38e8578723b5c8dc4c2b2b22def8e66 +KSCCRNTLARNCYNACRFTGGSQPTCGILCDCIHVTTTTCPSSHPS +>gi|242038833|ref|XP_002466811.1| hypothetical protein SORBIDRAFT_01g014470 [Sorghum bicolor]gi|241920665|gb|EER93809.1| hypothetical protein SORBIDRAFT_01g014470 [Sorghum bicolor] +NkSCCPSRIARNMYNTCRFRGASRETCARFARCEIVQGKCKDPHYID +>gi|21069041|dbj|BAB93114.1| leaf thionin Asthi3 [Avena sativa] +NTCCKDDIARNCYNVCRIPGTPTFICANMCRCIITRRNECPNDYPK +>gi|17381172|gb|AAL36398.1| putative thionin protein [Arabidopsis thaliana]gi|62320644|dbj|BAD95310.1| putative thionin [Arabidopsis thaliana] +QkTCCPSQSTRKGFEDCISEGNLQILCSAESGCRDTYVGYCPSGFPY +>gi|802170|gb|AAB33011.1| crambin precursor=thionin variant Thi2Ca12 [Crambe abyssinica, seeds, Peptide Partial, 135 aa] +KSCCPTKSARNTFDVCRLTGTSMGLCAAISECKILSVTKCPSNLPY +>gi|21553588|gb|AAM62681.1| thionin Thi2.2 [Arabidopsis thaliana] +KICCPTKDDRSVYFVCMLSVSSQFYCLLKSKCKNTSQTICPPGYTN +>gi|15218931|ref|NP_176784.1| thionin [Arabidopsis thaliana]gi|44888531|sp|Q9C8D6.1|THN24_ARATH RecName: Full=Probable thionin-2.4; Contains: RecName: Full=Probable thionin-2.4; Contains: RecName: Full=Acidic protein; Flags: Precursorgi|12322605|gb|AAG51299.1|AC026480_6 thionin, putative [Arabidopsis thaliana]gi|14190505|gb|AAK55733.1|AF380652_1 At1g66100/F15E12_20 [Arabidopsis thaliana]gi|15809774|gb|AAL06815.1| At1g66100/F15E12_20 [Arabidopsis thaliana]gi|332196341|gb|AEE34462.1| thionin [Arabidopsis thaliana] +NICCPSIQARTFYNACLFAVGSPSSCIRNSSCLDISESTCPRGYTN +>gi|1729954|sp|Q05806.1|THN5_WHEAT RecName: Full=Type-5 thionin; Contains: RecName: Full=Type-5 thionin; AltName: Full=Type V thionin; Contains: RecName: Full=Acidic protein; Flags: Precursorgi|21885|emb|CAA43844.1| wheat type V thionin [Triticum aestivum]gi|21887|emb|CAA43845.1| wheat type V thionin [Triticum aestivum] +VDCGANPFKVACFNSCLLGPSTVFQCADFCACRLPAG--------- +>gi|1006767|emb|CAA57354.1| Thionin class 4 [Tulipa gesneriana] +KSCFPSTAAKYCYNACRLPGCrPETICAARCGCKIISSGNCPPGYDY +>gi|545031|gb|AAB29760.1| thionin precursor {clone Thi1Va1} [Viscum album=mistletoe, Peptide, 115 aa] +-skICCRAPAGKKCYNLCTA-lLSSE-TCANTCYCKDVSGETCPAD--- +# +NULL 3706 5728 4211 4064 4839 3729 4763 4308 4069 3323 5509 4640 4464 4937 4285 4423 3815 3783 6325 4665 +HMM A C D E F G H I K L M N P Q R S T V W Y + M->M M->I M->D I->M I->I D->M D->D Neff Neff_I Neff_D + 0 * * 0 * 0 * * * * +K 1 * * * * * * * * 458 * * 2963 * 3592 * * * 4044 * * 1 + 248 2663 * 626 1507 * * 3800 1553 0 + +S 2 * * 4102 * * * * 2670 * 5237 * * * * * 784 3114 4012 * * 2 + 0 * * * * * * 3904 0 0 + +C 3 * 0 * * * * * * * * * * * * * * * * * * 3 + 0 * * * * * * 3904 0 0 + +C 4 * 127 * * 5274 4102 * * * * * * * * * * * * * * 4 + 0 * * * * * * 3904 0 0 + +R 5 4102 * * * * * * * 3234 * * * 563 5792 2829 * * * * * 5 + 0 * * * * * * 3904 0 0 + +N 6 5324 * 4346 * * * * * * * * 2217 * * 5975 913 2609 * * * 6 + 0 * * * * * * 3904 0 0 + +T 7 * * 5372 4868 * * * 3609 4203 * 5932 4966 3222 3323 4577 * 1058 5171 * * 7 + 0 * * * * * * 3904 0 0 + +L 8 2633 * 4790 * 4102 * * 2976 * 3305 6398 * * 4074 * 2680 1976 5798 5916 * 8 + 0 * * * * * * 3904 0 0 + +A 9 721 * 4264 * * 2676 * * 4102 * * * * * * 3785 4833 5723 * * 9 + 0 * * * * * * 3904 0 0 + +R 10 * * * * * * * 6010 4299 * * * * * 192 * * 4102 * * 10 + 0 * * * * * * 3904 0 0 + +N 11 4102 * * 3785 * * * 5932 3574 * * 852 * * * 2953 5191 * 4949 5274 11 + 0 * * * * * * 3904 0 0 + +C 12 4949 1516 * 4705 5191 3898 * 2896 5237 * 4577 3777 * 5156 5171 * 3904 3525 * * 12 + 0 * * * * * * 3904 0 0 + +Y 13 * * * * 2167 * * * * * * * * * * * * * * 364 13 + 0 * * * * * * 3904 0 0 + +N 14 * * 4255 3768 4790 * * * * 5237 * 467 * * * 4966 * 5384 * 4949 14 + 0 * * * * * * 3904 0 0 + +A 15 2759 * 3768 * * * * 5258 * 2422 5171 * * * * 2831 2966 1890 * * 15 + 0 * * * * * * 3904 0 0 + +C 16 * 0 * * * * * * * * * * * * * * * * * * 16 + 0 * * * * * * 3904 0 0 + +R 17 * * * * * * 6247 2898 * 3546 4790 * * * 570 * 4124 * * * 17 + 0 * * * * * * 3904 0 0 + +F 18 3993 * * * 2163 * * 3878 * 1350 5171 4949 * * 5237 3083 6124 4853 * * 18 + 36 * 5324 * * * * 3904 0 0 + +T 19 2869 * * 4797 * 3408 * * * 5900 * * 2341 4702 2393 3812 2888 4337 * 4863 19 + 41 * 5153 0 * 0 * 3864 1027 1027 + +G 20 * * * * 4921 461 6144 * * 4567 * * 3538 * * * * 3939 4923 * 20 + 74 * 4329 * * * 0 3866 0 1036 + +G 21 2203 5224 * * * 2367 * * * * * 3681 4879 * * 2318 2002 * * * 21 + 39 5224 * 0 * 666 1435 3805 1024 1201 + +S 22 5563 * * * * * * 5913 * 3097 * * 2021 4069 * 1175 3372 * * * 22 + 0 * * * * 0 * 3867 0 1036 + +Q 23 5939 6865 * 3364 * * 4720 5395 3318 * 4922 * 3802 2849 1548 5923 4156 4107 * * 23 + 183 * 3071 * * * * 3905 0 0 + +P 24 5261 * 4330 2470 2933 4155 * 4648 * * * * 1550 * 5746 4363 3210 * * * 24 + 56 4716 * 611 1533 0 * 3822 1055 1342 + +T 25 4249 * * * 6664 * 5251 3523 4605 2432 * 5992 * 4107 3591 3918 2775 2373 * 4800 25 + 0 * * * * * * 3905 0 0 + +C 26 * 31 * * * * * * * * * * * * * 5563 * * * * 26 + 0 * * * * * * 3905 0 0 + +G 27 576 6218 * * * 4125 * 3610 * 4008 4968 * * * * 3603 * * * * 27 + 0 * * * * * * 3905 0 0 + +I 28 2431 * 3750 * * 4305 * 5688 2616 4800 * 3211 * 4968 3447 2682 3578 * * * 28 + 0 * * * * * * 3905 0 0 + +L 29 4939 * 5594 4849 2818 4720 4964 3597 4255 1786 5376 5260 6032 * 5280 * 3895 3736 * 4584 29 + 0 * * * * * * 3905 0 0 + +C 30 3846 1526 * * * * * * * * * * * * * 1062 3259 * * * 30 + 0 * * * * * * 3905 0 0 + +D 31 4107 * 3807 5855 * 855 * * 4800 * * * * * 2651 4798 4471 * * 5354 31 + 0 * * * * * * 3905 0 0 + +C 32 * 0 * * * * * * * * * * * * * * * * * * 32 + 47 * 4964 * * * * 3905 0 0 + +I 33 * * * 4567 * * * 2834 841 3375 * * * 4917 3397 * * 4835 * * 33 + 49 * 4917 * * 0 * 3870 0 1047 + +H 34 * * 3476 4687 * * 3777 909 * 2720 * 4740 * * 4829 * 5345 5877 * * 34 + 0 * * * * * 0 3841 0 1049 + +V 35 * * * 4911 4207 * * 1514 * 3827 * * 4085 6094 * 4043 2913 2151 * * 35 + 0 * * * * 0 * 3841 0 1049 + +T 36 4107 * 2871 * * * * 5176 * 4888 * 4968 4694 4578 5376 1073 4069 * * 3783 36 + 161 * 3246 * * * * 3905 0 0 + +T 37 5071 * 5558 3987 * 924 * * * * * * * 4661 5293 3282 4719 2708 * * 37 + 20 * 6181 * * 0 * 3855 0 1295 + +T 38 4581 * 4921 5218 * 2008 * * 4535 4455 * 3505 4982 * 5103 2823 1884 * * * 38 + 0 * * * * 0 * 3725 0 1000 + +T 39 * 4541 * 5276 4635 6389 * 3900 2161 * * 4119 * 5305 * 3867 1327 * * 4695 39 + 0 * * * * * * 3715 0 0 + +C 40 * 63 * * * * * * 4541 * * * * * * * * * * * 40 + 0 * * * * * * 3715 0 0 + +P 41 * * 4213 * * * * * 3716 * * 6360 289 5637 * 5679 * * * * 41 + 0 * * * * * * 3715 0 0 + +S 42 4344 * 6422 * * 4608 * * 4038 * * 4291 1266 * 3392 2027 5072 * * * 42 + 13 * 6761 * * * * 3715 0 0 + +S 43 * * 1996 6054 4811 1245 4531 * * * * 4378 3364 4632 * 4381 * * * * 43 + 0 * * * * 0 * 3721 0 1000 + +H 44 * * * * 2971 * 4777 * * 3730 * * * * * * * * 2809 694 44 + 0 * * * * * * 3673 0 0 + +P 45 4583 * 3567 5091 * * * 3411 * * * 5414 982 * 5094 6405 2882 4501 * * 45 + 35 5391 * 4087 87 * * 3673 1007 0 + +S 46 * * 4479 * * * 2559 * 1676 * * 2332 * * 6234 5575 * * * 2061 46 + 0 * * 0 * * * 3610 0 0 + +// diff --git a/doc/source/example_scripts/example_data/3szs.2.A.pdb b/doc/source/example_scripts/example_data/3szs.2.A.pdb new file mode 100644 index 0000000000000000000000000000000000000000..928e42a2ebe198885818696fdab1c98f4007332d --- /dev/null +++ b/doc/source/example_scripts/example_data/3szs.2.A.pdb @@ -0,0 +1,338 @@ +ATOM 1 N LYS A 1 70.311 12.038 10.336 1.00 22.93 N +ATOM 2 CA LYS A 1 71.294 12.686 11.260 1.00 22.56 C +ATOM 3 C LYS A 1 72.452 13.260 10.462 1.00 21.30 C +ATOM 4 O LYS A 1 73.096 12.533 9.698 1.00 21.31 O +ATOM 5 CB LYS A 1 71.859 11.674 12.245 1.00 23.27 C +ATOM 6 CG LYS A 1 72.843 12.288 13.270 1.00 23.94 C +ATOM 7 CD LYS A 1 73.166 11.283 14.358 1.00 24.98 C +ATOM 8 CE LYS A 1 74.183 11.806 15.377 1.00 26.61 C +ATOM 9 NZ LYS A 1 73.782 13.125 15.944 1.00 28.35 N +ATOM 10 N SER A 2 72.731 14.545 10.678 1.00 20.37 N +ATOM 11 CA SER A 2 73.890 15.221 10.092 1.00 20.07 C +ATOM 12 C SER A 2 75.195 14.836 10.802 1.00 20.07 C +ATOM 13 O SER A 2 75.248 14.771 12.031 1.00 20.36 O +ATOM 14 CB SER A 2 73.714 16.747 10.170 1.00 19.20 C +ATOM 15 OG SER A 2 73.533 17.173 11.514 1.00 20.46 O +ATOM 16 N CYS A 3 76.239 14.589 10.014 1.00 19.64 N +ATOM 17 CA CYS A 3 77.558 14.249 10.537 1.00 20.30 C +ATOM 18 C CYS A 3 78.617 15.040 9.792 1.00 19.61 C +ATOM 19 O CYS A 3 78.695 14.954 8.563 1.00 20.12 O +ATOM 20 CB CYS A 3 77.797 12.750 10.372 1.00 21.18 C +ATOM 21 SG CYS A 3 76.652 11.750 11.345 1.00 22.37 S +ATOM 22 N CYS A 4 79.433 15.811 10.506 1.00 19.31 N +ATOM 23 CA CYS A 4 80.345 16.748 9.827 1.00 19.39 C +ATOM 24 C CYS A 4 81.802 16.342 9.899 1.00 19.98 C +ATOM 25 O CYS A 4 82.223 15.698 10.850 1.00 20.02 O +ATOM 26 CB CYS A 4 80.143 18.164 10.350 1.00 19.05 C +ATOM 27 SG CYS A 4 78.503 18.799 9.901 1.00 18.94 S +ATOM 28 N ARG A 5 82.564 16.710 8.869 1.00 20.20 N +ATOM 29 CA ARG A 5 83.977 16.352 8.800 1.00 20.98 C +ATOM 30 C ARG A 5 84.748 16.747 10.068 1.00 20.80 C +ATOM 31 O ARG A 5 85.565 15.975 10.581 1.00 22.01 O +ATOM 32 CB ARG A 5 84.614 17.014 7.586 1.00 20.96 C +ATOM 33 CG ARG A 5 86.036 16.582 7.338 1.00 24.42 C +ATOM 34 CD ARG A 5 86.135 15.161 6.807 1.00 27.64 C +ATOM 35 NE ARG A 5 85.600 15.082 5.454 1.00 29.44 N +ATOM 36 CZ ARG A 5 85.560 13.982 4.708 1.00 32.67 C +ATOM 37 NH1 ARG A 5 86.048 12.833 5.172 1.00 34.44 N +ATOM 38 NH2 ARG A 5 85.035 14.042 3.475 1.00 33.54 N +ATOM 39 N ASN A 6 84.494 17.955 10.559 1.00 20.02 N +ATOM 40 CA ASN A 6 85.205 18.479 11.733 1.00 19.38 C +ATOM 41 C ASN A 6 84.437 19.666 12.324 1.00 19.02 C +ATOM 42 O ASN A 6 83.356 20.004 11.836 1.00 17.94 O +ATOM 43 CB ASN A 6 86.643 18.861 11.347 1.00 20.02 C +ATOM 44 CG ASN A 6 86.703 19.850 10.201 1.00 19.73 C +ATOM 45 OD1 ASN A 6 85.924 20.804 10.146 1.00 17.07 O +ATOM 46 ND2 ASN A 6 87.643 19.631 9.271 1.00 21.16 N +ATOM 47 N THR A 7 84.979 20.306 13.359 1.00 18.62 N +ATOM 48 CA THR A 7 84.223 21.353 14.068 1.00 19.19 C +ATOM 49 C THR A 7 83.900 22.537 13.137 1.00 18.40 C +ATOM 50 O THR A 7 82.778 23.048 13.119 1.00 16.86 O +ATOM 51 CB THR A 7 84.970 21.852 15.312 1.00 19.22 C +ATOM 52 OG1 THR A 7 85.357 20.727 16.114 1.00 20.09 O +ATOM 53 CG2 THR A 7 84.103 22.810 16.134 1.00 21.31 C +ATOM 54 N LEU A 8 84.868 22.958 12.347 1.00 19.63 N +ATOM 55 CA LEU A 8 84.600 24.017 11.381 1.00 20.21 C +ATOM 56 C LEU A 8 83.437 23.658 10.448 1.00 19.26 C +ATOM 57 O LEU A 8 82.557 24.490 10.209 1.00 19.57 O +ATOM 58 CB LEU A 8 85.866 24.371 10.602 1.00 21.43 C +ATOM 59 CG LEU A 8 85.756 25.581 9.676 1.00 23.72 C +ATOM 60 CD1 LEU A 8 85.328 26.823 10.475 1.00 24.02 C +ATOM 61 CD2 LEU A 8 87.087 25.805 8.970 1.00 26.06 C +ATOM 62 N ALA A 9 83.410 22.418 9.949 1.00 19.33 N +ATOM 63 CA ALA A 9 82.345 21.959 9.048 1.00 18.44 C +ATOM 64 C ALA A 9 80.989 22.024 9.732 1.00 18.06 C +ATOM 65 O ALA A 9 79.979 22.411 9.114 1.00 16.97 O +ATOM 66 CB ALA A 9 82.621 20.541 8.548 1.00 19.16 C +ATOM 67 N ARG A 10 80.973 21.678 11.023 1.00 17.67 N +ATOM 68 CA ARG A 10 79.755 21.730 11.808 1.00 17.51 C +ATOM 69 C ARG A 10 79.241 23.158 11.960 1.00 16.99 C +ATOM 70 O ARG A 10 78.040 23.409 11.754 1.00 17.07 O +ATOM 71 CB ARG A 10 79.972 21.090 13.187 1.00 17.88 C +ATOM 72 CG ARG A 10 78.826 21.282 14.152 1.00 18.66 C +ATOM 73 CD ARG A 10 77.530 20.692 13.634 1.00 21.42 C +ATOM 74 NE ARG A 10 77.618 19.239 13.469 1.00 20.95 N +ATOM 75 CZ ARG A 10 76.639 18.479 12.982 1.00 22.34 C +ATOM 76 NH1 ARG A 10 75.483 19.030 12.594 1.00 22.21 N +ATOM 77 NH2 ARG A 10 76.814 17.161 12.883 1.00 20.57 N +ATOM 78 N ASN A 11 80.141 24.077 12.316 1.00 16.56 N +ATOM 79 CA ASN A 11 79.817 25.507 12.390 1.00 16.69 C +ATOM 80 C ASN A 11 79.237 26.005 11.062 1.00 16.42 C +ATOM 81 O ASN A 11 78.198 26.685 11.049 1.00 15.86 O +ATOM 82 CB ASN A 11 81.056 26.328 12.771 1.00 17.51 C +ATOM 83 CG ASN A 11 80.771 27.815 12.922 1.00 18.18 C +ATOM 84 OD1 ASN A 11 81.482 28.643 12.352 1.00 19.27 O +ATOM 85 ND2 ASN A 11 79.740 28.161 13.691 1.00 18.56 N +ATOM 86 N CYS A 12 79.896 25.643 9.964 1.00 16.24 N +ATOM 87 CA CYS A 12 79.424 25.982 8.617 1.00 17.02 C +ATOM 88 C CYS A 12 78.004 25.455 8.381 1.00 16.88 C +ATOM 89 O CYS A 12 77.130 26.184 7.915 1.00 16.89 O +ATOM 90 CB CYS A 12 80.408 25.432 7.556 1.00 16.67 C +ATOM 91 SG CYS A 12 79.954 25.602 5.811 1.00 19.92 S +ATOM 92 N TYR A 13 77.796 24.183 8.710 1.00 17.23 N +ATOM 93 CA TYR A 13 76.505 23.534 8.539 1.00 17.15 C +ATOM 94 C TYR A 13 75.391 24.218 9.323 1.00 16.90 C +ATOM 95 O TYR A 13 74.308 24.472 8.786 1.00 17.24 O +ATOM 96 CB TYR A 13 76.610 22.063 8.929 1.00 17.17 C +ATOM 97 CG TYR A 13 75.362 21.252 8.654 1.00 18.64 C +ATOM 98 CD1 TYR A 13 75.146 20.657 7.416 1.00 18.95 C +ATOM 99 CD2 TYR A 13 74.401 21.068 9.652 1.00 20.68 C +ATOM 100 CE1 TYR A 13 73.991 19.899 7.169 1.00 18.84 C +ATOM 101 CE2 TYR A 13 73.257 20.316 9.415 1.00 19.83 C +ATOM 102 CZ TYR A 13 73.060 19.738 8.192 1.00 19.78 C +ATOM 103 OH TYR A 13 71.932 19.005 8.005 1.00 19.96 O +ATOM 104 N ASN A 14 75.656 24.519 10.582 1.00 17.14 N +ATOM 105 CA ASN A 14 74.704 25.222 11.418 1.00 17.68 C +ATOM 106 C ASN A 14 74.348 26.605 10.880 1.00 17.66 C +ATOM 107 O ASN A 14 73.169 26.963 10.784 1.00 18.32 O +ATOM 108 CB ASN A 14 75.253 25.348 12.855 1.00 17.72 C +ATOM 109 CG ASN A 14 75.306 24.013 13.575 1.00 19.67 C +ATOM 110 OD1 ASN A 14 74.654 23.046 13.158 1.00 22.82 O +ATOM 111 ND2 ASN A 14 76.082 23.946 14.661 1.00 18.11 N +ATOM 112 N ALA A 15 75.364 27.377 10.512 1.00 17.55 N +ATOM 113 CA ALA A 15 75.137 28.709 9.980 1.00 17.93 C +ATOM 114 C ALA A 15 74.311 28.626 8.703 1.00 17.52 C +ATOM 115 O ALA A 15 73.399 29.430 8.500 1.00 16.69 O +ATOM 116 CB ALA A 15 76.454 29.414 9.719 1.00 18.56 C +ATOM 117 N CYS A 16 74.618 27.623 7.877 1.00 17.39 N +ATOM 118 CA CYS A 16 73.929 27.427 6.593 1.00 17.98 C +ATOM 119 C CYS A 16 72.462 27.063 6.825 1.00 17.83 C +ATOM 120 O CYS A 16 71.571 27.653 6.222 1.00 18.86 O +ATOM 121 CB CYS A 16 74.638 26.342 5.771 1.00 17.65 C +ATOM 122 SG CYS A 16 73.777 25.835 4.258 1.00 19.41 S +ATOM 123 N ARG A 17 72.218 26.107 7.715 1.00 18.46 N +ATOM 124 CA ARG A 17 70.862 25.737 8.098 1.00 18.74 C +ATOM 125 C ARG A 17 70.118 26.930 8.708 1.00 19.22 C +ATOM 126 O ARG A 17 68.932 27.097 8.471 1.00 19.08 O +ATOM 127 CB ARG A 17 70.874 24.563 9.092 1.00 19.18 C +ATOM 128 CG ARG A 17 71.326 23.228 8.472 1.00 19.31 C +ATOM 129 CD ARG A 17 70.287 22.654 7.561 1.00 18.70 C +ATOM 130 NE ARG A 17 69.170 22.065 8.306 1.00 20.36 N +ATOM 131 CZ ARG A 17 67.899 22.060 7.899 1.00 19.65 C +ATOM 132 NH1 ARG A 17 67.528 22.642 6.765 1.00 17.51 N +ATOM 133 NH2 ARG A 17 66.979 21.474 8.645 1.00 19.41 N +ATOM 134 N PHE A 18 70.829 27.770 9.453 1.00 19.70 N +ATOM 135 CA PHE A 18 70.206 28.886 10.168 1.00 20.37 C +ATOM 136 C PHE A 18 69.643 29.923 9.214 1.00 20.88 C +ATOM 137 O PHE A 18 68.658 30.572 9.546 1.00 21.08 O +ATOM 138 CB PHE A 18 71.193 29.520 11.143 1.00 20.71 C +ATOM 139 CG PHE A 18 70.549 30.365 12.225 1.00 20.37 C +ATOM 140 CD1 PHE A 18 69.911 29.772 13.308 1.00 22.54 C +ATOM 141 CD2 PHE A 18 70.620 31.741 12.175 1.00 21.02 C +ATOM 142 CE1 PHE A 18 69.336 30.546 14.315 1.00 20.68 C +ATOM 143 CE2 PHE A 18 70.055 32.528 13.181 1.00 22.19 C +ATOM 144 CZ PHE A 18 69.424 31.926 14.254 1.00 21.66 C +ATOM 145 N THR A 19 70.252 30.046 8.029 1.00 21.98 N +ATOM 146 CA THR A 19 69.770 30.920 6.961 1.00 22.61 C +ATOM 147 C THR A 19 68.661 30.273 6.130 1.00 22.47 C +ATOM 148 O THR A 19 68.081 30.924 5.263 1.00 22.47 O +ATOM 149 CB THR A 19 70.929 31.344 5.983 1.00 23.17 C +ATOM 150 OG1 THR A 19 71.280 30.251 5.123 1.00 23.78 O +ATOM 151 CG2 THR A 19 72.169 31.797 6.752 1.00 25.16 C +ATOM 152 N GLY A 20 68.382 28.993 6.379 1.00 21.35 N +ATOM 153 CA GLY A 20 67.351 28.273 5.648 1.00 20.98 C +ATOM 154 C GLY A 20 67.865 27.338 4.553 1.00 20.17 C +ATOM 155 O GLY A 20 67.079 26.853 3.769 1.00 20.69 O +ATOM 156 N GLY A 21 69.165 27.075 4.506 1.00 19.15 N +ATOM 157 CA GLY A 21 69.719 26.131 3.527 1.00 18.96 C +ATOM 158 C GLY A 21 69.222 24.717 3.814 1.00 18.25 C +ATOM 159 O GLY A 21 69.095 24.319 4.958 1.00 17.43 O +ATOM 160 N SER A 22 68.936 23.969 2.765 1.00 17.84 N +ATOM 161 CA SER A 22 68.477 22.583 2.892 1.00 17.92 C +ATOM 162 C SER A 22 69.593 21.702 3.453 1.00 17.21 C +ATOM 163 O SER A 22 70.770 22.046 3.360 1.00 16.70 O +ATOM 164 CB SER A 22 67.997 22.027 1.545 1.00 17.63 C +ATOM 165 OG SER A 22 69.083 21.592 0.727 1.00 18.27 O +ATOM 166 N GLN A 23 69.201 20.585 4.067 1.00 17.80 N +ATOM 167 CA GLN A 23 70.167 19.648 4.618 1.00 17.70 C +ATOM 168 C GLN A 23 71.170 19.221 3.535 1.00 17.54 C +ATOM 169 O GLN A 23 72.371 19.361 3.739 1.00 17.16 O +ATOM 170 CB GLN A 23 69.479 18.477 5.354 1.00 18.67 C +ATOM 171 CG GLN A 23 68.876 18.924 6.731 1.00 17.97 C +ATOM 172 CD GLN A 23 68.445 17.796 7.678 1.00 22.26 C +ATOM 173 OE1 GLN A 23 67.403 17.151 7.476 1.00 22.83 O +ATOM 174 NE2 GLN A 23 69.214 17.607 8.772 1.00 23.55 N +ATOM 175 N PRO A 24 70.696 18.793 2.351 1.00 17.99 N +ATOM 176 CA PRO A 24 71.686 18.441 1.321 1.00 18.40 C +ATOM 177 C PRO A 24 72.554 19.613 0.820 1.00 18.14 C +ATOM 178 O PRO A 24 73.747 19.426 0.504 1.00 17.53 O +ATOM 179 CB PRO A 24 70.816 17.909 0.178 1.00 18.94 C +ATOM 180 CG PRO A 24 69.562 17.447 0.858 1.00 19.55 C +ATOM 181 CD PRO A 24 69.328 18.474 1.913 1.00 17.85 C +ATOM 182 N THR A 25 71.962 20.807 0.737 1.00 18.19 N +ATOM 183 CA THR A 25 72.713 21.978 0.283 1.00 18.06 C +ATOM 184 C THR A 25 73.837 22.266 1.269 1.00 17.73 C +ATOM 185 O THR A 25 74.991 22.407 0.876 1.00 17.73 O +ATOM 186 CB THR A 25 71.797 23.202 0.104 1.00 18.71 C +ATOM 187 OG1 THR A 25 70.930 22.994 -1.032 1.00 18.11 O +ATOM 188 CG2 THR A 25 72.627 24.481 -0.137 1.00 19.04 C +ATOM 189 N CYS A 26 73.500 22.321 2.560 1.00 17.12 N +ATOM 190 CA CYS A 26 74.485 22.597 3.594 1.00 17.31 C +ATOM 191 C CYS A 26 75.511 21.455 3.700 1.00 17.50 C +ATOM 192 O CYS A 26 76.674 21.684 4.043 1.00 17.00 O +ATOM 193 CB CYS A 26 73.789 22.850 4.931 1.00 17.11 C +ATOM 194 SG CYS A 26 72.627 24.265 4.865 1.00 18.63 S +ATOM 195 N GLY A 27 75.076 20.241 3.364 1.00 17.89 N +ATOM 196 CA GLY A 27 75.953 19.078 3.340 1.00 18.54 C +ATOM 197 C GLY A 27 77.126 19.253 2.403 1.00 19.06 C +ATOM 198 O GLY A 27 78.272 19.097 2.826 1.00 19.52 O +ATOM 199 N ILE A 28 76.835 19.593 1.142 1.00 19.65 N +ATOM 200 CA ILE A 28 77.877 19.787 0.141 1.00 20.34 C +ATOM 201 C ILE A 28 78.639 21.110 0.330 1.00 19.77 C +ATOM 202 O ILE A 28 79.835 21.185 0.047 1.00 20.21 O +ATOM 203 CB ILE A 28 77.366 19.572 -1.315 1.00 21.07 C +ATOM 204 CG1 ILE A 28 76.453 20.688 -1.797 1.00 22.48 C +ATOM 205 CG2 ILE A 28 76.637 18.210 -1.450 1.00 21.65 C +ATOM 206 CD1 ILE A 28 76.044 20.528 -3.270 1.00 24.19 C +ATOM 207 N LEU A 29 77.973 22.132 0.855 1.00 19.54 N +ATOM 208 CA LEU A 29 78.654 23.388 1.188 1.00 19.57 C +ATOM 209 C LEU A 29 79.729 23.179 2.259 1.00 19.41 C +ATOM 210 O LEU A 29 80.827 23.757 2.183 1.00 19.88 O +ATOM 211 CB LEU A 29 77.647 24.431 1.716 1.00 19.38 C +ATOM 212 CG LEU A 29 78.204 25.839 1.927 1.00 19.81 C +ATOM 213 CD1 LEU A 29 78.692 26.428 0.576 1.00 18.44 C +ATOM 214 CD2 LEU A 29 77.181 26.742 2.592 1.00 20.59 C +ATOM 215 N CYS A 30 79.399 22.375 3.261 1.00 19.10 N +ATOM 216 CA CYS A 30 80.169 22.325 4.508 1.00 19.09 C +ATOM 217 C CYS A 30 80.912 21.016 4.776 1.00 19.97 C +ATOM 218 O CYS A 30 81.642 20.927 5.771 1.00 20.08 O +ATOM 219 CB CYS A 30 79.247 22.637 5.688 1.00 18.70 C +ATOM 220 SG CYS A 30 78.424 24.247 5.557 1.00 18.13 S +ATOM 221 N ASP A 31 80.741 20.039 3.880 1.00 20.09 N +ATOM 222 CA ASP A 31 81.329 18.698 3.990 1.00 20.85 C +ATOM 223 C ASP A 31 80.717 17.909 5.170 1.00 20.29 C +ATOM 224 O ASP A 31 81.415 17.441 6.049 1.00 19.79 O +ATOM 225 CB ASP A 31 82.865 18.789 4.084 1.00 21.96 C +ATOM 226 CG ASP A 31 83.562 17.436 3.894 1.00 24.45 C +ATOM 227 OD1 ASP A 31 82.897 16.423 3.577 1.00 26.46 O +ATOM 228 OD2 ASP A 31 84.790 17.383 4.121 1.00 27.56 O +ATOM 229 N CYS A 32 79.394 17.778 5.150 1.00 19.75 N +ATOM 230 CA CYS A 32 78.658 16.990 6.113 1.00 19.07 C +ATOM 231 C CYS A 32 77.784 15.973 5.364 1.00 19.52 C +ATOM 232 O CYS A 32 77.138 16.299 4.361 1.00 18.21 O +ATOM 233 CB CYS A 32 77.794 17.884 7.017 1.00 18.79 C +ATOM 234 SG CYS A 32 78.701 19.198 7.910 1.00 18.39 S +ATOM 235 N ILE A 33 77.791 14.738 5.865 1.00 20.72 N +ATOM 236 CA ILE A 33 76.974 13.656 5.319 1.00 20.98 C +ATOM 237 C ILE A 33 75.741 13.530 6.193 1.00 21.76 C +ATOM 238 O ILE A 33 75.681 14.106 7.284 1.00 20.83 O +ATOM 239 CB ILE A 33 77.741 12.298 5.271 1.00 21.56 C +ATOM 240 CG1 ILE A 33 78.106 11.791 6.676 1.00 22.35 C +ATOM 241 CG2 ILE A 33 79.004 12.439 4.424 1.00 20.82 C +ATOM 242 CD1 ILE A 33 78.919 10.490 6.675 1.00 23.33 C +ATOM 243 N HIS A 34 74.755 12.786 5.703 1.00 22.40 N +ATOM 244 CA HIS A 34 73.565 12.479 6.479 1.00 23.27 C +ATOM 245 C HIS A 34 73.418 10.953 6.563 1.00 24.17 C +ATOM 246 O HIS A 34 73.487 10.272 5.546 1.00 25.10 O +ATOM 247 CB HIS A 34 72.353 13.169 5.838 1.00 23.46 C +ATOM 248 CG HIS A 34 72.441 14.664 5.902 1.00 22.56 C +ATOM 249 ND1 HIS A 34 73.126 15.408 4.966 1.00 21.86 N +ATOM 250 CD2 HIS A 34 72.018 15.542 6.847 1.00 22.81 C +ATOM 251 CE1 HIS A 34 73.081 16.687 5.306 1.00 22.57 C +ATOM 252 NE2 HIS A 34 72.418 16.796 6.448 1.00 22.11 N +ATOM 253 N VAL A 35 73.259 10.437 7.777 1.00 24.23 N +ATOM 254 CA VAL A 35 73.207 8.999 8.043 1.00 25.09 C +ATOM 255 C VAL A 35 71.888 8.595 8.719 1.00 26.18 C +ATOM 256 O VAL A 35 71.207 9.427 9.317 1.00 26.05 O +ATOM 257 CB VAL A 35 74.404 8.557 8.932 1.00 25.15 C +ATOM 258 CG1 VAL A 35 75.735 8.949 8.274 1.00 23.07 C +ATOM 259 CG2 VAL A 35 74.302 9.157 10.357 1.00 24.54 C +ATOM 260 N THR A 36 71.541 7.315 8.624 1.00 27.52 N +ATOM 261 CA THR A 36 70.333 6.776 9.270 1.00 28.30 C +ATOM 262 C THR A 36 70.643 6.067 10.584 1.00 29.62 C +ATOM 263 O THR A 36 69.738 5.527 11.233 1.00 31.58 O +ATOM 264 CB THR A 36 69.618 5.789 8.349 1.00 29.01 C +ATOM 265 OG1 THR A 36 70.509 4.712 8.043 1.00 28.97 O +ATOM 266 CG2 THR A 36 69.192 6.488 7.065 1.00 28.06 C +ATOM 267 N THR A 37 71.923 6.042 10.957 1.00 29.19 N +ATOM 268 CA THR A 37 72.365 5.504 12.240 1.00 29.38 C +ATOM 269 C THR A 37 72.195 6.567 13.332 1.00 29.33 C +ATOM 270 O THR A 37 72.099 7.763 13.031 1.00 27.15 O +ATOM 271 CB THR A 37 73.842 5.055 12.167 1.00 29.16 C +ATOM 272 OG1 THR A 37 74.641 6.115 11.629 1.00 27.58 O +ATOM 273 CG2 THR A 37 73.991 3.802 11.276 1.00 29.61 C +ATOM 274 N THR A 38 72.156 6.126 14.595 1.00 30.62 N +ATOM 275 CA THR A 38 71.964 7.034 15.736 1.00 31.37 C +ATOM 276 C THR A 38 73.212 7.835 16.122 1.00 30.82 C +ATOM 277 O THR A 38 73.105 8.884 16.765 1.00 30.28 O +ATOM 278 CB THR A 38 71.510 6.288 16.989 1.00 32.71 C +ATOM 279 OG1 THR A 38 72.491 5.300 17.339 1.00 35.71 O +ATOM 280 CG2 THR A 38 70.163 5.622 16.750 1.00 34.63 C +ATOM 281 N THR A 39 74.384 7.320 15.764 1.00 30.91 N +ATOM 282 CA THR A 39 75.637 8.042 15.952 1.00 30.11 C +ATOM 283 C THR A 39 76.334 8.208 14.595 1.00 28.86 C +ATOM 284 O THR A 39 75.969 7.560 13.606 1.00 28.43 O +ATOM 285 CB THR A 39 76.561 7.267 16.867 1.00 31.03 C +ATOM 286 OG1 THR A 39 76.864 6.014 16.238 1.00 33.89 O +ATOM 287 CG2 THR A 39 75.899 7.005 18.240 1.00 32.89 C +ATOM 288 N CYS A 40 77.352 9.059 14.565 1.00 27.94 N +ATOM 289 CA CYS A 40 78.088 9.340 13.332 1.00 27.72 C +ATOM 290 C CYS A 40 79.250 8.372 13.098 1.00 28.42 C +ATOM 291 O CYS A 40 79.897 7.941 14.044 1.00 28.34 O +ATOM 292 CB CYS A 40 78.625 10.767 13.383 1.00 26.93 C +ATOM 293 SG CYS A 40 77.327 12.012 13.255 1.00 27.36 S +ATOM 294 N PRO A 41 79.536 8.047 11.829 1.00 28.52 N +ATOM 295 CA PRO A 41 80.726 7.248 11.537 1.00 29.51 C +ATOM 296 C PRO A 41 82.013 8.042 11.714 1.00 29.57 C +ATOM 297 O PRO A 41 81.996 9.284 11.718 1.00 28.72 O +ATOM 298 CB PRO A 41 80.549 6.908 10.064 1.00 29.64 C +ATOM 299 CG PRO A 41 79.817 8.051 9.530 1.00 28.15 C +ATOM 300 CD PRO A 41 78.841 8.443 10.592 1.00 27.70 C +ATOM 301 N SER A 42 83.130 7.331 11.793 1.00 30.71 N +ATOM 302 CA SER A 42 84.424 7.963 12.094 1.00 30.86 C +ATOM 303 C SER A 42 84.881 8.956 11.022 1.00 29.47 C +ATOM 304 O SER A 42 85.664 9.838 11.319 1.00 29.17 O +ATOM 305 CB SER A 42 85.511 6.896 12.302 1.00 32.43 C +ATOM 306 OG SER A 42 85.792 6.214 11.091 1.00 34.43 O +ATOM 307 N SER A 43 84.406 8.810 9.784 1.00 28.78 N +ATOM 308 CA SER A 43 84.816 9.708 8.708 1.00 28.12 C +ATOM 309 C SER A 43 84.309 11.142 8.929 1.00 26.81 C +ATOM 310 O SER A 43 84.923 12.100 8.436 1.00 26.40 O +ATOM 311 CB SER A 43 84.327 9.184 7.344 1.00 28.35 C +ATOM 312 OG SER A 43 82.915 9.350 7.213 1.00 29.22 O +ATOM 313 N HIS A 44 83.185 11.271 9.650 1.00 25.93 N +ATOM 314 CA HIS A 44 82.505 12.548 9.874 1.00 24.57 C +ATOM 315 C HIS A 44 81.993 12.591 11.304 1.00 24.77 C +ATOM 316 O HIS A 44 80.791 12.498 11.548 1.00 24.29 O +ATOM 317 CB HIS A 44 81.344 12.732 8.891 1.00 23.80 C +ATOM 318 CG HIS A 44 81.774 12.881 7.462 1.00 24.20 C +ATOM 319 ND1 HIS A 44 82.243 11.826 6.714 1.00 26.24 N +ATOM 320 CD2 HIS A 44 81.811 13.963 6.647 1.00 25.14 C +ATOM 321 CE1 HIS A 44 82.556 12.248 5.501 1.00 25.40 C +ATOM 322 NE2 HIS A 44 82.301 13.542 5.434 1.00 24.56 N +ATOM 323 N PRO A 45 82.910 12.745 12.265 1.00 25.81 N +ATOM 324 CA PRO A 45 82.577 12.633 13.681 1.00 25.99 C +ATOM 325 C PRO A 45 82.004 13.886 14.353 1.00 25.64 C +ATOM 326 O PRO A 45 81.619 13.821 15.519 1.00 25.77 O +ATOM 327 CB PRO A 45 83.928 12.287 14.312 1.00 27.41 C +ATOM 328 CG PRO A 45 84.949 12.959 13.405 1.00 27.45 C +ATOM 329 CD PRO A 45 84.323 13.112 12.051 1.00 26.09 C +ATOM 330 N SER A 46 81.948 15.020 13.663 1.00 24.53 N +ATOM 331 CA SER A 46 81.615 16.268 14.358 1.00 23.88 C +ATOM 332 C SER A 46 80.196 16.745 14.083 1.00 23.19 C +ATOM 333 O SER A 46 79.450 16.126 13.305 1.00 22.75 O +ATOM 334 CB SER A 46 82.623 17.351 13.992 1.00 24.12 C +ATOM 335 OG SER A 46 83.907 16.976 14.429 1.00 24.47 O +ATOM 336 OXT SER A 46 79.765 17.766 14.647 1.00 22.39 O +TER 337 SER A 46 +END diff --git a/doc/source/example_scripts/example_data/housing.csv b/doc/source/example_scripts/example_data/housing.csv new file mode 100644 index 0000000000000000000000000000000000000000..ae24ab833cf7811e6f566566746ec959101bab87 --- /dev/null +++ b/doc/source/example_scripts/example_data/housing.csv @@ -0,0 +1,507 @@ +CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV +0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,396.90,4.98,24.00 +0.02731,0.00,7.070,0,0.4690,6.4210,78.90,4.9671,2,242.0,17.80,396.90,9.14,21.60 +0.02729,0.00,7.070,0,0.4690,7.1850,61.10,4.9671,2,242.0,17.80,392.83,4.03,34.70 +0.03237,0.00,2.180,0,0.4580,6.9980,45.80,6.0622,3,222.0,18.70,394.63,2.94,33.40 +0.06905,0.00,2.180,0,0.4580,7.1470,54.20,6.0622,3,222.0,18.70,396.90,5.33,36.20 +0.02985,0.00,2.180,0,0.4580,6.4300,58.70,6.0622,3,222.0,18.70,394.12,5.21,28.70 +0.08829,12.50,7.870,0,0.5240,6.0120,66.60,5.5605,5,311.0,15.20,395.60,12.43,22.90 +0.14455,12.50,7.870,0,0.5240,6.1720,96.10,5.9505,5,311.0,15.20,396.90,19.15,27.10 +0.21124,12.50,7.870,0,0.5240,5.6310,100.00,6.0821,5,311.0,15.20,386.63,29.93,16.50 +0.17004,12.50,7.870,0,0.5240,6.0040,85.90,6.5921,5,311.0,15.20,386.71,17.10,18.90 +0.22489,12.50,7.870,0,0.5240,6.3770,94.30,6.3467,5,311.0,15.20,392.52,20.45,15.00 +0.11747,12.50,7.870,0,0.5240,6.0090,82.90,6.2267,5,311.0,15.20,396.90,13.27,18.90 +0.09378,12.50,7.870,0,0.5240,5.8890,39.00,5.4509,5,311.0,15.20,390.50,15.71,21.70 +0.62976,0.00,8.140,0,0.5380,5.9490,61.80,4.7075,4,307.0,21.00,396.90,8.26,20.40 +0.63796,0.00,8.140,0,0.5380,6.0960,84.50,4.4619,4,307.0,21.00,380.02,10.26,18.20 +0.62739,0.00,8.140,0,0.5380,5.8340,56.50,4.4986,4,307.0,21.00,395.62,8.47,19.90 +1.05393,0.00,8.140,0,0.5380,5.9350,29.30,4.4986,4,307.0,21.00,386.85,6.58,23.10 +0.78420,0.00,8.140,0,0.5380,5.9900,81.70,4.2579,4,307.0,21.00,386.75,14.67,17.50 +0.80271,0.00,8.140,0,0.5380,5.4560,36.60,3.7965,4,307.0,21.00,288.99,11.69,20.20 +0.72580,0.00,8.140,0,0.5380,5.7270,69.50,3.7965,4,307.0,21.00,390.95,11.28,18.20 +1.25179,0.00,8.140,0,0.5380,5.5700,98.10,3.7979,4,307.0,21.00,376.57,21.02,13.60 +0.85204,0.00,8.140,0,0.5380,5.9650,89.20,4.0123,4,307.0,21.00,392.53,13.83,19.60 +1.23247,0.00,8.140,0,0.5380,6.1420,91.70,3.9769,4,307.0,21.00,396.90,18.72,15.20 +0.98843,0.00,8.140,0,0.5380,5.8130,100.00,4.0952,4,307.0,21.00,394.54,19.88,14.50 +0.75026,0.00,8.140,0,0.5380,5.9240,94.10,4.3996,4,307.0,21.00,394.33,16.30,15.60 +0.84054,0.00,8.140,0,0.5380,5.5990,85.70,4.4546,4,307.0,21.00,303.42,16.51,13.90 +0.67191,0.00,8.140,0,0.5380,5.8130,90.30,4.6820,4,307.0,21.00,376.88,14.81,16.60 +0.95577,0.00,8.140,0,0.5380,6.0470,88.80,4.4534,4,307.0,21.00,306.38,17.28,14.80 +0.77299,0.00,8.140,0,0.5380,6.4950,94.40,4.4547,4,307.0,21.00,387.94,12.80,18.40 +1.00245,0.00,8.140,0,0.5380,6.6740,87.30,4.2390,4,307.0,21.00,380.23,11.98,21.00 +1.13081,0.00,8.140,0,0.5380,5.7130,94.10,4.2330,4,307.0,21.00,360.17,22.60,12.70 +1.35472,0.00,8.140,0,0.5380,6.0720,100.00,4.1750,4,307.0,21.00,376.73,13.04,14.50 +1.38799,0.00,8.140,0,0.5380,5.9500,82.00,3.9900,4,307.0,21.00,232.60,27.71,13.20 +1.15172,0.00,8.140,0,0.5380,5.7010,95.00,3.7872,4,307.0,21.00,358.77,18.35,13.10 +1.61282,0.00,8.140,0,0.5380,6.0960,96.90,3.7598,4,307.0,21.00,248.31,20.34,13.50 +0.06417,0.00,5.960,0,0.4990,5.9330,68.20,3.3603,5,279.0,19.20,396.90,9.68,18.90 +0.09744,0.00,5.960,0,0.4990,5.8410,61.40,3.3779,5,279.0,19.20,377.56,11.41,20.00 +0.08014,0.00,5.960,0,0.4990,5.8500,41.50,3.9342,5,279.0,19.20,396.90,8.77,21.00 +0.17505,0.00,5.960,0,0.4990,5.9660,30.20,3.8473,5,279.0,19.20,393.43,10.13,24.70 +0.02763,75.00,2.950,0,0.4280,6.5950,21.80,5.4011,3,252.0,18.30,395.63,4.32,30.80 +0.03359,75.00,2.950,0,0.4280,7.0240,15.80,5.4011,3,252.0,18.30,395.62,1.98,34.90 +0.12744,0.00,6.910,0,0.4480,6.7700,2.90,5.7209,3,233.0,17.90,385.41,4.84,26.60 +0.14150,0.00,6.910,0,0.4480,6.1690,6.60,5.7209,3,233.0,17.90,383.37,5.81,25.30 +0.15936,0.00,6.910,0,0.4480,6.2110,6.50,5.7209,3,233.0,17.90,394.46,7.44,24.70 +0.12269,0.00,6.910,0,0.4480,6.0690,40.00,5.7209,3,233.0,17.90,389.39,9.55,21.20 +0.17142,0.00,6.910,0,0.4480,5.6820,33.80,5.1004,3,233.0,17.90,396.90,10.21,19.30 +0.18836,0.00,6.910,0,0.4480,5.7860,33.30,5.1004,3,233.0,17.90,396.90,14.15,20.00 +0.22927,0.00,6.910,0,0.4480,6.0300,85.50,5.6894,3,233.0,17.90,392.74,18.80,16.60 +0.25387,0.00,6.910,0,0.4480,5.3990,95.30,5.8700,3,233.0,17.90,396.90,30.81,14.40 +0.21977,0.00,6.910,0,0.4480,5.6020,62.00,6.0877,3,233.0,17.90,396.90,16.20,19.40 +0.08873,21.00,5.640,0,0.4390,5.9630,45.70,6.8147,4,243.0,16.80,395.56,13.45,19.70 +0.04337,21.00,5.640,0,0.4390,6.1150,63.00,6.8147,4,243.0,16.80,393.97,9.43,20.50 +0.05360,21.00,5.640,0,0.4390,6.5110,21.10,6.8147,4,243.0,16.80,396.90,5.28,25.00 +0.04981,21.00,5.640,0,0.4390,5.9980,21.40,6.8147,4,243.0,16.80,396.90,8.43,23.40 +0.01360,75.00,4.000,0,0.4100,5.8880,47.60,7.3197,3,469.0,21.10,396.90,14.80,18.90 +0.01311,90.00,1.220,0,0.4030,7.2490,21.90,8.6966,5,226.0,17.90,395.93,4.81,35.40 +0.02055,85.00,0.740,0,0.4100,6.3830,35.70,9.1876,2,313.0,17.30,396.90,5.77,24.70 +0.01432,100.00,1.320,0,0.4110,6.8160,40.50,8.3248,5,256.0,15.10,392.90,3.95,31.60 +0.15445,25.00,5.130,0,0.4530,6.1450,29.20,7.8148,8,284.0,19.70,390.68,6.86,23.30 +0.10328,25.00,5.130,0,0.4530,5.9270,47.20,6.9320,8,284.0,19.70,396.90,9.22,19.60 +0.14932,25.00,5.130,0,0.4530,5.7410,66.20,7.2254,8,284.0,19.70,395.11,13.15,18.70 +0.17171,25.00,5.130,0,0.4530,5.9660,93.40,6.8185,8,284.0,19.70,378.08,14.44,16.00 +0.11027,25.00,5.130,0,0.4530,6.4560,67.80,7.2255,8,284.0,19.70,396.90,6.73,22.20 +0.12650,25.00,5.130,0,0.4530,6.7620,43.40,7.9809,8,284.0,19.70,395.58,9.50,25.00 +0.01951,17.50,1.380,0,0.4161,7.1040,59.50,9.2229,3,216.0,18.60,393.24,8.05,33.00 +0.03584,80.00,3.370,0,0.3980,6.2900,17.80,6.6115,4,337.0,16.10,396.90,4.67,23.50 +0.04379,80.00,3.370,0,0.3980,5.7870,31.10,6.6115,4,337.0,16.10,396.90,10.24,19.40 +0.05789,12.50,6.070,0,0.4090,5.8780,21.40,6.4980,4,345.0,18.90,396.21,8.10,22.00 +0.13554,12.50,6.070,0,0.4090,5.5940,36.80,6.4980,4,345.0,18.90,396.90,13.09,17.40 +0.12816,12.50,6.070,0,0.4090,5.8850,33.00,6.4980,4,345.0,18.90,396.90,8.79,20.90 +0.08826,0.00,10.810,0,0.4130,6.4170,6.60,5.2873,4,305.0,19.20,383.73,6.72,24.20 +0.15876,0.00,10.810,0,0.4130,5.9610,17.50,5.2873,4,305.0,19.20,376.94,9.88,21.70 +0.09164,0.00,10.810,0,0.4130,6.0650,7.80,5.2873,4,305.0,19.20,390.91,5.52,22.80 +0.19539,0.00,10.810,0,0.4130,6.2450,6.20,5.2873,4,305.0,19.20,377.17,7.54,23.40 +0.07896,0.00,12.830,0,0.4370,6.2730,6.00,4.2515,5,398.0,18.70,394.92,6.78,24.10 +0.09512,0.00,12.830,0,0.4370,6.2860,45.00,4.5026,5,398.0,18.70,383.23,8.94,21.40 +0.10153,0.00,12.830,0,0.4370,6.2790,74.50,4.0522,5,398.0,18.70,373.66,11.97,20.00 +0.08707,0.00,12.830,0,0.4370,6.1400,45.80,4.0905,5,398.0,18.70,386.96,10.27,20.80 +0.05646,0.00,12.830,0,0.4370,6.2320,53.70,5.0141,5,398.0,18.70,386.40,12.34,21.20 +0.08387,0.00,12.830,0,0.4370,5.8740,36.60,4.5026,5,398.0,18.70,396.06,9.10,20.30 +0.04113,25.00,4.860,0,0.4260,6.7270,33.50,5.4007,4,281.0,19.00,396.90,5.29,28.00 +0.04462,25.00,4.860,0,0.4260,6.6190,70.40,5.4007,4,281.0,19.00,395.63,7.22,23.90 +0.03659,25.00,4.860,0,0.4260,6.3020,32.20,5.4007,4,281.0,19.00,396.90,6.72,24.80 +0.03551,25.00,4.860,0,0.4260,6.1670,46.70,5.4007,4,281.0,19.00,390.64,7.51,22.90 +0.05059,0.00,4.490,0,0.4490,6.3890,48.00,4.7794,3,247.0,18.50,396.90,9.62,23.90 +0.05735,0.00,4.490,0,0.4490,6.6300,56.10,4.4377,3,247.0,18.50,392.30,6.53,26.60 +0.05188,0.00,4.490,0,0.4490,6.0150,45.10,4.4272,3,247.0,18.50,395.99,12.86,22.50 +0.07151,0.00,4.490,0,0.4490,6.1210,56.80,3.7476,3,247.0,18.50,395.15,8.44,22.20 +0.05660,0.00,3.410,0,0.4890,7.0070,86.30,3.4217,2,270.0,17.80,396.90,5.50,23.60 +0.05302,0.00,3.410,0,0.4890,7.0790,63.10,3.4145,2,270.0,17.80,396.06,5.70,28.70 +0.04684,0.00,3.410,0,0.4890,6.4170,66.10,3.0923,2,270.0,17.80,392.18,8.81,22.60 +0.03932,0.00,3.410,0,0.4890,6.4050,73.90,3.0921,2,270.0,17.80,393.55,8.20,22.00 +0.04203,28.00,15.040,0,0.4640,6.4420,53.60,3.6659,4,270.0,18.20,395.01,8.16,22.90 +0.02875,28.00,15.040,0,0.4640,6.2110,28.90,3.6659,4,270.0,18.20,396.33,6.21,25.00 +0.04294,28.00,15.040,0,0.4640,6.2490,77.30,3.6150,4,270.0,18.20,396.90,10.59,20.60 +0.12204,0.00,2.890,0,0.4450,6.6250,57.80,3.4952,2,276.0,18.00,357.98,6.65,28.40 +0.11504,0.00,2.890,0,0.4450,6.1630,69.60,3.4952,2,276.0,18.00,391.83,11.34,21.40 +0.12083,0.00,2.890,0,0.4450,8.0690,76.00,3.4952,2,276.0,18.00,396.90,4.21,38.70 +0.08187,0.00,2.890,0,0.4450,7.8200,36.90,3.4952,2,276.0,18.00,393.53,3.57,43.80 +0.06860,0.00,2.890,0,0.4450,7.4160,62.50,3.4952,2,276.0,18.00,396.90,6.19,33.20 +0.14866,0.00,8.560,0,0.5200,6.7270,79.90,2.7778,5,384.0,20.90,394.76,9.42,27.50 +0.11432,0.00,8.560,0,0.5200,6.7810,71.30,2.8561,5,384.0,20.90,395.58,7.67,26.50 +0.22876,0.00,8.560,0,0.5200,6.4050,85.40,2.7147,5,384.0,20.90,70.80,10.63,18.60 +0.21161,0.00,8.560,0,0.5200,6.1370,87.40,2.7147,5,384.0,20.90,394.47,13.44,19.30 +0.13960,0.00,8.560,0,0.5200,6.1670,90.00,2.4210,5,384.0,20.90,392.69,12.33,20.10 +0.13262,0.00,8.560,0,0.5200,5.8510,96.70,2.1069,5,384.0,20.90,394.05,16.47,19.50 +0.17120,0.00,8.560,0,0.5200,5.8360,91.90,2.2110,5,384.0,20.90,395.67,18.66,19.50 +0.13117,0.00,8.560,0,0.5200,6.1270,85.20,2.1224,5,384.0,20.90,387.69,14.09,20.40 +0.12802,0.00,8.560,0,0.5200,6.4740,97.10,2.4329,5,384.0,20.90,395.24,12.27,19.80 +0.26363,0.00,8.560,0,0.5200,6.2290,91.20,2.5451,5,384.0,20.90,391.23,15.55,19.40 +0.10793,0.00,8.560,0,0.5200,6.1950,54.40,2.7778,5,384.0,20.90,393.49,13.00,21.70 +0.10084,0.00,10.010,0,0.5470,6.7150,81.60,2.6775,6,432.0,17.80,395.59,10.16,22.80 +0.12329,0.00,10.010,0,0.5470,5.9130,92.90,2.3534,6,432.0,17.80,394.95,16.21,18.80 +0.22212,0.00,10.010,0,0.5470,6.0920,95.40,2.5480,6,432.0,17.80,396.90,17.09,18.70 +0.14231,0.00,10.010,0,0.5470,6.2540,84.20,2.2565,6,432.0,17.80,388.74,10.45,18.50 +0.17134,0.00,10.010,0,0.5470,5.9280,88.20,2.4631,6,432.0,17.80,344.91,15.76,18.30 +0.13158,0.00,10.010,0,0.5470,6.1760,72.50,2.7301,6,432.0,17.80,393.30,12.04,21.20 +0.15098,0.00,10.010,0,0.5470,6.0210,82.60,2.7474,6,432.0,17.80,394.51,10.30,19.20 +0.13058,0.00,10.010,0,0.5470,5.8720,73.10,2.4775,6,432.0,17.80,338.63,15.37,20.40 +0.14476,0.00,10.010,0,0.5470,5.7310,65.20,2.7592,6,432.0,17.80,391.50,13.61,19.30 +0.06899,0.00,25.650,0,0.5810,5.8700,69.70,2.2577,2,188.0,19.10,389.15,14.37,22.00 +0.07165,0.00,25.650,0,0.5810,6.0040,84.10,2.1974,2,188.0,19.10,377.67,14.27,20.30 +0.09299,0.00,25.650,0,0.5810,5.9610,92.90,2.0869,2,188.0,19.10,378.09,17.93,20.50 +0.15038,0.00,25.650,0,0.5810,5.8560,97.00,1.9444,2,188.0,19.10,370.31,25.41,17.30 +0.09849,0.00,25.650,0,0.5810,5.8790,95.80,2.0063,2,188.0,19.10,379.38,17.58,18.80 +0.16902,0.00,25.650,0,0.5810,5.9860,88.40,1.9929,2,188.0,19.10,385.02,14.81,21.40 +0.38735,0.00,25.650,0,0.5810,5.6130,95.60,1.7572,2,188.0,19.10,359.29,27.26,15.70 +0.25915,0.00,21.890,0,0.6240,5.6930,96.00,1.7883,4,437.0,21.20,392.11,17.19,16.20 +0.32543,0.00,21.890,0,0.6240,6.4310,98.80,1.8125,4,437.0,21.20,396.90,15.39,18.00 +0.88125,0.00,21.890,0,0.6240,5.6370,94.70,1.9799,4,437.0,21.20,396.90,18.34,14.30 +0.34006,0.00,21.890,0,0.6240,6.4580,98.90,2.1185,4,437.0,21.20,395.04,12.60,19.20 +1.19294,0.00,21.890,0,0.6240,6.3260,97.70,2.2710,4,437.0,21.20,396.90,12.26,19.60 +0.59005,0.00,21.890,0,0.6240,6.3720,97.90,2.3274,4,437.0,21.20,385.76,11.12,23.00 +0.32982,0.00,21.890,0,0.6240,5.8220,95.40,2.4699,4,437.0,21.20,388.69,15.03,18.40 +0.97617,0.00,21.890,0,0.6240,5.7570,98.40,2.3460,4,437.0,21.20,262.76,17.31,15.60 +0.55778,0.00,21.890,0,0.6240,6.3350,98.20,2.1107,4,437.0,21.20,394.67,16.96,18.10 +0.32264,0.00,21.890,0,0.6240,5.9420,93.50,1.9669,4,437.0,21.20,378.25,16.90,17.40 +0.35233,0.00,21.890,0,0.6240,6.4540,98.40,1.8498,4,437.0,21.20,394.08,14.59,17.10 +0.24980,0.00,21.890,0,0.6240,5.8570,98.20,1.6686,4,437.0,21.20,392.04,21.32,13.30 +0.54452,0.00,21.890,0,0.6240,6.1510,97.90,1.6687,4,437.0,21.20,396.90,18.46,17.80 +0.29090,0.00,21.890,0,0.6240,6.1740,93.60,1.6119,4,437.0,21.20,388.08,24.16,14.00 +1.62864,0.00,21.890,0,0.6240,5.0190,100.00,1.4394,4,437.0,21.20,396.90,34.41,14.40 +3.32105,0.00,19.580,1,0.8710,5.4030,100.00,1.3216,5,403.0,14.70,396.90,26.82,13.40 +4.09740,0.00,19.580,0,0.8710,5.4680,100.00,1.4118,5,403.0,14.70,396.90,26.42,15.60 +2.77974,0.00,19.580,0,0.8710,4.9030,97.80,1.3459,5,403.0,14.70,396.90,29.29,11.80 +2.37934,0.00,19.580,0,0.8710,6.1300,100.00,1.4191,5,403.0,14.70,172.91,27.80,13.80 +2.15505,0.00,19.580,0,0.8710,5.6280,100.00,1.5166,5,403.0,14.70,169.27,16.65,15.60 +2.36862,0.00,19.580,0,0.8710,4.9260,95.70,1.4608,5,403.0,14.70,391.71,29.53,14.60 +2.33099,0.00,19.580,0,0.8710,5.1860,93.80,1.5296,5,403.0,14.70,356.99,28.32,17.80 +2.73397,0.00,19.580,0,0.8710,5.5970,94.90,1.5257,5,403.0,14.70,351.85,21.45,15.40 +1.65660,0.00,19.580,0,0.8710,6.1220,97.30,1.6180,5,403.0,14.70,372.80,14.10,21.50 +1.49632,0.00,19.580,0,0.8710,5.4040,100.00,1.5916,5,403.0,14.70,341.60,13.28,19.60 +1.12658,0.00,19.580,1,0.8710,5.0120,88.00,1.6102,5,403.0,14.70,343.28,12.12,15.30 +2.14918,0.00,19.580,0,0.8710,5.7090,98.50,1.6232,5,403.0,14.70,261.95,15.79,19.40 +1.41385,0.00,19.580,1,0.8710,6.1290,96.00,1.7494,5,403.0,14.70,321.02,15.12,17.00 +3.53501,0.00,19.580,1,0.8710,6.1520,82.60,1.7455,5,403.0,14.70,88.01,15.02,15.60 +2.44668,0.00,19.580,0,0.8710,5.2720,94.00,1.7364,5,403.0,14.70,88.63,16.14,13.10 +1.22358,0.00,19.580,0,0.6050,6.9430,97.40,1.8773,5,403.0,14.70,363.43,4.59,41.30 +1.34284,0.00,19.580,0,0.6050,6.0660,100.00,1.7573,5,403.0,14.70,353.89,6.43,24.30 +1.42502,0.00,19.580,0,0.8710,6.5100,100.00,1.7659,5,403.0,14.70,364.31,7.39,23.30 +1.27346,0.00,19.580,1,0.6050,6.2500,92.60,1.7984,5,403.0,14.70,338.92,5.50,27.00 +1.46336,0.00,19.580,0,0.6050,7.4890,90.80,1.9709,5,403.0,14.70,374.43,1.73,50.00 +1.83377,0.00,19.580,1,0.6050,7.8020,98.20,2.0407,5,403.0,14.70,389.61,1.92,50.00 +1.51902,0.00,19.580,1,0.6050,8.3750,93.90,2.1620,5,403.0,14.70,388.45,3.32,50.00 +2.24236,0.00,19.580,0,0.6050,5.8540,91.80,2.4220,5,403.0,14.70,395.11,11.64,22.70 +2.92400,0.00,19.580,0,0.6050,6.1010,93.00,2.2834,5,403.0,14.70,240.16,9.81,25.00 +2.01019,0.00,19.580,0,0.6050,7.9290,96.20,2.0459,5,403.0,14.70,369.30,3.70,50.00 +1.80028,0.00,19.580,0,0.6050,5.8770,79.20,2.4259,5,403.0,14.70,227.61,12.14,23.80 +2.30040,0.00,19.580,0,0.6050,6.3190,96.10,2.1000,5,403.0,14.70,297.09,11.10,23.80 +2.44953,0.00,19.580,0,0.6050,6.4020,95.20,2.2625,5,403.0,14.70,330.04,11.32,22.30 +1.20742,0.00,19.580,0,0.6050,5.8750,94.60,2.4259,5,403.0,14.70,292.29,14.43,17.40 +2.31390,0.00,19.580,0,0.6050,5.8800,97.30,2.3887,5,403.0,14.70,348.13,12.03,19.10 +0.13914,0.00,4.050,0,0.5100,5.5720,88.50,2.5961,5,296.0,16.60,396.90,14.69,23.10 +0.09178,0.00,4.050,0,0.5100,6.4160,84.10,2.6463,5,296.0,16.60,395.50,9.04,23.60 +0.08447,0.00,4.050,0,0.5100,5.8590,68.70,2.7019,5,296.0,16.60,393.23,9.64,22.60 +0.06664,0.00,4.050,0,0.5100,6.5460,33.10,3.1323,5,296.0,16.60,390.96,5.33,29.40 +0.07022,0.00,4.050,0,0.5100,6.0200,47.20,3.5549,5,296.0,16.60,393.23,10.11,23.20 +0.05425,0.00,4.050,0,0.5100,6.3150,73.40,3.3175,5,296.0,16.60,395.60,6.29,24.60 +0.06642,0.00,4.050,0,0.5100,6.8600,74.40,2.9153,5,296.0,16.60,391.27,6.92,29.90 +0.05780,0.00,2.460,0,0.4880,6.9800,58.40,2.8290,3,193.0,17.80,396.90,5.04,37.20 +0.06588,0.00,2.460,0,0.4880,7.7650,83.30,2.7410,3,193.0,17.80,395.56,7.56,39.80 +0.06888,0.00,2.460,0,0.4880,6.1440,62.20,2.5979,3,193.0,17.80,396.90,9.45,36.20 +0.09103,0.00,2.460,0,0.4880,7.1550,92.20,2.7006,3,193.0,17.80,394.12,4.82,37.90 +0.10008,0.00,2.460,0,0.4880,6.5630,95.60,2.8470,3,193.0,17.80,396.90,5.68,32.50 +0.08308,0.00,2.460,0,0.4880,5.6040,89.80,2.9879,3,193.0,17.80,391.00,13.98,26.40 +0.06047,0.00,2.460,0,0.4880,6.1530,68.80,3.2797,3,193.0,17.80,387.11,13.15,29.60 +0.05602,0.00,2.460,0,0.4880,7.8310,53.60,3.1992,3,193.0,17.80,392.63,4.45,50.00 +0.07875,45.00,3.440,0,0.4370,6.7820,41.10,3.7886,5,398.0,15.20,393.87,6.68,32.00 +0.12579,45.00,3.440,0,0.4370,6.5560,29.10,4.5667,5,398.0,15.20,382.84,4.56,29.80 +0.08370,45.00,3.440,0,0.4370,7.1850,38.90,4.5667,5,398.0,15.20,396.90,5.39,34.90 +0.09068,45.00,3.440,0,0.4370,6.9510,21.50,6.4798,5,398.0,15.20,377.68,5.10,37.00 +0.06911,45.00,3.440,0,0.4370,6.7390,30.80,6.4798,5,398.0,15.20,389.71,4.69,30.50 +0.08664,45.00,3.440,0,0.4370,7.1780,26.30,6.4798,5,398.0,15.20,390.49,2.87,36.40 +0.02187,60.00,2.930,0,0.4010,6.8000,9.90,6.2196,1,265.0,15.60,393.37,5.03,31.10 +0.01439,60.00,2.930,0,0.4010,6.6040,18.80,6.2196,1,265.0,15.60,376.70,4.38,29.10 +0.01381,80.00,0.460,0,0.4220,7.8750,32.00,5.6484,4,255.0,14.40,394.23,2.97,50.00 +0.04011,80.00,1.520,0,0.4040,7.2870,34.10,7.3090,2,329.0,12.60,396.90,4.08,33.30 +0.04666,80.00,1.520,0,0.4040,7.1070,36.60,7.3090,2,329.0,12.60,354.31,8.61,30.30 +0.03768,80.00,1.520,0,0.4040,7.2740,38.30,7.3090,2,329.0,12.60,392.20,6.62,34.60 +0.03150,95.00,1.470,0,0.4030,6.9750,15.30,7.6534,3,402.0,17.00,396.90,4.56,34.90 +0.01778,95.00,1.470,0,0.4030,7.1350,13.90,7.6534,3,402.0,17.00,384.30,4.45,32.90 +0.03445,82.50,2.030,0,0.4150,6.1620,38.40,6.2700,2,348.0,14.70,393.77,7.43,24.10 +0.02177,82.50,2.030,0,0.4150,7.6100,15.70,6.2700,2,348.0,14.70,395.38,3.11,42.30 +0.03510,95.00,2.680,0,0.4161,7.8530,33.20,5.1180,4,224.0,14.70,392.78,3.81,48.50 +0.02009,95.00,2.680,0,0.4161,8.0340,31.90,5.1180,4,224.0,14.70,390.55,2.88,50.00 +0.13642,0.00,10.590,0,0.4890,5.8910,22.30,3.9454,4,277.0,18.60,396.90,10.87,22.60 +0.22969,0.00,10.590,0,0.4890,6.3260,52.50,4.3549,4,277.0,18.60,394.87,10.97,24.40 +0.25199,0.00,10.590,0,0.4890,5.7830,72.70,4.3549,4,277.0,18.60,389.43,18.06,22.50 +0.13587,0.00,10.590,1,0.4890,6.0640,59.10,4.2392,4,277.0,18.60,381.32,14.66,24.40 +0.43571,0.00,10.590,1,0.4890,5.3440,100.00,3.8750,4,277.0,18.60,396.90,23.09,20.00 +0.17446,0.00,10.590,1,0.4890,5.9600,92.10,3.8771,4,277.0,18.60,393.25,17.27,21.70 +0.37578,0.00,10.590,1,0.4890,5.4040,88.60,3.6650,4,277.0,18.60,395.24,23.98,19.30 +0.21719,0.00,10.590,1,0.4890,5.8070,53.80,3.6526,4,277.0,18.60,390.94,16.03,22.40 +0.14052,0.00,10.590,0,0.4890,6.3750,32.30,3.9454,4,277.0,18.60,385.81,9.38,28.10 +0.28955,0.00,10.590,0,0.4890,5.4120,9.80,3.5875,4,277.0,18.60,348.93,29.55,23.70 +0.19802,0.00,10.590,0,0.4890,6.1820,42.40,3.9454,4,277.0,18.60,393.63,9.47,25.00 +0.04560,0.00,13.890,1,0.5500,5.8880,56.00,3.1121,5,276.0,16.40,392.80,13.51,23.30 +0.07013,0.00,13.890,0,0.5500,6.6420,85.10,3.4211,5,276.0,16.40,392.78,9.69,28.70 +0.11069,0.00,13.890,1,0.5500,5.9510,93.80,2.8893,5,276.0,16.40,396.90,17.92,21.50 +0.11425,0.00,13.890,1,0.5500,6.3730,92.40,3.3633,5,276.0,16.40,393.74,10.50,23.00 +0.35809,0.00,6.200,1,0.5070,6.9510,88.50,2.8617,8,307.0,17.40,391.70,9.71,26.70 +0.40771,0.00,6.200,1,0.5070,6.1640,91.30,3.0480,8,307.0,17.40,395.24,21.46,21.70 +0.62356,0.00,6.200,1,0.5070,6.8790,77.70,3.2721,8,307.0,17.40,390.39,9.93,27.50 +0.61470,0.00,6.200,0,0.5070,6.6180,80.80,3.2721,8,307.0,17.40,396.90,7.60,30.10 +0.31533,0.00,6.200,0,0.5040,8.2660,78.30,2.8944,8,307.0,17.40,385.05,4.14,44.80 +0.52693,0.00,6.200,0,0.5040,8.7250,83.00,2.8944,8,307.0,17.40,382.00,4.63,50.00 +0.38214,0.00,6.200,0,0.5040,8.0400,86.50,3.2157,8,307.0,17.40,387.38,3.13,37.60 +0.41238,0.00,6.200,0,0.5040,7.1630,79.90,3.2157,8,307.0,17.40,372.08,6.36,31.60 +0.29819,0.00,6.200,0,0.5040,7.6860,17.00,3.3751,8,307.0,17.40,377.51,3.92,46.70 +0.44178,0.00,6.200,0,0.5040,6.5520,21.40,3.3751,8,307.0,17.40,380.34,3.76,31.50 +0.53700,0.00,6.200,0,0.5040,5.9810,68.10,3.6715,8,307.0,17.40,378.35,11.65,24.30 +0.46296,0.00,6.200,0,0.5040,7.4120,76.90,3.6715,8,307.0,17.40,376.14,5.25,31.70 +0.57529,0.00,6.200,0,0.5070,8.3370,73.30,3.8384,8,307.0,17.40,385.91,2.47,41.70 +0.33147,0.00,6.200,0,0.5070,8.2470,70.40,3.6519,8,307.0,17.40,378.95,3.95,48.30 +0.44791,0.00,6.200,1,0.5070,6.7260,66.50,3.6519,8,307.0,17.40,360.20,8.05,29.00 +0.33045,0.00,6.200,0,0.5070,6.0860,61.50,3.6519,8,307.0,17.40,376.75,10.88,24.00 +0.52058,0.00,6.200,1,0.5070,6.6310,76.50,4.1480,8,307.0,17.40,388.45,9.54,25.10 +0.51183,0.00,6.200,0,0.5070,7.3580,71.60,4.1480,8,307.0,17.40,390.07,4.73,31.50 +0.08244,30.00,4.930,0,0.4280,6.4810,18.50,6.1899,6,300.0,16.60,379.41,6.36,23.70 +0.09252,30.00,4.930,0,0.4280,6.6060,42.20,6.1899,6,300.0,16.60,383.78,7.37,23.30 +0.11329,30.00,4.930,0,0.4280,6.8970,54.30,6.3361,6,300.0,16.60,391.25,11.38,22.00 +0.10612,30.00,4.930,0,0.4280,6.0950,65.10,6.3361,6,300.0,16.60,394.62,12.40,20.10 +0.10290,30.00,4.930,0,0.4280,6.3580,52.90,7.0355,6,300.0,16.60,372.75,11.22,22.20 +0.12757,30.00,4.930,0,0.4280,6.3930,7.80,7.0355,6,300.0,16.60,374.71,5.19,23.70 +0.20608,22.00,5.860,0,0.4310,5.5930,76.50,7.9549,7,330.0,19.10,372.49,12.50,17.60 +0.19133,22.00,5.860,0,0.4310,5.6050,70.20,7.9549,7,330.0,19.10,389.13,18.46,18.50 +0.33983,22.00,5.860,0,0.4310,6.1080,34.90,8.0555,7,330.0,19.10,390.18,9.16,24.30 +0.19657,22.00,5.860,0,0.4310,6.2260,79.20,8.0555,7,330.0,19.10,376.14,10.15,20.50 +0.16439,22.00,5.860,0,0.4310,6.4330,49.10,7.8265,7,330.0,19.10,374.71,9.52,24.50 +0.19073,22.00,5.860,0,0.4310,6.7180,17.50,7.8265,7,330.0,19.10,393.74,6.56,26.20 +0.14030,22.00,5.860,0,0.4310,6.4870,13.00,7.3967,7,330.0,19.10,396.28,5.90,24.40 +0.21409,22.00,5.860,0,0.4310,6.4380,8.90,7.3967,7,330.0,19.10,377.07,3.59,24.80 +0.08221,22.00,5.860,0,0.4310,6.9570,6.80,8.9067,7,330.0,19.10,386.09,3.53,29.60 +0.36894,22.00,5.860,0,0.4310,8.2590,8.40,8.9067,7,330.0,19.10,396.90,3.54,42.80 +0.04819,80.00,3.640,0,0.3920,6.1080,32.00,9.2203,1,315.0,16.40,392.89,6.57,21.90 +0.03548,80.00,3.640,0,0.3920,5.8760,19.10,9.2203,1,315.0,16.40,395.18,9.25,20.90 +0.01538,90.00,3.750,0,0.3940,7.4540,34.20,6.3361,3,244.0,15.90,386.34,3.11,44.00 +0.61154,20.00,3.970,0,0.6470,8.7040,86.90,1.8010,5,264.0,13.00,389.70,5.12,50.00 +0.66351,20.00,3.970,0,0.6470,7.3330,100.00,1.8946,5,264.0,13.00,383.29,7.79,36.00 +0.65665,20.00,3.970,0,0.6470,6.8420,100.00,2.0107,5,264.0,13.00,391.93,6.90,30.10 +0.54011,20.00,3.970,0,0.6470,7.2030,81.80,2.1121,5,264.0,13.00,392.80,9.59,33.80 +0.53412,20.00,3.970,0,0.6470,7.5200,89.40,2.1398,5,264.0,13.00,388.37,7.26,43.10 +0.52014,20.00,3.970,0,0.6470,8.3980,91.50,2.2885,5,264.0,13.00,386.86,5.91,48.80 +0.82526,20.00,3.970,0,0.6470,7.3270,94.50,2.0788,5,264.0,13.00,393.42,11.25,31.00 +0.55007,20.00,3.970,0,0.6470,7.2060,91.60,1.9301,5,264.0,13.00,387.89,8.10,36.50 +0.76162,20.00,3.970,0,0.6470,5.5600,62.80,1.9865,5,264.0,13.00,392.40,10.45,22.80 +0.78570,20.00,3.970,0,0.6470,7.0140,84.60,2.1329,5,264.0,13.00,384.07,14.79,30.70 +0.57834,20.00,3.970,0,0.5750,8.2970,67.00,2.4216,5,264.0,13.00,384.54,7.44,50.00 +0.54050,20.00,3.970,0,0.5750,7.4700,52.60,2.8720,5,264.0,13.00,390.30,3.16,43.50 +0.09065,20.00,6.960,1,0.4640,5.9200,61.50,3.9175,3,223.0,18.60,391.34,13.65,20.70 +0.29916,20.00,6.960,0,0.4640,5.8560,42.10,4.4290,3,223.0,18.60,388.65,13.00,21.10 +0.16211,20.00,6.960,0,0.4640,6.2400,16.30,4.4290,3,223.0,18.60,396.90,6.59,25.20 +0.11460,20.00,6.960,0,0.4640,6.5380,58.70,3.9175,3,223.0,18.60,394.96,7.73,24.40 +0.22188,20.00,6.960,1,0.4640,7.6910,51.80,4.3665,3,223.0,18.60,390.77,6.58,35.20 +0.05644,40.00,6.410,1,0.4470,6.7580,32.90,4.0776,4,254.0,17.60,396.90,3.53,32.40 +0.09604,40.00,6.410,0,0.4470,6.8540,42.80,4.2673,4,254.0,17.60,396.90,2.98,32.00 +0.10469,40.00,6.410,1,0.4470,7.2670,49.00,4.7872,4,254.0,17.60,389.25,6.05,33.20 +0.06127,40.00,6.410,1,0.4470,6.8260,27.60,4.8628,4,254.0,17.60,393.45,4.16,33.10 +0.07978,40.00,6.410,0,0.4470,6.4820,32.10,4.1403,4,254.0,17.60,396.90,7.19,29.10 +0.21038,20.00,3.330,0,0.4429,6.8120,32.20,4.1007,5,216.0,14.90,396.90,4.85,35.10 +0.03578,20.00,3.330,0,0.4429,7.8200,64.50,4.6947,5,216.0,14.90,387.31,3.76,45.40 +0.03705,20.00,3.330,0,0.4429,6.9680,37.20,5.2447,5,216.0,14.90,392.23,4.59,35.40 +0.06129,20.00,3.330,1,0.4429,7.6450,49.70,5.2119,5,216.0,14.90,377.07,3.01,46.00 +0.01501,90.00,1.210,1,0.4010,7.9230,24.80,5.8850,1,198.0,13.60,395.52,3.16,50.00 +0.00906,90.00,2.970,0,0.4000,7.0880,20.80,7.3073,1,285.0,15.30,394.72,7.85,32.20 +0.01096,55.00,2.250,0,0.3890,6.4530,31.90,7.3073,1,300.0,15.30,394.72,8.23,22.00 +0.01965,80.00,1.760,0,0.3850,6.2300,31.50,9.0892,1,241.0,18.20,341.60,12.93,20.10 +0.03871,52.50,5.320,0,0.4050,6.2090,31.30,7.3172,6,293.0,16.60,396.90,7.14,23.20 +0.04590,52.50,5.320,0,0.4050,6.3150,45.60,7.3172,6,293.0,16.60,396.90,7.60,22.30 +0.04297,52.50,5.320,0,0.4050,6.5650,22.90,7.3172,6,293.0,16.60,371.72,9.51,24.80 +0.03502,80.00,4.950,0,0.4110,6.8610,27.90,5.1167,4,245.0,19.20,396.90,3.33,28.50 +0.07886,80.00,4.950,0,0.4110,7.1480,27.70,5.1167,4,245.0,19.20,396.90,3.56,37.30 +0.03615,80.00,4.950,0,0.4110,6.6300,23.40,5.1167,4,245.0,19.20,396.90,4.70,27.90 +0.08265,0.00,13.920,0,0.4370,6.1270,18.40,5.5027,4,289.0,16.00,396.90,8.58,23.90 +0.08199,0.00,13.920,0,0.4370,6.0090,42.30,5.5027,4,289.0,16.00,396.90,10.40,21.70 +0.12932,0.00,13.920,0,0.4370,6.6780,31.10,5.9604,4,289.0,16.00,396.90,6.27,28.60 +0.05372,0.00,13.920,0,0.4370,6.5490,51.00,5.9604,4,289.0,16.00,392.85,7.39,27.10 +0.14103,0.00,13.920,0,0.4370,5.7900,58.00,6.3200,4,289.0,16.00,396.90,15.84,20.30 +0.06466,70.00,2.240,0,0.4000,6.3450,20.10,7.8278,5,358.0,14.80,368.24,4.97,22.50 +0.05561,70.00,2.240,0,0.4000,7.0410,10.00,7.8278,5,358.0,14.80,371.58,4.74,29.00 +0.04417,70.00,2.240,0,0.4000,6.8710,47.40,7.8278,5,358.0,14.80,390.86,6.07,24.80 +0.03537,34.00,6.090,0,0.4330,6.5900,40.40,5.4917,7,329.0,16.10,395.75,9.50,22.00 +0.09266,34.00,6.090,0,0.4330,6.4950,18.40,5.4917,7,329.0,16.10,383.61,8.67,26.40 +0.10000,34.00,6.090,0,0.4330,6.9820,17.70,5.4917,7,329.0,16.10,390.43,4.86,33.10 +0.05515,33.00,2.180,0,0.4720,7.2360,41.10,4.0220,7,222.0,18.40,393.68,6.93,36.10 +0.05479,33.00,2.180,0,0.4720,6.6160,58.10,3.3700,7,222.0,18.40,393.36,8.93,28.40 +0.07503,33.00,2.180,0,0.4720,7.4200,71.90,3.0992,7,222.0,18.40,396.90,6.47,33.40 +0.04932,33.00,2.180,0,0.4720,6.8490,70.30,3.1827,7,222.0,18.40,396.90,7.53,28.20 +0.49298,0.00,9.900,0,0.5440,6.6350,82.50,3.3175,4,304.0,18.40,396.90,4.54,22.80 +0.34940,0.00,9.900,0,0.5440,5.9720,76.70,3.1025,4,304.0,18.40,396.24,9.97,20.30 +2.63548,0.00,9.900,0,0.5440,4.9730,37.80,2.5194,4,304.0,18.40,350.45,12.64,16.10 +0.79041,0.00,9.900,0,0.5440,6.1220,52.80,2.6403,4,304.0,18.40,396.90,5.98,22.10 +0.26169,0.00,9.900,0,0.5440,6.0230,90.40,2.8340,4,304.0,18.40,396.30,11.72,19.40 +0.26938,0.00,9.900,0,0.5440,6.2660,82.80,3.2628,4,304.0,18.40,393.39,7.90,21.60 +0.36920,0.00,9.900,0,0.5440,6.5670,87.30,3.6023,4,304.0,18.40,395.69,9.28,23.80 +0.25356,0.00,9.900,0,0.5440,5.7050,77.70,3.9450,4,304.0,18.40,396.42,11.50,16.20 +0.31827,0.00,9.900,0,0.5440,5.9140,83.20,3.9986,4,304.0,18.40,390.70,18.33,17.80 +0.24522,0.00,9.900,0,0.5440,5.7820,71.70,4.0317,4,304.0,18.40,396.90,15.94,19.80 +0.40202,0.00,9.900,0,0.5440,6.3820,67.20,3.5325,4,304.0,18.40,395.21,10.36,23.10 +0.47547,0.00,9.900,0,0.5440,6.1130,58.80,4.0019,4,304.0,18.40,396.23,12.73,21.00 +0.16760,0.00,7.380,0,0.4930,6.4260,52.30,4.5404,5,287.0,19.60,396.90,7.20,23.80 +0.18159,0.00,7.380,0,0.4930,6.3760,54.30,4.5404,5,287.0,19.60,396.90,6.87,23.10 +0.35114,0.00,7.380,0,0.4930,6.0410,49.90,4.7211,5,287.0,19.60,396.90,7.70,20.40 +0.28392,0.00,7.380,0,0.4930,5.7080,74.30,4.7211,5,287.0,19.60,391.13,11.74,18.50 +0.34109,0.00,7.380,0,0.4930,6.4150,40.10,4.7211,5,287.0,19.60,396.90,6.12,25.00 +0.19186,0.00,7.380,0,0.4930,6.4310,14.70,5.4159,5,287.0,19.60,393.68,5.08,24.60 +0.30347,0.00,7.380,0,0.4930,6.3120,28.90,5.4159,5,287.0,19.60,396.90,6.15,23.00 +0.24103,0.00,7.380,0,0.4930,6.0830,43.70,5.4159,5,287.0,19.60,396.90,12.79,22.20 +0.06617,0.00,3.240,0,0.4600,5.8680,25.80,5.2146,4,430.0,16.90,382.44,9.97,19.30 +0.06724,0.00,3.240,0,0.4600,6.3330,17.20,5.2146,4,430.0,16.90,375.21,7.34,22.60 +0.04544,0.00,3.240,0,0.4600,6.1440,32.20,5.8736,4,430.0,16.90,368.57,9.09,19.80 +0.05023,35.00,6.060,0,0.4379,5.7060,28.40,6.6407,1,304.0,16.90,394.02,12.43,17.10 +0.03466,35.00,6.060,0,0.4379,6.0310,23.30,6.6407,1,304.0,16.90,362.25,7.83,19.40 +0.05083,0.00,5.190,0,0.5150,6.3160,38.10,6.4584,5,224.0,20.20,389.71,5.68,22.20 +0.03738,0.00,5.190,0,0.5150,6.3100,38.50,6.4584,5,224.0,20.20,389.40,6.75,20.70 +0.03961,0.00,5.190,0,0.5150,6.0370,34.50,5.9853,5,224.0,20.20,396.90,8.01,21.10 +0.03427,0.00,5.190,0,0.5150,5.8690,46.30,5.2311,5,224.0,20.20,396.90,9.80,19.50 +0.03041,0.00,5.190,0,0.5150,5.8950,59.60,5.6150,5,224.0,20.20,394.81,10.56,18.50 +0.03306,0.00,5.190,0,0.5150,6.0590,37.30,4.8122,5,224.0,20.20,396.14,8.51,20.60 +0.05497,0.00,5.190,0,0.5150,5.9850,45.40,4.8122,5,224.0,20.20,396.90,9.74,19.00 +0.06151,0.00,5.190,0,0.5150,5.9680,58.50,4.8122,5,224.0,20.20,396.90,9.29,18.70 +0.01301,35.00,1.520,0,0.4420,7.2410,49.30,7.0379,1,284.0,15.50,394.74,5.49,32.70 +0.02498,0.00,1.890,0,0.5180,6.5400,59.70,6.2669,1,422.0,15.90,389.96,8.65,16.50 +0.02543,55.00,3.780,0,0.4840,6.6960,56.40,5.7321,5,370.0,17.60,396.90,7.18,23.90 +0.03049,55.00,3.780,0,0.4840,6.8740,28.10,6.4654,5,370.0,17.60,387.97,4.61,31.20 +0.03113,0.00,4.390,0,0.4420,6.0140,48.50,8.0136,3,352.0,18.80,385.64,10.53,17.50 +0.06162,0.00,4.390,0,0.4420,5.8980,52.30,8.0136,3,352.0,18.80,364.61,12.67,17.20 +0.01870,85.00,4.150,0,0.4290,6.5160,27.70,8.5353,4,351.0,17.90,392.43,6.36,23.10 +0.01501,80.00,2.010,0,0.4350,6.6350,29.70,8.3440,4,280.0,17.00,390.94,5.99,24.50 +0.02899,40.00,1.250,0,0.4290,6.9390,34.50,8.7921,1,335.0,19.70,389.85,5.89,26.60 +0.06211,40.00,1.250,0,0.4290,6.4900,44.40,8.7921,1,335.0,19.70,396.90,5.98,22.90 +0.07950,60.00,1.690,0,0.4110,6.5790,35.90,10.7103,4,411.0,18.30,370.78,5.49,24.10 +0.07244,60.00,1.690,0,0.4110,5.8840,18.50,10.7103,4,411.0,18.30,392.33,7.79,18.60 +0.01709,90.00,2.020,0,0.4100,6.7280,36.10,12.1265,5,187.0,17.00,384.46,4.50,30.10 +0.04301,80.00,1.910,0,0.4130,5.6630,21.90,10.5857,4,334.0,22.00,382.80,8.05,18.20 +0.10659,80.00,1.910,0,0.4130,5.9360,19.50,10.5857,4,334.0,22.00,376.04,5.57,20.60 +8.98296,0.00,18.100,1,0.7700,6.2120,97.40,2.1222,24,666.0,20.20,377.73,17.60,17.80 +3.84970,0.00,18.100,1,0.7700,6.3950,91.00,2.5052,24,666.0,20.20,391.34,13.27,21.70 +5.20177,0.00,18.100,1,0.7700,6.1270,83.40,2.7227,24,666.0,20.20,395.43,11.48,22.70 +4.26131,0.00,18.100,0,0.7700,6.1120,81.30,2.5091,24,666.0,20.20,390.74,12.67,22.60 +4.54192,0.00,18.100,0,0.7700,6.3980,88.00,2.5182,24,666.0,20.20,374.56,7.79,25.00 +3.83684,0.00,18.100,0,0.7700,6.2510,91.10,2.2955,24,666.0,20.20,350.65,14.19,19.90 +3.67822,0.00,18.100,0,0.7700,5.3620,96.20,2.1036,24,666.0,20.20,380.79,10.19,20.80 +4.22239,0.00,18.100,1,0.7700,5.8030,89.00,1.9047,24,666.0,20.20,353.04,14.64,16.80 +3.47428,0.00,18.100,1,0.7180,8.7800,82.90,1.9047,24,666.0,20.20,354.55,5.29,21.90 +4.55587,0.00,18.100,0,0.7180,3.5610,87.90,1.6132,24,666.0,20.20,354.70,7.12,27.50 +3.69695,0.00,18.100,0,0.7180,4.9630,91.40,1.7523,24,666.0,20.20,316.03,14.00,21.90 +13.52220,0.00,18.100,0,0.6310,3.8630,100.00,1.5106,24,666.0,20.20,131.42,13.33,23.10 +4.89822,0.00,18.100,0,0.6310,4.9700,100.00,1.3325,24,666.0,20.20,375.52,3.26,50.00 +5.66998,0.00,18.100,1,0.6310,6.6830,96.80,1.3567,24,666.0,20.20,375.33,3.73,50.00 +6.53876,0.00,18.100,1,0.6310,7.0160,97.50,1.2024,24,666.0,20.20,392.05,2.96,50.00 +9.23230,0.00,18.100,0,0.6310,6.2160,100.00,1.1691,24,666.0,20.20,366.15,9.53,50.00 +8.26725,0.00,18.100,1,0.6680,5.8750,89.60,1.1296,24,666.0,20.20,347.88,8.88,50.00 +11.10810,0.00,18.100,0,0.6680,4.9060,100.00,1.1742,24,666.0,20.20,396.90,34.77,13.80 +18.49820,0.00,18.100,0,0.6680,4.1380,100.00,1.1370,24,666.0,20.20,396.90,37.97,13.80 +19.60910,0.00,18.100,0,0.6710,7.3130,97.90,1.3163,24,666.0,20.20,396.90,13.44,15.00 +15.28800,0.00,18.100,0,0.6710,6.6490,93.30,1.3449,24,666.0,20.20,363.02,23.24,13.90 +9.82349,0.00,18.100,0,0.6710,6.7940,98.80,1.3580,24,666.0,20.20,396.90,21.24,13.30 +23.64820,0.00,18.100,0,0.6710,6.3800,96.20,1.3861,24,666.0,20.20,396.90,23.69,13.10 +17.86670,0.00,18.100,0,0.6710,6.2230,100.00,1.3861,24,666.0,20.20,393.74,21.78,10.20 +88.97620,0.00,18.100,0,0.6710,6.9680,91.90,1.4165,24,666.0,20.20,396.90,17.21,10.40 +15.87440,0.00,18.100,0,0.6710,6.5450,99.10,1.5192,24,666.0,20.20,396.90,21.08,10.90 +9.18702,0.00,18.100,0,0.7000,5.5360,100.00,1.5804,24,666.0,20.20,396.90,23.60,11.30 +7.99248,0.00,18.100,0,0.7000,5.5200,100.00,1.5331,24,666.0,20.20,396.90,24.56,12.30 +20.08490,0.00,18.100,0,0.7000,4.3680,91.20,1.4395,24,666.0,20.20,285.83,30.63,8.80 +16.81180,0.00,18.100,0,0.7000,5.2770,98.10,1.4261,24,666.0,20.20,396.90,30.81,7.20 +24.39380,0.00,18.100,0,0.7000,4.6520,100.00,1.4672,24,666.0,20.20,396.90,28.28,10.50 +22.59710,0.00,18.100,0,0.7000,5.0000,89.50,1.5184,24,666.0,20.20,396.90,31.99,7.40 +14.33370,0.00,18.100,0,0.7000,4.8800,100.00,1.5895,24,666.0,20.20,372.92,30.62,10.20 +8.15174,0.00,18.100,0,0.7000,5.3900,98.90,1.7281,24,666.0,20.20,396.90,20.85,11.50 +6.96215,0.00,18.100,0,0.7000,5.7130,97.00,1.9265,24,666.0,20.20,394.43,17.11,15.10 +5.29305,0.00,18.100,0,0.7000,6.0510,82.50,2.1678,24,666.0,20.20,378.38,18.76,23.20 +11.57790,0.00,18.100,0,0.7000,5.0360,97.00,1.7700,24,666.0,20.20,396.90,25.68,9.70 +8.64476,0.00,18.100,0,0.6930,6.1930,92.60,1.7912,24,666.0,20.20,396.90,15.17,13.80 +13.35980,0.00,18.100,0,0.6930,5.8870,94.70,1.7821,24,666.0,20.20,396.90,16.35,12.70 +8.71675,0.00,18.100,0,0.6930,6.4710,98.80,1.7257,24,666.0,20.20,391.98,17.12,13.10 +5.87205,0.00,18.100,0,0.6930,6.4050,96.00,1.6768,24,666.0,20.20,396.90,19.37,12.50 +7.67202,0.00,18.100,0,0.6930,5.7470,98.90,1.6334,24,666.0,20.20,393.10,19.92,8.50 +38.35180,0.00,18.100,0,0.6930,5.4530,100.00,1.4896,24,666.0,20.20,396.90,30.59,5.00 +9.91655,0.00,18.100,0,0.6930,5.8520,77.80,1.5004,24,666.0,20.20,338.16,29.97,6.30 +25.04610,0.00,18.100,0,0.6930,5.9870,100.00,1.5888,24,666.0,20.20,396.90,26.77,5.60 +14.23620,0.00,18.100,0,0.6930,6.3430,100.00,1.5741,24,666.0,20.20,396.90,20.32,7.20 +9.59571,0.00,18.100,0,0.6930,6.4040,100.00,1.6390,24,666.0,20.20,376.11,20.31,12.10 +24.80170,0.00,18.100,0,0.6930,5.3490,96.00,1.7028,24,666.0,20.20,396.90,19.77,8.30 +41.52920,0.00,18.100,0,0.6930,5.5310,85.40,1.6074,24,666.0,20.20,329.46,27.38,8.50 +67.92080,0.00,18.100,0,0.6930,5.6830,100.00,1.4254,24,666.0,20.20,384.97,22.98,5.00 +20.71620,0.00,18.100,0,0.6590,4.1380,100.00,1.1781,24,666.0,20.20,370.22,23.34,11.90 +11.95110,0.00,18.100,0,0.6590,5.6080,100.00,1.2852,24,666.0,20.20,332.09,12.13,27.90 +7.40389,0.00,18.100,0,0.5970,5.6170,97.90,1.4547,24,666.0,20.20,314.64,26.40,17.20 +14.43830,0.00,18.100,0,0.5970,6.8520,100.00,1.4655,24,666.0,20.20,179.36,19.78,27.50 +51.13580,0.00,18.100,0,0.5970,5.7570,100.00,1.4130,24,666.0,20.20,2.60,10.11,15.00 +14.05070,0.00,18.100,0,0.5970,6.6570,100.00,1.5275,24,666.0,20.20,35.05,21.22,17.20 +18.81100,0.00,18.100,0,0.5970,4.6280,100.00,1.5539,24,666.0,20.20,28.79,34.37,17.90 +28.65580,0.00,18.100,0,0.5970,5.1550,100.00,1.5894,24,666.0,20.20,210.97,20.08,16.30 +45.74610,0.00,18.100,0,0.6930,4.5190,100.00,1.6582,24,666.0,20.20,88.27,36.98,7.00 +18.08460,0.00,18.100,0,0.6790,6.4340,100.00,1.8347,24,666.0,20.20,27.25,29.05,7.20 +10.83420,0.00,18.100,0,0.6790,6.7820,90.80,1.8195,24,666.0,20.20,21.57,25.79,7.50 +25.94060,0.00,18.100,0,0.6790,5.3040,89.10,1.6475,24,666.0,20.20,127.36,26.64,10.40 +73.53410,0.00,18.100,0,0.6790,5.9570,100.00,1.8026,24,666.0,20.20,16.45,20.62,8.80 +11.81230,0.00,18.100,0,0.7180,6.8240,76.50,1.7940,24,666.0,20.20,48.45,22.74,8.40 +11.08740,0.00,18.100,0,0.7180,6.4110,100.00,1.8589,24,666.0,20.20,318.75,15.02,16.70 +7.02259,0.00,18.100,0,0.7180,6.0060,95.30,1.8746,24,666.0,20.20,319.98,15.70,14.20 +12.04820,0.00,18.100,0,0.6140,5.6480,87.60,1.9512,24,666.0,20.20,291.55,14.10,20.80 +7.05042,0.00,18.100,0,0.6140,6.1030,85.10,2.0218,24,666.0,20.20,2.52,23.29,13.40 +8.79212,0.00,18.100,0,0.5840,5.5650,70.60,2.0635,24,666.0,20.20,3.65,17.16,11.70 +15.86030,0.00,18.100,0,0.6790,5.8960,95.40,1.9096,24,666.0,20.20,7.68,24.39,8.30 +12.24720,0.00,18.100,0,0.5840,5.8370,59.70,1.9976,24,666.0,20.20,24.65,15.69,10.20 +37.66190,0.00,18.100,0,0.6790,6.2020,78.70,1.8629,24,666.0,20.20,18.82,14.52,10.90 +7.36711,0.00,18.100,0,0.6790,6.1930,78.10,1.9356,24,666.0,20.20,96.73,21.52,11.00 +9.33889,0.00,18.100,0,0.6790,6.3800,95.60,1.9682,24,666.0,20.20,60.72,24.08,9.50 +8.49213,0.00,18.100,0,0.5840,6.3480,86.10,2.0527,24,666.0,20.20,83.45,17.64,14.50 +10.06230,0.00,18.100,0,0.5840,6.8330,94.30,2.0882,24,666.0,20.20,81.33,19.69,14.10 +6.44405,0.00,18.100,0,0.5840,6.4250,74.80,2.2004,24,666.0,20.20,97.95,12.03,16.10 +5.58107,0.00,18.100,0,0.7130,6.4360,87.90,2.3158,24,666.0,20.20,100.19,16.22,14.30 +13.91340,0.00,18.100,0,0.7130,6.2080,95.00,2.2222,24,666.0,20.20,100.63,15.17,11.70 +11.16040,0.00,18.100,0,0.7400,6.6290,94.60,2.1247,24,666.0,20.20,109.85,23.27,13.40 +14.42080,0.00,18.100,0,0.7400,6.4610,93.30,2.0026,24,666.0,20.20,27.49,18.05,9.60 +15.17720,0.00,18.100,0,0.7400,6.1520,100.00,1.9142,24,666.0,20.20,9.32,26.45,8.70 +13.67810,0.00,18.100,0,0.7400,5.9350,87.90,1.8206,24,666.0,20.20,68.95,34.02,8.40 +9.39063,0.00,18.100,0,0.7400,5.6270,93.90,1.8172,24,666.0,20.20,396.90,22.88,12.80 +22.05110,0.00,18.100,0,0.7400,5.8180,92.40,1.8662,24,666.0,20.20,391.45,22.11,10.50 +9.72418,0.00,18.100,0,0.7400,6.4060,97.20,2.0651,24,666.0,20.20,385.96,19.52,17.10 +5.66637,0.00,18.100,0,0.7400,6.2190,100.00,2.0048,24,666.0,20.20,395.69,16.59,18.40 +9.96654,0.00,18.100,0,0.7400,6.4850,100.00,1.9784,24,666.0,20.20,386.73,18.85,15.40 +12.80230,0.00,18.100,0,0.7400,5.8540,96.60,1.8956,24,666.0,20.20,240.52,23.79,10.80 +10.67180,0.00,18.100,0,0.7400,6.4590,94.80,1.9879,24,666.0,20.20,43.06,23.98,11.80 +6.28807,0.00,18.100,0,0.7400,6.3410,96.40,2.0720,24,666.0,20.20,318.01,17.79,14.90 +9.92485,0.00,18.100,0,0.7400,6.2510,96.60,2.1980,24,666.0,20.20,388.52,16.44,12.60 +9.32909,0.00,18.100,0,0.7130,6.1850,98.70,2.2616,24,666.0,20.20,396.90,18.13,14.10 +7.52601,0.00,18.100,0,0.7130,6.4170,98.30,2.1850,24,666.0,20.20,304.21,19.31,13.00 +6.71772,0.00,18.100,0,0.7130,6.7490,92.60,2.3236,24,666.0,20.20,0.32,17.44,13.40 +5.44114,0.00,18.100,0,0.7130,6.6550,98.20,2.3552,24,666.0,20.20,355.29,17.73,15.20 +5.09017,0.00,18.100,0,0.7130,6.2970,91.80,2.3682,24,666.0,20.20,385.09,17.27,16.10 +8.24809,0.00,18.100,0,0.7130,7.3930,99.30,2.4527,24,666.0,20.20,375.87,16.74,17.80 +9.51363,0.00,18.100,0,0.7130,6.7280,94.10,2.4961,24,666.0,20.20,6.68,18.71,14.90 +4.75237,0.00,18.100,0,0.7130,6.5250,86.50,2.4358,24,666.0,20.20,50.92,18.13,14.10 +4.66883,0.00,18.100,0,0.7130,5.9760,87.90,2.5806,24,666.0,20.20,10.48,19.01,12.70 +8.20058,0.00,18.100,0,0.7130,5.9360,80.30,2.7792,24,666.0,20.20,3.50,16.94,13.50 +7.75223,0.00,18.100,0,0.7130,6.3010,83.70,2.7831,24,666.0,20.20,272.21,16.23,14.90 +6.80117,0.00,18.100,0,0.7130,6.0810,84.40,2.7175,24,666.0,20.20,396.90,14.70,20.00 +4.81213,0.00,18.100,0,0.7130,6.7010,90.00,2.5975,24,666.0,20.20,255.23,16.42,16.40 +3.69311,0.00,18.100,0,0.7130,6.3760,88.40,2.5671,24,666.0,20.20,391.43,14.65,17.70 +6.65492,0.00,18.100,0,0.7130,6.3170,83.00,2.7344,24,666.0,20.20,396.90,13.99,19.50 +5.82115,0.00,18.100,0,0.7130,6.5130,89.90,2.8016,24,666.0,20.20,393.82,10.29,20.20 +7.83932,0.00,18.100,0,0.6550,6.2090,65.40,2.9634,24,666.0,20.20,396.90,13.22,21.40 +3.16360,0.00,18.100,0,0.6550,5.7590,48.20,3.0665,24,666.0,20.20,334.40,14.13,19.90 +3.77498,0.00,18.100,0,0.6550,5.9520,84.70,2.8715,24,666.0,20.20,22.01,17.15,19.00 +4.42228,0.00,18.100,0,0.5840,6.0030,94.50,2.5403,24,666.0,20.20,331.29,21.32,19.10 +15.57570,0.00,18.100,0,0.5800,5.9260,71.00,2.9084,24,666.0,20.20,368.74,18.13,19.10 +13.07510,0.00,18.100,0,0.5800,5.7130,56.70,2.8237,24,666.0,20.20,396.90,14.76,20.10 +4.34879,0.00,18.100,0,0.5800,6.1670,84.00,3.0334,24,666.0,20.20,396.90,16.29,19.90 +4.03841,0.00,18.100,0,0.5320,6.2290,90.70,3.0993,24,666.0,20.20,395.33,12.87,19.60 +3.56868,0.00,18.100,0,0.5800,6.4370,75.00,2.8965,24,666.0,20.20,393.37,14.36,23.20 +4.64689,0.00,18.100,0,0.6140,6.9800,67.60,2.5329,24,666.0,20.20,374.68,11.66,29.80 +8.05579,0.00,18.100,0,0.5840,5.4270,95.40,2.4298,24,666.0,20.20,352.58,18.14,13.80 +6.39312,0.00,18.100,0,0.5840,6.1620,97.40,2.2060,24,666.0,20.20,302.76,24.10,13.30 +4.87141,0.00,18.100,0,0.6140,6.4840,93.60,2.3053,24,666.0,20.20,396.21,18.68,16.70 +15.02340,0.00,18.100,0,0.6140,5.3040,97.30,2.1007,24,666.0,20.20,349.48,24.91,12.00 +10.23300,0.00,18.100,0,0.6140,6.1850,96.70,2.1705,24,666.0,20.20,379.70,18.03,14.60 +14.33370,0.00,18.100,0,0.6140,6.2290,88.00,1.9512,24,666.0,20.20,383.32,13.11,21.40 +5.82401,0.00,18.100,0,0.5320,6.2420,64.70,3.4242,24,666.0,20.20,396.90,10.74,23.00 +5.70818,0.00,18.100,0,0.5320,6.7500,74.90,3.3317,24,666.0,20.20,393.07,7.74,23.70 +5.73116,0.00,18.100,0,0.5320,7.0610,77.00,3.4106,24,666.0,20.20,395.28,7.01,25.00 +2.81838,0.00,18.100,0,0.5320,5.7620,40.30,4.0983,24,666.0,20.20,392.92,10.42,21.80 +2.37857,0.00,18.100,0,0.5830,5.8710,41.90,3.7240,24,666.0,20.20,370.73,13.34,20.60 +3.67367,0.00,18.100,0,0.5830,6.3120,51.90,3.9917,24,666.0,20.20,388.62,10.58,21.20 +5.69175,0.00,18.100,0,0.5830,6.1140,79.80,3.5459,24,666.0,20.20,392.68,14.98,19.10 +4.83567,0.00,18.100,0,0.5830,5.9050,53.20,3.1523,24,666.0,20.20,388.22,11.45,20.60 +0.15086,0.00,27.740,0,0.6090,5.4540,92.70,1.8209,4,711.0,20.10,395.09,18.06,15.20 +0.18337,0.00,27.740,0,0.6090,5.4140,98.30,1.7554,4,711.0,20.10,344.05,23.97,7.00 +0.20746,0.00,27.740,0,0.6090,5.0930,98.00,1.8226,4,711.0,20.10,318.43,29.68,8.10 +0.10574,0.00,27.740,0,0.6090,5.9830,98.80,1.8681,4,711.0,20.10,390.11,18.07,13.60 +0.11132,0.00,27.740,0,0.6090,5.9830,83.50,2.1099,4,711.0,20.10,396.90,13.35,20.10 +0.17331,0.00,9.690,0,0.5850,5.7070,54.00,2.3817,6,391.0,19.20,396.90,12.01,21.80 +0.27957,0.00,9.690,0,0.5850,5.9260,42.60,2.3817,6,391.0,19.20,396.90,13.59,24.50 +0.17899,0.00,9.690,0,0.5850,5.6700,28.80,2.7986,6,391.0,19.20,393.29,17.60,23.10 +0.28960,0.00,9.690,0,0.5850,5.3900,72.90,2.7986,6,391.0,19.20,396.90,21.14,19.70 +0.26838,0.00,9.690,0,0.5850,5.7940,70.60,2.8927,6,391.0,19.20,396.90,14.10,18.30 +0.23912,0.00,9.690,0,0.5850,6.0190,65.30,2.4091,6,391.0,19.20,396.90,12.92,21.20 +0.17783,0.00,9.690,0,0.5850,5.5690,73.50,2.3999,6,391.0,19.20,395.77,15.10,17.50 +0.22438,0.00,9.690,0,0.5850,6.0270,79.70,2.4982,6,391.0,19.20,396.90,14.33,16.80 +0.06263,0.00,11.930,0,0.5730,6.5930,69.10,2.4786,1,273.0,21.00,391.99,9.67,22.40 +0.04527,0.00,11.930,0,0.5730,6.1200,76.70,2.2875,1,273.0,21.00,396.90,9.08,20.60 +0.06076,0.00,11.930,0,0.5730,6.9760,91.00,2.1675,1,273.0,21.00,396.90,5.64,23.90 +0.10959,0.00,11.930,0,0.5730,6.7940,89.30,2.3889,1,273.0,21.00,393.45,6.48,22.00 +0.04741,0.00,11.930,0,0.5730,6.0300,80.80,2.5050,1,273.0,21.00,396.90,7.88,11.90 \ No newline at end of file diff --git a/doc/source/example_scripts/gmqe_example.py b/doc/source/example_scripts/gmqe_example.py new file mode 100644 index 0000000000000000000000000000000000000000..0be8cf749de556bfb4b9d36badb1a8d4cbdafc9f --- /dev/null +++ b/doc/source/example_scripts/gmqe_example.py @@ -0,0 +1,41 @@ +from ost import io +from ost import seq +from ost import mol +from qmean import PSIPREDHandler +from qmean import DisCoContainer +from qmean import GMQE + +# Let's model crambin! +trg_sequence = seq.CreateSequence('crambin', 'TTCCPSIVARSNFNVCRLPGTPEAICATYTGCIIIPGATCPGDYAN') +trg_profile = io.LoadSequenceProfile('example_data/1crn.1.A.hhm') +dc = DisCoContainer.Load('example_data/1crn_dc.dat') + +# Setup PSIPREDHandler +data = dict() +data["seq"] = str(trg_sequence) +data["ss"] = 'CCCCCCHHHHHHHCCCCCCCCCHHHHHHCCCCEECCCCCCCCCCCC' +data["conf"] = '9888980234220123269989453431158088299999999989' +psipred_handler = PSIPREDHandler(data) + +# GMQE is calculated by using the structure with PDB ID 3szs as template +seqres_aln = io.LoadAlignment('example_data/1crn_3szs_aln.fasta') +tpl = io.LoadPDB('example_data/3szs.2.A.pdb') +tpl_profile = io.LoadSequenceProfile('example_data/3szs.2.A.hhm') + +# Assign secondary structure, the ss_agreement score is invalid otherwise +mol.alg.AssignSecStruct(tpl) + +# The template has no gaps, the atomseq alignment therefore matches the +# seqres alignment. Be aware that the AttachView function doesn't check +# whether sequence and attached view match. +aln = seqres_aln +aln.AttachView(1, tpl.CreateFullView()) + +# Generate target specific GMQE object and estimate GMQE +# For optimal performance you need to provide a context profile database +# (see documentation) +scorer = GMQE(trg_sequence, psipred_handler, disco = dc, profile = trg_profile) +gmqe_result = scorer.PredictGMQE(aln, seqres_aln, tpl_profile = tpl_profile) +print('Expected model quality when using 3szs as template: ', gmqe_result[0]) +print('Scores: ', gmqe_result[1]) + diff --git a/doc/source/example_scripts/regressor_training.py b/doc/source/example_scripts/regressor_training.py new file mode 100644 index 0000000000000000000000000000000000000000..0a1d9b542644f9898b2c636ee4c061770c08dc69 --- /dev/null +++ b/doc/source/example_scripts/regressor_training.py @@ -0,0 +1,41 @@ +import pandas as pd +import numpy as np +from qmean.mlp_regressor import TrainRegressor + +# Example training of multi-layer perceptron on a toy data set. +# The Boston House Price Dataset involves the prediction of a +# house price in thousands of dollars given details of the house +# and its neighborhood. +df = pd.read_csv('example_data/housing.csv') + +df_train = df.loc[:400] +df_test = df.loc[400:] + +# search for boston housing data set in the net... I'm sure +# you'll find a description of the single features +features = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', + 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT'] + +# thats the median value of the houses we want to predict +target = 'MEDV' + +# define architecture and training parameters +topology = [len(features), 20, 20, 1] +loss_function = 'mean_squared_error' +optimizer = 'adam' +epochs = 100 +batch_size = 10 + +# train and predict +regressor = TrainRegressor(df_train, features, target,loss_function, + optimizer, topology, epochs, batch_size) +regressor_in = df_test[features].values +predictions = np.zeros(regressor_in.shape[0]) +for idx in range(regressor_in.shape[0]): + predictions[idx] = regressor.Predict(regressor_in[idx]) + +# estimate root mean square error +ref = df_test[target].values +rmse = np.sqrt(np.mean(np.square(predictions-df_test[target].values))) +print("testing rmse:", rmse) + diff --git a/doc/source/example_scripts/test_doctests.py b/doc/source/example_scripts/test_doctests.py index 76ea16e73273db740cdddfaf7b80e295df7e30f1..036bf0c32fe488359d2d3f702fd8ee69ec7d1cd4 100644 --- a/doc/source/example_scripts/test_doctests.py +++ b/doc/source/example_scripts/test_doctests.py @@ -512,6 +512,43 @@ class DocTests(unittest.TestCase): shutil.rmtree('shift_towards_cter') os.remove('alignment_comparison.png') + def testRegressorTraining(self): + return_code, sout, serr = self.runScript('regressor_training.py') + if return_code != 0: + if "ModuleNotFoundError" in serr: + print("Could not import keras/pandas, skip regressor training test") + return + self.assertEqual(return_code, 0) + testing_rmse = float(sout.splitlines()[-1].split()[-1]) + # there is some randomness in training but we should for sure have an + # rmse below 10 if everything went well... + self.assertTrue(testing_rmse < 10.0) + + def testGMQE(self): + return_code, sout, serr = self.runScript('gmqe_example.py') + self.assertEqual(return_code, 0) + exp_gmqe = 0.73466 + exp_scores = {'dist_const': 0.8093481659889221, + 'reduced': 0.15201421082019806, + 'cb_packing': 0.003967251628637314, + 'torsion': -0.18035820126533508, + 'ss_agreement': 0.2522673010826111, + 'coverage': 1.0, 'seq_id': 30.434782028198242, + 'seq_sim': 0.39275363087654114, 'n_insertions': 0, + 'n_deletions': 0, 'seqres_length': 46, + 'seqres_coverage': 1.0, + 'profile_aln_score': 137.37026977539062, + 'avg_entropy': 1.3449710607528687} + sout = sout.splitlines() + self.assertEqual(len(sout), 2) + gmqe = float(sout[0].split()[-1]) + scores = eval(sout[1][7:]) + self.assertAlmostEqual(gmqe, exp_gmqe, 2) + for score_name in exp_scores.keys(): + self.assertTrue(score_name in scores) + self.assertAlmostEqual(exp_scores[score_name], + scores[score_name], 2) + if __name__ == "__main__": from ost import testutils testutils.RunTests() diff --git a/doc/source/gmqe.rst b/doc/source/gmqe.rst new file mode 100644 index 0000000000000000000000000000000000000000..78c509405a5c8a1dec9a8c9c0c77af829e3cddf3 --- /dev/null +++ b/doc/source/gmqe.rst @@ -0,0 +1,217 @@ +.. Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and +.. Biozentrum - University of Basel +.. +.. Licensed under the Apache License, Version 2.0 (the "License"); +.. you may not use this file except in compliance with the License. +.. You may obtain a copy of the License at +.. +.. http://www.apache.org/licenses/LICENSE-2.0 +.. +.. Unless required by applicable law or agreed to in writing, software +.. distributed under the License is distributed on an "AS IS" BASIS, +.. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. See the License for the specific language governing permissions and +.. limitations under the License. + +GMQE +================================================================================ + +.. currentmodule:: qmean + +The Global Model Quality Estimate (GMQE) aims to predict the expected quality of +a protein model before it is actually built. It bases the prediction on the +input data used for a common comparative modelling approach. This can be +features that are extracted from the target-template sequence alignment +(e.g. sequence identity etc.) or features derived from structural data +(e.g. statistical potentials/DisCo applied on a 'raw-model', i.e. a model where +the coordinates of the template have been transferred to an initial model +exhibiting the target sequence but without further processing such as loop +modelling etc.). +The features are combined with a :class:`NNScorer` which is trained to predict +the full-atomic lDDT score of the final model. + +.. literalinclude:: example_scripts/gmqe_example.py + +.. class:: GMQE(seqres, psipred, disco=None, profile=None, crf_file = None) + + Performs all preprocessing required to score any number of models for + a certain target sequence. + + :param seqres: Target sequence + :param psipred: PSIPRED prediction that must match *seqres* + :param disco: DisCo Container that must match *seqres*. This is + optional but decreases accuracy of quality predictions + if omitted. + :param profile: Profile that must match *seqres*. This is optional + but decreases accuracy of quality predictions if + omitted. The profile is used to calculate an + HMM-HMM alignment score which aims to reproduce + the score which is optimized in alignments + generated by HHblits. + :param crf_file: When calculating the HMM-HMM alignment score with + the *profile*, pseudo counts are assigned. + The *crf_file* contains a path to the file containing + context profiles in the HH-suite. Given a default + HH-suite installation at <PATH_TO_HHSUITE>, this file + can typically be found at: + <PATH_TO_HHSUITE>/data/context_data.crf + The pseudo counts are assigned with + :func:`ost.seq.alg.AddAAPseudoCounts`. If *crf_file* + is given, pseudo counts are added using the context + profiles. If not, they are added using a substitution + matrix. + + :type seqres: :class:`str` or :class:`ost.seq.SequenceHandle` + :type psipred: :class:`PSIPREDHandler` + :type disco: :class:`DisCoContainer` + :type profile: :class:`ost.seq.ProfileHandle` + :type crf_file: :class:`str` + + .. method:: GetScores(aln, seqres_aln, residue_numbers = None,\ + n_positions = None, ca_positions = None,\ + c_positions = None, cb_positions = None,\ + dssp_states = None, profile_aln_score = None,\ + tpl_profile = None) + + Calculates all scores it possibly can given the input of this function and + the input given at initalization of :class:`GMQE`. Alignment specific scores + are directly extracted from the ATOMSEQ alignment (*aln*: alignment that has + target sequence as first sequence and template sequence of amino acids that + are actually covered by structural data as second sequence) and SEQRES + alignment (*seqres_aln*: same as before, but second sequence is the SEQRES, + i.e. the target sequence of the template which is not necessarily fully + covered by structural data). The second sequence in *seqres_aln* must be + consistent with *tpl_profile*. + + There are two ways to pass structural data for scoring: + + * manually pass the information by providing *residue_numbers*, + *n_positions*, *ca_positions*, *c_positions*, *cb_positions* and + *dssp_states* + * :class:`ost.mol.EntityView` attached to second sequence (the template + sequence) in aln (see: :meth:`ost.seq.AlignmentHandle.AttachView`). + To get meaningful results for the ss_agreement score (described below), + secondary structure must be assigned (e.g. by executing + :meth:`ost.mol.alg.AssignSecStruct`). + + If you pass the stuff manually (option one), any :class:`ost.mol.EntityView` + attached to *aln* is ignored. + + + Scores that are calculated: + + * seq_id: Fraction of conserved amino acids in *aln* + * seq_sim: Normalized substitution score in *aln* given BLOSUM62 + * n_insertions: The number of insertions in *aln* + * n_deletions: The number of deletions in *aln* + * seqres_length: The length of the target seqres + * coverage: Fraction of amino acid in target sequence of *aln* that are covered by an amino acid in the template seq + * seqres_coverage: Fraction of amino acid in target sequence of *seqres_aln* that are covered by an amino acid in the template seq + * profile_aln_score: Only set if *profile_aln_score* is given or if you provide a target profile at :class:`GMQE` initialization as well as *tpl_profile*. If you provided *profile_aln_score*, that's directly set. Otherwise it executes :meth:`ost.seq.alg.HMMScore` using *seqres_aln*, *tpl_profile* and the target profile given at :class:`GMQE` initialization + * avg_entropy: Only set if you provide a target profile at :class:`GMQE` initialization. Average entropy of the columns in the target profile + * dist_const: Only set if you provide a :class:`DisCoContainer` at :class:`GMQE` initialization. Represents the DisCo score calculated on the raw-model + * reduced: Reduced score on raw-model (see: :class:`ReducedPotential`) + * cb_packing: CBPacking score on raw-model (see: :class:`CBPackingPotential`) + * torsion: Torsion score on raw-model (see: :class:`TorsionPotential`) + * ss_agreement: SSAgreementScore on raw model (see :class:`SSAgreement`) + + :param aln: The ATOMSEQ alignment with target sequence as first + sequence and the sequence of the template you want to + use for modelling as second sequence. The template + sequence must only contain the amino acid for which we + have structural coverage. One way of passing structural + data to the function is to attach a + :class:`ost.mol.EntityView` to the template sequence. + :param seqres_aln: The SEQRES aligment with target sequence as first + sequence and the sequence of the template you want to + use for modelling as second sequence. + :param residue_numbers: Required if you pass structural data using the + *residue_numbers*, *n_positions*, *ca_positions*, + *c_positions*, *cb_positions* and *dssp_states* + parameters. Maps the elements of the passed lists + to the target sequence with 1-based indexing scheme + (first residue in target sequence has index 1). + :param n_positions: Contains the nitrogen positions of the template + residues which are mapped to the target sequence + using *residue_numbers*. + :param ca_positions: Contains the CA carbon positions of the template + residues which are mapped to the target sequence + using *residue_numbers*. + :param c_positions: Contains the backbone carbon positions of the template + residues which are mapped to the target sequence + using *residue_numbers*. + :param cb_positions: Contains the cb carbon positions of the template + residues which are mapped to the target sequence + using *residue_numbers*. In case of glycine, + construct a fake CB position using + :meth:`ost.mol.alg.CBetaPosition` + :param dssp_states: DSSP states of the template residues which are + mapped to the target sequence using *residue_numbers*. + Valid states: ['H', 'E', 'C', 'G', 'B', 'S', 'T', 'I']. + If you assigned the secondary structure to an + :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`, + for example with :meth:`ost.mol.alg.AssignSecStruct`, you + can extract this state for every + :class:`ost.mol.ResidueHandle`/:class:`ost.mol.ResidueView` + r by simply using str(r.GetSecStructure()) + :param profile_aln_score: If set, this value is directly set as profile_aln_score in + the return dict without calculating anything. + :param tpl_profile: Required to calculate profile_aln_score, must match second + sequence in *seqres_aln*. + + :type aln: :class:`ost.seq.AlignmentHandle` + :type seqres_aln: :class:`ost.seq.AlignmentHandle` + :type residue_numbers: :class:`list` of :class:`int` + :type n_positions: :class:`ost.geom.Vec3List` + :type ca_positions: :class:`ost.geom.Vec3List` + :type c_positions: :class:`ost.geom.Vec3List` + :type cb_positions: :class:`ost.geom.Vec3List` + :type dssp_states: :class:`list` of :class:`str` + :type profile_aln_score: :class:`float` + :type tpl_profile: :class:`ost.seq.ProfileHandle` + + :returns: A :class:`dict` with all scores it can calculate + + + .. method:: PredictGMQE(aln, seqres_aln, residue_numbers = None, n_positions = None, \ + ca_positions = None, c_positions = None, cb_positions = None, \ + dssp_states = None, profile_aln_score = None, tpl_profile = None, \ + QMEANDisCo=None) + + Invokes :meth:`GMQE.GetScores` and returns one final GMQE value using an internal + :class:`NNScorer`. + + :param aln: Passed to :meth:`GMQE.GetScores` + :param seqres_aln: Passed to :meth:`GMQE.GetScores` + :param residue_numbers: Passed to :meth:`GMQE.GetScores` + :param n_positions: Passed to :meth:`GMQE.GetScores` + :param ca_positions: Passed to :meth:`GMQE.GetScores` + :param c_positions: Passed to :meth:`GMQE.GetScores` + :param cb_positions: Passed to :meth:`GMQE.GetScores` + :param dssp_states: Passed to :meth:`GMQE.GetScores` + :param profile_aln_score: Passed to :meth:`GMQE.GetScores` + :param tpl_profile: Passed to :meth:`GMQE.GetScores` + :param QMEANDisCo: If you already built a model, you can further increase the + accuracy of the lDDT prediction by passing its global + QMEANDisCo score. Thats the avg_local_score property of + the :class:`QMEANScorer` which must be setup with DisCo + and all that stuff. + + :type aln: :class:`ost.seq.AlignmentHandle` + :type seqres_aln: :class:`ost.seq.AlignmentHandle` + :type residue_numbers: :class:`list` of :class:`int` + :type n_positions: :class:`ost.geom.Vec3List` + :type ca_positions: :class:`ost.geom.Vec3List` + :type c_positions: :class:`ost.geom.Vec3List` + :type cb_positions: :class:`ost.geom.Vec3List` + :type dssp_states: :class:`list` of :class:`str` + :type profile_aln_score: :class:`float` + :type tpl_profile: :class:`ost.seq.ProfileHandle` + :type QMEANDisCo: :class:`float` + + + + :returns: A :class:`tuple` with the predicted lDDT (GMQE) + as first element and a :class:`dict` containing + the underlying scores as second element. + diff --git a/doc/source/index.rst b/doc/source/index.rst index adb2721effbb19169b7056d045e61ae7cf602cb6..f4d4e25991f6a533362a3fb518a0a221f7fcb544 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -32,4 +32,5 @@ Contents: Combining Scores <score_combination> Membrane Detection <membrane> Distance Constraints (DisCo) <disco> + Global Model Quality Estimate (GMQE) <gmqe> License <license> diff --git a/doc/source/score_combination.rst b/doc/source/score_combination.rst index d13a46424738a43a211c2060826bd35d7015e44f..ce3ba037af6d1078cb6b37eec32c5561cb708928 100644 --- a/doc/source/score_combination.rst +++ b/doc/source/score_combination.rst @@ -201,20 +201,162 @@ usage of the ost table class. Multi-Layer Perceptron scoring -------------------------------------------------------------------------------- -To be independent from any machine learning library, QMEAN comes with an own -implementation of a multi-layer perceptron to evaluate a fully connected -feed-forward neural network, the :class:`Regressor`. The idea is to use -whatever machine learning library to train such a network and then dump -it to disk in a way that it can be read by the :class:`Regressor`. - -Right now, theres no documentation on :class:`Regressor`. The interested -user can figure out the data format and functionality by studying the file -mlp_regressor.py. - -Similar problems as for the linear combination apply. There's no guarantee -that all input features are valid. The idea is to again train several -regressors and let :class:`NNScorer` figure out what regressor we need. -Again, code is the documentation. +To be independent from any machine learning library, QMEAN comes with a +lightweight multi-layer perceptron implementation: the :class:`Regressor`. +The idea is to use whatever machine learning library to train such a network and +transform it into a :class:`Regressor`. For the simplest training cases +:func:`TrainRegressor` should be sufficient. + +.. class:: Regressor(n_input_features, mean=None, std=None) + + Lighweight implementation of a fully connected multi-layer perceptron with + in-built data normalization. Before traversing the layers, data is normalized + as (input-mean)/std. + + :param n_input_features: The number of input features to expect + :param mean: Mean value for every input feature (*n_input_features* values) + for data normalization. All values set to 0.0 if not provided. + :param std: Standard deviation for every input feature (*n_input_features* + values) for data normalization. All values set to 1.0 if not + provided. + + :type n_input_features: :class:`int` + :type mean: :class:`list` of :class:`float` or :class:`numpy.ndarray` + :type mean: :class:`list` of :class:`float` or :class:`numpy.ndarray` + + + .. method:: AddLayer(weights, activation_function, bias=None) + + Adds another layer *i*. Given the weight matrix M, the new layer + is derived from a simple matrix multiplication M*layer[*i*-1], where + the input layer is considered to be layer 0. Postprocessing + consists of adding bias values and perform a final activation. + + :param weights: Weight matrix with shape (m,n). n is the number of + elements of the previous layer and m determines the + number of elements in the added layer. + :param activation_function: 0: no activation, 1: ReLU (Rectified Linear + Unit) activation + :param bias: Bias that is applied to layer before executing the + activation function. Size must be consistent with + number of rows (m) in *weights*. All values set to 0.0 if + not provided. + :type weights: :class:`numpy.ndarray` + :type activation_function: :class:`int` + :type bias: :class:`list` of :class:`float` or :class:`numpy.ndarray` + + .. method:: Save(filepath) + + Dumps regressor in binary format + + :param filepath: Path to dump regressor + :type filepath: :class:`str` + + .. method:: Load(filepath) + + Static method to load previously dumped regressor + + :param filepath: Path to dumped regressor + :type filepath: :class:`str` + + .. method:: Predict(input) + + Normalizes input, traverses all layers an returns first element of last + layer: the prediction + + :param input: Input of size *n_input_features* to feed into network + :type input: :class:`list` of :class:`float` or :class:`numpy.ndarray` + + +.. method:: TrainRegressor(data_frame, features, target, loss_function,\ + optimizer, topology, epochs, batch_size,\ + randomize=False) + + Trains a :class:`Regressor` using keras given a pandas dataframe as input. + This obviously adds pandas and keras as dependency. Check the example script! + + :param data_frame: Data with a column for each feature as well as the target + :param features: Describes input features, features must be present in + *data_frame* + :param target: The target, must be present in *data_frame* + :param loss_function: Any function that keras understands. E.g. + "mean_squared_error" or "mean_absolute_error" + :param optimizer: Any optimizer that keras understands. E.g. + "sgd", "rmsprop", "adagrad", "adadelta", "adam" + :param topology: Every element describes number of nodes of a layer + with first layer being the input layer that must have + the same size as *features*. The last layer is the + output layer that must be of size 1. + :param epochs: Number of training epochs + :param batch_size: Training batch size + :param randomize: Whether to randomize the order of training data prior to + training + + :type data_frame: :class:`pandas.DataFrame` + :type features: :class:`list` of :class:`str` + :type target: :class:`str` + :type loss_function: :class:`str` + :type optimizer: :class:`str` + :type topology: :class:`list` of :class:`int` + :type epochs: :class:`int` + :type batch_size: :class:`int` + :type randomize: :class:`bool` + +.. literalinclude:: example_scripts/regressor_training.py + + +The :class:`Regressor` requires a fixed set of input scores and cannot +flexibly adapt if certain features are missing. Examples include scores for the +first/last two residues in the :class:`qmean.TorsionPotential` due to missing +dihedral angles. The :class:`NNScorer` offers a naive solution and selects the +appropriate :class:`Regressor` in a set of alternatives that cover the possible +combinations of input scores. + +.. class:: NNScorer(path) + + Organizes multiple :class:`Regressor` which cover alternative combinations of + input scores and selects the right one for scoring. The object must be + constructed manually and is loaded from disk. + + :param path: Directory containing a file 'feature_groups.json' and several + stored :class:`Regressor` with naming 'nn_<idx>.dat'. The first + is a json file containing a list of items. Each item at location + idx is a list of score names that represent the input for the + the :class:`Regressor` named 'nn_<idx>.dat'. + :type path: :class:`str` + + + .. method:: GetScore(score_dict, olc=None) + + Extracts all valid scores from *score_dict*, selects the right internal + :class:`NNScorer` and returns a score. + + :param score_dict: Keys relate to the internal feature group list. All + values !(None || NaN) are selected and the + internal list of feature groups is iterated until + a group can be identified for which all values are + valid. Ordering of the internal feature groups therefore + matters. The values are ordered as defined in the found + feature group and passed to the according + :class:`Regressor`. The function returns 0.0 if no + feature group can be identified. + :param olc: A common use case for the :class:`NNScorer` is to + estimate residue specific scores. When defining *olc*, + a one-hot array with 20 elements is added in front + of the values that are passed to the selected + :class:`Regressor`. All elements are zero, the location + of *olc* in the string "ACDEFGHIKLMNPQRSTVWY" is set + to one. The function returns 0.0 if *olc* is not found in + the specified string. + :type score_dict: :class:`dict` + :type olc: :class:`str` + + + + + + + diff --git a/pymod/CMakeLists.txt b/pymod/CMakeLists.txt index a73250e69ed276974f89cc330eb880c56a285e9b..719d04f2e2b633b2610a59755366901f9d613a19 100644 --- a/pymod/CMakeLists.txt +++ b/pymod/CMakeLists.txt @@ -9,6 +9,7 @@ set(QMEAN_MODULES score_calculator.py reference_set.py mlp_regressor.py + gmqe.py ) set(QMEAN_PYMOD @@ -24,6 +25,8 @@ export_base.cc export_spherical_smoother.cc export_disco.cc export_clash.cc +export_gmqe.cc +export_extract_data_helper.cc wrap_qmean.cc ) diff --git a/pymod/__init__.py b/pymod/__init__.py index f6289d733ce302c5c74fb0d672cf2029cc2f98dc..0774388637b36ca9517dee256e6ee76bb304a8bc 100644 --- a/pymod/__init__.py +++ b/pymod/__init__.py @@ -21,3 +21,4 @@ from qmean.score_calculator import GlobalScorer from qmean.mqa_result_membrane import AssessMembraneModelQuality from qmean.mqa_result_membrane import GenerateEnergyGapPlot from qmean.mqa_result import QMEANScorer +from qmean.gmqe import GMQE diff --git a/pymod/conf.py b/pymod/conf.py index 8e9e5dd1aa0e128d936c3f74dc00ffcce7d9dd3f..6904f2293088248a38d7c753e8948f1c059c81e1 100644 --- a/pymod/conf.py +++ b/pymod/conf.py @@ -38,6 +38,7 @@ class SwissmodelSettings: self.local_potentials = os.path.join(POTENTIAL_DIR, 'soluble_local_potentials.dat') self.global_potentials = os.path.join(POTENTIAL_DIR, 'soluble_global_potentials.dat') self.local_scorer = os.path.join(SCORER_DIR, 'local_nn_scorer') + self.gmqe_scorer = os.path.join(SCORER_DIR, 'gmqe_scorer') self.linear_local_scorer = os.path.join(SCORER_DIR, 'promod_local_scorer.dat') # deprecated self.global_scorer = os.path.join(SCORER_DIR, 'promod_global_scorer.dat') self.reference_tab = os.path.join(REFERENCE_DIR, 'reference_tab_promod_scorer.txt') diff --git a/pymod/export_clash.cc b/pymod/export_clash.cc index 99055724bda61b3778a4e2a50c18afd9b51c96d4..926f124af0d88ce2a6b79291ad6d62ede159f312 100644 --- a/pymod/export_clash.cc +++ b/pymod/export_clash.cc @@ -22,16 +22,6 @@ using namespace qmean; using namespace boost::python; - - -namespace { -list WrapGetClashScores(ost::mol::EntityView& target, ost::mol::EntityView& env){ - std::vector<Real> scores = GetClashScores(target, env); - list ret_list=VecToList<Real>(scores); - return ret_list; -} -} - void export_clash() { def("GetClashScores", &GetClashScores, (arg("target"), arg("env"))); } diff --git a/pymod/export_extract_data_helper.cc b/pymod/export_extract_data_helper.cc new file mode 100644 index 0000000000000000000000000000000000000000..e6f397cabe5b1ab4bb3f7fb019905a11979f0d88 --- /dev/null +++ b/pymod/export_extract_data_helper.cc @@ -0,0 +1,222 @@ +//------------------------------------------------------------------------------ +// This file is part of the OpenStructure project <www.openstructure.org> +// +// Copyright (C) 2008-2020 by the OpenStructure authors +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License as published by the Free +// Software Foundation; either version 3.0 of the License, or (at your option) +// any later version. +// This library is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +// details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this library; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +//------------------------------------------------------------------------------ + + +#include <boost/python.hpp> +#include <ost/db/linear_indexer.hh> +#include <ost/db/binary_container.hh> +#include <qmean/extract_data_helper.hh> + +using namespace boost::python; +using namespace ost::db; + + +namespace{ + + +template<typename T> +void VecToList(const std::vector<T>& v, list& l) { + l = list(); + for(typename std::vector<T>::const_iterator i = v.begin(); + i != v.end(); ++i) { + l.append(*i); + } +} + +///////////////////////////// +// Wrapper for DisCo Stuff // +///////////////////////////// + +// helper struct to reduce number of input parameters +struct DisCoDataContainers { + + DisCoDataContainers(const String& indexer_path, + const String& seqres_container_path, + const String& atomseq_container_path, + const String& position_container_path) { + indexer = LinearIndexer::Load(indexer_path); + seqres_container = LinearCharacterContainer::Load(seqres_container_path); + atomseq_container = LinearCharacterContainer::Load(atomseq_container_path); + position_container = LinearPositionContainer::Load(position_container_path); + } + + LinearIndexerPtr indexer; + LinearCharacterContainerPtr seqres_container; + LinearCharacterContainerPtr atomseq_container; + LinearPositionContainerPtr position_container; +}; + +// helper struct to wrap output + +struct ExtractedDisCoData{ + std::vector<int> residue_numbers; + geom::Vec3List ca_positions; +}; + +typedef boost::shared_ptr<ExtractedDisCoData> ExtractedDisCoDataPtr; +list WrapExtractedDisCoDataGetResNums(ExtractedDisCoDataPtr ptr) { + list residue_numbers; + VecToList(ptr->residue_numbers, residue_numbers); + return residue_numbers; +} + +ExtractedDisCoDataPtr WrapExtractTemplateDataDisCo(const String& entry_name, + const String& chain_name, + const ost::seq::AlignmentHandle& aln, + DisCoDataContainers& data_containers) { + ExtractedDisCoDataPtr ptr(new ExtractedDisCoData); + qmean::ExtractTemplateDataDisCo(entry_name, chain_name, aln, + data_containers.indexer, + data_containers.seqres_container, + data_containers.atomseq_container, + data_containers.position_container, + ptr->residue_numbers, + ptr->ca_positions); + return ptr; +} + +//////////////////////////// +// Wrapper for GMQE Stuff // +//////////////////////////// + +// helper struct to reduce number of input parameters +struct GMQEDataContainers { + + GMQEDataContainers(const String& indexer_path, + const String& seqres_container_path, + const String& atomseq_container_path, + const String& dssp_container_path, + const String& n_position_container_path, + const String& ca_position_container_path, + const String& c_position_container_path, + const String& cb_position_container_path) { + indexer = LinearIndexer::Load(indexer_path); + seqres_container = LinearCharacterContainer::Load(seqres_container_path); + atomseq_container = LinearCharacterContainer::Load(atomseq_container_path); + dssp_container = LinearCharacterContainer::Load(dssp_container_path); + n_position_container = LinearPositionContainer::Load(n_position_container_path); + ca_position_container = LinearPositionContainer::Load(ca_position_container_path); + c_position_container = LinearPositionContainer::Load(c_position_container_path); + cb_position_container = LinearPositionContainer::Load(cb_position_container_path); + } + + LinearIndexerPtr indexer; + LinearCharacterContainerPtr seqres_container; + LinearCharacterContainerPtr atomseq_container; + LinearCharacterContainerPtr dssp_container; + LinearPositionContainerPtr n_position_container; + LinearPositionContainerPtr ca_position_container; + LinearPositionContainerPtr c_position_container; + LinearPositionContainerPtr cb_position_container; +}; + +// helper struct to wrap output +struct ExtractedGMQEData{ + std::vector<int> residue_numbers; + String dssp; + geom::Vec3List n_positions; + geom::Vec3List ca_positions; + geom::Vec3List c_positions; + geom::Vec3List cb_positions; +}; +typedef boost::shared_ptr<ExtractedGMQEData> ExtractedGMQEDataPtr; +list WrapExtractedGMQEDataGetResNums(ExtractedGMQEDataPtr ptr) { + list residue_numbers; + VecToList(ptr->residue_numbers, residue_numbers); + return residue_numbers; +} + +ExtractedGMQEDataPtr WrapExtractTemplateDataGMQE(const String& entry_name, + const String& chain_name, + const ost::seq::AlignmentHandle& aln, + GMQEDataContainers& data_containers) { + ExtractedGMQEDataPtr ptr(new ExtractedGMQEData); + qmean::ExtractTemplateDataGMQE(entry_name, chain_name, aln, + data_containers.indexer, + data_containers.seqres_container, + data_containers.atomseq_container, + data_containers.dssp_container, + data_containers.n_position_container, + data_containers.ca_position_container, + data_containers.c_position_container, + data_containers.cb_position_container, + ptr->residue_numbers, ptr->dssp, + ptr->n_positions, ptr->ca_positions, + ptr->c_positions, ptr->cb_positions); + return ptr; +} + +} + + +void export_extract_data_helper() { + + class_<DisCoDataContainers>("DisCoDataContainers", init<const String&, + const String&, + const String&, + const String&>()) + .def_readonly("indexer", &DisCoDataContainers::indexer) + .def_readonly("seqres_container", &DisCoDataContainers::seqres_container) + .def_readonly("atomseq_container", &DisCoDataContainers::atomseq_container) + .def_readonly("ca_position_container", &DisCoDataContainers::position_container) + ; + + class_<ExtractedDisCoData, ExtractedDisCoDataPtr>("ExtractedDisCoData", no_init) + .add_property("residue_numbers", &WrapExtractedDisCoDataGetResNums) + .def_readonly("ca_positions", &ExtractedDisCoData::ca_positions) + ; + + def("ExtractTemplateDataDisCo", &WrapExtractTemplateDataDisCo, (arg("entry_name"), + arg("chain_name"), + arg("aln"), + arg("data_containers"))); + + class_<GMQEDataContainers>("GMQEDataContainers", init<const String&, + const String&, + const String&, + const String&, + const String&, + const String&, + const String&, + const String&>()) + .def_readonly("indexer", &GMQEDataContainers::indexer) + .def_readonly("seqres_container", &GMQEDataContainers::seqres_container) + .def_readonly("atomseq_container", &GMQEDataContainers::atomseq_container) + .def_readonly("dssp_container", &GMQEDataContainers::dssp_container) + .def_readonly("n_position_container", &GMQEDataContainers::n_position_container) + .def_readonly("ca_position_container", &GMQEDataContainers::ca_position_container) + .def_readonly("c_position_container", &GMQEDataContainers::c_position_container) + .def_readonly("cb_position_container", &GMQEDataContainers::cb_position_container) + ; + + class_<ExtractedGMQEData, ExtractedGMQEDataPtr>("ExtractedGMQEData", no_init) + .add_property("residue_numbers", &WrapExtractedGMQEDataGetResNums) + .def_readonly("dssp", &ExtractedGMQEData::dssp) + .def_readonly("n_positions", &ExtractedGMQEData::n_positions) + .def_readonly("ca_positions", &ExtractedGMQEData::ca_positions) + .def_readonly("c_positions", &ExtractedGMQEData::c_positions) + .def_readonly("cb_positions", &ExtractedGMQEData::cb_positions) + ; + + def("ExtractTemplateDataGMQE", &WrapExtractTemplateDataGMQE, (arg("entry_name"), + arg("chain_name"), + arg("aln"), + arg("data_containers"))); +} + diff --git a/pymod/export_gmqe.cc b/pymod/export_gmqe.cc new file mode 100644 index 0000000000000000000000000000000000000000..48785d467296cf7a1e4d55300594046637163c69 --- /dev/null +++ b/pymod/export_gmqe.cc @@ -0,0 +1,98 @@ +// Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and +// Biozentrum - University of Basel +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <boost/python.hpp> +#include <boost/python/register_ptr_to_python.hpp> + +#include <qmean/gmqe_scores.hh> +#include <qmean/trg_tpl_similarity.hh> +#include <qmean/vec_list_magic.hh> + + +using namespace qmean; +using namespace boost::python; + + +namespace{ + +GMQEScoreCalculatorPtr WrapInit(PotentialContainerPtr potentials, + DisCoContainerPtr disco_container, + const ost::seq::SequenceHandle& seqres, + const ost::seq::SequenceHandle& psipred_pred, + const boost::python::list& psipred_cfi) { + std::vector<int> v_psipred_cfi = ListToVec<int>(psipred_cfi); + GMQEScoreCalculatorPtr p(new GMQEScoreCalculator(potentials, disco_container, + seqres, psipred_pred, + v_psipred_cfi)); + return p; +} + +boost::python::dict WrapEval(const GMQEScoreCalculatorPtr p, + const geom::Vec3List& n_positions, + const geom::Vec3List& ca_positions, + const geom::Vec3List& c_positions, + const geom::Vec3List& cb_positions, + const ost::seq::SequenceHandle& dssp_states, + const boost::python::list& residue_numbers) { + std::vector<int> v_residue_numbers = ListToVec<int>(residue_numbers); + Real dist_const, reduced, cb_packing, torsion, ss_agreement, coverage; + p->Eval(n_positions, ca_positions, c_positions, cb_positions, + dssp_states, v_residue_numbers, dist_const, reduced, cb_packing, + torsion, ss_agreement, coverage); + boost::python::dict return_dict; + return_dict["dist_const"] = dist_const; + return_dict["reduced"] = reduced; + return_dict["cb_packing"] = cb_packing; + return_dict["torsion"] = torsion; + return_dict["ss_agreement"] = ss_agreement; + return_dict["coverage"] = coverage; + + return return_dict; +} + +TrgTplSimilarityPtr WrapSimilarityInit(const String& seqres, + const boost::python::list& residue_numbers, + const geom::Vec3List& ca_pos, + Real distance_threshold) { + std::vector<int> v_residue_numbers = ListToVec<int>(residue_numbers); + TrgTplSimilarityPtr p(new TrgTplSimilarity(seqres, v_residue_numbers, + ca_pos, distance_threshold)); + return p; +} + +Real WrapGetSimilarity(TrgTplSimilarityPtr p, + const boost::python::list& residue_numbers, + const geom::Vec3List& ca_pos) { + std::vector<int> v_residue_numbers = ListToVec<int>(residue_numbers); + return p->GetSimilarity(v_residue_numbers, ca_pos); +} + +} + + +void export_gmqe() { + + class_<GMQEScoreCalculator>("GMQEScoreCalculator", no_init) + .def("__init__", make_constructor(&WrapInit)) + .def("Eval", &WrapEval, (arg("n_positions"), arg("ca_positions"), + arg("c_positions"), arg("cb_positions"), + arg("dssp_states"), arg("residue_numbers"))) + ; + + class_<TrgTplSimilarity>("TrgTplSimilarity", no_init) + .def("__init__", make_constructor(&WrapSimilarityInit)) + .def("GetSimilarity", &WrapGetSimilarity) + ; +} diff --git a/pymod/gmqe.py b/pymod/gmqe.py new file mode 100644 index 0000000000000000000000000000000000000000..0e8f476e899a31e1f8d4067c0bf6210bfaff22ae --- /dev/null +++ b/pymod/gmqe.py @@ -0,0 +1,233 @@ +# Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and +# Biozentrum - University of Basel +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from ost import seq +from ost import mol +from ost import geom +from qmean import DisCoContainer +from qmean import conf +from qmean import GMQEScoreCalculator +from qmean import PotentialContainer +from qmean.score_calculator import NNScorer + +class GMQE: + + def __init__(self, seqres, psipred, disco=None, profile=None, + crf_file = None): + + if isinstance(seqres, str): + self.seqres = seq.CreateSequence('seqres', seqres) + else: + self.seqres = seqres + if '-' in str(self.seqres): + raise RuntimeError("SEQRES must contain no gaps") + + if str(psipred.seq) != str(self.seqres): + raise RuntimeError('PSIPREDHandler is inconsistent with SEQRES') + + # we're not directly using the QMEAN psipred scoring capabilities + # but rather hack something together... lets store the actual psipred + # prediction as a sequence that can then be fed into the + # GMQEScoreCalculator + self.psipred_pred = seq.CreateSequence('psipred', ''.join(psipred.ss)) + self.psipred_cfi = [int(cfi) for cfi in psipred.conf] + + # make DisCoContainer optional... We still need to feed "some" + # DisCoContainer into the GMQEScoreCalculator... If it's only an empty + # container, the resulting score will be zero and useless. We therefore + # keep track of that situation with the fake_disco variable. + if disco is None: + self.disco = DisCoContainer(self.seqres) + self.fake_disco = True + else: + if str(disco.GetSeqres()) != str(self.seqres): + raise RuntimeError('DisCoContainer is inconsistent with SEQRES') + self.disco = disco + self.fake_disco = False + + self._profile_with_pseudo_counts = None + if profile: + if str(profile.sequence) != str(self.seqres): + raise RuntimeError('Profile is inconsistent with SEQRES') + self.profile = profile + self.profile_avg_entropy = profile.avg_entropy + else: + self.profile = None + self.profile_avg_entropy = None + + settings = conf.SwissmodelSettings() + self.potentials = PotentialContainer.Load(settings.global_potentials) + self.nn_scorer = NNScorer(settings.gmqe_scorer) + self.crf_file = crf_file + self.score_calculator = GMQEScoreCalculator(self.potentials, + self.disco, + self.seqres, + self.psipred_pred, + self.psipred_cfi) + + + @property + def profile_with_pseudo_counts(self): + if self._profile_with_pseudo_counts is None: + if self.profile is None: + return None + # enforce deep copy by using Extract function + self._profile_with_pseudo_counts = self.profile.Extract(0, + len(str(self.profile.sequence))) + seq.alg.AddTransitionPseudoCounts(self._profile_with_pseudo_counts) + if self.crf_file: + crf_lib = seq.alg.ContextProfileDB.FromCRF(self.crf_file) + seq.alg.AddAAPseudoCounts(self._profile_with_pseudo_counts, + crf_lib) + else: + seq.alg.AddAAPseudoCounts(self._profile_with_pseudo_counts) + seq.alg.AddNullPseudoCounts(self._profile_with_pseudo_counts) + + return self._profile_with_pseudo_counts + + + def GetScores(self, aln, seqres_aln, residue_numbers = None, + n_positions = None, ca_positions = None, + c_positions = None, cb_positions = None, dssp_states = None, + profile_aln_score = None, tpl_profile = None): + + # expect exactly two sequences in aln + if aln.GetCount() != 2: + raise RuntimeError("Expect exactly two sequences in aln") + + # expect first sequence to match seqres + if str(aln.GetSequence(0).GetGaplessString()) != str(self.seqres): + raise RuntimeError("Expect first seq in aln to match SEQRES") + + # expect exactly two sequences in seqres_aln + if seqres_aln.GetCount() != 2: + raise RuntimeError("Expect exactly two sequences in seqres_aln") + + # expect first sequence to match seqres + if str(seqres_aln.GetSequence(0).GetGaplessString()) != str(self.seqres): + raise RuntimeError("Expect first seq in seqres_aln to match SEQRES") + + if residue_numbers is None or n_positions is None or \ + ca_positions is None or c_positions is None or\ + cb_positions is None or dssp_states is None: + # at least one of the positions is None, expect view to be attached + # to sequence 1 + s = aln.GetSequence(1) + if not s.HasAttachedView(): + raise RuntimeError("Not all positions and dssp states provided, "\ + "need to extract them from attached " +\ + "EntityView. " +\ + "But nothing is attached to second sequence") + + n_positions = geom.Vec3List() + ca_positions = geom.Vec3List() + c_positions = geom.Vec3List() + cb_positions = geom.Vec3List() + dssp_states = list() + residue_numbers = list() + + current_rnum = 0 + for col in aln: + if col[0] != '-': + current_rnum += 1 + if col[0] != '-' and col[1] != '-': + r = col.GetResidue(1) + if r.IsValid(): + n = r.FindAtom("N") + ca = r.FindAtom("CA") + c = r.FindAtom("C") + cb = r.FindAtom("CB") + if n.IsValid() and ca.IsValid() and c.IsValid(): + dssp_states.append(str(r.GetSecStructure())) + residue_numbers.append(current_rnum) + n_positions.append(n.GetPos()) + ca_positions.append(ca.GetPos()) + c_positions.append(c.GetPos()) + if cb.IsValid(): + cb_positions.append(cb.GetPos()) + else: + cb_pos = mol.alg.CBetaPosition(n.GetPos(), + ca.GetPos(), + c.GetPos()) + cb_positions.append(cb_pos) + + # expect dssp states either as string or list of characters but + # need ost.SequenceHandle for score_calculator + d = seq.CreateSequence('dssp', ''.join([item for item in dssp_states])) + + scores = self.score_calculator.Eval(n_positions, ca_positions, + c_positions, cb_positions, + d, residue_numbers) + + # if disco was not given at initialization, "dist_const" in scores + # is invalid and needs to be removed + if self.fake_disco: + del scores["dist_const"] + + scores['seq_id'] = seq.alg.SequenceIdentity(aln) + scores['seq_sim'] = seq.alg.SequenceSimilarity(aln, seq.alg.BLOSUM62, + normalize=True) + scores['n_insertions'] = len(re.findall("-+", + str(aln.GetSequence(1)).strip('-'))) + scores['n_deletions'] = len(re.findall("-+", + str(aln.GetSequence(0)).strip('-'))) + scores['seqres_length'] = len(self.seqres.GetGaplessString()) + + seqres_covered = 0 + seqres_tot = 0 + for col in seqres_aln: + if col[0] != '-': + seqres_tot += 1 + if col[1] != '-': + seqres_covered += 1 + scores["seqres_coverage"] = float(seqres_covered)/seqres_tot + + if profile_aln_score is not None: + scores["profile_aln_score"] = profile_aln_score + scores["avg_entropy"] = self.profile_avg_entropy + elif self.profile is not None and tpl_profile is not None: + tpl_profile_copy = tpl_profile.Extract(0, len(tpl_profile.sequence)) + seq.alg.AddTransitionPseudoCounts(tpl_profile_copy) + seq.alg.AddAAPseudoCounts(tpl_profile_copy) + seq.alg.AddNullPseudoCounts(tpl_profile_copy) + scores["profile_aln_score"] = \ + seq.alg.HMMScore(self.profile_with_pseudo_counts, + tpl_profile_copy, seqres_aln, 0, 1) + scores["avg_entropy"] = self.profile_avg_entropy + + return scores + + + def PredictGMQE(self, aln, seqres_aln, residue_numbers = None, + n_positions = None, ca_positions = None, c_positions = None, + cb_positions = None, dssp_states = None, + profile_aln_score = None, tpl_profile = None, QMEANDisCo=None): + + scores = self.GetScores(aln, seqres_aln, + residue_numbers = residue_numbers, + n_positions = n_positions, + ca_positions = ca_positions, + c_positions = c_positions, + cb_positions = cb_positions, + dssp_states = dssp_states, + profile_aln_score = profile_aln_score, + tpl_profile = tpl_profile) + + if QMEANDisCo is not None: + scores["QMEANDisCo"] = QMEANDisCo + + return (self.nn_scorer.GetScore(scores), scores) + diff --git a/pymod/mlp_regressor.py b/pymod/mlp_regressor.py index 857725b35ef039b64254b7e6f278f13ac0df2975..13fffde2f17982fc3113bc9c07ab1cefa692636a 100644 --- a/pymod/mlp_regressor.py +++ b/pymod/mlp_regressor.py @@ -1,88 +1,185 @@ # Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and # Biozentrum - University of Basel -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os +import random import numpy as np -class Regressor: - - def __init__(self, filepath): - - fh = open(filepath, 'rb') - - magic_number = np.fromfile(fh, dtype=np.int32, count=1)[0] - if(magic_number != 444222): - raise RuntimeError("Inconsistency when reading mlp regressor!") - - version = np.fromfile(fh, dtype=np.int32, count=1)[0] - if version != 1: - raise RuntimeError("Inconsistent file version when reading mlp \ - regressor!") - - self.n_layers = np.fromfile(fh, dtype=np.int32, count=1)[0] - self.layer_sizes = np.fromfile(fh, dtype=np.int32, count=self.n_layers) - - if self.layer_sizes[-1] != 1: - raise RuntimeError("Expect a layer size of 1 for last (output) "\ - "layer.") - - self.activation_functions = np.fromfile(fh, dtype=np.int32, - count=self.n_layers) - - mean = np.fromfile(fh, dtype=np.float32, count=self.layer_sizes[0]) - self.mean = np.asmatrix(mean.reshape(self.layer_sizes[0], 1)) - std = np.fromfile(fh, dtype=np.float32, count=self.layer_sizes[0]) - self.one_over_std = np.asmatrix(std.reshape(self.layer_sizes[0], 1)) - for i in range(self.layer_sizes[0]): - self.one_over_std[(i, 0)] = 1.0 / self.one_over_std[(i, 0)] - - - self.bias = list() - for i in range(1, self.n_layers): - val = np.fromfile(fh, dtype=np.float32, count = self.layer_sizes[i]) - self.bias.append(np.asmatrix(val.reshape(self.layer_sizes[i], 1))) +class Regressor: + def __init__(self, n_input_features, mean=None, std=None): + self._n_input_features = n_input_features + if mean is None: + self._mean = np.zeros((self._n_input_features, 1)) + else: + m = np.array(mean) + if m.size != self._n_input_features: + raise ValueError("Mean size must be equal n_input_features.") + self._mean = m.reshape(self._n_input_features, 1) - self.weights = list() - for i in range(1, self.n_layers): - val = np.fromfile(fh, dtype=np.float32, - count=self.layer_sizes[i-1] * self.layer_sizes[i]) - self.weights.append(np.asmatrix(val).reshape(self.layer_sizes[i], - self.layer_sizes[i-1])) - - fh.close() - + if std is None: + self._std = np.ones((self._n_input_features, 1)) + else: + m = np.array(std) + if m.size != self._n_input_features: + raise ValueError("Std size must be equal n_input_features") + self._std = m.reshape(self._n_input_features, 1) + # internally, the first layer is an input layer for data normalization + self._n_layers = 1 + self._layer_sizes = [self._n_input_features] + self._activation_functions = [0] + self._bias = [None] + self._weights = [None] + + def AddLayer(self, weights, activation_function, bias=None): + if weights.shape[1] != self._layer_sizes[-1]: + raise ValueError("Weights Cols must be equal size of prev layer") + if activation_function not in [0, 1]: + raise ValueError("Expect activation_function to be in [0,1]") + if bias is None: + self._bias.append(np.zeros((weights.shape[0], 1))) + else: + m = np.array(bias) + if m.size != weights.shape[0]: + raise ValueError("Weights rows must be equal to size of bias") + self._bias.append(m.reshape(m.size, 1)) + self._weights.append(np.array(weights)) + self._activation_functions.append(activation_function) + self._n_layers += 1 + self._layer_sizes.append(self._weights[-1].shape[0]) + + @staticmethod + def Load(filepath): + with open(filepath, "rb") as fh: + magic_number = np.fromfile(fh, dtype=np.int32, count=1)[0] + if magic_number != 444222: + raise RuntimeError("Inconsistency when reading mlp regressor!") + version = np.fromfile(fh, dtype=np.int32, count=1)[0] + if version != 1: + raise RuntimeError("Unsupported file version: %s" % (version)) + n_layers = np.fromfile(fh, dtype=np.int32, count=1)[0] + layer_sizes = np.fromfile(fh, dtype=np.int32, count=n_layers) + activations = np.fromfile(fh, dtype=np.int32, count=n_layers) + mean = np.fromfile(fh, dtype=np.float32, count=layer_sizes[0]) + std = np.fromfile(fh, dtype=np.float32, count=layer_sizes[0]) + bias = list() + for i in range(1, n_layers): + n = layer_sizes[i] + v = np.fromfile(fh, dtype=np.float32, count=n) + bias.append(v.reshape(layer_sizes[i], 1)) + weights = list() + for i in range(1, n_layers): + n = layer_sizes[i - 1] * layer_sizes[i] + v = np.fromfile(fh, dtype=np.float32, count=n) + weights.append(v.reshape(layer_sizes[i], layer_sizes[i - 1])) + regressor = Regressor(layer_sizes[0], mean, std) + for w, b, a in zip(weights, bias, activations[1:]): + regressor.AddLayer(w, a, b) + return regressor + + def Save(self, filepath): + if self._layer_sizes[-1] != 1: + raise RuntimeError("Expect size 1 for last (output) layer.") + with open(filepath, "wb") as fh: + np.array([444222], dtype=np.int32).tofile(fh) + np.array([1], dtype=np.int32).tofile(fh) + np.array([self._n_layers], dtype=np.int32).tofile(fh) + np.array([self._layer_sizes], dtype=np.int32).tofile(fh) + np.array([self._activation_functions], dtype=np.int32).tofile(fh) + self._mean.astype(np.float32).tofile(fh) + self._std.astype(np.float32).tofile(fh) + for b in self._bias[1:]: + b.astype(np.float32).tofile(fh) + for w in self._weights[1:]: + w.astype(np.float32).tofile(fh) def Predict(self, features): - - features = np.matrix(features).reshape(self.layer_sizes[0], 1) - layer = np.multiply(features - self.mean, self.one_over_std) + features = np.array(features).reshape(self._layer_sizes[0], 1) + layer = np.divide(features - self._mean, self._std) self._Activate(layer, 0) - for i in range(1, self.n_layers): - layer = self.weights[i-1] * layer + self.bias[i-1] + for i in range(1, self._n_layers): + layer = np.dot(self._weights[i], layer) + self._bias[i] self._Activate(layer, i) - - return layer[(0,0)] - + return layer[(0, 0)] def _Activate(self, layer, layer_idx): - - if self.activation_functions[layer_idx] == 0: + if self._activation_functions[layer_idx] == 0: return - elif self.activation_functions[layer_idx] == 1: + elif self._activation_functions[layer_idx] == 1: layer[layer < 0] = 0 else: - raise RuntimeError("Observed invalid activation function in \ - loaded regressor!") - + raise RuntimeError("Invalid activation function in regressor.") + + +def TrainRegressor( + data_frame, + features, + target, + loss_function, + optimizer, + topology, + epochs, + batch_size, + randomize=False, +): + + # hidden imports which are only required for training + import keras + from keras.models import Sequential + from keras.layers import Dense + + # check whether all required data is present in data frame + for f in features: + if f not in data_frame.keys(): + raise RuntimeError('Feature "%s" not present in data frame' % (f)) + if target not in data_frame.keys(): + raise RuntimeError('Target "%s" not present in data frame' % (target)) + + # check topology + if topology[0] != len(features): + raise RuntimeError("First layer in topology must be size of features") + if topology[-1] != 1: + raise RuntimeError("Last layer in topology must be of size 1") + + # prepare / normalize training data + data_frame_sel = data_frame[features + [target]] + data_frame_sel = data_frame_sel[data_frame_sel.notnull().all(axis=1)] + x = data_frame_sel[features].values + y = data_frame_sel[target].values + if randomize: + random_indices = list(range(x.shape[0])) + random.shuffle(random_indices) + x = x[random_indices] + y = y[random_indices] + means = x.mean(axis=0) + stds = x.std(axis=0) + x -= means + x /= stds + + # train + keras_model = Sequential() + keras_model.add(Dense(topology[0], activation="relu", input_dim=x.shape[1])) + for n in topology[1:]: + keras_model.add(Dense(n, activation="relu")) + keras_model.compile(loss=loss_function, optimizer=optimizer) + keras_model.fit(x, y, epochs=epochs, batch_size=batch_size) + + # build regressor and return + regressor = Regressor(len(features), means, stds) + for layer in keras_model.layers: + regressor.AddLayer( + layer.weights[0].numpy().transpose(), 1, bias=layer.weights[1].numpy() + ) + return regressor diff --git a/pymod/predicted_sequence_features.py b/pymod/predicted_sequence_features.py index 36d002e38ed61116ef01b179ce5d66277e7e0c2a..1d4e46cb0e0d79546e0700e9de734f6f34b7aab5 100644 --- a/pymod/predicted_sequence_features.py +++ b/pymod/predicted_sequence_features.py @@ -27,7 +27,7 @@ def AlignChainToSEQRES(chain, seqres): """ try: return seq.alg.AlignToSEQRES(chain.Select(''), seqres, - try_resnum_first=False,validate=False) + try_resnum_first=True, validate=False) except Exception as e: print(e) diff --git a/pymod/score_calculator.py b/pymod/score_calculator.py index efb5bcc17d7a601e109d029f1a050dd95de882a8..d88699fbb65c50261c59f1c469d32c66ce36782d 100644 --- a/pymod/score_calculator.py +++ b/pymod/score_calculator.py @@ -433,23 +433,14 @@ class NNScorer: raise RuntimeError("Specified NNScorer directory does not contain "\ "a neural network for every entry in "\ "feature_groups.json") - self.nn.append(Regressor(nn_path)) - if self.nn[-1].layer_sizes[0] != len(fg) + len(self.aa_string): - raise RuntimeError("Input layer of loaded NN is inconsistent with "\ - "number of features as defined in "\ - "feature_groups.json!") + self.nn.append(Regressor.Load(nn_path)) - def GetScore(self, score_dict, olc): - - try: - aa_idx = self.aa_string.index(olc) - except: - return 0.0 + def GetScore(self, score_dict, olc = None): valid_scores = dict() for k, v in score_dict.items(): - if v == v: + if v == v and v is not None: valid_scores[k] = v final_fg_idx = -1 @@ -466,10 +457,16 @@ class NNScorer: if final_fg_idx == -1: return 0.0 - input_features = list([0.0] * len(self.aa_string)) - input_features[aa_idx] = 1.0 + input_features = list() + if olc is not None: + try: + aa_idx = self.aa_string.index(olc) + input_features = list([0.0] * len(self.aa_string)) + input_features[aa_idx] = 1.0 + except: + return 0.0 + for f in self.feature_groups[final_fg_idx]: input_features.append(valid_scores[f]) return self.nn[final_fg_idx].Predict(input_features) - diff --git a/pymod/wrap_qmean.cc b/pymod/wrap_qmean.cc index d8c82ce47cff4cdcd11a166a564375b2b3865625..e348516a73b8c7f47e5104884d396ce9202271ce 100644 --- a/pymod/wrap_qmean.cc +++ b/pymod/wrap_qmean.cc @@ -29,6 +29,8 @@ void export_CBPacking(); void export_HBond(); void export_disco(); void export_clash(); +void export_gmqe(); +void export_extract_data_helper(); using namespace boost::python; @@ -53,5 +55,7 @@ BOOST_PYTHON_MODULE(_qmean) export_HBond(); export_disco(); export_clash(); + export_gmqe(); + export_extract_data_helper(); } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ddade4bea0418da8b83a7295d6efad4f072f9a40..32e36f3ae8b7288b0b1436994f05837e6ac7afe0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -27,6 +27,9 @@ vec_list_magic.hh version.hh disco.hh clash_score.hh +gmqe_scores.hh +trg_tpl_similarity.hh +extract_data_helper.hh impl/reduced_impl.hh impl/torsion_impl.hh impl/packing_impl.hh @@ -58,6 +61,9 @@ ss_agreement.cc spherical_smoother.cc disco.cc clash_score.cc +gmqe_scores.cc +trg_tpl_similarity.cc +extract_data_helper.cc impl/reduced_impl.cc impl/torsion_impl.cc impl/packing_impl.cc diff --git a/src/cb_packing_statistics.cc b/src/cb_packing_statistics.cc index 63ceb7f2d4236a0a3bc7f26054e9e65a858c853a..40803d29ebf0e68113fa90133c7f5563a9f6f2e2 100644 --- a/src/cb_packing_statistics.cc +++ b/src/cb_packing_statistics.cc @@ -82,7 +82,7 @@ Real CBPackingStatistic::GetCount(ost::conop::AminoAcid aa, uint bin) const{ throw std::runtime_error("Cannot get count for invalid amino acid!"); } - if(bin >= opts_.max_counts+1){ + if(static_cast<int>(bin) >= opts_.max_counts+1){ throw std::runtime_error("Cannot get count for invalid bin!"); } diff --git a/src/cbeta_statistics.cc b/src/cbeta_statistics.cc index 9e4b7398b2992d3e356e19b6d54ec2a418802fee..5ac9204b02546fab25c0ba849af6063d670806b5 100644 --- a/src/cbeta_statistics.cc +++ b/src/cbeta_statistics.cc @@ -74,7 +74,7 @@ Real CBetaStatistic::GetCount(ost::conop::AminoAcid a, ost::conop::AminoAcid b, throw std::runtime_error("Cannot get count for invalid AminoAcid!"); } - if(dist_bin >= opts_.number_of_bins){ + if(static_cast<int>(dist_bin) >= opts_.number_of_bins){ throw std::runtime_error("Cannot get count for invalid bin!"); } diff --git a/src/disco.cc b/src/disco.cc index d24a21781b17c0ccd2643685f8edc763d94abe64..019421d8e86865f067ade610fdc795b2b885c177 100644 --- a/src/disco.cc +++ b/src/disco.cc @@ -599,7 +599,7 @@ DisCoContainer::DisCoContainer(const ost::seq::SequenceHandle& seqres): seqsim_clustering_cutoff_(std::numeric_limits<Real>::quiet_NaN()) { // check whether the seqres contains a gap - if(seqres_.GetLength() != seqres_.GetGaplessString().size()) { + if(seqres_.GetLength() != static_cast<int>(seqres_.GetGaplessString().size())) { throw std::runtime_error("The provided SEQRES must not contain a gap!"); } @@ -811,7 +811,7 @@ void DisCoContainer::AddData(const ost::seq::AlignmentHandle& aln, "exactly match with the internal SEQRES!"); } - int max_idx = seqres_.GetLength(); + uint max_idx = seqres_.GetLength(); for(uint i = 0; i < pos_seqres_mapping.size(); ++i) { if(pos_seqres_mapping[i] < 1 || pos_seqres_mapping[i] > max_idx) { std::stringstream ss; @@ -901,7 +901,6 @@ void DisCoContainer::CalculateConstraints(Real dist_cutoff, Real gamma, cluster_weights, avg_cluster_seqsim, avg_cluster_seqid); // estimate all the distances - Real squared_dist_cutoff = dist_cutoff_ * dist_cutoff_; // the pair describes: (idx in pos_ / aln_, dist multiplied by 1000) ost::TriMatrix<std::vector<std::pair<uint16_t, uint16_t> >* > pairwise_distances(0); diff --git a/src/extract_data_helper.cc b/src/extract_data_helper.cc new file mode 100644 index 0000000000000000000000000000000000000000..eceeb849e0184b444de7658249a558900777fa3c --- /dev/null +++ b/src/extract_data_helper.cc @@ -0,0 +1,173 @@ +// Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and +// Biozentrum - University of Basel +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <qmean/extract_data_helper.hh> + +namespace qmean { + +void ExtractTemplateDataDisCo(const String& entry_name, const String& chain_name, + const ost::seq::AlignmentHandle& aln, + ost::db::LinearIndexerPtr indexer, + ost::db::LinearCharacterContainerPtr seqres_container, + ost::db::LinearCharacterContainerPtr atomseq_container, + ost::db::LinearPositionContainerPtr& position_container, + std::vector<int>& residue_numbers, + geom::Vec3List& positions) { + + std::pair<uint64_t, uint64_t> data_range = indexer->GetDataRange(entry_name, + chain_name); + + String template_seqres = aln.GetSequence(1).GetGaplessString(); + data_range.first += aln.GetSequence(1).GetOffset(); + data_range.second = data_range.first + template_seqres.size(); + + // check, whether the the template seqres is consistent with what + // we find in seqres_container + String expected_template_seqres; + seqres_container->GetCharacters(data_range, expected_template_seqres); + if(expected_template_seqres != template_seqres) { + throw std::runtime_error("Template sequence in input alignment is " + "inconsistent with sequence in SEQRES container!"); + } + + String template_atomseq; + atomseq_container->GetCharacters(data_range, template_atomseq); + geom::Vec3List extracted_positions; + position_container->GetPositions(data_range, extracted_positions); + + // prepare output + uint template_atomseq_size = template_atomseq.size(); + residue_numbers.clear(); + residue_numbers.reserve(template_atomseq_size); + positions.clear(); + positions.reserve(template_atomseq_size); + + // extract + uint current_rnum = aln.GetSequence(0).GetOffset() + 1; + uint current_template_pos = 0; + String seqres_seq = aln.GetSequence(0).GetString(); + String template_seq = aln.GetSequence(1).GetString(); + + for(int i = 0; i < aln.GetLength(); ++i) { + if(seqres_seq[i] != '-' && template_seq[i] != '-') { + if(template_atomseq[current_template_pos] != '-') { + // it is aligned and we have a valid position! + residue_numbers.push_back(current_rnum); + positions.push_back(extracted_positions[current_template_pos]); + } + } + + if(seqres_seq[i] != '-') { + ++current_rnum; + } + + if(template_seq[i] != '-') { + ++current_template_pos; + } + } +} + +void ExtractTemplateDataGMQE(const String& entry_name, const String& chain_name, + const ost::seq::AlignmentHandle& aln, + ost::db::LinearIndexerPtr indexer, + ost::db::LinearCharacterContainerPtr seqres_container, + ost::db::LinearCharacterContainerPtr atomseq_container, + ost::db::LinearCharacterContainerPtr dssp_container, + ost::db::LinearPositionContainerPtr& n_position_container, + ost::db::LinearPositionContainerPtr& ca_position_container, + ost::db::LinearPositionContainerPtr& c_position_container, + ost::db::LinearPositionContainerPtr& cb_position_container, + std::vector<int>& residue_numbers, + String& dssp, + geom::Vec3List& n_positions, + geom::Vec3List& ca_positions, + geom::Vec3List& c_positions, + geom::Vec3List& cb_positions) { + + std::pair<uint64_t, uint64_t> data_range = indexer->GetDataRange(entry_name, + chain_name); + + String template_seqres = aln.GetSequence(1).GetGaplessString(); + data_range.first += aln.GetSequence(1).GetOffset(); + data_range.second = data_range.first + template_seqres.size(); + + // check, whether the the template seqres is consistent with what + // we find in seqres_container + String expected_template_seqres; + seqres_container->GetCharacters(data_range, expected_template_seqres); + if(expected_template_seqres != template_seqres) { + throw std::runtime_error("Template sequence in input alignment is " + "inconsistent with sequence in SEQRES container!"); + } + + String template_atomseq; + atomseq_container->GetCharacters(data_range, template_atomseq); + String extracted_dssp; + dssp_container->GetCharacters(data_range, extracted_dssp); + geom::Vec3List extracted_n_positions; + n_position_container->GetPositions(data_range, extracted_n_positions); + geom::Vec3List extracted_ca_positions; + ca_position_container->GetPositions(data_range, extracted_ca_positions); + geom::Vec3List extracted_c_positions; + c_position_container->GetPositions(data_range, extracted_c_positions); + geom::Vec3List extracted_cb_positions; + cb_position_container->GetPositions(data_range, extracted_cb_positions); + + // prepare output + uint template_atomseq_size = template_atomseq.size(); + residue_numbers.clear(); + residue_numbers.reserve(template_atomseq_size); + dssp.clear(); + dssp.reserve(template_atomseq_size); + n_positions.clear(); + n_positions.reserve(template_atomseq_size); + ca_positions.clear(); + ca_positions.reserve(template_atomseq_size); + c_positions.clear(); + c_positions.reserve(template_atomseq_size); + cb_positions.clear(); + cb_positions.reserve(template_atomseq_size); + + // extract + uint current_rnum = aln.GetSequence(0).GetOffset() + 1; + uint current_template_pos = 0; + String seqres_seq = aln.GetSequence(0).GetString(); + String template_seq = aln.GetSequence(1).GetString(); + + for(int i = 0; i < aln.GetLength(); ++i) { + if(seqres_seq[i] != '-' && template_seq[i] != '-') { + if(template_atomseq[current_template_pos] != '-') { + // it is aligned and we have a valid position! + residue_numbers.push_back(current_rnum); + dssp.push_back(extracted_dssp[current_template_pos]); + n_positions.push_back(extracted_n_positions[current_template_pos]); + ca_positions.push_back(extracted_ca_positions[current_template_pos]); + c_positions.push_back(extracted_c_positions[current_template_pos]); + cb_positions.push_back(extracted_cb_positions[current_template_pos]); + } + } + + if(seqres_seq[i] != '-') { + ++current_rnum; + } + + if(template_seq[i] != '-') { + ++current_template_pos; + } + } +} + +} //ns + diff --git a/src/extract_data_helper.hh b/src/extract_data_helper.hh new file mode 100644 index 0000000000000000000000000000000000000000..461bc75a40858d4142edabb64c7c122eb79f8d25 --- /dev/null +++ b/src/extract_data_helper.hh @@ -0,0 +1,54 @@ +// Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and +// Biozentrum - University of Basel +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef QMEAN_EXTRACT_DATA_HELPER_HH +#define QMEAN_EXTRACT_DATA_HELPER_HH + +#include <ost/geom/vec3.hh> +#include <ost/seq/sequence_handle.hh> +#include <ost/db/linear_indexer.hh> +#include <ost/db/binary_container.hh> +#include <ost/seq/alignment_handle.hh> + +namespace qmean { + +void ExtractTemplateDataDisCo(const String& entry_name, const String& chain_name, + const ost::seq::AlignmentHandle& aln, + ost::db::LinearIndexerPtr indexer, + ost::db::LinearCharacterContainerPtr seqres_container, + ost::db::LinearCharacterContainerPtr atomseq_container, + ost::db::LinearPositionContainerPtr& position_container, + std::vector<int>& residue_numbers, + geom::Vec3List& positions); + +void ExtractTemplateDataGMQE(const String& entry_name, const String& chain_name, + const ost::seq::AlignmentHandle& aln, + ost::db::LinearIndexerPtr indexer, + ost::db::LinearCharacterContainerPtr seqres_container, + ost::db::LinearCharacterContainerPtr atomseq_container, + ost::db::LinearCharacterContainerPtr dssp_container, + ost::db::LinearPositionContainerPtr& n_position_container, + ost::db::LinearPositionContainerPtr& ca_position_container, + ost::db::LinearPositionContainerPtr& c_position_container, + ost::db::LinearPositionContainerPtr& cb_position_container, + std::vector<int>& residue_numbers, + String& dssp, + geom::Vec3List& n_positions, + geom::Vec3List& ca_positions, + geom::Vec3List& c_positions, + geom::Vec3List& cb_positions); +} //ns + +#endif diff --git a/src/gmqe_scores.cc b/src/gmqe_scores.cc new file mode 100644 index 0000000000000000000000000000000000000000..d2b7de31069e22c2297452b8f90db5432f4d6075 --- /dev/null +++ b/src/gmqe_scores.cc @@ -0,0 +1,451 @@ +// Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and +// Biozentrum - University of Basel +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <qmean/gmqe_scores.hh> + +namespace qmean { + +GMQEScoreCalculator::GMQEScoreCalculator(PotentialContainerPtr potentials, + DisCoContainerPtr disco_container, + const ost::seq::SequenceHandle& seqres, + const ost::seq::SequenceHandle& psipred_pred, + const std::vector<int>& psipred_cfi) { + + // some consistency checks + if(seqres.GetString() != disco_container->GetSeqres().GetString()) { + throw std::runtime_error("Provided SEQRES does not match SEQRES in " + "DisCoContainer!"); + } + + // check whether the psipred_pred prediction only contains valid symbols + // [H, E, C] is further down... + if(seqres.GetLength() != psipred_pred.GetLength()) { + throw std::runtime_error("Provided SEQRES is inconsistent with provided " + "psipred prediction!"); + } + + if(seqres.GetLength() != static_cast<int>(psipred_cfi.size())) { + throw std::runtime_error("Provided SEQRES is inconsistent with provided " + "psipred confidences!"); + } + + // check, whether all required potentials are present + std::vector<String> potential_keys = potentials->GetKeys(); + + if(std::find(potential_keys.begin(), potential_keys.end(), "reduced_coil") == + potential_keys.end()) { + throw std::runtime_error("Expect a potential with key: reduced_coil to be " + "present in provided potential container!"); + } + + if(std::find(potential_keys.begin(), potential_keys.end(), "reduced_helix") == + potential_keys.end()) { + throw std::runtime_error("Expect a potential with key: reduced_helix to be " + "present in provided potential container!"); + } + + if(std::find(potential_keys.begin(), potential_keys.end(), "reduced_extended") == + potential_keys.end()) { + throw std::runtime_error("Expect a potential with key: reduced__extended to " + "be present in provided potential container!"); + } + + if(std::find(potential_keys.begin(), potential_keys.end(), "cb_packing_coil") == + potential_keys.end()) { + throw std::runtime_error("Expect a potential with key: cb_packing_coil to be " + "present in provided potential container!"); + } + + if(std::find(potential_keys.begin(), potential_keys.end(), "cb_packing_helix") == + potential_keys.end()) { + throw std::runtime_error("Expect a potential with key: cb_packing_helix to be " + "present in provided potential container!"); + } + + if(std::find(potential_keys.begin(), potential_keys.end(), "cb_packing_extended") == + potential_keys.end()) { + throw std::runtime_error("Expect a potential with key: cb_packing_extended to " + "be present in provided potential container!"); + } + + // The potentials stored in the PotentialContainer are of basetype + // PotentialBase. Lets do some casting... + qmean::ReducedPotentialPtr reduced_coil = + boost::dynamic_pointer_cast<qmean::ReducedPotential>( + (*potentials)["reduced_coil"]); + + qmean::ReducedPotentialPtr reduced_helix = + boost::dynamic_pointer_cast<qmean::ReducedPotential>( + (*potentials)["reduced_helix"]); + + qmean::ReducedPotentialPtr reduced_extended = + boost::dynamic_pointer_cast<qmean::ReducedPotential>( + (*potentials)["reduced_extended"]); + + qmean::CBPackingPotentialPtr cb_packing_coil = + boost::dynamic_pointer_cast<qmean::CBPackingPotential>( + (*potentials)["cb_packing_coil"]); + + qmean::CBPackingPotentialPtr cb_packing_helix = + boost::dynamic_pointer_cast<qmean::CBPackingPotential>( + (*potentials)["cb_packing_helix"]); + + qmean::CBPackingPotentialPtr cb_packing_extended = + boost::dynamic_pointer_cast<qmean::CBPackingPotential>( + (*potentials)["cb_packing_extended"]); + + qmean::TorsionPotentialPtr torsion = + boost::dynamic_pointer_cast<qmean::TorsionPotential>( + (*potentials)["torsion"]); + + if(!(reduced_coil && reduced_helix && reduced_extended && + cb_packing_coil && cb_packing_helix && cb_packing_extended && + torsion)) { + throw std::runtime_error("Could not apply dynamic cast to potential stored " + "in the provided PotentialContainer. They must be " + "of type CBPackingPotential, " + "ReducedPotential and TorsionPotential!"); + } + + // check, whether the potential parametrizations are consistent + qmean::impl::ReducedOpts reduced_coil_opts= reduced_coil->GetOpts(); + qmean::impl::ReducedOpts reduced_helix_opts = reduced_helix->GetOpts(); + qmean::impl::ReducedOpts reduced_extended_opts = reduced_extended->GetOpts(); + + qmean::impl::CBPackingOpts cb_packing_coil_opts= cb_packing_coil->GetOpts(); + qmean::impl::CBPackingOpts cb_packing_helix_opts = cb_packing_helix->GetOpts(); + qmean::impl::CBPackingOpts cb_packing_extended_opts = cb_packing_extended->GetOpts(); + + + if(reduced_coil_opts != reduced_helix_opts || + reduced_coil_opts != reduced_extended_opts) { + throw std::runtime_error("Options of provided reduced potentials must be " + " consistent!"); + } + + if(cb_packing_coil_opts != cb_packing_helix_opts || + cb_packing_coil_opts != cb_packing_extended_opts) { + throw std::runtime_error("Options of provided cb_packing potentials must be " + " consistent!"); + } + + // to simplify the code later on, we assume a lower cutoff of 0.0 for the + // reduced potential. + if(reduced_coil_opts.lower_cutoff != 0.0) { + throw std::runtime_error("The reduced potentials are assumed to have a " + "lower cutoff of 0.0!"); + } + + disco_container_ = disco_container; + + reduced_cutoff_ = reduced_coil_opts.upper_cutoff; + reduced_squared_cutoff_ = reduced_cutoff_ * reduced_cutoff_; + reduced_seq_sep_ = reduced_coil_opts.sequence_sep; + cb_packing_squared_cutoff_ = cb_packing_coil_opts.cutoff * + cb_packing_coil_opts.cutoff; + cb_packing_max_count_ = cb_packing_coil_opts.max_counts; + disco_cutoff_ = disco_container_->GetDistCutoff(); + disco_squared_cutoff_ = disco_cutoff_ * disco_cutoff_; + disco_bin_size_ = disco_container_->GetBinSize(); + + // We only store the raw data of the potentials. However, we keep a shared + // pointer as a member in order to keep the reference counts up and avoid + // invalidation of the raw data. + potential_container_ = potentials; + + // the actual amino acids defined in ost::conop is the only thing we need + for(int i = 0; i < seqres.GetLength(); ++i) { + ost::conop::AminoAcid aa = ost::conop::OneLetterCodeToAminoAcid(seqres[i]); + if(aa == ost::conop::XXX) { + throw std::runtime_error("Only standard amino acids allowed in SEQRES!"); + } + amino_acids_.push_back(aa); + } + + for(int i = 0; i < psipred_pred.GetLength(); ++i) { + if(psipred_pred[i] == 'H') { + cb_packing_energies_.push_back(cb_packing_helix->Data()); + reduced_energies_.push_back(reduced_helix->Data()); + } + else if(psipred_pred[i] == 'E') { + cb_packing_energies_.push_back(cb_packing_extended->Data()); + reduced_energies_.push_back(reduced_extended->Data()); + } + else if(psipred_pred[i] == 'C') { + cb_packing_energies_.push_back(cb_packing_coil->Data()); + reduced_energies_.push_back(reduced_coil->Data()); + } + else { + throw std::runtime_error("Expect only characters in ['H', 'E', 'C'] in " + "psipred_pred!"); + } + psipred_pred_.push_back(psipred_pred[i]); + } + psipred_cfi_ = psipred_cfi; + + // the torsion potential is agnostic of secondary structure prediction... + std::vector<String> aa_names; + for(std::vector<ost::conop::AminoAcid>::iterator it = amino_acids_.begin(); + it != amino_acids_.end(); ++it) { + aa_names.push_back(ost::conop::AminoAcidToResidueName(*it)); + } + + std::vector<String> aa_triplet; + aa_triplet.push_back("ALA"); + aa_triplet.push_back("ALA"); + aa_triplet.push_back("ALA"); + + // first will be invalid anyway... + torsion_energies_.push_back(NULL); + for(int i = 1; i < static_cast<int>(aa_names.size()) - 1; ++i) { + aa_triplet[0] = aa_names[i-1]; + aa_triplet[1] = aa_names[i]; + aa_triplet[2] = aa_names[i+1]; + torsion_energies_.push_back(torsion->Data(aa_triplet)); + } + // last one is again invalid... + torsion_energies_.push_back(NULL); +} + +void GMQEScoreCalculator::Eval(const geom::Vec3List& n_positions, + const geom::Vec3List& ca_positions, + const geom::Vec3List& c_positions, + const geom::Vec3List& cb_positions, + const ost::seq::SequenceHandle& dssp_states, + const std::vector<int>& residue_numbers, + Real& dist_const, Real& reduced, + Real& cb_packing, Real& torsion, + Real& ss_agreement, Real& coverage) const { + + // some consistency checks + if(residue_numbers.size() != n_positions.size() || + residue_numbers.size() != ca_positions.size() || + residue_numbers.size() != c_positions.size() || + residue_numbers.size() != cb_positions.size()) { + throw std::runtime_error("Expect resnums and positions to be of same size"); + } + + if(static_cast<int>(residue_numbers.size()) != dssp_states.GetLength()) { + throw std::runtime_error("Expect resnums and dssp states to be of same size!"); + } + + int seqres_size = amino_acids_.size(); + for(uint i = 0; i < residue_numbers.size(); ++i) { + if(residue_numbers[i] < 1 || residue_numbers[i] > seqres_size) { + throw std::runtime_error("Invalid resnum"); + } + } + + int num_positions = ca_positions.size(); + int pos_idx = 0; + std::vector<Real> squared_ca_distances(num_positions * num_positions / 2); + for(int i = 0; i < num_positions; ++i) { + for(int j = i + 1; j < num_positions; ++j, ++pos_idx) { + squared_ca_distances[pos_idx] = geom::Length2(ca_positions[i] - + ca_positions[j]); + } + } + + Real max_squared_ca_cutoff = std::max(reduced_squared_cutoff_, + disco_squared_cutoff_); + + std::vector<Real> reduced_scores(num_positions, 0.0); + std::vector<Real> disco_scores(num_positions, 0.0); + + std::vector<int> cb_packing_counts(num_positions, 0); + std::vector<int> reduced_counts(num_positions, 0); + std::vector<int> disco_counts(num_positions, 0); + + geom::Vec3List direction_vectors(num_positions); + for(int i = 0; i < num_positions; ++i) { + direction_vectors[i] = + geom::Normalize(geom::Normalize(ca_positions[i]-n_positions[i]) + + geom::Normalize(ca_positions[i]-c_positions[i])); + } + + pos_idx = 0; + for(int i = 0; i < num_positions; ++i) { + for(int j = i + 1; j < num_positions; ++j, ++pos_idx) { + + if(squared_ca_distances[pos_idx] < max_squared_ca_cutoff) { + + int rnum_i = residue_numbers[i]; + int rnum_j = residue_numbers[j]; + + // let the scoring action begin + Real ca_distance = std::sqrt(squared_ca_distances[pos_idx]); + Real squared_cb_distance = geom::Length2(cb_positions[i] - + cb_positions[j]); + + if(squared_cb_distance < cb_packing_squared_cutoff_) { + cb_packing_counts[i] += 1; + cb_packing_counts[j] += 1; + } + + if(ca_distance < disco_cutoff_ && + disco_container_->HasConstraint(rnum_i, rnum_j)) { + + const std::vector<Real>& constraint = + disco_container_->GetConstraint(rnum_i, rnum_j); + + // we perform a linear interpolation + uint bin_lower = static_cast<uint>(ca_distance / disco_bin_size_); + uint bin_upper = bin_lower + 1; + Real w_one = (bin_upper * disco_bin_size_ - ca_distance) / + disco_bin_size_; + Real w_two = Real(1.0) - w_one; + Real score = w_one * constraint[bin_lower] + + w_two * constraint[bin_upper]; + + disco_scores[i] += score; + disco_scores[j] += score; + disco_counts[i] += 1; + disco_counts[j] += 1; + } + + if(ca_distance < reduced_cutoff_ && + std::abs(rnum_i - rnum_j) >= reduced_seq_sep_) { + + geom::Vec3 v_i = direction_vectors[i]; + geom::Vec3 v_j = direction_vectors[j]; + geom::Vec3 v_ij = (ca_positions[j] - ca_positions[i]); + v_ij /= ca_distance; + + Real dot_product = geom::Dot(v_i, v_ij); + dot_product = std::max(Real(-0.999999), dot_product); + dot_product = std::min(Real(1.0), dot_product); + Real alpha = std::acos(dot_product); + + dot_product = -geom::Dot(v_ij, v_j); + dot_product = std::max(Real(-0.999999), dot_product); + dot_product = std::min(Real(1.0), dot_product); + Real beta = std::acos(dot_product); + + geom::Vec3 v_i_ij_cross = -geom::Cross(v_i, v_ij); + geom::Vec3 v_ij_j_cross = geom::Cross(v_ij, v_j); + Real gamma = std::atan2(-geom::Dot(v_i, v_ij_j_cross), + geom::Dot(v_i_ij_cross,v_ij_j_cross)); + + ost::conop::AminoAcid aa_i = amino_acids_[rnum_i - 1]; + ost::conop::AminoAcid aa_j = amino_acids_[rnum_j - 1]; + + Real e_i = reduced_energies_[rnum_i-1]->Get(aa_i, aa_j, ca_distance, + alpha, beta, gamma); + Real e_j = reduced_energies_[rnum_j-1]->Get(aa_j, aa_i, ca_distance, + beta, alpha, gamma); + + reduced_scores[i] += e_i; + reduced_scores[j] += e_j; + + reduced_counts[i] += 1; + reduced_counts[j] += 1; + } + } + } + } + + Real summed_reduced = 0.0; + Real summed_cb_packing = 0.0; + Real summed_disco = 0.0; + Real summed_ss_agreement = 0.0; + + for(int i = 0; i < num_positions; ++i) { + + // for cb_packing we still need to extract the actual energies + int rnum = residue_numbers[i]; + ost::conop::AminoAcid aa = amino_acids_[rnum - 1]; + int c = std::min(cb_packing_counts[i], cb_packing_max_count_); + summed_cb_packing += cb_packing_energies_[rnum-1]->Get(aa, c); + + summed_ss_agreement += ss_agreement_.LogOddScore(dssp_states[i], + psipred_pred_[rnum-1], + psipred_cfi_[rnum-1]); + + // the other two are trivial + if(reduced_counts[i] > 0) { + summed_reduced += reduced_scores[i] / reduced_counts[i]; + } + + if(disco_counts[i] > 0) { + summed_disco += disco_scores[i] / disco_counts[i]; + } + } + + // torsion is done separately + std::vector<Real> + phi_angles(num_positions, std::numeric_limits<Real>::quiet_NaN()); + std::vector<Real> + psi_angles(num_positions, std::numeric_limits<Real>::quiet_NaN()); + + // do phi angles + for(int i = 1; i < num_positions; ++i) { + if(residue_numbers[i-1]+1 == residue_numbers[i] && + geom::Length2(c_positions[i-1]-n_positions[i]) < Real(9.0)) { + phi_angles[i] = geom::DihedralAngle(c_positions[i-1], n_positions[i], + ca_positions[i], c_positions[i]); + } + } + + // do psi angles + for(int i = 0; i < num_positions-1; ++i) { + if(residue_numbers[i]+1 == residue_numbers[i+1] && + geom::Length2(c_positions[i]-n_positions[i+1]) < Real(9.0)) { + psi_angles[i] = geom::DihedralAngle(n_positions[i], ca_positions[i], + c_positions[i], n_positions[i+1]); + } + } + + Real summed_torsion = 0.0; + int n_torsions = 0; + for(int i = 1; i < num_positions-1; ++i) { + if(phi_angles[i-1] == phi_angles[i-1] && + psi_angles[i-1] == psi_angles[i-1] && + phi_angles[i] == phi_angles[i] && + psi_angles[i] == psi_angles[i] && + phi_angles[i+1] == phi_angles[i+1] && + psi_angles[i+1] == psi_angles[i+1]) { + summed_torsion += + torsion_energies_[residue_numbers[i]-1]->Get(phi_angles[i-1], + psi_angles[i-1], + phi_angles[i], + psi_angles[i], + phi_angles[i+1], + psi_angles[i+1]); + ++n_torsions; + } + } + + dist_const = summed_disco; + reduced = summed_reduced; + cb_packing = summed_cb_packing; + torsion = summed_torsion; + ss_agreement = summed_ss_agreement; + + if(num_positions > 0) { + dist_const /= num_positions; + reduced /= num_positions; + cb_packing /= num_positions; + ss_agreement /= num_positions; + } + + if(n_torsions > 0) { + torsion /= n_torsions; + } + + coverage = static_cast<Real>(num_positions) / amino_acids_.size(); +} + + +} // ns diff --git a/src/gmqe_scores.hh b/src/gmqe_scores.hh new file mode 100644 index 0000000000000000000000000000000000000000..2cd2fbc9575cb3a901f0291b7701a18af1729bfe --- /dev/null +++ b/src/gmqe_scores.hh @@ -0,0 +1,79 @@ +// Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and +// Biozentrum - University of Basel +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GMQE_SCORES_HH +#define GMQE_SCORES_HH + +#include <qmean/disco.hh> +#include <qmean/potential_base.hh> +#include <qmean/cb_packing_potential.hh> +#include <qmean/reduced_potential.hh> +#include <qmean/torsion_potential.hh> +#include <qmean/cbeta_potential.hh> +#include <qmean/ss_agreement.hh> + + +namespace qmean { + + + +class GMQEScoreCalculator; +typedef boost::shared_ptr<GMQEScoreCalculator> GMQEScoreCalculatorPtr; + +class GMQEScoreCalculator { + +public: + + GMQEScoreCalculator(PotentialContainerPtr potentials, + DisCoContainerPtr disco_container, + const ost::seq::SequenceHandle& seqres, + const ost::seq::SequenceHandle& psipred_pred, + const std::vector<int>& psipred_cfi); + + + void Eval(const geom::Vec3List& n_positions, + const geom::Vec3List& ca_positions, + const geom::Vec3List& c_positions, + const geom::Vec3List& cb_positions, + const ost::seq::SequenceHandle& dssp_states, + const std::vector<int>& residue_numbers, + Real& dist_const, Real& reduced, + Real& cb_packing, Real& torsion, + Real& ss_agreement, Real& coverage) const; + +private: + + std::vector<CBPackingEnergies*> cb_packing_energies_; + std::vector<ReducedEnergies*> reduced_energies_; + std::vector<TorsionEnergies*> torsion_energies_; + DisCoContainerPtr disco_container_; + std::vector<ost::conop::AminoAcid> amino_acids_; + std::vector<char> psipred_pred_; + std::vector<int> psipred_cfi_; + Real reduced_cutoff_; + Real reduced_squared_cutoff_; + int reduced_seq_sep_; + Real cb_packing_squared_cutoff_; + int cb_packing_max_count_; + Real disco_cutoff_; + Real disco_squared_cutoff_; + Real disco_bin_size_; + PotentialContainerPtr potential_container_; + SSAgreement ss_agreement_; +}; + +} // ns + +#endif diff --git a/src/hbond_potential.cc b/src/hbond_potential.cc index 12a8029828f570def02de500788365b2121a4152..3470340eeae07a8009376177b06ef67c3d18fa9d 100644 --- a/src/hbond_potential.cc +++ b/src/hbond_potential.cc @@ -75,10 +75,10 @@ void HBondPotential::Fill(HBondStatisticPtr stat){ throw std::runtime_error(ss.str()); } expectation_value = 0.0; - for(uint j = 0; j < opts_.d_bins; ++j){ - for(uint k = 0; k < opts_.alpha_bins; ++k){ - for(uint l = 0; l < opts_.beta_bins; ++l){ - for(uint m = 0; m < opts_.gamma_bins; ++m){ + for(int j = 0; j < opts_.d_bins; ++j){ + for(int k = 0; k < opts_.alpha_bins; ++k){ + for(int l = 0; l < opts_.beta_bins; ++l){ + for(int m = 0; m < opts_.gamma_bins; ++m){ count = stat->GetCount(i,j,k,l,m); frac = std::max((count/state_count)/ref_value,1.0); e = -log(frac); @@ -89,10 +89,10 @@ void HBondPotential::Fill(HBondStatisticPtr stat){ } } expectation_value = std::abs(expectation_value); - for(uint j = 0; j < opts_.d_bins; ++j){ - for(uint k = 0; k < opts_.alpha_bins; ++k){ - for(uint l = 0; l < opts_.beta_bins; ++l){ - for(uint m = 0; m < opts_.gamma_bins; ++m){ + for(int j = 0; j < opts_.d_bins; ++j){ + for(int k = 0; k < opts_.alpha_bins; ++k){ + for(int l = 0; l < opts_.beta_bins; ++l){ + for(int m = 0; m < opts_.gamma_bins; ++m){ e = energies_.Get(Index(i,j,k,l,m)); energies_.Set(Index(i,j,k,l,m),e/expectation_value); } diff --git a/src/hbond_statistics.cc b/src/hbond_statistics.cc index ad1ad3de8ff8ac83a1d267dacab2eaaa46781241..3a646637e988f53f7525fb052c4149748d9dda5a 100644 --- a/src/hbond_statistics.cc +++ b/src/hbond_statistics.cc @@ -91,15 +91,15 @@ Real HBondStatistic::GetCount(uint state, uint d_bin, uint alpha_bin, uint beta_ throw std::runtime_error("Cannot get count for invalid state!"); } - if(alpha_bin >= opts_.alpha_bins){ + if(static_cast<int>(alpha_bin) >= opts_.alpha_bins){ throw std::runtime_error("Cannot get count for invalid alpha bin!"); } - if(beta_bin >= opts_.beta_bins){ + if(static_cast<int>(beta_bin) >= opts_.beta_bins){ throw std::runtime_error("Cannot get count for invalid beta bin!"); } - if(gamma_bin >= opts_.gamma_bins){ + if(static_cast<int>(gamma_bin) >= opts_.gamma_bins){ throw std::runtime_error("Cannot get count for invalid gamma bin!"); } diff --git a/src/impl/hbond_impl.cc b/src/impl/hbond_impl.cc index 64dfe29260478b438b5a1c40cd4257ba947ac2ec..20032078318dd218d2373e86bf286cab51bb453b 100644 --- a/src/impl/hbond_impl.cc +++ b/src/impl/hbond_impl.cc @@ -152,7 +152,6 @@ bool HBondPotentialImpl::VisitResidue(const ost::mol::ResidueHandle& res){ std::vector<HBondSpatialOrganizerItem> in_reach = env_.FindWithin(ca_pos, 9.0); int num = res.GetNumber().GetNum(); String chain_name = res.GetChain().GetName(); - int diff; if(!is_proline){ //current residue as donor diff --git a/src/impl/torsion_impl.cc b/src/impl/torsion_impl.cc index 659425212c5932c6e578601f557a0411d05ce0f7..343e39f8b5bcc38385b5a8857d5af9161e209bb9 100644 --- a/src/impl/torsion_impl.cc +++ b/src/impl/torsion_impl.cc @@ -79,11 +79,11 @@ String TorsionOpts::FindStat(const std::vector<String>& residues){ //iterate over all three positions for(int i=0;i<3;++i){ //"all" matches every residue - if(single_ids[i].str().find("all")!=(-1)){ + if(single_ids[i].str().find("all")!=std::string::npos){ continue; } //check, whether currrent residue matches current id position - if(single_ids[i].str().find(residues[i])==-1){ + if(single_ids[i].str().find(residues[i])==std::string::npos){ match=false; break; } diff --git a/src/impl/torsion_impl.hh b/src/impl/torsion_impl.hh index 655584ddb75185dbf0a81740c2fb9679e3fe5663..d26a73920a87410f47efb001f56fa1e05599398c 100644 --- a/src/impl/torsion_impl.hh +++ b/src/impl/torsion_impl.hh @@ -35,7 +35,7 @@ public: void Serialize(DS& ds){ if(ds.IsSource()){ - int num_of_groups; + int num_of_groups = 0; ds & num_of_groups; for(int i=0;i<6;++i){ num_of_bins.push_back(0); diff --git a/src/interaction_statistics.cc b/src/interaction_statistics.cc index f422896a43cec56d32ccafd03015be30d204a669..1fccb26f83c0aac47cf8d80e096dde83d7093669 100644 --- a/src/interaction_statistics.cc +++ b/src/interaction_statistics.cc @@ -74,7 +74,7 @@ Real InteractionStatistic::GetCount(atom::ChemType a, atom::ChemType b, uint dis throw std::runtime_error("Cannot get count for invalid ChemType!"); } - if(dist_bin >= opts_.number_of_bins){ + if(static_cast<int>(dist_bin) >= opts_.number_of_bins){ throw std::runtime_error("Cannot get count for invalid distance bin!"); } diff --git a/src/multi_classifier.hh b/src/multi_classifier.hh index 86bbc4bd543705b5d5bbee4a03432a44ce409c6d..f0c9770bd63ca781edab4e19c290311858b783c3 100644 --- a/src/multi_classifier.hh +++ b/src/multi_classifier.hh @@ -429,7 +429,7 @@ public: uint32_t total=this->CalculateNumberOfBuckets(); buckets_.resize(total); - for (int i=0; i<total; ++i) + for (uint32_t i=0; i<total; ++i) buckets_[i] = rhs.buckets_.at(i); //memcpy(&buckets_.front(), &rhs.buckets_.front(), sizeof(V)*total); } diff --git a/src/packing_statistics.cc b/src/packing_statistics.cc index ef704231141212fb07b3a9f959791176947444e3..41e5979463a7913e192290aaffdcdcc690717d82 100644 --- a/src/packing_statistics.cc +++ b/src/packing_statistics.cc @@ -91,7 +91,7 @@ Real PackingStatistic::GetCount(atom::ChemType a, uint bin) const{ throw std::runtime_error("Cannot get count for invalid atom!"); } - if(bin > opts_.max_counts){ + if(static_cast<int>(bin) > opts_.max_counts){ throw std::runtime_error("Cannot get count for invalid bin!"); } @@ -100,7 +100,7 @@ Real PackingStatistic::GetCount(atom::ChemType a, uint bin) const{ Real PackingStatistic::GetCount(uint bin) const{ - if(bin > opts_.max_counts){ + if(static_cast<int>(bin) > opts_.max_counts){ throw std::runtime_error("Cannot get count for invalid bin!"); } diff --git a/src/reduced_potential.cc b/src/reduced_potential.cc index ee155913c9619cd076e9b7687fb5a88d7ac78b73..96d8a6712fd0ea773b40654bb874c23e87d7166c 100644 --- a/src/reduced_potential.cc +++ b/src/reduced_potential.cc @@ -90,13 +90,13 @@ void ReducedPotential::Fill(ReducedStatisticPtr stats, const String& reference_s throw io::IOException(ss.str()); } - for (size_t i=0; i<ost::conop::XXX; ++i) { - for (size_t j=0; j<ost::conop::XXX; ++j) { + for (int i=0; i<ost::conop::XXX; ++i) { + for (int j=0; j<ost::conop::XXX; ++j) { Real sequence_count = stats->GetCount(ost::conop::AminoAcid(i), ost::conop::AminoAcid(j)); - for (size_t k=0; k<opts_.num_dist_bins; ++k) { - for (size_t l=0; l<opts_.num_angle_bins; ++l) { - for (size_t m=0; m<opts_.num_angle_bins; ++m) { - for (size_t n=0; n<opts_.num_dihedral_bins; ++n) { + for (int k=0; k<opts_.num_dist_bins; ++k) { + for (int l=0; l<opts_.num_angle_bins; ++l) { + for (int m=0; m<opts_.num_angle_bins; ++m) { + for (int n=0; n<opts_.num_dihedral_bins; ++n) { Real propensity = 0.0; Real sequence_conformation_count = stats->GetCount(ost::conop::AminoAcid(i), ost::conop::AminoAcid(j), k, l, m, n); if(sequence_count>0 && reference[k][l][m][n]>0){ diff --git a/src/reduced_statistics.cc b/src/reduced_statistics.cc index ea00a33213539b56139366e8387d76743e5c4918..5e692ef2faeed193985f85c6e5d060d0e23defe1 100644 --- a/src/reduced_statistics.cc +++ b/src/reduced_statistics.cc @@ -65,12 +65,12 @@ Real ReducedStatistic::GetTotalCount() const{ typedef ReducedHistogram::IndexType Index; Real count=0.0; - for (size_t i=0; i<ost::conop::XXX; ++i) { - for (size_t j=0; j<ost::conop::XXX; ++j) { - for (size_t k=0; k<opts_.num_dist_bins; ++k) { - for (size_t l=0; l<opts_.num_angle_bins; ++l) { - for (size_t m=0; m<opts_.num_angle_bins; ++m) { - for (size_t n=0; n<opts_.num_dihedral_bins; ++n) { + for (int i=0; i<ost::conop::XXX; ++i) { + for (int j=0; j<ost::conop::XXX; ++j) { + for (int k=0; k<opts_.num_dist_bins; ++k) { + for (int l=0; l<opts_.num_angle_bins; ++l) { + for (int m=0; m<opts_.num_angle_bins; ++m) { + for (int n=0; n<opts_.num_dihedral_bins; ++n) { count+=histo_.Get(Index(i, j, k, l, m, n)); } } @@ -89,10 +89,10 @@ Real ReducedStatistic::GetCount(ost::conop::AminoAcid aa_one, ost::conop::AminoA typedef ReducedHistogram::IndexType Index; Real count=0.0; - for (size_t k=0; k<opts_.num_dist_bins; ++k) { - for (size_t l=0; l<opts_.num_angle_bins; ++l) { - for (size_t m=0; m<opts_.num_angle_bins; ++m) { - for (size_t n=0; n<opts_.num_dihedral_bins; ++n) { + for (int k=0; k<opts_.num_dist_bins; ++k) { + for (int l=0; l<opts_.num_angle_bins; ++l) { + for (int m=0; m<opts_.num_angle_bins; ++m) { + for (int n=0; n<opts_.num_dihedral_bins; ++n) { count+=histo_.Get(Index(aa_one, aa_two, k, l, m, n)); } } @@ -108,19 +108,19 @@ Real ReducedStatistic::GetCount(ost::conop::AminoAcid aa_one, ost::conop::AminoA throw std::runtime_error("Cannot get count of invalid amino acid!"); } - if(dist_bin >= opts_.num_dist_bins){ + if(static_cast<int>(dist_bin) >= opts_.num_dist_bins){ throw std::runtime_error("Cannot get count of invalid distance bin!"); } - if(alpha_bin >= opts_.num_angle_bins){ + if(static_cast<int>(alpha_bin) >= opts_.num_angle_bins){ throw std::runtime_error("Cannot get count of invalid alpha bin!"); } - if(beta_bin >= opts_.num_angle_bins){ + if(static_cast<int>(beta_bin) >= opts_.num_angle_bins){ throw std::runtime_error("Cannot get count of invalid beta bin!"); } - if(gamma_bin >= opts_.num_dihedral_bins){ + if(static_cast<int>(gamma_bin) >= opts_.num_dihedral_bins){ throw std::runtime_error("Cannot get count of invalid gamma bin!"); } @@ -130,19 +130,19 @@ Real ReducedStatistic::GetCount(ost::conop::AminoAcid aa_one, ost::conop::AminoA Real ReducedStatistic::GetCount(uint dist_bin, uint alpha_bin, uint beta_bin, uint gamma_bin) const{ - if(dist_bin >= opts_.num_dist_bins){ + if(static_cast<int>(dist_bin) >= opts_.num_dist_bins){ throw std::runtime_error("Cannot get count of invalid distance bin!"); } - if(alpha_bin >= opts_.num_angle_bins){ + if(static_cast<int>(alpha_bin) >= opts_.num_angle_bins){ throw std::runtime_error("Cannot get count of invalid alpha bin!"); } - if(beta_bin >= opts_.num_angle_bins){ + if(static_cast<int>(beta_bin) >= opts_.num_angle_bins){ throw std::runtime_error("Cannot get count of invalid beta bin!"); } - if(gamma_bin >= opts_.num_dihedral_bins){ + if(static_cast<int>(gamma_bin) >= opts_.num_dihedral_bins){ throw std::runtime_error("Cannot get count of invalid gamma bin!"); } diff --git a/src/spherical_smoother.cc b/src/spherical_smoother.cc index a48471eb04b4a098a821da49335da56f5bdfbd8f..4d9e28194d01ea32057e9447d35320c0a87c0b22 100644 --- a/src/spherical_smoother.cc +++ b/src/spherical_smoother.cc @@ -65,7 +65,7 @@ std::vector<Real> SphericalSmoother::Smooth(std::vector<Real>& values){ //fill the values in the visitor pattern int j; - for(int i=0;i<values.size();++i){ + for(size_t i=0;i<values.size();++i){ j=0; for(std::vector<int>::iterator it=value_distributor_[i].begin();it!=value_distributor_[i].end();++it,++j){ visitor_pattern_[i][j].second=values[*it]; @@ -76,7 +76,7 @@ std::vector<Real> SphericalSmoother::Smooth(std::vector<Real>& values){ std::vector<Real> result; - for(int i=0;i<values.size();++i){ + for(size_t i=0;i<values.size();++i){ //check for nan if(values[i]!=values[i]){ result.push_back(std::numeric_limits<Real>::quiet_NaN()); diff --git a/src/torsion_potential.cc b/src/torsion_potential.cc index 98919efc24ce45ea1d153c5ef17508c95a718090..b1f1459d77d14d3aaa1c47653ab48e9cf77b54dc 100644 --- a/src/torsion_potential.cc +++ b/src/torsion_potential.cc @@ -26,7 +26,7 @@ TorsionPotentialPtr TorsionPotential::Create(TorsionStatisticPtr& stat, Real sig p->opts_=stat->GetOpts(); p->opts_.sigma=sigma; - for(int i=0;i<p->opts_.group_identifier.size();++i){ + for(size_t i=0;i<p->opts_.group_identifier.size();++i){ p->energies_[p->opts_.group_identifier[i]]=TorsionEnergies(0.0, ContinuousClassifier(p->opts_.num_of_bins[0], -M_PI, M_PI), ContinuousClassifier(p->opts_.num_of_bins[1], -M_PI, M_PI), @@ -108,7 +108,6 @@ Real TorsionPotential::GetEnergy(const String& group_id, std::vector<Real>& angl count_=0; std::vector<Real> new_angles(angles); - Real new_angle; if(energies_.find(group_id) == energies_.end()){ return std::numeric_limits<Real>::quiet_NaN(); diff --git a/src/torsion_statistics.cc b/src/torsion_statistics.cc index eb03838ad512a78354200a6f7734bf07d1c67029..25c3eec6beca66d0ee15e91b5d0eb6b0d906a420 100644 --- a/src/torsion_statistics.cc +++ b/src/torsion_statistics.cc @@ -19,7 +19,7 @@ namespace qmean{ TorsionStatistic::TorsionStatistic(std::vector<String>& gi, std::vector<int>& nob){ - for(int i=0;i<gi.size();++i){ + for(size_t i=0;i<gi.size();++i){ histos_[gi[i]]=TorsionHistogram(ContinuousClassifier(nob[0], -M_PI, M_PI), ContinuousClassifier(nob[1], -M_PI, M_PI), ContinuousClassifier(nob[2], -M_PI, M_PI), diff --git a/src/trg_tpl_similarity.cc b/src/trg_tpl_similarity.cc new file mode 100644 index 0000000000000000000000000000000000000000..512e76f61924806e189a89a2fb10c5f5fafa0378 --- /dev/null +++ b/src/trg_tpl_similarity.cc @@ -0,0 +1,104 @@ +// Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and +// Biozentrum - University of Basel +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <ost/geom/vecmat3_op.hh> +#include <qmean/trg_tpl_similarity.hh> + +namespace qmean { + +TrgTplSimilarity::TrgTplSimilarity(const String& seqres, + const std::vector<int>& residue_numbers, + const geom::Vec3List& ca_pos, + Real distance_threshold) { + + if(ca_pos.size() != residue_numbers.size()) { + throw std::runtime_error("ca_pos and residue_numbers inconsistent in size"); + } + + for(auto it = residue_numbers.begin(); it != residue_numbers.end(); ++it) { + if(*it < 1 || *it > static_cast<int>(seqres.size())) { + throw std::runtime_error("invalid residue number observed"); + } + } + + ref_seq = seqres; + ref_distances.clear(); + ref_indices.clear(); + for(auto it = residue_numbers.begin(); it != residue_numbers.end(); ++it) { + ref_indices.push_back((*it)-1); + } + n_interactions = 0; + for(uint i = 0; i < ca_pos.size(); ++i) { + std::vector<std::pair<int, Real> > distances; + for(uint j = i + 1; j < ca_pos.size(); ++j) { + Real d = geom::Distance(ca_pos[i], ca_pos[j]); + if(d < distance_threshold) { + distances.push_back(std::make_pair(ref_indices[j], d)); + } + } + ref_distances.push_back(distances); + n_interactions += distances.size(); + } +} + + +Real TrgTplSimilarity::GetSimilarity(const std::vector<int>& residue_numbers, + const geom::Vec3List& ca_pos) { + + if(ca_pos.size() != residue_numbers.size()) { + throw std::runtime_error("ca_pos and residue_numbers inconsistent in size"); + } + + int max_resnum = ref_seq.size(); + for(auto it = residue_numbers.begin(); it != residue_numbers.end(); ++it) { + if(*it < 1 || *it > max_resnum) { + throw std::runtime_error("invalid residue number observed"); + } + } + + std::vector<bool> valid_positions(ref_seq.size(), false); + std::vector<geom::Vec3> positions(ref_seq.size(), geom::Vec3()); + for(uint i = 0; i < residue_numbers.size(); ++i) { + valid_positions[residue_numbers[i]-1] = true; + positions[residue_numbers[i]-1] = ca_pos[i]; + } + + int n_05 = 0; + int n_1 = 0; + int n_2 = 0; + int n_4 = 0; + for(uint i = 0; i < ref_distances.size(); ++i) { + int ref_idx = ref_indices[i]; + if(valid_positions[ref_idx]) { + for(auto it = ref_distances[i].begin(); it != ref_distances[i].end(); ++it) { + if(valid_positions[it->first]) { + Real d = geom::Distance(positions[ref_idx], positions[it->first]); + Real d_diff = std::abs(d - it->second); + n_4 += int((d_diff < Real(4.0))); + n_2 += int((d_diff < Real(2.0))); + n_1 += int((d_diff < Real(1.0))); + n_05 += int((d_diff < Real(0.5))); + } + } + } + } + return Real(0.25) * (static_cast<Real>(n_4)/n_interactions + + static_cast<Real>(n_2)/n_interactions + + static_cast<Real>(n_1)/n_interactions + + static_cast<Real>(n_05)/n_interactions); +} + +} + diff --git a/src/trg_tpl_similarity.hh b/src/trg_tpl_similarity.hh new file mode 100644 index 0000000000000000000000000000000000000000..6a886728112831dcc2bf47da8d599a5af6ba32ca --- /dev/null +++ b/src/trg_tpl_similarity.hh @@ -0,0 +1,50 @@ +// Copyright (c) 2013-2020, SIB - Swiss Institute of Bioinformatics and +// Biozentrum - University of Basel +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TRG_TPL_SIMILARITY_HH +#define TRG_TPL_SIMILARITY_HH + +#include <vector> +#include <ost/geom/vec3.hh> + + + +namespace qmean { + +struct TrgTplSimilarity; +typedef boost::shared_ptr<TrgTplSimilarity> TrgTplSimilarityPtr; + + +struct TrgTplSimilarity{ + +TrgTplSimilarity(const String& seqres, + const std::vector<int>& residue_numbers, + const geom::Vec3List& ca_pos, + Real distance_threshold = 15.0); + +Real GetSimilarity(const std::vector<int>& residue_numbers, + const geom::Vec3List& ca_pos); + +String ref_seq; +std::vector<std::vector<std::pair<int, Real> > > ref_distances; +std::vector<int> ref_indices; +int n_interactions; +}; + + +} + +#endif +