#include <sstream> #include <iostream> #include <fstream> #include <string> #include <iomanip> #include <cmath> #include <complex> #include <ctime> #include <stdlib.h> #include <float.h> #include <random> #include <gsl/gsl_linalg.h> #include <gsl/gsl_errno.h> #include <gsl/gsl_math.h> #include <gsl/gsl_min.h> #include <gsl/gsl_multimin.h> #define PI (3.14159265359) #define TIMING 0 using namespace std; #if TIMING == 1 #include <chrono> using namespace std::chrono; #endif /* * define a struct * that contains * the file information: * - file name length in characters * - file name * - length of file in lines */ struct input_file { const char* filename; int nbr_lines; }; /* * define a struct * that contains * the sequence information: * - length * - amount of IPs * - frequency prior * - protein concentration * (multiple of c_0) * - actual sequence encoded * in numbers from 0 to 3 */ struct sequence { int length; int amt; double freq; int *seq; }; /* * defines a struct * that contains * the current state of the parameters * subject to the EM algorithm */ struct state { double E0; double *wai; }; //include parameters first #include "params.h" static struct sequence* sequence_array; //global array of sequence-structs static bool masked_array[int(pow(4,RBP_BIND_LEN))]; //global array of masked sequences static int tot_nbr_sequences; //include headers with code #include "inout.h" #include "update.h" /* * main method */ int main(int argc,char *argv[]) { //declarations int i,j; double tmp,llhood; state curr_state,candidate; //allocations curr_state.wai = new double[4*RBP_BIND_LEN]; candidate.wai = new double[4*RBP_BIND_LEN]; //count total number of sequences tot_nbr_sequences = 0; for(i=0;i<NBR_FILES;i++) tot_nbr_sequences += input_files_array[i].nbr_lines; //initialize array of structs "sequence" sequence_array = new sequence[tot_nbr_sequences]; for(i=0;i<tot_nbr_sequences;i++) sequence_array[i].seq = new int[MAX_SEQ_LEN]; read_sequences(); //read sequences read_mask(mask_file); //read masked sequences srand(atoi(argv[argc-1])); //set random seed to thermal-noise generated random number gsl_set_error_handler_off(); //ignore gsl errors #if TIMING == 1 auto start = high_resolution_clock::now(); #endif //initialize matrix either randomly or with sequence if(argc == 3) { initialize_matrix(&curr_state); } else if (argc == 4) { initialize_matrix(&curr_state,argv[argc-2]); } else { printf("Wrong input, abort process.\n"); return 0; } renormalize(&curr_state); //save copy llhood = log_likelihood(&curr_state); copy_state(&curr_state,&candidate); //give initial output print_state(&curr_state); printf("%f\t0\n\n",llhood); //has to succeed print_state //iterate EM algorithm i = 0; do { i++; E_step(&candidate); M_step(&candidate); renormalize(&candidate); //check convergence if(metric_difference(&candidate,&curr_state) > precision || i < 5) copy_state(&candidate,&curr_state); //continue optimizing else { copy_state(&candidate,&curr_state); break; //maximum reached } } while(i < 200); //give final output print_state(&curr_state); printf("%f\t%d\n\n",log_likelihood(&curr_state),i); //has to succeed print_state #if TIMING == 1 auto stop = high_resolution_clock::now(); auto duration = duration_cast<microseconds>(stop - start); cout << duration.count()/3600000000.0 << "h" << endl; #endif //collect garbage delete[] curr_state.wai; delete[] candidate.wai; for(i=0;i<tot_nbr_sequences;i++) delete[] sequence_array[i].seq; delete[] sequence_array; return 0; }