#include <sstream>
#include <iostream>
#include <fstream>
#include <string>
#include <iomanip>
#include <cmath>
#include <complex>
#include <ctime>
#include <stdlib.h>
#include <float.h>
#include <random>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_errno.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_min.h>
#include <gsl/gsl_multimin.h>


#define PI  (3.14159265359)
#define TIMING 0

using namespace std;

#if TIMING == 1
#include <chrono>
using namespace std::chrono;
#endif

/*
 * define a struct
 * that contains
 * the file information:
 * 	- file name length in characters
 * 	- file name
 * 	- length of file in lines
 */
struct input_file
{
	const char* 	filename;
	int		nbr_lines;
};


/* 
 * define a struct 
 * that contains 
 * the sequence information:
 * 	- length
 * 	- amount of IPs
 * 	- frequency prior
 * 	- protein concentration 
 * 		(multiple of c_0)
 * 	- actual sequence encoded 
 * 		in numbers from 0 to 3
 */
struct sequence 
{
	int	length;
	int	amt;
	double 	freq;
	int	*seq;
}; 


/*
 * defines a struct
 * that contains 
 * the current state of the parameters
 * subject to the EM algorithm
 */
struct state 
{
	double	E0;
	double	*wai;
};


//include parameters first
#include "params.h"

static struct sequence* sequence_array;			//global array of sequence-structs
static bool masked_array[int(pow(4,RBP_BIND_LEN))];	//global array of masked sequences

static int tot_nbr_sequences;

//include headers with code
#include "inout.h"
#include "update.h"




    
/*
 * main method
 */
int main(int argc,char *argv[])
{
	//declarations
	int i,j;
	double tmp,llhood;
	state 	curr_state,candidate;
	
	//allocations
	curr_state.wai 		= new double[4*RBP_BIND_LEN];
	candidate.wai 		= new double[4*RBP_BIND_LEN];
	
	//count total number of sequences
	tot_nbr_sequences = 0;
	for(i=0;i<NBR_FILES;i++)
		tot_nbr_sequences += input_files_array[i].nbr_lines;
	
	//initialize array of structs "sequence"
	sequence_array = new sequence[tot_nbr_sequences];
	for(i=0;i<tot_nbr_sequences;i++)
		sequence_array[i].seq = new int[MAX_SEQ_LEN];
	
	
	read_sequences();		//read sequences
	read_mask(mask_file);		//read masked sequences
	
	
	srand(atoi(argv[argc-1]));	//set random seed to thermal-noise generated random number
	gsl_set_error_handler_off();	//ignore gsl errors
	
#if TIMING == 1
	auto start = high_resolution_clock::now();
#endif
	
	//initialize matrix either randomly or with sequence
	if(argc == 3)
	{
		initialize_matrix(&curr_state);
	}
	else if (argc == 4)
	{
		initialize_matrix(&curr_state,argv[argc-2]);
	}
	else
	{
		printf("Wrong input, abort process.\n");
		return 0;
	}
	renormalize(&curr_state);
	
	//save copy
	llhood = log_likelihood(&curr_state);
	copy_state(&curr_state,&candidate);
	
	//give initial output
	print_state(&curr_state);
	printf("%f\t0\n\n",llhood);	//has to succeed print_state
	
	
	//iterate EM algorithm
	i = 0;
	do
	{
		i++;
		
		E_step(&candidate); 
		M_step(&candidate);
		
		renormalize(&candidate);
		
		
	  //check convergence
		if(metric_difference(&candidate,&curr_state) > precision || i < 5)
			copy_state(&candidate,&curr_state);	//continue optimizing
		else
		{
			copy_state(&candidate,&curr_state);
			break;	//maximum reached
		}
   }
   while(i < 200);
	
	
	//give final output
	print_state(&curr_state);
	printf("%f\t%d\n\n",log_likelihood(&curr_state),i);	//has to succeed print_state
	
#if TIMING == 1
	auto stop = high_resolution_clock::now();
	
	auto duration = duration_cast<microseconds>(stop - start);
	cout << duration.count()/3600000000.0 << "h" << endl;
#endif
	
	//collect garbage
	delete[] curr_state.wai;
	delete[] candidate.wai;
	for(i=0;i<tot_nbr_sequences;i++)
		delete[] sequence_array[i].seq;
	delete[] sequence_array;
	
	return 0;
}