diff --git a/Treemmer.py b/Treemmer.py index 123b3ee0becff8d911bcb03ffc0a9ab7a6c6c332..6ce4b454659f21e3aa13a76b6c0ac99a73c00cb9 100644 --- a/Treemmer.py +++ b/Treemmer.py @@ -70,7 +70,7 @@ def find_N(t,leaf): - if (flag == 0): #### this means that the leaf has not neighbors at one node of dist + if (flag == 0): #### this means that the leaf has no neighbors at one node of dist parent=parent.up #### therefore I climb the tree down towards the root of one more step and look for leaves multi_flag=0 if arguments.verbose==3: @@ -78,7 +78,7 @@ def find_N(t,leaf): print "gran parent" print parent temp_dlist={} - for n in range(0,len(parent.get_children())): #this for loop start from grean parent and climb up max one nodes, if it finds leaves calculate the distances, + for n in range(0,len(parent.get_children())): #this for loop start from gran parent and climb up max one nodes, if it finds leaves calculate the distances, if parent.is_root(): break if (parent.children[n].is_leaf()): @@ -108,7 +108,7 @@ def find_leaf_to_prune(dlist): #parse the list with all neighbor pairs and d d_min.update({k:v}) pair= str(random.choice(list(d_min))) - if arguments.random: + if arguments.prune_random: pair= str(random.choice(list(dlist))) pair=pair.split(",") leaf1 = t.search_nodes(name=pair[0])[0] @@ -213,12 +213,6 @@ def write_stop(t,output1,output2): F=open(output2,"w") F.write("\n".join(list_names)) F.close() -########################################## PLOT tree length decay ####################### - - - - - ###### SOFTWARE STARTS @@ -234,19 +228,19 @@ parser.add_argument('-X','--stop_at_X_leaves', metavar='0-n_leaves', default='0' parser.add_argument('-RTL','--stop_at_RTL', metavar='0-1', default='0', help='stop pruning when the relative tree length falls below RTL', type =restricted_float,nargs='?') parser.add_argument('-r','--resolution', metavar='INT', default=1,help='number of leaves to prune at each iteration (default: 1)',type =int, nargs='?') parser.add_argument('-p','--solve_polytomies',help='resolve polytomies at random (default: FALSE)',action='store_true',default =False) -parser.add_argument('-pr','--random',help='prune random leaves (default: FALSE)',action='store_true',default =False) -parser.add_argument('-lp','--leaves_pair', metavar='0,1,2', default=2,help='After the pair of leaves with the smallest distance is dentified Treemmer prunes: 0: the longest leaf\n1: the shortest leaf\n2: random choice (default)',type =int, nargs='?') +parser.add_argument('-pr','--prune_random',help='prune random leaves (default: FALSE)',action='store_true',default =False) +parser.add_argument('-lp','--leaves_pair', metavar='0,1,2', default=2,help='After the pair of leaves with the smallest distance is dentified Treemmer prunes: 0: the longest leaf\n1: the shortest leaf\n2: random choice (default: 2)',type =int, nargs='?') parser.add_argument('-np','--no_plot',help='do not load matplotlib and plot (default: FALSE)',action='store_true',default =False) parser.add_argument('-fp','--fine_plot',help='when --resolution > 1, plot RTL vs n leaves every time a leaf is pruned (default: FALSE => plot every X leaves (X = -r))',action='store_true',default =False) parser.add_argument('-c','--cpu', metavar='INT', default=1,help='number of cpu to use (default: 1)',type =int, nargs='?') -parser.add_argument('-v' ,'--verbose', metavar='0,1,2', default='0', help='0: silent, 1: show progress, 2: print tree at each iteration, 3: only for testing (findN), 4: only for testing (prune_t) (default: 1)', type =int, nargs='?',choices=[0,1,2,3,4]) +parser.add_argument('-v' ,'--verbose', metavar='0,1,2', default='1', help='0: silent, 1: show progress, 2: print tree at each iteration, 3: only for testing (findN), 4: only for testing (prune_t) (default: 1)', type =int, nargs='?',choices=[0,1,2,3,4]) arguments = parser.parse_args() if ((arguments.stop_at_RTL > 0) and (arguments.stop_at_X_leaves > 0)): - raise argparse.ArgumentTypeError("-X and -RTL are mutually exclusive arguments") + raise argparse.ArgumentTypeError("-X and -RTL are mutually exclusive options") @@ -262,14 +256,15 @@ ori_length = len(t) if arguments.solve_polytomies: t.resolve_polytomy() -if arguments.verbose > 0: +if arguments.verbose > 0: # print progress on standard output print "N of taxa in tree is : "+ str(len(t)) if arguments.solve_polytomies: print "\nPolytomies will be solved at random" else: print "\nPolytomies will be kept" - + if arguments.prune_random: + print "\nA random leaf is pruned at each iteration, you don't really need Treemmer to do this" if arguments.stop_at_X_leaves: print "\nTreemmer will reduce the tree to" + str(arguments.stop_at_X_leaves) + " leaves" else: @@ -283,7 +278,16 @@ if arguments.verbose > 0: x=[] y=[] -while (len(t) > 3): + +output.append ('1 ' + str(len(t))) #append first point to the output with RTL = 1 (before starting pruning)################################ + +length=len(t) +x.append(length) +y.append(1) + + + +while (len(t) > 3): #################### Main loop ################################ counter = counter +1 leaves = t.get_leaves() DLIST={} @@ -291,7 +295,7 @@ while (len(t) > 3): if arguments.verbose > 0: print "\niter " + str(counter) if arguments.verbose > 1: - print "calculationg distances" + print "calculating distances" DLIST = Parallel(n_jobs=arguments.cpu)(delayed(parallel_loop)(i) for i in range(0,arguments.cpu)) result = {} @@ -307,16 +311,16 @@ while (len(t) > 3): for r in range (1,arguments.resolution+1): - if (len(DLIST)==0): + if ((len(DLIST)==0) or (len(t)<4)): break (leaf_to_p,dist) = find_leaf_to_prune(DLIST) leaf_to_prune = t.search_nodes(name=leaf_to_p)[0] t = prune_t(leaf_to_p,t) - TL=TL-dist + TL= calculate_TL(t) DLIST=prune_dist_matrix(DLIST,leaf_to_p) rel_TL=TL/TOT_TL - + ################################# OUTPUT ########################################################## if (arguments.fine_plot): # plot point in rtld after every leaf independently of -r output.append (str(rel_TL) + ' ' + str(len(t))) @@ -340,7 +344,7 @@ while (len(t) > 3): stop=1 break - if arguments.verbose > 1: + if arguments.verbose > 1: # print progress on standard output print "\n ITERATION RESOLUTION: " + str(r) print "leaf to prune:\n" + str(leaf_to_p) + " " + str(dist) @@ -351,9 +355,10 @@ while (len(t) > 3): print DLIST if (stop ==1): + print "\nRTL : " + str(rel_TL) + " N_seq: " +str(len(t)) break - if not (arguments.fine_plot): + if not (arguments.fine_plot): # normal plot (with -fp = FALSE) output.append (str(rel_TL) + ' ' + str(len(t))) length=len(t) x.append(length) @@ -362,15 +367,15 @@ while (len(t) > 3): if arguments.verbose==1: print "\nRTL : " + str(rel_TL) + " N_seq: " +str(len(t)) - + -if stop == 0: +if stop == 0: # create file for plot of rltd F=open(arguments.INFILE+"_res_"+ str(arguments.resolution) + "_LD","w") F.write("\n".join(output)) - +################################################# make plot directly ########################################################## if not arguments.no_plot: import numpy as np import matplotlib.pyplot as plt @@ -378,12 +383,12 @@ if not arguments.no_plot: ax = plt.figure().gca() ax.xaxis.set_major_locator(MaxNLocator(integer=True)) plt.scatter(x, y, s= 2, c= 'black') - plt.xlim(ori_length, 3) - plt.ylim(0,1.1) + plt.xlim(ori_length,0) + plt.ylim(-0.02,1.02) plt.xlabel('Number of leaves') plt.ylabel('Relative tree length') - plt.savefig(arguments.INFILE+'_res_'+ str(arguments.resolution)+'_TLD.png') - + #plt.savefig(arguments.INFILE+'_res_'+ str(arguments.resolution)+'_TLD.png') + plt.savefig(arguments.INFILE+'_res_'+ str(arguments.resolution)+'_TLD.pdf')