diff --git a/Treemmer.py b/Treemmer.py index 73375c7b6685e1ec7cba2952e2db32cb2184fae3..049e15654d8f37ae978c256f0c5604845275d123 100644 --- a/Treemmer.py +++ b/Treemmer.py @@ -10,8 +10,6 @@ import argparse - - ############################################################ define arg type float 0 < X > 1 ############################################################### def restricted_float(x): @@ -221,7 +219,7 @@ def write_stop(t,output1,output2): -###### SOFTWARE START +###### SOFTWARE STARTS @@ -230,14 +228,15 @@ def write_stop(t,output1,output2): parser = argparse.ArgumentParser() parser.add_argument('INFILE',type=str,help='path to the newick tree') -parser.add_argument('-r','--resolution', metavar='INT', default=1,help='number of leaves top prune at each iteration (default: 1)',type =int, nargs='?') +parser.add_argument('-X','--stop_at_X_leaves', metavar='0-n_leaves', default='0', help='stop pruning when the number of leaves = X', type =int, nargs='?') +parser.add_argument('-RTL','--stop_at_RTL', metavar='0-1', default='0', help='stop pruning when the relative tree length falls below RTL', type =restricted_float,nargs='?') +parser.add_argument('-r','--resolution', metavar='INT', default=1,help='number of leaves to prune at each iteration (default: 1)',type =int, nargs='?') +parser.add_argument('-p','--solve_polytomies',help='resolve polytomies at random (default: FALSE)',action='store_true',default =False) +parser.add_argument('-lp','--leaves_pair', metavar='0,1,2', default=2,help='After the pair of leaves with the smallest distance is dentified Treemmer prunes: 0: the longest leaf\n1: the shortest leaf\n2: random choice (default)',type =int, nargs='?') +parser.add_argument('-np','--no_plot',help='do not load matplotlib and plot (default: FALSE)',action='store_true',default =False) +parser.add_argument('-fp','--fine_plot',help='when --resolution > 1, plot RTL vs n leaves every time a leaf is pruned (default: FALSE => plot every X leaves (X = -r))',action='store_true',default =False) parser.add_argument('-c','--cpu', metavar='INT', default=1,help='number of cpu to use (default: 1)',type =int, nargs='?') parser.add_argument('-v' ,'--verbose', metavar='0,1,2', default='0', help='0: silent, 1: show progress, 2: print tree at each iteration, 3: only for testing (findN), 4: only for testing (prune_t) (default: 1)', type =int, nargs='?',choices=[0,1,2,3,4]) -parser.add_argument('-p','--solve_polytomies',help='resolve polytmies at random (default: FALSE)',action='store_true',default =False) -parser.add_argument('-np','--no_plot',help='do not load matplotlib and plot (default: FALSE)',action='store_true',default =False) -parser.add_argument('-X','--stop_at_X_leaves', metavar='0-n_leaves', default='0', help='stop pruning when the number of leaves fall below X (integer)', type =int, nargs='?') -parser.add_argument('-RTL','--stop_at_RTL', metavar='0-1', default='0', help='stop pruning when the relative tree length falls below RTL (decimal number between 0 and 1)', type =restricted_float,nargs='?') -parser.add_argument('-lp','--leaves_pair', metavar='0,1,2', default=2,help='After the pair of leaves with the smallest distance is dentified Treemmer prunes: 0: the longest leaf\n1: the shortest leaf\n2: random choice (default)',type =int, nargs='?') arguments = parser.parse_args() @@ -294,7 +293,7 @@ while (len(t) > 3): DLIST = Parallel(n_jobs=arguments.cpu)(delayed(parallel_loop)(i) for i in range(0,arguments.cpu)) result = {} - for d in DLIST: #when running in parallel DLIST is updated in a weird way, it is a dict of dicts, this for loop merge them all in one + for d in DLIST: #when running in parallel DLIST is updated in a weird way, it is a dict of dicts, this for loop merge them all in one result.update(d) DLIST=result @@ -314,8 +313,15 @@ while (len(t) > 3): TL=TL-dist DLIST=prune_dist_matrix(DLIST,leaf_to_p) rel_TL=TL/TOT_TL + + + if (arguments.fine_plot): # plot point in rtld after every leaf independently of -r + output.append (str(rel_TL) + ' ' + str(len(t))) + length=len(t) + x.append(length) + y.append(rel_TL) - if arguments.stop_at_X_leaves: + if arguments.stop_at_X_leaves: # if stop criterium is met (X) ==> output if arguments.stop_at_X_leaves >= len(t): output1=arguments.INFILE+"_trimmed_tree_X_" + str(arguments.stop_at_X_leaves) output2=arguments.INFILE+"_trimmed_list_X_" + str(arguments.stop_at_X_leaves) @@ -323,8 +329,8 @@ while (len(t) > 3): stop=1 break - if arguments.stop_at_RTL: - if arguments.stop_at_RTL >= rel_TL: + if arguments.stop_at_RTL: # if stop criterium is met (RTL) ==> output + if arguments.stop_at_RTL >= rel_T: output1=arguments.INFILE+"_trimmed_tree_RTL_" + str(arguments.stop_at_RTL) output2=arguments.INFILE+"_trimmed_list_RTL_" + str(arguments.stop_at_RTL) write_stop(t,output1,output2) @@ -344,10 +350,11 @@ while (len(t) > 3): if (stop ==1): break - output.append (str(rel_TL) + ' ' + str(len(t))) - length=len(t) - x.append(length) - y.append(rel_TL) + if not (arguments.fine_plot): + output.append (str(rel_TL) + ' ' + str(len(t))) + length=len(t) + x.append(length) + y.append(rel_TL) if arguments.verbose==1: @@ -372,69 +379,9 @@ if not arguments.no_plot: plt.ylim(0,1.1) plt.xlabel('Number of leaves') plt.ylabel('Relative tree length') - plt.savefig(arguments.INFILE+'_TLD.png') + plt.savefig(arguments.INFILE+'_res_'+ str(arguments.resolution)+'_TLD.png') - - - -########################################### LOOP THRU ALL LEAVES (replaced by joblib parallel line ############################################# - - -# for leaf in leaves: -# counter1 = counter1 + 1 -# print "iter find N : "+ str(counter1) -# N_list=find_N(t,leaf) -# print N_list -# if N_list: -# DLIST.update(N_list) - - - - -########################################### CALCULATE MATRIX DISTANCE into simple DICTIONARY very slow!! dendropy is orders of magnitude faster ############################################# - -#def make_dist_matrix(leaves): -# count=0 -# dlist = {} -# for x in range (0,len(leaves)-1): -# count=count+1 -# -# for y in range (x+1,len(leaves)): -# -# DIS = leaves[x].get_distance(leaves[y]) -# dlist.update({leaves[x].name + "," +leaves[y].name : DIS}) -# -# return dlist - -#print dlist - - -########################################### CALCULATE MATRIX DISTANCE into nested DICTIONARY ############################################## - - -#m = {} -#for x in range (0,len(leaves)-2): -# d = {} -# for y in range (x+1,len(leaves)-1): -# -# DIS = leaves[x].get_distance(leaves[y]) -# #print str(leaves[x]) + str(leaves[y]) + "\n" + str(DIS) -# d.update({leaves[y].name : DIS}) - #print "D"# - #print d -# m.update({leaves[x].name : d}) - #print "M" - #print m - -#print m - - - - - - -