if ((sister_flag==1)and(flag==0)):#collect results at two nodes of distance only if there are no leaves that are closer
dlist.update(temp_dlist)
ifarguments.verbose==3:
printstr(temp_dlist)+" are not sister taxa, but neighbours first is leaf, second is upper neighbor"
if (flag==0):#### this means that the leaf has not neighbors at one node of dist
parent=parent.up#### therefore I climb the tree down towards the root of one more step and look for leaves
multi_flag=0
ifarguments.verbose==3:
print"going down"
print"gran parent"
printparent
temp_dlist={}
forninrange(0,len(parent.get_children())):#this for loop start from grean parent and climb up max one nodes, if it finds leaves calculate the distances,
printleaf.name+","+parent.children[n].name+str(DIS)+" are not sister taxa, but neighbours first is leaf, second is neighbor of downstair (towards root)"
returndlist
########################################## IDENTIFY LEAF TO PRUNE #######################
deffind_leaf_to_prune(dlist):#parse the list with all neighbor pairs and distances, find the closest pair and select the leaf
min_val=min(dlist.itervalues())
d_min={}
fork,vindlist.iteritems():
ifv==min_val:
d_min.update({k:v})
pair=str(random.choice(list(d_min)))
pair=pair.split(",")
leaf1=t.search_nodes(name=pair[0])[0]
leaf2=t.search_nodes(name=pair[1])[0]
if (leaf1.dist>leaf2.dist):
if (arguments.leaves_pair==1):
leaf_to_prune=leaf2.name
dist=leaf2.dist
if (arguments.leaves_pair==0):
leaf_to_prune=leaf1.name
dist=leaf1.dist
if (leaf1.dist<leaf2.dist):
if (arguments.leaves_pair==1):
leaf_to_prune=leaf1.name
dist=leaf1.dist
if (arguments.leaves_pair==0):
leaf_to_prune=leaf2.name
dist=leaf2.dist
if ((leaf1.dist==leaf2.dist)or(arguments.leaves_pair==2)):
leaf_to_prune=random.choice(list(pair))#this select the leaf at random within the couple
dist=leaf1.dist
return (leaf_to_prune,dist)
########################################## PRUNE LEAF FROM TREE #######################
########################################## write output with stop option #######################
defwrite_stop(t,output1,output2):
F=open(output1,"w")
F.write(t.write())
F.close()
leaves=t.get_leaves()
list_names=[]
forleafinleaves:
list_names.append(leaf.name)
F=open(output2,"w")
F.write("\n".join(list_names))
F.close()
###### SOFTWARE START
parser=argparse.ArgumentParser()
parser.add_argument('INFILE',type=str,help='path to the newick tree')
parser.add_argument('-r','--resolution',metavar='INT',default=1,help='number of leaves top prune at each iteration (default: 1)',type=int,nargs='?')
parser.add_argument('-c','--cpu',metavar='INT',default=1,help='number of cpu to use (default: 1)',type=int,nargs='?')
parser.add_argument('-v','--verbose',metavar='0,1,2',default='0',help='0: silent, 1: show progress, 2: print tree at each iteration, 3: only for testing (findN), 4: only for testing (prune_t) (default: 1)',type=int,nargs='?',choices=[0,1,2,3,4])
parser.add_argument('-p','--solve_polytomies',help='resolve polytmies at random (default: FALSE)',action='store_true',default=False)
### yet to implemen
parser.add_argument('-X','--stop_at_X_leaves',metavar='0-n_leaves',default='0',help='stop pruning when the number of leaves fall below X (integer)',type=int,nargs='?')
parser.add_argument('-RTL','--stop_at_RTL',metavar='0-1',default='0',help='stop pruning when the relative tree length falls below RTL (decimal number between 0 and 1)',type=restricted_float,nargs='?')
parser.add_argument('-lp','--leaves_pair',metavar='0,1,2',default=2,help='After the pair of leaves with the smallest distance is dentified Treemmer prunes: 0: the longest leaf\n1: the shortest leaf\n2: random choice (default)',type=int,nargs='?')
arguments=parser.parse_args()
if ((arguments.stop_at_RTL>0)and(arguments.stop_at_X_leaves>0)):
raiseargparse.ArgumentTypeError("-X and -RTL are mutually exclusive arguments")
t=Tree(arguments.INFILE,format=1)
counter=0
output=[]
stop=0
TOT_TL=calculate_TL(t)
TL=TOT_TL
ifarguments.solve_polytomies:
t.resolve_polytomy()
ifarguments.verbose>0:
print"N of taxa in tree is : "+str(len(t))
ifarguments.solve_polytomies:
print"\nPolytomies will be solved at random"
else:
print"\nPolytomies will be kept"
ifarguments.stop_at_X_leaves:
print"\nTreemmer will reduce the tree to"+str(arguments.stop_at_X_leaves)+" leaves"
else:
ifarguments.stop_at_RTL:
print"\nTreemmer will reduce the tree to"+str(arguments.stop_at_RTL)+" of the original tree length"
else:
print"\nTreemmer will calculate the tree length decay"
print"\nTreemmer will prune "+str(arguments.resolution)+" leaves at each iteration"
print"\nTreemmer will use "+str(arguments.cpu)+" cpu(s)"
########################################### LOOP THRU ALL LEAVES (replaced by joblib parallel line #############################################
# for leaf in leaves:
# counter1 = counter1 + 1
# print "iter find N : "+ str(counter1)
# N_list=find_N(t,leaf)
# print N_list
# if N_list:
# DLIST.update(N_list)
########################################### CALCULATE MATRIX DISTANCE into simple DICTIONARY very slow!! dendropy is orders of magnitude faster #############################################