Skip to content
Snippets Groups Projects
Commit c8e49628 authored by Menardo Fabrizio's avatar Menardo Fabrizio
Browse files

Delete make_variable_fasta.py

parent 6f5a2095
Branches master
No related tags found
No related merge requests found
from Bio import SeqIO
from joblib import Parallel, delayed
import argparse
import re
############################################################ define arg type float 0 < X > 1 ###############################################################
def restricted_float(x):
x = float(x)
if x < 0.0 or x > 1.0:
raise argparse.ArgumentTypeError("%r not in range [0.0, 1.0]"%(x,))
return x
########################################## find if a position is variable #######################
def find_variable(n):
out={}
#print "nnnnnnnnnnnnnn"
print n
pos=""
for name, seq in fasta.items():
pos += seq[n]
#print pos
seq_length=len(pos)
good_seq_length=len(re.findall('[ATCGatcg]', pos))
ratio= float(good_seq_length)/float(seq_length)
#print ratio
if (ratio >= arguments.missing):
A=len(re.findall('[Aa]', pos))
T=len(re.findall('[Tt]', pos))
G=len(re.findall('[Gg]', pos))
C=len(re.findall('[Cc]', pos))
if (A > 0):
A=1
if (T > 0):
T=1
if (C > 0):
C=1
if (G > 0):
G=1
if ((A+C+G+T) > 1):
#print pos
for name_out, seq_out in fasta.items():
OUT[name_out] += seq_out[n]
#print OUT
#print out
return OUT
########################################## parallel loop #######################
def parallel_loop(i):
n=i
print "iiiiii"
print i
while n < length:
print "n=" + str(n)
print "i=" + str(i)
OUT=find_variable(n)
#print temp_out
n=n+arguments.cpu #n of threads
#if temp_out:
#OUT.update(temp_out)
#for name_out, seq_out in temp_out.items():
#OUT[name_out] += temp_out[name_out]
#tot.update(OUT)
#print OUT
return OUT
parser = argparse.ArgumentParser()
parser.add_argument('INFILE',type=str,help='path to the multi fasta alignment')
parser.add_argument('-m','--missing', metavar='0-1', default='0.5', help='minimum percentage of NOT missing data', type =restricted_float,nargs='?')
parser.add_argument('-c','--cpu', metavar='INT', default=1,help='number of cpu to use (default: 1)',type =int, nargs='?')
arguments = parser.parse_args()
tot={}
fasta={}
OUT={}
for record in SeqIO.parse(arguments.INFILE, "fasta"):
length = len(record.seq)
fasta.update({str(record.id):str(record.seq)})
OUT.update({str(record.id):""})
print "fasta reas"
OUT = Parallel(n_jobs=arguments.cpu)(delayed(find_variable)(i) for i in range(0,length))
#print OUT
result={}
for d in OUT: #when run
result.update(d)
#print OUT
#print result
for name_out, seq_out in result.items():
print ">" + name_out
print seq_out
#scp print len(seq_out)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment