-
Hugo Madge Leon authoredHugo Madge Leon authored
fragmentation.py 1.02 KiB
import random
dna_seq = {
"ATAACATGTGGATGGCCAGTGGTCGGTTGTTACACGCCTACCGCGATGCTGAATGACCCGGACTAGAGTGGCGAAATTTATGGCGTGTGACCCGTTATGC": 100,
"TCCATTTCGGTCAGTGGGTCATTGCTAGTAGTCGATTGCATTGCCATTCTCCGAGTGATTTAGCGTGACAGCCGCAGGGAACCCATAAAATGCAATCGTA": 100
}
mean_length = 12
std = 1
term_frags = []
for seq, counts in dna_seq.items():
for _ in range(counts):
n_cuts = int(len(seq)/mean_length)
cuts = random.sample(range(1,len(seq)-1), n_cuts)
cuts.sort()
cuts.insert(0,0)
term_frag = ""
for i, val in enumerate(cuts):
if i == len(cuts)-1:
fragment = seq[val:cuts[-1]]
else:
fragment = seq[val:cuts[i+1]]
if mean_length-std <= len(fragment) <= mean_length+std:
term_frag = fragment
if term_frag == "":
continue
else:
term_frags.append(term_frag)
with open('terminal_frags.txt', 'w') as f:
for line in term_frags:
f.write(line)
f.write('\n')