Skip to content
Snippets Groups Projects
fragmentation.py 1.02 KiB
import random


dna_seq = {
    "ATAACATGTGGATGGCCAGTGGTCGGTTGTTACACGCCTACCGCGATGCTGAATGACCCGGACTAGAGTGGCGAAATTTATGGCGTGTGACCCGTTATGC": 100,
    "TCCATTTCGGTCAGTGGGTCATTGCTAGTAGTCGATTGCATTGCCATTCTCCGAGTGATTTAGCGTGACAGCCGCAGGGAACCCATAAAATGCAATCGTA": 100
}

mean_length = 12
std = 1

term_frags = []
for seq, counts in dna_seq.items():
    for _ in range(counts):
        n_cuts = int(len(seq)/mean_length)
        cuts = random.sample(range(1,len(seq)-1), n_cuts)
        cuts.sort()
        cuts.insert(0,0)
        term_frag = ""
        for i, val in enumerate(cuts):
            if i == len(cuts)-1:
                fragment = seq[val:cuts[-1]]
            else:
                fragment = seq[val:cuts[i+1]]
            if mean_length-std <= len(fragment) <= mean_length+std:
                term_frag = fragment
        if term_frag == "":
            continue
        else:
            term_frags.append(term_frag)
    
with open('terminal_frags.txt', 'w') as f:
    for line in term_frags:
        f.write(line)
        f.write('\n')