Skip to content
Snippets Groups Projects
Commit 9a5a1ca0 authored by Tanya Santosh Nandan's avatar Tanya Santosh Nandan
Browse files

Tanya

parent 79fd2c6a
No related branches found
No related tags found
1 merge request!13Tanya
import pandas as pdimport randomimport numpy as npimport re dna_seq = { "ATAACATGTGGATGGCCAGTGGTCGGTTGTTACACGCCTACCGCGATGCTGAATGACCCGGACTAGAGTGGCGAAATTTATGGCGTGTGACCCGTTATGC": 100, "TCCATTTCGGTCAGTGGGTCATTGCTAGTAGTCGATTGCATTGCCATTCTCCGAGTGATTTAGCGTGACAGCCGCAGGGAACCCATAAAATGCAATCGTA": 100}nucs = ['A','T','G','C']mononuc_freqs = [0.22, 0.25, 0.23, 0.30] # A_dinuc_freqs = {'AA':0.27,'AT':0.25, 'AG':0.22, 'AC':0.26} # T_dinuc_freqs = {'TA':0.27, 'TT':0.24, 'TG':0.24, 'TC':0.25} # C_dinuc_freqs = {'CA':0.23, 'CT':0.21, 'CG':0.35, 'CC':0.21} # G_dinuc_freqs = {'GA':0.25, 'GT':0.25, 'GG':0.23, 'GC':0.27} mean_length = 12 std = 1 term_frags = [] for seq, counts in dna_seq.items(): for _ in range(counts): n_cuts = int(len(seq)/mean_length) # non-random DNA fragmentation implementation based on https://www.nature.com/articles/srep04532#Sec1 # assume fragmentation by sonication for NGS workflow cuts = [] cut_nucs = np.random.choice(nucs, n_cuts, p=mononuc_freqs) for nuc in cut_nucs: nuc_pos = [x.start() for x in re.finditer(nuc, seq)] pos = np.random.choice(nuc_pos) while pos in cuts: pos = np.random.choice(nuc_pos) cuts.append(pos) cuts.sort() cuts.insert(0,0) term_frag = "" for i, val in enumerate(cuts): if i == len(cuts)-1: fragment = seq[val+1:cuts[-1]] else: fragment = seq[val:cuts[i+1]] if mean_length-std <= len(fragment) <= mean_length+std: term_frag = fragment if term_frag == "": continue else: term_frags.append(term_frag) with open('terminal_frags.txt', 'w') as f: for line in term_frags: f.write(line) f.write('\n')
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment