Skip to content
Snippets Groups Projects
Commit 7b3bda5d authored by Tanya Santosh Nandan's avatar Tanya Santosh Nandan
Browse files

Merge branch 'tanya' into 'main'

Tanya

See merge request !13
parents 79fd2c6a 9a5a1ca0
No related branches found
No related tags found
1 merge request!13Tanya
import pandas as pdimport randomimport numpy as npimport re dna_seq = { "ATAACATGTGGATGGCCAGTGGTCGGTTGTTACACGCCTACCGCGATGCTGAATGACCCGGACTAGAGTGGCGAAATTTATGGCGTGTGACCCGTTATGC": 100, "TCCATTTCGGTCAGTGGGTCATTGCTAGTAGTCGATTGCATTGCCATTCTCCGAGTGATTTAGCGTGACAGCCGCAGGGAACCCATAAAATGCAATCGTA": 100}nucs = ['A','T','G','C']mononuc_freqs = [0.22, 0.25, 0.23, 0.30] # A_dinuc_freqs = {'AA':0.27,'AT':0.25, 'AG':0.22, 'AC':0.26} # T_dinuc_freqs = {'TA':0.27, 'TT':0.24, 'TG':0.24, 'TC':0.25} # C_dinuc_freqs = {'CA':0.23, 'CT':0.21, 'CG':0.35, 'CC':0.21} # G_dinuc_freqs = {'GA':0.25, 'GT':0.25, 'GG':0.23, 'GC':0.27} mean_length = 12 std = 1 term_frags = [] for seq, counts in dna_seq.items(): for _ in range(counts): n_cuts = int(len(seq)/mean_length) # non-random DNA fragmentation implementation based on https://www.nature.com/articles/srep04532#Sec1 # assume fragmentation by sonication for NGS workflow cuts = [] cut_nucs = np.random.choice(nucs, n_cuts, p=mononuc_freqs) for nuc in cut_nucs: nuc_pos = [x.start() for x in re.finditer(nuc, seq)] pos = np.random.choice(nuc_pos) while pos in cuts: pos = np.random.choice(nuc_pos) cuts.append(pos) cuts.sort() cuts.insert(0,0) term_frag = "" for i, val in enumerate(cuts): if i == len(cuts)-1: fragment = seq[val+1:cuts[-1]] else: fragment = seq[val:cuts[i+1]] if mean_length-std <= len(fragment) <= mean_length+std: term_frag = fragment if term_frag == "": continue else: term_frags.append(term_frag) with open('terminal_frags.txt', 'w') as f: for line in term_frags: f.write(line) f.write('\n')
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment