diff --git a/.gitignore b/.gitignore deleted file mode 100644 index e67a40ec0cbb5aa7e77e9607e08a517dc1187296..0000000000000000000000000000000000000000 --- a/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -.DS_Store -.idea/ -__pycache__/ diff --git a/LICENSE.txt b/LICENSE.txt deleted file mode 100644 index 9ae3ad477bffeaa0ed46b1887c9cdde30ce6b73f..0000000000000000000000000000000000000000 --- a/LICENSE.txt +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2022 Clara Serger, Michael Sandholzer and Christoph Harmel - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md deleted file mode 100644 index 3003dfb80d3d09bd805b71f5eb1b1c13bbfea12d..0000000000000000000000000000000000000000 --- a/README.md +++ /dev/null @@ -1,29 +0,0 @@ -# Read Sequencer - -## Overview - -Read Sequencer is a test python package to simulate sequencing. -It reads fasta files, simulate sequencing with specified read length and writes the resulting sequences into a new fasta file. - - -## Installation from PyPI - -Read Sequencer requires Python 3.9 or later. - -Install Read Sequencer from PyPI using: - -``` -pip install -i https://test.pypi.org/simple/ read-sequencer==0.1.1 -``` - -## Usage - -``` -read_sequencer --input_file_path --output_file_path --read_length -``` -## Contributors and Contact Information - -Christoph Harmel - christoph.harmel@unibas.ch -Michael Sandholzer - michael.sandholzer@unibas.ch -Clara Serger - c.serger@unibas.ch - diff --git a/build/lib/read_sequencer_package/__init__.py b/build/lib/read_sequencer_package/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/build/lib/read_sequencer_package/cli.py b/build/lib/read_sequencer_package/cli.py deleted file mode 100644 index e786d78de5391ea2b035c058f4de2cd16162a4d6..0000000000000000000000000000000000000000 --- a/build/lib/read_sequencer_package/cli.py +++ /dev/null @@ -1,23 +0,0 @@ -import argparse -from modules import read_sequencer as rs - -parser = argparse.ArgumentParser(prog='read_sequencer', - description='Simulates sequencing of DNA sequences specified by an FASTA file.') -parser.add_argument('--input_file_path', - help='path to FASTA file') -parser.add_argument('--output_file_path', - help='path to FASTA file') -parser.add_argument('--read_length', - help='read length for sequencing', - type=int) - -args = parser.parse_args() - -def main(): - read_sequencer = rs() - read_sequencer.read_fasta(args.input_file_path) - read_sequencer.run_sequencing(args.read_length) - read_sequencer.write_fasta(args.output_file_path) - -if __name__ == '__main__': - main() diff --git a/build/lib/read_sequencer_package/modules.py b/build/lib/read_sequencer_package/modules.py deleted file mode 100644 index 39a686616817f6b496328d460fe06931f9670da8..0000000000000000000000000000000000000000 --- a/build/lib/read_sequencer_package/modules.py +++ /dev/null @@ -1,148 +0,0 @@ -def generate_sequences(n, mean, sd): - """ - Generates random sequences. - - Args: - n (int): Amount of sequences to generate. - mean (int): mean length of sequence (gaussian distribution). - sd (float): standard deviation of length of sequence (gaussian distribution). - - Returns: - list: of n sequences - """ - from random import gauss, choice - dict = {} - for i in range(n): - keys = range(n) - seq = "" - nt = ["A", "T", "C", "G"] - for value in range(abs(round(gauss(mean, sd)))): - seq = seq + choice(nt) - dict[keys[i]] = seq - return dict - - -def read_in_fasta(file_path): - ''' - This function reads in FASTA files. - - Args: - file_path (str): A file path directing to the fasta file. - - Returns: - Dict: It returns a dictionary with sequences. - - ''' - sequences = {} - f = open(file_path) - for line in f: - if line[0] == '>': - defline = line.strip() - defline = defline.replace('>', '') - else: - if defline not in sequences: - sequences[defline] = '' - sequences[defline] += line.strip() - f.close() - return sequences - -def read_sequence(seq, read_length): - ''' - This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is - smaller then the requested length or cuts the sequence if its longer. - - Args: - seq (str): the sequence to read - read_length (int): length of reads - - Returns: - str: returns sequenced element - - ''' - from random import choice - bases = ["A", "T", "C", "G"] - sequenced = '' - if read_length >= len(seq): - for nt in range(len(seq)): - sequenced += seq[nt] - for nt in range(len(seq), read_length): - sequenced += choice(bases) - else: - for nt in range(read_length): - sequenced += seq[nt] - - return sequenced - -def simulate_sequencing(sequences, read_length): - """ - Simulates sequencing. - - Args: - sequences (dict): Dictionary of sequences to sequence. - read_length (int): length of reads - - Returns: - dict: of n sequences as values - """ - results = {} - for index, key in enumerate(sequences): - results[key] = read_sequence(sequences[key], read_length=read_length) - - return results - -import random -def generate_sequences(n, mean, sd): - """ - Generates random sequences. - - Args: - n (int): Amount of sequences to generate. - mean (int): mean length of sequence (gaussian distribution). - sd (float): standart deviation of length of sequence (gaussian distribution). - - Returns: - dict: of n sequences - """ - dict1 = {} - for i in range(n): - keys = range(n) - seq = "" - nt = ["A", "T", "C", "G"] - for value in range(round(random.gauss(mean, sd))): - seq = seq + random.choice(nt) - dict1[keys[i]] = seq - return dict1 - -def write_fasta(sequences, file_path): - """ - Takes a dictionary and writes it to a fasta file. - Must specify the filename when calling the function. - - Args: - sequences (dict): Dictionary of sequence. - file_path (str): A file path directing to the output folder. - - """ - from textwrap import wrap - with open(file_path, "w") as outfile: - for key, value in sequences.items(): - outfile.write(key + "\n") - outfile.write("\n".join(wrap(value, 60))) - outfile.write("\n") - -class read_sequencer: - def __init__(self): - self.sequences = {} - self.reads = {} - - def add_random_sequences(self, n, mean, sd): - self.sequences = generate_sequences(n, mean, sd) - - def read_fasta(self, input_file): - self.sequences = read_in_fasta(input_file) - - def run_sequencing(self, read_length): - self.reads = simulate_sequencing(self.sequences, read_length) - - def write_fasta(self, output_file_path): - write_fasta(self.reads, output_file_path) diff --git a/dist/read_sequencer-0.1.1-py3-none-any.whl b/dist/read_sequencer-0.1.1-py3-none-any.whl deleted file mode 100644 index 234b5227d12d9f5e410d26b2e131fed34c2596fc..0000000000000000000000000000000000000000 Binary files a/dist/read_sequencer-0.1.1-py3-none-any.whl and /dev/null differ diff --git a/dist/read_sequencer-0.1.1.tar.gz b/dist/read_sequencer-0.1.1.tar.gz deleted file mode 100644 index 7eb03d4a5f331e1050f6b5f6c8cb5ee356fc2595..0000000000000000000000000000000000000000 Binary files a/dist/read_sequencer-0.1.1.tar.gz and /dev/null differ diff --git a/images/Git_Tutorial_CSerger.png b/images/Git_Tutorial_CSerger.png deleted file mode 100644 index a80ebb2cff393ab5575bbb50804b42f9ba7ff77c..0000000000000000000000000000000000000000 Binary files a/images/Git_Tutorial_CSerger.png and /dev/null differ diff --git a/images/Markdown_Tutorial_CSerger.png b/images/Markdown_Tutorial_CSerger.png deleted file mode 100644 index 1146d19773186c648382ce4aa87681c0001b7a67..0000000000000000000000000000000000000000 Binary files a/images/Markdown_Tutorial_CSerger.png and /dev/null differ diff --git a/images/Michael_Screenshot 2022-11-07 at 17.38.44.png b/images/Michael_Screenshot 2022-11-07 at 17.38.44.png deleted file mode 100644 index 272acf066d23d8440b02356cfc2ebd579f2f9569..0000000000000000000000000000000000000000 Binary files a/images/Michael_Screenshot 2022-11-07 at 17.38.44.png and /dev/null differ diff --git a/images/Michael_Screenshot 2022-11-08 at 13.35.22.png b/images/Michael_Screenshot 2022-11-08 at 13.35.22.png deleted file mode 100644 index 2deedd0c2a642d492a959ce85733fbfdf2e508ed..0000000000000000000000000000000000000000 Binary files a/images/Michael_Screenshot 2022-11-08 at 13.35.22.png and /dev/null differ diff --git a/images/Michael_Screenshot 2022-11-08 at 14.38.02.png b/images/Michael_Screenshot 2022-11-08 at 14.38.02.png deleted file mode 100644 index 4bdbfdf333fcacdc043e990027dd232953d53b20..0000000000000000000000000000000000000000 Binary files a/images/Michael_Screenshot 2022-11-08 at 14.38.02.png and /dev/null differ diff --git a/images/screenshot_git_tutorial_main_harmel.png b/images/screenshot_git_tutorial_main_harmel.png deleted file mode 100644 index fcbb8fde4d7ee8cb4d61d2cf3ff62ccddf11ff53..0000000000000000000000000000000000000000 Binary files a/images/screenshot_git_tutorial_main_harmel.png and /dev/null differ diff --git a/images/screenshot_git_tutorial_remote_harmel.png b/images/screenshot_git_tutorial_remote_harmel.png deleted file mode 100644 index 76ff324be44e1e54447ae6fc09ecc62d50b48583..0000000000000000000000000000000000000000 Binary files a/images/screenshot_git_tutorial_remote_harmel.png and /dev/null differ diff --git a/images/screenshot_markup_tutorial_harmel.png b/images/screenshot_markup_tutorial_harmel.png deleted file mode 100644 index db3aaf02cb5ec285c3cf5f76fe1d25134df69907..0000000000000000000000000000000000000000 Binary files a/images/screenshot_markup_tutorial_harmel.png and /dev/null differ diff --git a/read_sequencer.egg-info/PKG-INFO b/read_sequencer.egg-info/PKG-INFO deleted file mode 100644 index ed2a2f723157a60010ad224e55406bda28f82255..0000000000000000000000000000000000000000 --- a/read_sequencer.egg-info/PKG-INFO +++ /dev/null @@ -1,9 +0,0 @@ -Metadata-Version: 2.1 -Name: read-sequencer -Version: 0.1.1 -Summary: Simulates sequencing with a specified read length from sequences specified by a FASTA file. -Home-page: https://git.scicore.unibas.ch/zavolan_group/tools/read-sequencer -Author: Clara Serger, Michael Sandholzer and Christoph Harmel -Author-email: christoph.harmel@unibas.ch -License: MIT -License-File: LICENSE.txt diff --git a/read_sequencer.egg-info/SOURCES.txt b/read_sequencer.egg-info/SOURCES.txt deleted file mode 100644 index 9b093f4ab2fab7288a29384742947a5405d68517..0000000000000000000000000000000000000000 --- a/read_sequencer.egg-info/SOURCES.txt +++ /dev/null @@ -1,12 +0,0 @@ -LICENSE.txt -README.md -setup.py -read_sequencer.egg-info/PKG-INFO -read_sequencer.egg-info/SOURCES.txt -read_sequencer.egg-info/dependency_links.txt -read_sequencer.egg-info/entry_points.txt -read_sequencer.egg-info/requires.txt -read_sequencer.egg-info/top_level.txt -read_sequencer_package/__init__.py -read_sequencer_package/cli.py -read_sequencer_package/modules.py \ No newline at end of file diff --git a/read_sequencer.egg-info/dependency_links.txt b/read_sequencer.egg-info/dependency_links.txt deleted file mode 100644 index 8b137891791fe96927ad78e64b0aad7bded08bdc..0000000000000000000000000000000000000000 --- a/read_sequencer.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/read_sequencer.egg-info/entry_points.txt b/read_sequencer.egg-info/entry_points.txt deleted file mode 100644 index 569e8154320e35d343a621996abf05109990fec6..0000000000000000000000000000000000000000 --- a/read_sequencer.egg-info/entry_points.txt +++ /dev/null @@ -1,2 +0,0 @@ -[console_scripts] -read_sequencer = read_sequencer_package.cli:main diff --git a/read_sequencer.egg-info/requires.txt b/read_sequencer.egg-info/requires.txt deleted file mode 100644 index d9ca6c64cbd4a8ce3229966dd400911a7f626665..0000000000000000000000000000000000000000 --- a/read_sequencer.egg-info/requires.txt +++ /dev/null @@ -1,3 +0,0 @@ -random -textwrap -argparse diff --git a/read_sequencer.egg-info/top_level.txt b/read_sequencer.egg-info/top_level.txt deleted file mode 100644 index a5c49290c04dc99c11fd0696381a1287c34af400..0000000000000000000000000000000000000000 --- a/read_sequencer.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -read_sequencer_package diff --git a/read_sequencer_package/__init__.py b/read_sequencer_package/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/read_sequencer_package/cli.py b/read_sequencer_package/cli.py deleted file mode 100644 index 9a68267c115d328afa32e582d8c03e4aede2a076..0000000000000000000000000000000000000000 --- a/read_sequencer_package/cli.py +++ /dev/null @@ -1,38 +0,0 @@ -import argparse -from modules import ReadSequencer -import logging - -parser = argparse.ArgumentParser(prog='read_sequencer', - description='Simulates sequencing of DNA sequences specified by an FASTA file.') -parser.add_argument('--input_file_path', - help='path to FASTA file') -parser.add_argument('--output_file_path', - help='path to FASTA file') -parser.add_argument('--read_length', - help='read length for sequencing', - type=int) -parser.add_argument('--random', action='store_true', default=False, - help='generate random sequences') -parser.add_argument('--n_random', default=100, type=int, help='n random sequences') -parser.add_argument('--mean_random', default=50, type=int, help='mean random sequences') -parser.add_argument('--sd_random', default=25, type=int, help='standard deviation random sequences') - -args = parser.parse_args() - -def main(): - LOG.info("Read sequencer started.") - read_sequencer = ReadSequencer() - if args.random: - read_sequencer.add_random_sequences(n=args.n_random, mean=args.mean_random, sd=args.sd_random) - else: - read_sequencer.read_fasta(args.input_file_path) - read_sequencer.run_sequencing(args.read_length) - read_sequencer.write_fasta(args.output_file_path) - LOG.info("Read sequencer finished.") - -if __name__ == '__main__': - logging.basicConfig( - format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")', - level=logging.INFO) - LOG = logging.getLogger(__name__) - main() diff --git a/read_sequencer_package/fasta_testfile/50_seqs_50_1000_bp.fasta b/read_sequencer_package/fasta_testfile/50_seqs_50_1000_bp.fasta deleted file mode 100644 index 12e41cdac4403ac3c51f4f76553889346ab56bdf..0000000000000000000000000000000000000000 --- a/read_sequencer_package/fasta_testfile/50_seqs_50_1000_bp.fasta +++ /dev/null @@ -1 +0,0 @@ ->1|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 481 bp tgagcactcggtgccaagggcggggatacacagatggttggctgatacaaccgggacttaaattccctagactagatctgtgttggaacgcctctctacgagaaggcgaacgaactggcgccgaggcgatcgctaacatcttcgtctcgcttgaaccacacaatggatgattcctccctaggggtttgacaatcaacctggatagcgtttaatatagatggctggttgatttgtaaggccttcacagactactcagagcaataagtgaccccccaacaatcagaggctgatcctctgctccgaaggcagcactcatcatcggtattctgttcgctagaacagatggaatgcatgcgccccgctaagtttgattgagttaaacttattgtcttatagccatagccaaggtatctatggtgtcgtacgtgacagttccgtgtaggcatgccatcccgcctcatgcgtcatgtcatactgaggc >2|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 495 bp ctgaatcaggtgtaggttctttttacgtcgtttaaggagctacacggtatcttgttttcagttaaggtgccacacccccgggtggatcatccgtcagctttcctacaattaggtaactggcgggatcatttagtcttgtattaagacgctcgcgcccggggcggccggcttgtttgtggagagaaacaacaagtctgagtatagattaaatacaactggtttactggcaagtcagcgcgtaacaaccggtgagccgctgcgcatgcttactgcaatgaacatcttggcacgatcctgcgatagcgtgccctgacccgtgcacctcgtcggtgaatttcgtcgaacaagcggatcgccacgccacgtgagatcaagccaaaacacaaaaaccacaggcaatagcgacgctgaagtgtcatttcctacctaaaacttcggttcttcttcttaagagggcattaagtccggataactactgcatggcacctgtgtatg >3|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 193 bp acttcagtactggaaggatctaggaaccattaatgcgagtgtggtgacgccagacgacccccggtgttctgccaccttctttggataggagaaccgtcactcgccccggaggccccacggataagaagggtatcttgtgatcacgcgaatgactcacttgcgtaagtaatctaactttgtttttcgctataaa >4|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 625 bp acgtctggagcgtgggttgacccctgtacatggttctttccggatccttaacgtgccgatacaactcaaaggtaactgtgcttaccacttccgaagctacatgcctctaacaaagtactttcgaggaggcactcaacccccggagatgctttgcgcggaagcagagatcgctgctcaaaatttggaatcactttcgtgcgagacccaaacaatttatggtggattcaagcgaacgagtcatgattacagatctatcaatcgaggagaggacggcttcgccgtttccttttaatgtgaaactagagccttcctcaatagtgaggcccttgcccggtgcccgggattggctcaaaagtaccggctcaggaagtctctcaactgccaagttggttaaagtagcttcggcgtaaggaacccgaccgaccatcagtgtcatacaaggaaatatttccaggaacctagtttatcttgaagttctgaatgagttaggtaatgtcggcccttcatgacagggggacgtgctcgcgagctcaaaccaagggggctaagggcgacggtgcttagtcatctactagatccacccacatgacgattgtgtgggtcctttcagccttttacttctcgc >5|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 845 bp agagcgtacggcgcgcatcgtataccctacgagggcggcgtgtggaggaacgctgggctgacactgtagaagattagatacacttgtccctaaaattaacccttaaccgctattagccgtgaacgcttcctaatatttcaagccgtatagctaagtggagaatgtggagccctggtcaaatcacgagccaattagccctagacggacagcacatctcgtcgcgttaagcggaacactcagcttttattacctagtgctcagcctggtttccatatgctctaaccgaactgatgcatacttgggtctgactaagggccatggttcgcgtcaagcaggccgggttcagaagccctggttgaggggcggatactccagtcgcccgcaggtcgcgatccctgggtttaaactacttcacgacacgtacaaacacttactacctacccctacaacactgagtgaaaagtgctagctaagtatgtccgtcgagataggactcgactttagaggtctgcacgaatgtcctggtaggatgcccaccaaagggaataatccttcgggtacgttttgggcaggtgtgacgtgagaaatccgcacccttatcgccgtaaaggttatttgcggggtgcgtggttacttgagttgtccctctcgagcggggtaacaaaaacctactgtatatctagtctggtccgaaattctttatgctgccgacatgctgcgacccgactacgtgtgtaagagtcattatttcttaatagtttaacaaggctacacccctatatgataaaggctattctccatagtgtaagagtctgttgcaccgacaacccggcgtctggctacat >6|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 703 bp tgcagtcgatgtgctattcgttttaggcagtctacgcgcttagtaactcccacggccatagacttatctcagacatggaccatgtcgatatcggacgccgtcttaccacatttttcatagcccttcataaggcagcgtgctcttactgcccaataaggtggacgattccgaccctaggcgaaccagcgctatagatggaccttctaattgatgcgcaacgtgattgtttccttggtctgggttagcatttcggtagcctaacagtcactccagttcgctaactggcctggatgagggccccatactatatggtgatagcaacgacaccccagtgtattgacctgttgtgtcctggtgatgttgaacgtcaccaagatagtctctatgtgactccatagctaaggagggtgacgtgatgcgccggccccgccccagacactgctacagaaagcttaaggcgagcgtatgaagagcctttgggcatacactctcgtatctagctaggtcaaggtgacggaatgaatctgctatatctagattggcacgcgataatctaggccaattgctggtaaaacacatggtcttattgtatacccaatcccgatttgaatttcctgcaacgaggcagctcgcagaggaacttaagtagagtgaaccctggagccgaatcccagagtcgtcggggacaaagtatatgcaacgg >7|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 243 bp actctttagaatgggtttcactaatagtacgtgcatacaatttcgtcagaaagggcgcttgctaagggacacggatcaatgatgaccagacttatggtgtcaggtctcactatattacatatccggaacccgtgcccgcaccacgcgctgggtctaggcgaccggtgcatcatctccgcgtctctagaggattctctcggtaaatgctgaattgcgtgagatcaaatccgtatgccagtcatg >8|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 863 bp attggcccggtccaggacagagccttatattgctactggtatgagaaccgttctgacgtaaacttgatggctttacgcctgcacgggcttcatacacacatgaccgtggacaaagtcgcccaggccctcgaatagggtgtaatggttaacggttagtgccaccccaatgggtgcgaggcagtaagagtgtcctatggcaaaactctcctcgtttcagaagggtcgctcctctagcctccttatcccccctataatagtactcgccgggtacgagccggagctccctcgagaagtcatcctgctcttaccacaggcagagaacgcgcaaggtttagatactaacttcattcatccaccagctggacaaggaactatagagagatgacattaggttatagctgaggggcgatcccattacccgaggctcgcaatagccttctactctgccaatgatcagtattgtaaacatggctggcgtccctaaatacaaagtcccgctgcaattgatggacttagacgaatcactagcaaagtcgataaatgtttacgctatccaatcccgcggttttaaaggtctgtactatacattcaatacggggggagatgtgtattgagccactagaggtatctaatgggggattgaaaaccgttttatagtcagtctaagcagcgccccttatgtcgctgtgatcggcagggttttttccgagatgtaaggcgaccgatatttcgttcttggcttggaagtagtacccgcgatgacctaccaagtagtccgacccattgatagactatggataccgctcccctccggtcacgaagcaccttagtgaggcatccgccgtgatgtgttgtttggagtg >9|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 494 bp aagcgaaactcctagaacttcccatcaggcaatcgtgtcccacgaagcacggatactacgggcactagttgaatggggggtttttttcgtaggtcgtaataggtactcggatagtcggcccagagttatgcttaagaatgcgctgcttaattcaatgtgactgccgttgtctccgatcagatccaggtgatgattgcgatcgcagcgacatatgtctcgaaagacgtgtcgtgaataagcctgtaagcccaatgcaacatggttccctcaccttgtagctgatgtaccgtgtttcaatctccgcggctatcgatcgccctttcatgcaagctgtaaccagacaggaatctgccctgccatcgttatgtatgcgtattacgactgattcgcgcaggcatcccggctaaggccactgggtataatccacagacattgcacgtcatatagaaaacgacctgtgttcacaatcagcccggggggagtcagagtagtat >10|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 86 bp atcctagcgccaaagatttactgttatggggtcgacgaacactagccgataatgccgtcctgggatctctagcctagtattatgcg >11|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 360 bp cgcctgagggtcctaaatctgacgtatgatcgaagagattggaaggtcccggcgggtcaccccacgttgcgatcatggccaaggccatggtttgctcaaaaatcccacattcgccgtcttacgcgttaggacctcactatcccacagacggtgcgttaccttgtagttgacgcgggatcgtggtgataacagctatttccgagacttcatattcttttacatagcggcttaccgtagtgactccatacattatttgcctattttgtagtgccccgaacagtaaggggaagccaactgccgcggtagctcatagacagacgtggtatacacgctacaaataagcagtggattgagacatga >12|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 140 bp gaattcctggggatttactcacccccgaggcggacaagatttccagctggatcaccgagggttacttaatcccttcgatgctttcaaaggccctaatcagtattgagcaacgaaagcggagtcgttagtgtccaagttgc >13|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 832 bp aatactctcgttgaagcgtcggacagtaaagtgagagatttcggcccacggtagtcggacattctcagtggggagcgaagagttgcgcttagagccgacgtacacgatataacctcaattgaaaatcgctatgtgcatcgttagggcctccggcgtgctgtttcggcagctgagtgtgagggtataacttaccttcgacccgaattgtctcgcggaaatcctaggcaagtaatccacttttggtacgggggagctagttcctctaagacgaacaagtgcactcttcacgtatagtgccctacagttgcgctgttcatggaatccgactaatagaccagtcccgaccccagtgcttcgactgttacaacagttatcgtcgcgcttcgggacgaaatctcggcattactatactcgacatacacagaaagctatggaggtcgccgtaatattcacctcgtcgagtctgtaggcgtagtaaacgttacataatagctaatgggactttcgaacggaacattatactcatcgtgaaacgtttggtcaccacactgtagaatccacctggatcggtgctagttctagtcattatgccctctaatactggtcgcgtagcagggcgcaaggtacagtgcgataccggaataggggtccacacctcacgtccacgtacctatagtcacgccgtatcgaattcattgcactccttttaagtattgagcgacgactggcacccggaatagttagtttgttcggaacgcccccacgcacgagtcgtgcgaattgaatgccgttagaacttggtgcatgcgccgtcgctactattaacggacag >14|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 296 bp atcggggtgcgaaatcccctgagctggttgactacatacgtaaccacgttccgtgcgtcatctaagcgtatcggctcatactggtggtaactagacttggtgaaccctaggtgccggcatatcgaggtccgcatccaaaataactatcgctatagctacatagacatttactcgcaatattacacgaaccgtacgtccctcggtattaacgtaatggttaaagtctctaattccgctgcagagcggcgggataaagacgccggtgtggcctgaatggtggatctgtccgtagtacc >15|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 515 bp accttcaatttgttcgcccgggacaagtagaaattactgtaaactaaacttaacctattccttgttaaagtccgcaccaagtgtactgtaagaatggtcgctcgtaataataacgagaagatcctcgagccgtggtctgctgcaactaccttgagcggtacatcgatgtcccactctgggcggggatcaggggcgagacttgtggtgaggccaaagaatggcgcatatgtaggcaccatacgtcgatacgttccaggagtagaggcctcgaacatacaccacgataagtctacagacgcatagatgacgtggaactcgcatctaacccctggaattctctgactgtcaccctgagccttggtgggccaccgttagggacgtgcaactaccctttctggtgcgaatctcgtttccttagtgccttcacacaaggaggccaattgtttgctaccccatccagacatttgggcattcttgtgagtcccgtaagtcggtcatgatgggtttaagttagt >16|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 820 bp ccggctcaatcctgtagaaccgcgtacaacacacccaagctataccgcacacggcgccttagcaaccactgcttatctgcgtattatacctttacaatcattacatttgatctatctgtgtaccggttttttttgattcaattcgctggattacgacctcccggccaaaaattctcaattcatcgttaacagacgtatttgaagataatcattcaacgtgaactagcacttggtcacttggtacgccaaccaagctgtgctttggggcaaccctttataactcacatgccgtcctaggactttacctagtccacctagcgtgttacagataccgattgcatcaagtcctcgacggaccgcactcgtcgcagttaaaggcaggtctatcagggagataccgtgtgttgcgccaattaatcttagaaaattatgcttgcaatagagcaggaaccctgaaagaatatggttctgtgaaaaagcgtgcactcccgtgtgtgctcgttggttactagcaccgacgttgggtgggcggaggggtcatatgcctgcgccgatcgatatagcgacgctcgtatcaagttgtactgcggcaacacgggtggctcggtgtagaaataaaacgggtgccctcgggtcgaagcgaagcgcaaatgctcgtgggccggagcggccgggggcgggccactatgggcattacaagccagacaggtaagagggagtgctttgatgtgaacagcaccccttccgctggacggggttatgacaagttcgccaagattttacaatatccttaacagtggaatgtccttaccgggtttac >17|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 791 bp attgttagggcctgtccggaaaagatcaacggaagatattcaccagcacctatgctgactcacgtagttcccgacgttcagtcccctccaacgtggaaggtaggacccatctccttaacgggatcgatcggtcttcctgtgaaagttgctcagagtcctcaaggacgtttttgggtgcgtgtacggtatggttatggtacgtgtctgtgacagagggtattcttactggttaagtgacccatatgaccacctgacgcccgagcatagacctgtaggggtcgacgcgagagatggcagcttttgtcatatcatcggttcatgtcaaggttggaggaattcaggcatacacaatctcggcttagtctgcgctgctcctgtccatacctggcacttggagtcaatggattcccaaaaccgacaaatgagtcaacgctctactttttgtttgctggaacgaggcaatatccattgattcccttctcaacaaatgttatcgcggcaggaggacacaccggggccgcccgggcatcgattcgtaaccgcctgaatgtgatacaaccgataatccacttgtaagaaaatgtattctagggacttggcaccgtacggtatagtagctaatctatctaccgctagctcccggcgaaacatctggcggctaacgaccgcacacgtccaagtaattatcggcactgcgatctgagggatcatctcgcggacggagattaactagttaagaatacctatctccatcgcaatgcgactgtagccaatagctatttaaggcgtgt >18|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 328 bp accgattacaggcagtcggccttgtccgctcgtatatccagggatgttccaccgaaagtgggagtgtggcacttattggtaaaaggcatttttacgaacgacactgataggattgatcactcaagaaatgttctcgaccctgaggtaggagtcttaacagacggacatcctccgtagatacgtgagaattaagggacgcatgtcgaaaacgcttggaatctactgtagtggcccaccttacgcttcttccaataactcccttcatagtccggcaacctcggtgggggtttcccttaggcctcggtgattgctagaacctccgcgcaaa >19|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp ggagtggaaaattctgtagtccgttggcggcgaccgcaaaccagaataatatggtcacgttaggccctcgggccccttcatatgtacggagtcattgaattagcattatactaccgttacgcaagaccctatcccatccgcgactgtcaccactgctgtaaggttgcaaggctgtttcaatgtaaagtaggcgaattctgacgtgggctgataacgaatcccccgggttatctagtgcaagtgctatcc >20|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 440 bp atcttaaacagcccaatcggctcgccgaccaatttcccgcttcacagtacgcggaagaatctgcagatagaagtcagccctctcacgtcaataggaatgctgcccgtcatgtttaactactcaagttttaaggtgtcccttatcggttccaggatcatgtctgaaggaagatggtcgcaacgaaatctggagtggcatacatcgttcggtcgaagcataatctcagacgttatctataaagttagggcgctgtatggattgggattcaagctcgaagcctgttcctgccatacagcgccttagttaggatcacgcctgaaacgtcacgacggtgctaagcatcatggggcgtggcccggacgattatccatccgctacttgcgtatggtggtgtcacccaaaatatatgtcgcgaagagtgtccgtgtcatgctaccgag >21|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 840 bp cataactcgtgagtggccctgtacaagtcattgcatcacaatccttgcaatttgctcctttggccaagcgtacaagaccccggacccatacgctcccggctgataaactgctacagcatggtatatccggatgatgcccctgaaaactgcggaagtcaatttgttgatgaatccccgactttccgctgttcctgtggatggtcgaatgccaaatgaagagctgctccccccttctttaatatcaagcactacaaagataaagcctgtttggctgacggcgagccctcccctatcgtacgcaggatagatctggccaagtccgctgacgatggggtccacactgccagaagcgtagatctttgttgagtcggaccggaagagctaacctagctaagggtgtagagttttcaggagcttagagtcatgtcggattatggttggcgttacggacgggctccaaacgatcaaactctagtggccactttcatggccagaacggaaagagcggcgatgtctgccaagtaagaccttcactaccttccgttgattacagacgtcggtttgacagcttggggtcttatccggcgtttcagagaacttttggagcactgagcgcagacaccgacaagcttagctagacagctgaaccgtatcacttttgaaaccagagaaaacgcatagggtggttgaggtaccagaaggtgtggtttctaagttggaaaccacgtacatcactccttagatctccgaaagcgtcttcgcgtgttcggactccacatctatgcgtttactagcaagcggtttctgaccaatatgcctatgatatatcttaggtcggga >22|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 234 bp caccgcgaaagtgactcagttttcccggtcttatcacggtcgttgtcgtccagattccggttgttaactgcgggagctataacacttattccttactgcgacggctgatccactaagaacagttcatagagctcggctatataatttgaagacatagattccacggtacttgtagcccataaccgctgaggaggaacgtccaacggttcgcgcggagcatgtgacgcttaaagg >23|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 917 bp atcaagtgattacctggtaacccgccgctcttgcagtgttcaccctttgtgtcgtcttagtgtttgtacacgttaaggaaaagcgttagcttaaccattacgccccccaaagcccggtgtgtagttatctacatgccgtgtcaaagcggtgactaaatgtttatcaagttctgatgacaacgtgagctcttaaagccattgactagtataagcacggaacaatgataccaggcaagcttgaatataggataaggcctctaagctcgaagcggatcttacggaggtgtgaatcaacagcactcgagtagtacaccgtggatggttagtgaagttggtggtaaaagagtaaagggttctaacaccttaacaatgcgctacacttcaccatagccgagagtcagtatgtggtaccgttagttctttcaatcccaagagcgcataactgcttgccgccgcttagtttagggacattaatgtatatgatgaggggatgctcccttcattcggaccgaccccgacacatcgtatcctaatggctaaccgctcgcagccccctgcctgcatgcggtccgccgagcagtcgaccaagcactgtgaaagaatttgggaaatcgaacccagactaccggaacttaactcacgaccttcgatcttctgatcccagtttccatacttatatatactgcgcgaactgcagcggactttcctccggcccagcctatcgagctttgcgattgtattcgaggggcgtcggtatcttttatccaacagctgtcatcagttcggaccggggggataaaagcgccagactggatctggggtggtgtcaaccatgtgagtcaatcgtccccgagtgccgagggcctagggttcctacatggtgggtgctcctgtcgaaagaatcgagtgacgactcg >24|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 676 bp cacacggcatcgcaaagcgagctatccagagatgatacatgtggttgaaggtgattgcgtcaacatgggggttgctcagtttggttggtcaatcaacggtggcagaccatgcgataacgatgatggtaagactgtaaggtaagttaaatactctcgtctgccagttgggtcgtcaacgctgcagagacgccattcttcccagaaggtccgagctttctacagtgccgcggcgtcatgaccaaaggggtccaacctcgcagtaaaatgtctatgcttctggtttggaatgagaccgggccatcccgtgataaagagcttcatttatcagggaaagcgtcgcgtagctctagaatttatttatcttgagtcaaatgccatcatctaatgaatccactgagctggtaaggcctaggcaggcacggaggactttatagtccacgaattcgggcatccgcattatcttgttcgtccgcacttaacgactccatacccgaccctgttactctatcgagtacactacggttaaccgggcgtcattgtccacaggttcacagcaacattgggcgaagaggagtgctaactaagcgccatgccccattctaagtttacaacaaagtatttccaagcggacggtccgtgtttcagcctcatcttgcccccctggacattccgatgg >25|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 870 bp ctgatcaccaatagcttgcgcttaacacacgcgccttacaattatatgacgcccttgccaatgacagatagagccattaatcgtggaaaccaggcatttatacttgtccgatgtatcgattctcctctatctacagagcccggacatgcgaaatatcaaaattccatgtatactgaataaatacattgggcaagccgggctcatgcagcaatcccagcgttgccttacgcaaagatatcttacggagttgcctttagattaacagcacgtgttcaaaaacctagccaactctgtcggtctagggcggaacgaagtagccagagtcgccccacgcagttcacgattacagtaatccccttatggttggggcatcgggaaattaaccctaagatgcgccccttgagcgccgaaaagggatcagttcagagtttccccccattcattgcaaggcactgttcaggcgctaacatgaggcccaaaaactagctggttacttcctgcgtcgcgcaactgttcatgtgttctttccgtacctgtgccaaagtccatgttgaggtacacccttgggtgtgttagaaagtggcttgccctcatagctgctatgggaaatttgagttgcgaccgtcgggcctcagggcgccaggtttggctagtaggcggcgtcttgtgctgcgtcaactgcgaaatgatgtccggtaggcttttatgggtcgttgcggccatgcaagagcatgcggccggttgtcggttacagagtcttagatactgtcaaactcgtacacaataaagagaggtactaatgaatcatgggcagcgcgttcatagtatgtgaaacttggcaatcagtgcacggttggccccacggtccta >26|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 751 bp gggtgcgttatggggactaaagactgttactaccggtactccgccttatagagccgtcacgtattaatcagctatcaacagatactatcgtcacagccctccttctggcgaaggatctgagcatttgcaaagctataagttggtacgcaacggtagagggcttcgtagtcggggaaagggcttgcagtagtataggccgtaacttatctgttgcaacctcaaccgcacgaatcgattactctataactgccctcaatacagtatggttaccagtcaccttcacactgaagattaattcgcctacagaaggagaacatctaggtctccgtagaatagcagtcgtgacaacacgccgaaacttgaggcaagctcaggcgtgtgtagcgagctttcagcttaggcgggcattacctaattgttacggaccccccaaaaattgtcgactggtttcttctatgcgactataaaacaggaataggaaagtgggtgcatgcaacttgttcgtgtaccgttatgatcgattcctatgtgggagtttgcgcccacctcgtctgtgctgtcccggcgtactgcaccacgctgatttatcttgtagtaaggatggggtcaatacgagggctgaggcgtagagcccgacgggaaacacattcgtcacgccgcaaatcgcgtcgtcctcagacctcagcaagaccttttggtcagaatatggcggggctctaattgcctttacttccactccgtgaacttccgc >27|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 574 bp gtactgcaccttgcactgctatctacaatgccgagggtcgccctagtgctttgcatgtttggcctctacctacgagtctacgcgggcgtttttaagcaagctacgatcatcttgatccaagggtacgaggccccgcagaccaatggaggtcgtgaccaccctcgtgtatgcctcgcactaagcgagcattctggtatactgtctctctcctgtgataataacagtcggctcgatattcagttcacatgaaacagtatgttatataggtgggatggttataacacggaaaggtgaaaaagagtgcggaagttacttaggagtgccgtccttgatcaagcatgcgtagcaacaagcgcccgtaacaaccggatggaggttctgggtgaacaagggcgcccctacaggatatacaggacttgccctatggtccatttatagtatggtggtataccccggctcacctgtgaattagattgcgaaccaaataaaggatcatcgggttcacatttaggttagagccctcatacgttaacctgccggtacaccacttctttcgccgccgcatagtacatgc >28|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 169 bp agctccctaaacaacacccgcgtaaaaccttcagttatggtgccgactaaccctgtggatgtcttagcgctctcgttccgatgggtgctgatactagtaaatgagactcgagaccgagaacacgcaacggctacaacctggtcggttgttggggtttttataatcagtg >29|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 408 bp tgcagtgatgcatcgataagaccgcatagttacctccttacaggtgacgctaggctaattgggagtgctggcacttgtgccctacagtcaagcgctcacgcggtgttctcctcccgcaatcttagatattaggctctgtaccgcacgaaggatgaattttcttgactattggtccctgtttacgagggcttacctagagtgaggatgaacataaacaaggcctacttgacttaaggcttccaaatcacttgagggcaaatgactcctcaaacgcgagtgccagtactatccgtgagggaagaaaatctgccaaaaggccttggccgagatagattccgccgcccagcttacggggcatggtctataagttccgttttagcatgtactgtcaacaagcctgcgggatcc >30|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 52 bp caaagcgattcgggttaacgcacttaagagttcgacgtaggttagtcccctc >31|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 581 bp tgctctgacgtgtaagcgccttcgataacgtctttgcagcgccccacaaagtaaggaccggtctaacagggcttccgaatcaatagactgatagtaatgggatcctgaggctgggacccgacacacggcatattttactagaaacgctgatttaaactccaattatccttgacgcactgagccacagtcttagacgcagaatgtccgcaggagccctgtctttcccctaaatcattcgcggcatttgtttacgggttaagtcctgcggatcctagagtctgggccccgtacaaccaggaagagactgatactccgcgtattacggcccataagaacggtgggcctcgtttgtatttgactactgtacactcctgcctactgctgaacttaatgatgcgagatgaaagtcacagggggtgtagatcaagttgcacttaggtttcttccgatagattactatgcaagctatccacatgtgagaagctatagccacctctcttaacttctggtaagggcgcattgcggagcggcgagtacatatggtcttaatggacgccggtcgccgtccagatggagatagc >32|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 249 bp gcggaactacctctctaagaccgcacaacaagtgtagtagatgaagatcacgcagagtgctcggcactgcatttttatacgtcgaatcagaaacgaggttcctcctctaggcttgttaaaaatccgggcgcgatgggctggtaatctgtggccatgggagcctcgccatttaaagattttggttaaggctcctctgttgtgtccatcacccttgaacgagcccgtacaaaccgtgtacgatgttgacac >33|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 297 bp gccctcgcgccagcttacttttagaaaacatcgaccggtaagagatacctgggtgagctgggcttcacgacatgttcttaaatcaatactctaaatctgctttgtagcatgcctcaagtaaaaaaatgtgctggttccgcacaggtgtgacgattaacgttgcgcccgtttgcgtcagtccagatcaccgatcttccacaccaccggtgggctgccggactgcaggtaatgactcctggctgcattctctgacataaaggttgaatagaacggcgtccttgagaaggttatggaacg >34|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 573 bp gcctaggggtcttgaccacagggagtacgagcattgatcattggagcaggtggctaatattgatagtggttagaccaccggcgcatcatcgtacgagcgcgggcgatacgtgtctttcaccggcgcactaatcttatcttacttctcaagccccgacagcatgtacgccaagtgttgttctgatgaaactttcgaaatagcaactgttagtcagttatagttggggagggcagtgaatacctcaaatacacccaagaaataacttcgaagcggcgcctatatcacacccctgtttcttatgactggtttgcgtgtgctaacagcaatcaagtacctgaccgtatgtccttgaagcttgaggatagtacccggatccagaggactgaaaaccgtgtctacgctgttctcacgccgatgtttgaaataatgagtgtagcgtctgccaaactggcttaagcactcagcgtgaggcgagattctatggccttgcgttttcgtttcgcgcgaacggtgacaatccagaaccccgaccttaaatatgcgacgtaaccctcctggccccgtccgagtgaa >35|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 559 bp gaaaaagtcgccccattcagttacaatcgtcttcagaagccagctcggttggggctatctgcggggtaatgcaacagggggctaccagacggtaaaccagggtcttgctattggtgttacgaaacaaaggagctatgcgacctcattagatcgagattactctcacaggcagctccggccatagcacaactaatttcgggtgtggagctcaccacaggaacatcttgtgcgtcctttgttatttaattgtgcattgtaatgcaccggaccccgggaacatacagccattatctgtgttgccgctcatccgttgtacttcttaatacaatcagaattgtactcaaccgattgccaagcacgtacgcgtcagatacacatccggagtcagtcctcgtcctgctttgactcatgccaaggagtgcgcttcgcgcgggtgaatctcgttatcgatttatagtatttatcttatcgcggaagaccacgctagtagactgggtaacgtcgcgattgtcccaaagccagagtgaagataggcgacatcctctgtgagaggggtacc >36|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 187 bp ctaagtccttatctatgatgcatctttcgttactgcgacaatatccgagacgagcagagttacacgccgaggtgtaaacgaatacgattgctatatgcaacgagttggttacacgcgtgaaggcgaatgtggatgctgcacttggagtcccattttaccggccgcacgtgctagctcactcaccttg >37|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 549 bp ttcatatggggatttggaatcgggtttgtgcggaatatgcccacgagactgcttatgtcaacgagacgacccattgtcacgttgtaaggccaccaataacacacaggtcttcgtttgctgtctcagggcaatcgcatcgacaacatcgtatggataccgttttttatcagcttacggcgcatcatactaataaggtgtttgagagggcgcagactcgaagcagtgtgatcttcccggttcgaagatgcaaaaacggtcctatttcgatccaaaactcagcgcactagtccaatgcttttttggagggttttgtagaacaatcgaggcgcggagcagcgaaatagaaaacgggccagtgaacgacggatccacacggaggtttcactcgaggacgtgtgccccaacaaccggttatctccttacttattcaagtgtgcctgcacctcgataagtctaaactccgcctatcccagcgtaggtcatagtcgtaactcccaacaggtgccctgcgacttgttcctcgccggtgctgcaagaaggtagtgttct >38|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 916 bp gtggcctaccataaatcaatttgggttaacgctctttgatctacgcactatgttgattcacttaccccttgtcaccgggcagaagagagccagtttaggtgtggttgtatttgccaaaccgcaaaccgcctaatgagctggatccggccatggaattaatcccgtcgtttgactcgaggtgttcaaagactgtgcaacacgacgtgcattcatcactagaacttaatctagaccaggccttgtggccaggagaggcgacgtgatattgccctatacacagataattatatacccctcgcgcgcaaaccatctcggtctctttccaaggtgccagcacgcgataactcgtatctgggctggatgtgcgtttcccttagcccactcccccttttaagtactagcgtactcgggttctacggagtgcatggagtttccacaaagggacgcaacataatttaaagaaccgagccttacgaggagcttttgcaggcttccgtcgctatccgtcgtcatggagtgaggctttgaggaacgagcacttgggactctatataccccggagtaagtatctacagccggggtctgacgccaaccatttgttactttgttgcgagggctactcccgctagtagtagaactgctgtcaggcaacgacactaattaagtggccttgacccgtacgacttgagaatcttcggttcaatattccccgtctcgaaaggctgcttcaagtgcgctacacattacatacaactaggcgggagggctccaaaccggcggagctacaaggagtctaatagtgcgaaaaaagggccgcgcgacaaatgagtctggtgtggcattcggaacgagtgggaacgatccgaatccgctttgacaattaaccgtcctaattaggatttcgtgaatagtagtg >39|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 848 bp accccggtcgctttggccggtcgtagccctaatcaattctgttcgtatcactaaagtaacggtttgaaatcctttgcaaacttgatctgggtatatgaaccggtatgcggggatagtggtaaataagtagtttacgagctgagcgtggattatcccagagaagttgccttaggtccagagcccgcacctacaatcactcgaggccggtcgagcgttgcgtggcaaggaaacccagccggtcaccctaccctcaaactcacgtcattgatccaatcatacatggcgtctctcacggtggtgttgtttgctgtttcttgcggcccgtttattcgtgaacacgacgcaagccctaccgacctcgctagccgatctagacgactgggtgggttacccttcccagaggagtgactatggatatgtagtccttataggcatccagggcaccggatgcactagtcacacccctgctcagatagcgccaaaaagtgaattcaagcgttcagctggacacccattaaacacgagtgctactgggcttacataatacgagagaagattggccgattgttgcccttagaacttatgtgaggtaagtctgagacgccgattgcggcctagacattagtaaaaataagataagaactactcccactgactcgttcgggcctcctagagctagggcccccctgagcatgttcagttatatcctacgggctagcgtaaggttttttcgtttatgcgaggcttgagacgcgacatacgagcattccgttgctggggcgatagaccacgatacgctcagaagagggaatactaaattgataaaatgctttcattgtctagcacct >40|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 289 bp agagcaaaagaaagtctgctccgcgtgacacacttgctcgttgtagtaactgcacgcgccgtctactcgacagggaccccccgtcggttcctctctatagcaatcgcggaagtggttccctgcctcccgcgcagaagttcaaactagtaatccttaatgacttgtggggggggagatcagtttcttccacaatggagtaaacttatgcgagaatcaagatcgcagaggccattttttgatgatactgtcagatatgtggttagccgtatcacgttaccgacgcagaatt >41|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 642 bp ctggggaatgaccgtaccgatctaattccccgtcgaaaaacttatgacgcgcagttgtccttatgcttgagacatgaatccttgccccatattggcgatcttggccaatgagatctgtcgaaagtactggaggccggtaaattgggggctctagaggtccgcccctgaaggactaacgtgtgtgtgtgtctacgtgtcgggttatcagcgtgttggacgatggccgtggattcaacgcatgctagagagctaatgatcctccgaagtcaaaagcctcagtgcttcgatttatgagcgcgtggcgagtacgtctagtgatactctaactactataaacaaggcctcgtcgcagaatccttcaatacattgggccccgggagataagtcggaccaggactaaattacacatgggggccctaaccctaggtcttaacggatggcttgataaagcacgcgctaatcttccctatcgtgcacatatctcttgctaccccctgctaaatcgctcgggccttggcctacacaatatgcaactggaagtgtggtttcgcaagggtataaataactgaactgtgaagttcccctgttacaaagccattagccgctaatgaacatagacctaacggactcgctccttgtgct >42|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 993 bp gcaagaagccaaaaaccttgcaggaggtcatttaagtttacccgcgcataagcagagacggacctctctgagatctcgcaccgcgcgcccggccggcactatcgatgctagactagggttggtgactagcccgtcaaaaccagcctaaacgcaaagattgtaggcgctagtccggaactgactgcttcgtgtcggtgggagcctagtatgtttccgggtctatgacccctaaaatcatagacgtgtcttaatagctatacctgacttactttgaagtacttgccacgacgagtttatgagatattagtattctcagccttgtgctcctcctacgaccgggatgagtcatctggtcaccttgatccgtacggaactatagatagtcacttcgaggcatgcgcgtttggacacctactgcttctaagtcatagcgctcgcgtaatgcagcctcgcatgttctttacacgacacgagggattttgattgttataggtgaacgtacagacaaaatcactgtttcagaatacttggcttgtacgatctccagtactccccgtgccggctcggcgaacgggataagacatccacggcattctgtagtggttgaccgggtttgacagactcccttatctggatggggcccgataacgatgagcatagaaccgttgtaagtctcgattgtcacccgaggacaaaattttctcatcctaggttcactcagcgtcgttagagcatcagagttccgtctttaggttactttaaagatcgaaagaaaccttcgtgctggtaggaagtctcatataagtagccagcgtggaccgaggaatagattgttatgttgcatgtacttctgggatcgtgtccagccaacttcaccgcggcacacgtcgatggacccaggaaatgcctcggggtcagaatgagccttgtgcggcccgactgctgaacgacgtataaaatagagcgtcgtggaccccatacaacgcacataac >43|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 473 bp ggttgtccaggcgcgagcaagtagctgactcgctaatcttaacgagtattgcttaggacttccaaatactccaagacgtcaatacgctttatctttgtgaagtcatcccggaccgagcgcttgggtcgtgatttaaaatcccctgtgatgtggctacaggtgcggcctatacagccgagaagaaggccgtctttaggcgtccaatgaaccgttacagggacacaccaaactgcgccaactgatcccacgggtcacggtacgctctaagaccagtcgggattctgacttaacatcgcagcatctgatcgagtggcttctccagcgagcctagggcattacaccgtgcgttcgcaaactctgtatgcttgtcggaaggtatagcttgagcccctttggttcatatcgttaatacttaagtaagtggggtcaatctttttgaactaatcaaatgactcactggcgaaaggcagacg >44|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 272 bp gtagaacttgttccccatggacaatgctagttccgttaatgccaggtattcatgtgccaagcgcctgcctggggaatacgagcctctctacaaacttacggccaccatgcttaaagattcggtgacttcactaatgacctatacaagtaatgcggaggacgctgtcgcttattgctctttgctaaggccagttatgtccgtcagtcaacgatacgctgcggcggtgggtgacggcactagaccggaagcctgatgacaagttcgaatcaata >45|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 860 bp aaaagcatcactctaacgacgctaccgtctgaatagatcaagattgctatcggttcgaccttgatcgcatgtgaacccgcccaaaaacccgtctcgacaaaagttacgtcgcatgggctgcgccaccggatagctcctagcttatcttataaatcaggtagagctacaacatggtgctatgacaactggagtgtcatcgctttggcgaaaccgtaaagggtgggaattgctgcattctcaactgggccgaactattccgcattcggctgctcacaaatcgtggaatgtgtccttgaacgtcctgcttaaccatggtcattgccacgaaggccctggtcggttcagaagtgtatcagacttacaagggtccgaggaggttccggcggggggagaacaagcatcgaacacgactcactgacctgtaggggtattacctatcactgtgacccacatctgaggtactggtccattccataaagatcgagtcgtcttcctaaactgggcactcatacgtacaggaccaaaaaagaggttggttggtgaccgtgccacgcctggcggtcagaatgagttaatgggtcgaataggcgatcttgataacaatagagattcacaacgtgggctcaagccgcttcctgcgaccactattagcgaactagggctatcgccacggaaaagtacttctaacatatacccctatggagtttcagtgtgaagccactgagcagtggcgaggttgtcgcgtcttttgttcaattgttccctgtactttaatgttcgtatcggatttgctttccgttgatagcgaatctctaccctgtcgctgtgtctagcatccgtccgcgaagccggtgacacat >46|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 884 bp aagcctctacaggctctgcggtttggctttacttaacggtgagtcaggaaaacattactgctacgttcaccgtgttcagagatagagagtacattagggaccaatcacaacgttcgccagggcaccgcctaatccgcgttgttagcaagagtacaggctctcgtatactttcagacccttcaatactagacgacaaattgcagcccggggtcatcggtcgactcagatacgtgctaacgagtaccaggtctaccgttgcaacgttggatgcgttatactcggcataaggcgatgccctttgacatactgcactacgctttggcgatagcgctacagttgatgaccgggctaactgcacgcgcgcgtagacgggagcccaattttttgaaattggcgacgaccgacttacgcacctatgcctcgcacgaccagtagctgtaagctctggtcgttggcattagattggcgaaccgctgaccacgatttaagccttccaaatcgttctactatacgagctcagcgtggcggtatgctagttgataaagtaaggacgttctcccgggtccagaagcccgatccacttttaaaagggctgatgcataaactaagactgtatgtctggttcaagtcttcagtaacttcagcctatgtttccaagtgacaacttggagaggggatgctgcggattctgatctaagctgttatatatctgttatcacctcgaaactatcactctggatgaccctcctatgtgcgctcagcgtagaccttccgctgtactttcataaacatagcatggagtgactcagaagagctttaaaggcgggagatcataggtcgcggtcgagcgaatgggaccgaggggaatgccaccaattcccgct >47|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 888 bp gcgccttgaagaggcgaggtctaaaggcaaaaatttagatccgccctatgagacggccgacgcggagaattccctaaccactattgtcctctgcatcgatatcaggaataggcttacctgcaatctcttatggtgatagactgtttgggagctgaacctgagacgcgcacgaaatttggaaggatcaaataggccccgcagtctctggtagacttctgccgagcggactagcttggctaaggtgtacaagcctaaatcgtttttcacatcaattttatagctgattatagaggaacgacgcgatcgtgcagagtgatggtcaaagggtcggtacgtcggatgcccagaaatatggtctgaggggtagcctgttcagaggcgcttaacttgctccttgctcacaggagcgatatgcgctagggttctggacgatcgaccatcattgtaacccatccaatccgtccttattgatggcccactcccgcatgctggtccgaaggcgatcccgatatcccgagcactagaattcgacacagtctgtaccgtgcctaattcttatcagaccctttatgcccgtctcggccttagagttaaatggactatctccacggaatgggcagtgcatcgctaccaagggtcgcccgatcccggggtttccacttcggatcatttttgtggatagtacatatcccacttcaacaagatagcaaaagtccaagacgcagtagagcacgtttctcaaacaacggaccatgcacggttcccggtaacccagtccagggaacaatttgtggttcttctttgaactagttgggagcaacagactcaggggctggccagtagtattgaaccaagcgttttttacaattactttgctggcgctaactg >48|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 588 bp catcaagatgggttacgtaggaccgagattcagtctctgggttagagccgacagcggggccgctacatagtacacggcgaggaatgcggggttgggctgaaccgtacacagtgggctagctgcggtacctgccaccggcatgcgtttaaatcctttcctttggcgaagccaactgccgacgtccgcaacagagactcgttttccgaccccgttactaaatcagctaactggcgcctgaatcctcttacgtcggatgttaattagtgtatagaatatcggagggttgagtgcgacgcgcttcctgttctccgctacttcttgtattatgatttggtcaaatacagtcgacaatagtgctcgacaggatataacgctatggcaccccatagtatcagctaaacgttcatatcctagtagcttagaaagaaatttaatatgcagtggagcctggaaccttacttattgtcggctcatcccgcaatactcgcaaataatcacgcatggtgccggacggtacgggccacgtcaattaggtaaacgaaacacgttacgtggactgctcacccaaccttgagggactgtctactt >49|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 626 bp taacctcagtctcgttcccccctcggtagttcggacccttattcgcttatctcacattcatcactgtagaccaaggaccgggcatacttgcggatatctaccaggactaggcacttagggatacgctgttgaatacgggtttcgtcccgtgtactcaagtgtagtttaagataggtacgagtgctagtacatcgtacaatttacaactgacttaaacgagagtttattatgtcttgttcacttgttgacacgcctgggaaaataataaaaggcaacgtctaatctcagacccgttgattaactaagcagcgtgacgtggagtcatacctgctatatttgggaggtgggaagtattggtgaaccgagcccctcctagccgtggcggtaatgacattaagaaggcgcagttagtcagcactcgaggcaggtgcgcctctgcacgtctgctgatatcgtcggaacgagttaacgctcccgcccacccatcagtcagggaccactttcgacacctagttcgagatttctcctttcggtaaaatcgggctcaattaaggcttgggtaccccggccagggaatatgcacatgagggaactatgagactgcacctaccccgcacggatggagg >50|random sequence|A: 0.25|C: 0.25|G: 0.25|T: 0.25|length: 214 bp taactgtcggtcactgctcatcccgactagttcggctcactagacttactcgcggaagcgagaagtaggacgtcgtgtaatactccaacgtcgttacgcaatgttgtaaaacttcatcgcattccgtgcatggcctaaacgtgcagcattatataacgctctttggtcttaatatccatcgcgggagtaacgcgaaggggagacgtgtgcctga \ No newline at end of file diff --git a/read_sequencer_package/fasta_testfile/result.fasta b/read_sequencer_package/fasta_testfile/result.fasta deleted file mode 100644 index 32258d65e587dd9e7501d585c137d799103c60f8..0000000000000000000000000000000000000000 --- a/read_sequencer_package/fasta_testfile/result.fasta +++ /dev/null @@ -1,200 +0,0 @@ -0: length 103 nt -ACGCCCTATGTCAGAGTGGTGTTAGTGCACAATTCATTCCGATATGCGAA -1: length 35 nt -ATAACGAGCGCGAGCCCTTAACGGGCAGGACTGCCGACTCACACCTTAAT -2: length 61 nt -GGTCTCATAAGCACTTCGGGTACATCCTCCTAAATGGGCCGCGACTTAGG -3: length 54 nt -TAGGTCTCGGATACGAGATATAACCGTAGCATGGGAGTGACACCCATGCC -4: length 64 nt -TCTGCGATACCCAATTTAGGGCATGGCATAAGGCCCGGCGGGTATTTCCG -5: length 85 nt -TTGTTAAGCGTGTTGGTAGCGCCCCCAAATTGTCCCAACTGGGCCACACC -6: length 53 nt -ATAGTACAAGTTAGACCTCATGCATTGAGCTGCCCGGTTCGGACCTTAAT -7: length 27 nt -TAAAATATTAAGGATAGCCGGAACGCGCTGGCGACTGACGACACTACACG -8: length 25 nt -TAGATTCCAGGGAATGACGTCACACCGTTCCGTATATCAGCGGCGCGGAA -9: length 52 nt -GGTGCACCTATTAACAGAGTGACTTTGGGGGAATCACTTATTCGCATATA -10: length 41 nt -AACGACAAGCTGAACGAGCTCTGGCGGCTACGCAGTGTTCGCGCAAGCGA -11: length 106 nt -GTAATAAACTAAACATCAGCTGAGGCGTCCGACCACTGCATCTTTTGTTG -12: length 5 nt -GTAATATGGTATAGCTCGACCCTCGTACAACGGGGGTCAGACAGCGTTGC -13: length 49 nt -TTACTTTGCACGCATGCATGGAGGCACCATGAAAGACGGGAAGCGGCTAA -14: length 22 nt -GCAAGGGGTTTGAGGGAGCAACGTAAGATCAGAGATAACAGTCTCAAGAG -15: length 109 nt -AATTGAAGTCAGCAAAGAATTGGGATTGCTGGAAGCAACACGGACACGTT -16: length 43 nt -GAACCATTCTAAACAGCACCACCCGCATCCCGCTTTGCGCAGCAGCCACT -17: length 85 nt -ATGTCTCACAGGGGAGGTCTCCAGGCTTCTACCAAGTTCCCAATTCCATG -18: length 16 nt -ATCTCATCAATTATGTTGTACACCTGCTCGTTCTAACTTGCAGACTGCAT -19: length 52 nt -GAGTATCGCAGTATACTGTTGGCGGACATTCCAGAATATTGGTATTCAGC -20: length 2 nt -ATATAGCTACCCCAGATTGTCTTAACGAGTCCCGCACCCCCCACCAAGAT -21: length 13 nt -GTATTTACCTGCCCACTAGTACAAGTCCCCGAACCTTAGCAATCCTAAGT -22: length 64 nt -TGGCCGTTTCAGAAGGATTCCTGCCCCTGTGATGAGTCACGTGGAGACAG -23: length 59 nt -CGAGGAGAGCTGTAAAATACTGCTGCCTGCTACATTGGGGCTTGGGTCTC -24: length 82 nt -GCACAGTAGCGAGTGACTCGGTTACTATGTGTTGCTGACTTTATATGTGC -25: length 6 nt -GAGTAAAGCTGAAACACCGAACCGGCAGTTAGAGCTAGTGGATTTTCTTA -26: length 87 nt -AGACATTTTCGGAACCAATGATCTTGTTGGAGTTGAGAGGCCGAACCAGG -27: length 136 nt -AACACGGTAATATCAGGTCGGCCTTAGTCACTTCGGAATTGGAGTTAGTA -28: length 68 nt -CACCGGTAGCTGAATGTAACGAATCTACGTTGACACGGTTAAAGGGATAT -29: length 84 nt -AGATTGTCATGGACTGACGGAGAACACCGTGCTTAAAGTGGAATAATGTG -30: length 51 nt -CGGCAAGGAGGAGGCTACTGACCAATCAGACAAACGTTCATGGCTAGAGA -31: length 51 nt -AGCAGCCGCTCAACTGTGACGTCGGCTGAGTCTACCAGCCCTCAGACTAC -32: length 73 nt -TCCGATCTTATAGTAGTGATCCTGATGGAACAATAACTGACCCAGTAAAG -33: length 58 nt -ACGTTTTATTGTACCTAGGTGTGGCACCATGAAAGGCAGGAGGCCCACCG -34: length 28 nt -AATCCGCAATCGTGCTACGCATTTTCCAGACAATATTTCGAGGTTACCGC -35: length 13 nt -AACACGAGGCCCTCTCGACGCTGCTATTTAACAGCGCGTGCCTATTGAAA -36: length 2 nt -ATACCGAAACTTACTAGTTGTGACAGCGGATTCCAAACTGCGGGTGAATA -37: length 71 nt -GTGAGGAGTACTCGGCACCGGGAACGTAGTAATGTGCTCTGATCCACTCT -38: length 46 nt -CGAACGTAAATAAGTCTGCTGCTGCCTCAGCAGAGGGATGGGAGGGGTCT -39: length 48 nt -CGCAAGAGTGTCGAGCTGGCAGACGCGTATTGCCTTTATATCCGTGAAAC -40: length 63 nt -TGACAAACATAAGTTTGTCGCCCGTTTCCACATAGTAGAATTCATTAGAA -41: length 11 nt -AGAAATAGTACGTAGTGTTGGCTGTGAATATTCGGAGATAAGGACTCATT -42: length 68 nt -GGTTCACAAATTGTCATAGATCATCGTGGTACGACGGGCGATCTTATTTC -43: length 7 nt -CGAGCGTGATGACGTAATGGAGTGAATAAGGAGTGCGAAATGGTGCGTTT -44: length 84 nt -TGGGACGATCATTCCCCCTGACCTTACGTACAATGCAATTGCGAGTGGCT -45: length 23 nt -TTGTACTTACGGCTTAAATTGCACAGAAGGGGTTCTCCGGAAGATAACTA -46: length 58 nt -GGCCCTTCGGTGCAGTCGGCCTAGGTATCAAGCTCGCCTGCCCGTGTTAT -47: length 50 nt -TATCGTGGTTATGTGCGGACCGCCTGATATTACAGCTGATGTGAGGGCAA -48: length 63 nt -TCTGCTAGGGATCCACTCCTGGGTCATTATTAAGCACTTACGGCCTGAGA -49: length 48 nt -GGAGAATTCGCGGCATAAGATCTCCATACGGGTAGTATGTGCCACTATTA -50: length 64 nt -AATGCGTACGGCCCAAAAACCTTCAGGATTACCATGAACCCGTAACTCAA -51: length 41 nt -AGTCATATAATGATTGAGTTCCCTCAACCCTCCGGACGTAAACACTCCGG -52: length 44 nt -TCCAGATGGCCTGGCAATCAGGTGTTGACGGGAACCGCACCCCTGCTGCT -53: length 62 nt -AGGTTGTCTGAATCACTCGGGAGACTTCCCCGCACGATATCGGCACCTGA -54: length 73 nt -CCGTACGGAACATCCTGCAGCTTCGCCCGGTCGAGAAGAAGGTAGAGATA -55: length 39 nt -TGCGCGGCGGACTCTGTCATTTATGACCTGCTTGAACGATGAGTGGGCCA -56: length 88 nt -CGATCCATAGGGCACGCTAGTTTGCATTTTAAGGAGTGCCAATTACGAGG -57: length 117 nt -TATTGCGTCCGTTGCCTGCTCCATCCGTTGCACTATGCCATAAGGCAACA -58: length 67 nt -TTAACGCACGATCAACGGTTCATAATCTGCGGTGATCCCAGAGCTCTACA -59: length 84 nt -CTGACCCTCGATGTGTTTAATAGTTGTATTTCGGCCCCAGTAGGCTCGAC -60: length 71 nt -CTGTCAGATGCGCATGTTCGTGGTCCATTGTCAGCCAAATTAAAGGTTGC -61: length 102 nt -TACATGCTCATATGAACTGACATTGAGTGGAAGGTAGCGACCGCGAGGTT -62: length 66 nt -ACATGATCTTTGTCGAATACTCAAAATGCACAGTGCCTAACTATTTGGGT -63: length 22 nt -GTCACGACACGCATCTACACTAAACAGCACCACATGTTAGAGATGAGATC -64: length 46 nt -CGTGGTCGTGAAGACCCACTATATTAGCAAGAGTCGCGATAGATGGGTGT -65: length 78 nt -CTAGGTAATTTTGAGAGACACCCCGACGCACGGATGTTCCGTACAAAGAT -66: length 32 nt -CAGTCTGAATGTATCAACTAAGAGAATCGTACGAAAATAGACCTAATCGA -67: length 42 nt -GCTCCGCACACTAGATAGCTGATGCCCACATACTCATTCATTATTCGATG -68: length 49 nt -GTCTGAACCAGACACGCACTACGGCTAGCACCTATCCTTATCCTAAAACG -69: length 41 nt -GAGTTACGACCGGACGGGTTGTTAGGTCACCCCGGAATCAGACGATTTAC -70: length 29 nt -GTAGGCCAACGCGAACTACTCCTCCCGCCCACAGTCTAGACTAAGCGTTG -71: length 94 nt -GGTTTAGACTGTCTGCAACTATGTTACTGGATATACCTAATGACAGGGGT -72: length 48 nt -TAAGAATGTTATTGGGCGGTAGCTTACTCTATTGGAATTGAGAAAATGTA -73: length 63 nt -CTAGTATGTTTTATTGGATGCTTGAGGTAGGAATAATCACATCGGGCCGA -74: length 70 nt -ACAAACCTCCTCCCCGGCGACGGGCAACGCCCGCATTGCAACATGCAAGA -75: length 58 nt -GGCTCCATTTCGCTACCCTCTCCCCTAAATCAGGTCCGCGGAGTTGTCGT -76: length 98 nt -ACATACCTATGCCTCAGATCGGTTCGGCTCTTGAGGCGACACCCGTGATA -77: length 34 nt -TTAGAAAGGAACTCATTTTAAGCAAGTTATGCCAGGTTAGCCCGCCCTGG -78: length 48 nt -AAGGCTCGCCGTGACCAGTAGTGCGCAACCTTTCAAGCCGTGTAATATTT -79: length 51 nt -CCTTAAGGGTGGTTTGCACCGTAAAGACCCTCTCTTCCACTTTCCCCGAA -80: length 30 nt -GCCTCGACGTGCCGGACGATGGGCGCTGATCGAAAGTCCATTGTCCCCCT -81: length 14 nt -AGGGCAGTGTACACACACGGCAAGGGTTGCGATGGGTAGAGGCTAAGACG -82: length 39 nt -AATGGGACTTAGTAAACAGGCTGGCCGCCTAGACTGTGTGTGAATGCCCA -83: length 29 nt -AGCTTGGACTTAGAGAAAATCAGTCTACAAGACATAGGCTTTAAAGAGGA -84: length 15 nt -ACGGAAAGTAAGTCTCTCGTCCCTGGTTAAAGGGTCAGTCCCTCCCACCT -85: length 20 nt -CCCAGAGCAAACTGGACCTCCCACCGGCAGATCATCGTTTATTATTATAC -86: length 64 nt -CGTGGCCAGTACATGCTGATGATCCCTATTACAGACCTCGCGTGTTGAGA -87: length 52 nt -GTTAGGCTGTACTGAATACCATGAGCATGTGGGTAGGTTGTACTGGAACC -88: length 60 nt -CGCCGGGCTCCCCGAAGTATAGAGCCAGCGGTTAAATAACTTATCTGATC -89: length 32 nt -CTTGTACGTCGAGTATGAGAGGGGTTACACTTTCTGCTTGCTAAGTTCAG -90: length 14 nt -AAACAGATTAATAACGTAATACCCCCGCTCTACGCCCCTGCATTACGGTT -91: length 21 nt -GTAGTTCGTAGCAGGGGATTGTCATAAATCAGCGCCTGATGGGAAGCGTT -92: length 47 nt -TGCCCAGATGTAACACATCCCGGCCCATTCTGGTAACTCCGTTTCTGGGT -93: length 47 nt -AATTAAGGGGTTGTCAAACCGGCCACTTTTGAACAATGGGCTTCGCCGCC -94: length 42 nt -TTCAACCTTTACGATGCAAACATAGACAAACTTGGACGTATTCGGACCCT -95: length 35 nt -GGCGCTTAGTGGAATGCCCCCCCGTATGCAGCATCGAGTATTCCGTAGCG -96: length 54 nt -TTGCTGATCTTTCGAATTTGGATCTGGGATTTTGCCAGATAGCGCCCGTA -97: length 34 nt -TACGTAGATATTTTCAGGAGGCACTGAACAGCCCAGAGCCTCCATCGGGC -98: length 15 nt -ATTGCGCAGGTATGGCTCAGAGACACAGTATACTATTACCTTCCCGCAAC -99: length 42 nt -CGATCTGGCATCGGCATCCGAGTAGCCCCTACGTATGGCTTTGCCCAACA diff --git a/read_sequencer_package/modules.py b/read_sequencer_package/modules.py deleted file mode 100644 index f92458982cf1d57721279b44344fad2f262f80ee..0000000000000000000000000000000000000000 --- a/read_sequencer_package/modules.py +++ /dev/null @@ -1,130 +0,0 @@ -import logging -LOG = logging.getLogger(__name__) - -def read_in_fasta(file_path: str) -> dict[str,str]: - """ - This function reads in FASTA files. - - Args: - file_path (str): A file path directing to the fasta file. - - Returns: - Dict: It returns a dictionary with sequences. - """ - LOG.info("Reading in FASTA files from destination.") - sequences: dict[str,str] = {} - f = open(file_path) - for line in f: - if line[0] == '>': - def_line = line.strip() - def_line = def_line.replace('>', '') - else: - if def_line not in sequences: - sequences[def_line] = '' - sequences[def_line] += line.strip() - f.close() - return sequences - -def read_sequence(seq:str, read_length:int) -> str: - """ - This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is - smaller than the requested length or cuts the sequence if its longer. - - Args: - seq (str): the sequence to read - read_length (int): length of reads - - Returns: - str: returns sequenced element - - """ - from random import choice - bases: list[str] = ["A", "T", "C", "G"] - sequenced: str = '' - if read_length > len(seq): - for nt in range(len(seq)): - sequenced += seq[nt] - for nt in range(len(seq), read_length): - sequenced += choice(bases) - else: - for nt in range(read_length): - sequenced += seq[nt] - - return sequenced - -def simulate_sequencing(sequences: dict[str,str], read_length: int) -> dict[str,str]: - """ - Simulates sequencing. - - Args: - sequences (dict): Dictionary of sequences to sequence. - read_length (int): length of reads - - Returns: - dict: of n sequences as values - """ - LOG.info("Sequencing in progress....") - results: dict[str,str] = {} - for index, key in enumerate(sequences): - results[key] = read_sequence(sequences[key], read_length=read_length) - LOG.info("Sequencing was successfully executed.") - return results - -def generate_sequences(n: int, mean: int, sd: int) -> dict[str,str]: - """ - Generates random sequences. - - Args: - n (int): Amount of sequences to generate. - mean (int): mean length of sequence (gaussian distribution). - sd (float): standard deviation of length of sequence (gaussian distribution). - - Returns: - dict: of n sequences - """ - from random import choice, gauss - LOG.info("Generating random sequences.") - sequences: dict[str,str] = {} - for i in range(n): - seq: str = "" - bases: list[str] = ["A", "T", "C", "G"] - for nt in range(abs(round(gauss(mean, sd)))): - seq = seq + choice(bases) - key: str = str(i) + ': length ' + str(len(seq)) + ' nt' - sequences[key] = seq - return sequences - -def write_fasta(sequences: dict[str,str], file_path: str): - """ - Takes a dictionary and writes it to a fasta file. - Must specify the filename when calling the function. - - Args: - sequences (dict): Dictionary of sequence. - file_path (str): A file path directing to the output folder. - - """ - LOG.info("Writing FASTA file.") - from textwrap import wrap - with open(file_path, "w") as outfile: - for key, value in sequences.items(): - outfile.write(key + "\n") - outfile.write("\n".join(wrap(value, 60))) - outfile.write("\n") - -class ReadSequencer: - def __init__(self): - self.sequences: dict[str,str] = {} - self.reads: dict[str,str] = {} - - def add_random_sequences(self, n: int, mean: int, sd: int): - self.sequences: dict[str,str] = generate_sequences(n, mean, sd) - - def read_fasta(self, input_file): - self.sequences: dict[str,str] = read_in_fasta(input_file) - - def run_sequencing(self, read_length: int): - self.reads: dict[str,str] = simulate_sequencing(self.sequences, read_length) - - def write_fasta(self, output_file_path: str): - write_fasta(self.reads, output_file_path) diff --git a/setup.py b/setup.py deleted file mode 100644 index 905f8ee96fc23a232cf669563da1389e1228c9e5..0000000000000000000000000000000000000000 --- a/setup.py +++ /dev/null @@ -1,14 +0,0 @@ -from setuptools import setup, find_packages - -setup( - name='read_sequencer', - version='0.1.1', - url='https://git.scicore.unibas.ch/zavolan_group/tools/read-sequencer', - license='MIT', - author='Clara Serger, Michael Sandholzer and Christoph Harmel', - author_email='christoph.harmel@unibas.ch', - description='Simulates sequencing with a specified read length from sequences specified by a FASTA file.', - packages=find_packages(), - install_requires=['random','textwrap','argparse','logging'], - entry_points={'console_scripts': ['read_sequencer=read_sequencer_package.cli:main']} -)