DO NOT MERGE: remove all content for review

513450f0 · Alex Kanitz · 536371a0 · 536371a0 · 536371a0 · 536371a0
Commit 513450f0 authored 2 years ago by Alex Kanitz
--- a/.gitignore
+++ b/.gitignore
-.DS_Store
-.idea/
-__pycache__/
--- a/LICENSE.txt
+++ b/LICENSE.txt
-MIT License
-Copyright (c) 2022 Clara Serger, Michael Sandholzer and Christoph Harmel
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
--- a/README.md
+++ b/README.md
-# Read Sequencer
-## Overview
-Read Sequencer is a test python package to simulate sequencing. 
-It reads fasta files, simulate sequencing with specified read length and writes the resulting sequences into a new fasta file.
-## Installation from PyPI
-Read Sequencer requires Python 3.9 or later.
-Install Read Sequencer from PyPI using:
-```
-pip install -i https://test.pypi.org/simple/ read-sequencer==0.1.1
-```
-## Usage
-```
-read_sequencer --input_file_path --output_file_path --read_length
-```
-## Contributors and Contact Information
-Christoph Harmel - christoph.harmel@unibas.ch  
-Michael Sandholzer - michael.sandholzer@unibas.ch  
-Clara Serger - c.serger@unibas.ch  
--- a/build/lib/read_sequencer_package/__init__.py
+++ b/build/lib/read_sequencer_package/__init__.py
--- a/build/lib/read_sequencer_package/cli.py
+++ b/build/lib/read_sequencer_package/cli.py
-import argparse
-from modules import read_sequencer as rs
-parser = argparse.ArgumentParser(prog='read_sequencer',
-                                 description='Simulates sequencing of DNA sequences specified by an FASTA file.')
-parser.add_argument('--input_file_path',
-                    help='path to FASTA file')
-parser.add_argument('--output_file_path',
-                    help='path to FASTA file')
-parser.add_argument('--read_length',
-                    help='read length for sequencing',
-                    type=int)
-args = parser.parse_args()
-def main():
-    read_sequencer = rs()
-    read_sequencer.read_fasta(args.input_file_path)
-    read_sequencer.run_sequencing(args.read_length)
-    read_sequencer.write_fasta(args.output_file_path)
-if __name__ == '__main__':
-    main()
--- a/build/lib/read_sequencer_package/modules.py
+++ b/build/lib/read_sequencer_package/modules.py
-def generate_sequences(n, mean, sd):
-    """
-    Generates random sequences.
-    Args:
-        n (int): Amount of sequences to generate.
-        mean (int): mean length of sequence (gaussian distribution).
-        sd (float): standard deviation of length of sequence (gaussian distribution).
-    Returns:
-        list: of n sequences
-    """
-    from random import gauss, choice
-    dict = {}
-    for i in range(n):
-        keys = range(n)
-        seq = ""
-        nt = ["A", "T", "C", "G"]
-        for value in range(abs(round(gauss(mean, sd)))):
-            seq = seq + choice(nt)
-        dict[keys[i]] = seq
-    return dict
-def read_in_fasta(file_path):
-    '''
-    This function reads in FASTA files.
-    Args:
-        file_path (str): A file path directing to the fasta file.  
-    Returns:
-        Dict: It returns a dictionary with sequences.
-    '''
-    sequences = {}
-    f = open(file_path)
-    for line in f:
-        if line[0] == '>':
-            defline = line.strip()
-            defline = defline.replace('>', '')
-        else:
-            if defline not in sequences:
-                sequences[defline] = ''
-                sequences[defline] += line.strip()
-    f.close()
-    return sequences
-def read_sequence(seq, read_length):
-    '''
-    This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is 
-    smaller then the requested length or cuts the sequence if its longer.
-    Args:
-        seq (str): the sequence to read 
-        read_length (int): length of reads
-    Returns:
-        str: returns sequenced element
-    '''
-    from random import choice
-    bases = ["A", "T", "C", "G"]
-    sequenced = ''
-    if read_length >= len(seq):
-        for nt in range(len(seq)):
-            sequenced += seq[nt]
-        for nt in range(len(seq), read_length):
-            sequenced += choice(bases)
-    else:
-        for nt in range(read_length):
-            sequenced += seq[nt]
-    return sequenced
-def simulate_sequencing(sequences, read_length):
-    """
-    Simulates sequencing.
-    Args:
-        sequences (dict): Dictionary of sequences to sequence.
-        read_length (int): length of reads
-    Returns:
-        dict: of n sequences as values 
-    """
-    results = {}
-    for index, key in enumerate(sequences):
-        results[key] = read_sequence(sequences[key], read_length=read_length)
-    return results
-import random
-def generate_sequences(n, mean, sd):
-    """
-    Generates random sequences.
-    Args:
-        n (int): Amount of sequences to generate.
-        mean (int): mean length of sequence (gaussian distribution).
-        sd (float): standart deviation of length of sequence (gaussian distribution).
-    Returns:
-        dict: of n sequences
-    """
-    dict1 = {}
-    for i in range(n):
-        keys = range(n)
-        seq = ""
-        nt = ["A", "T", "C", "G"]
-        for value in range(round(random.gauss(mean, sd))):
-            seq = seq + random.choice(nt)
-        dict1[keys[i]] = seq
-    return dict1
-def write_fasta(sequences, file_path):
-    """
-    Takes a dictionary and writes it to a fasta file.
-    Must specify the filename when calling the function.
-    Args:
-        sequences (dict): Dictionary of sequence.
-        file_path (str): A file path directing to the output folder.
-    """
-    from textwrap import wrap
-    with open(file_path, "w") as outfile:
-        for key, value in sequences.items():
-            outfile.write(key + "\n")
-            outfile.write("\n".join(wrap(value, 60)))
-            outfile.write("\n")
-class read_sequencer:
-    def __init__(self):
-        self.sequences = {}
-        self.reads = {}
-    def add_random_sequences(self, n, mean, sd):
-        self.sequences = generate_sequences(n, mean, sd)
-    def read_fasta(self, input_file):
-        self.sequences = read_in_fasta(input_file)
-    def run_sequencing(self, read_length):
-        self.reads = simulate_sequencing(self.sequences, read_length)
-    def write_fasta(self, output_file_path):
-        write_fasta(self.reads, output_file_path)
--- a/dist/read_sequencer-0.1.1-py3-none-any.whl
+++ b/dist/read_sequencer-0.1.1-py3-none-any.whl
--- a/dist/read_sequencer-0.1.1.tar.gz
+++ b/dist/read_sequencer-0.1.1.tar.gz
--- a/images/Git_Tutorial_CSerger.png
+++ b/images/Git_Tutorial_CSerger.png
--- a/images/Markdown_Tutorial_CSerger.png
+++ b/images/Markdown_Tutorial_CSerger.png
--- a/images/Michael_Screenshot 2022-11-07 at 17.38.44.png
+++ b/images/Michael_Screenshot 2022-11-07 at 17.38.44.png
--- a/images/Michael_Screenshot 2022-11-08 at 13.35.22.png
+++ b/images/Michael_Screenshot 2022-11-08 at 13.35.22.png
--- a/images/Michael_Screenshot 2022-11-08 at 14.38.02.png
+++ b/images/Michael_Screenshot 2022-11-08 at 14.38.02.png
--- a/images/screenshot_git_tutorial_main_harmel.png
+++ b/images/screenshot_git_tutorial_main_harmel.png
--- a/images/screenshot_git_tutorial_remote_harmel.png
+++ b/images/screenshot_git_tutorial_remote_harmel.png
--- a/images/screenshot_markup_tutorial_harmel.png
+++ b/images/screenshot_markup_tutorial_harmel.png
--- a/read_sequencer.egg-info/PKG-INFO
+++ b/read_sequencer.egg-info/PKG-INFO
-Metadata-Version: 2.1
-Name: read-sequencer
-Version: 0.1.1
-Summary: Simulates sequencing with a specified read length from sequences specified by a FASTA file.
-Home-page: https://git.scicore.unibas.ch/zavolan_group/tools/read-sequencer
-Author: Clara Serger, Michael Sandholzer and Christoph Harmel
-Author-email: christoph.harmel@unibas.ch
-License: MIT
-License-File: LICENSE.txt
--- a/read_sequencer.egg-info/SOURCES.txt
+++ b/read_sequencer.egg-info/SOURCES.txt
-LICENSE.txt
-README.md
-setup.py
-read_sequencer.egg-info/PKG-INFO
-read_sequencer.egg-info/SOURCES.txt
-read_sequencer.egg-info/dependency_links.txt
-read_sequencer.egg-info/entry_points.txt
-read_sequencer.egg-info/requires.txt
-read_sequencer.egg-info/top_level.txt
-read_sequencer_package/__init__.py
-read_sequencer_package/cli.py
-read_sequencer_package/modules.py
\ No newline at end of file
--- a/read_sequencer.egg-info/dependency_links.txt
+++ b/read_sequencer.egg-info/dependency_links.txt
--- a/read_sequencer.egg-info/entry_points.txt
+++ b/read_sequencer.egg-info/entry_points.txt
-[console_scripts]
-read_sequencer = read_sequencer_package.cli:main