Merge branch 'clean_up' into 'main'

fix: renamed modules.py and .gitignore, deleted unneeded directories, added tests directory See merge request !25

Merge branch 'clean_up' into 'main'
ea45d048 · Christoph Harmel · 536371a0 · d8328900 · ea45d048 · 536371a0
Commit ea45d048 authored 2 years ago by Christoph Harmel
--- a/.gitignore
+++ b/.gitignore
 .DS_Store
 .idea/
+# Created by https://www.toptal.com/developers/gitignore/api/python
+# Edit at https://www.toptal.com/developers/gitignore?templates=python
+### Python ###
+# Byte-compiled / optimized / DLL files
 __pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+   For a library or package, you might want to ignore these files since the code is
+   intended to run in multiple environments; otherwise, check them in:
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+# End of https://www.toptal.com/developers/gitignore/api/python
--- a/build/lib/read_sequencer_package/__init__.py
+++ b/build/lib/read_sequencer_package/__init__.py
--- a/build/lib/read_sequencer_package/cli.py
+++ b/build/lib/read_sequencer_package/cli.py
-import argparse
-from modules import read_sequencer as rs
-parser = argparse.ArgumentParser(prog='read_sequencer',
-                                 description='Simulates sequencing of DNA sequences specified by an FASTA file.')
-parser.add_argument('--input_file_path',
-                    help='path to FASTA file')
-parser.add_argument('--output_file_path',
-                    help='path to FASTA file')
-parser.add_argument('--read_length',
-                    help='read length for sequencing',
-                    type=int)
-args = parser.parse_args()
-def main():
-    read_sequencer = rs()
-    read_sequencer.read_fasta(args.input_file_path)
-    read_sequencer.run_sequencing(args.read_length)
-    read_sequencer.write_fasta(args.output_file_path)
-if __name__ == '__main__':
-    main()
--- a/build/lib/read_sequencer_package/modules.py
+++ b/build/lib/read_sequencer_package/modules.py
-def generate_sequences(n, mean, sd):
-    """
-    Generates random sequences.
-    Args:
-        n (int): Amount of sequences to generate.
-        mean (int): mean length of sequence (gaussian distribution).
-        sd (float): standard deviation of length of sequence (gaussian distribution).
-    Returns:
-        list: of n sequences
-    """
-    from random import gauss, choice
-    dict = {}
-    for i in range(n):
-        keys = range(n)
-        seq = ""
-        nt = ["A", "T", "C", "G"]
-        for value in range(abs(round(gauss(mean, sd)))):
-            seq = seq + choice(nt)
-        dict[keys[i]] = seq
-    return dict
-def read_in_fasta(file_path):
-    '''
-    This function reads in FASTA files.
-    Args:
-        file_path (str): A file path directing to the fasta file.  
-    Returns:
-        Dict: It returns a dictionary with sequences.
-    '''
-    sequences = {}
-    f = open(file_path)
-    for line in f:
-        if line[0] == '>':
-            defline = line.strip()
-            defline = defline.replace('>', '')
-        else:
-            if defline not in sequences:
-                sequences[defline] = ''
-                sequences[defline] += line.strip()
-    f.close()
-    return sequences
-def read_sequence(seq, read_length):
-    '''
-    This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is 
-    smaller then the requested length or cuts the sequence if its longer.
-    Args:
-        seq (str): the sequence to read 
-        read_length (int): length of reads
-    Returns:
-        str: returns sequenced element
-    '''
-    from random import choice
-    bases = ["A", "T", "C", "G"]
-    sequenced = ''
-    if read_length >= len(seq):
-        for nt in range(len(seq)):
-            sequenced += seq[nt]
-        for nt in range(len(seq), read_length):
-            sequenced += choice(bases)
-    else:
-        for nt in range(read_length):
-            sequenced += seq[nt]
-    return sequenced
-def simulate_sequencing(sequences, read_length):
-    """
-    Simulates sequencing.
-    Args:
-        sequences (dict): Dictionary of sequences to sequence.
-        read_length (int): length of reads
-    Returns:
-        dict: of n sequences as values 
-    """
-    results = {}
-    for index, key in enumerate(sequences):
-        results[key] = read_sequence(sequences[key], read_length=read_length)
-    return results
-import random
-def generate_sequences(n, mean, sd):
-    """
-    Generates random sequences.
-    Args:
-        n (int): Amount of sequences to generate.
-        mean (int): mean length of sequence (gaussian distribution).
-        sd (float): standart deviation of length of sequence (gaussian distribution).
-    Returns:
-        dict: of n sequences
-    """
-    dict1 = {}
-    for i in range(n):
-        keys = range(n)
-        seq = ""
-        nt = ["A", "T", "C", "G"]
-        for value in range(round(random.gauss(mean, sd))):
-            seq = seq + random.choice(nt)
-        dict1[keys[i]] = seq
-    return dict1
-def write_fasta(sequences, file_path):
-    """
-    Takes a dictionary and writes it to a fasta file.
-    Must specify the filename when calling the function.
-    Args:
-        sequences (dict): Dictionary of sequence.
-        file_path (str): A file path directing to the output folder.
-    """
-    from textwrap import wrap
-    with open(file_path, "w") as outfile:
-        for key, value in sequences.items():
-            outfile.write(key + "\n")
-            outfile.write("\n".join(wrap(value, 60)))
-            outfile.write("\n")
-class read_sequencer:
-    def __init__(self):
-        self.sequences = {}
-        self.reads = {}
-    def add_random_sequences(self, n, mean, sd):
-        self.sequences = generate_sequences(n, mean, sd)
-    def read_fasta(self, input_file):
-        self.sequences = read_in_fasta(input_file)
-    def run_sequencing(self, read_length):
-        self.reads = simulate_sequencing(self.sequences, read_length)
-    def write_fasta(self, output_file_path):
-        write_fasta(self.reads, output_file_path)
--- a/dist/read_sequencer-0.1.1-py3-none-any.whl
+++ b/dist/read_sequencer-0.1.1-py3-none-any.whl
--- a/dist/read_sequencer-0.1.1.tar.gz
+++ b/dist/read_sequencer-0.1.1.tar.gz
--- a/images/Git_Tutorial_CSerger.png
+++ b/images/Git_Tutorial_CSerger.png
--- a/images/Markdown_Tutorial_CSerger.png
+++ b/images/Markdown_Tutorial_CSerger.png
--- a/images/Michael_Screenshot 2022-11-07 at 17.38.44.png
+++ b/images/Michael_Screenshot 2022-11-07 at 17.38.44.png
--- a/images/Michael_Screenshot 2022-11-08 at 13.35.22.png
+++ b/images/Michael_Screenshot 2022-11-08 at 13.35.22.png
--- a/images/Michael_Screenshot 2022-11-08 at 14.38.02.png
+++ b/images/Michael_Screenshot 2022-11-08 at 14.38.02.png
--- a/images/screenshot_git_tutorial_main_harmel.png
+++ b/images/screenshot_git_tutorial_main_harmel.png
--- a/images/screenshot_git_tutorial_remote_harmel.png
+++ b/images/screenshot_git_tutorial_remote_harmel.png
--- a/images/screenshot_markup_tutorial_harmel.png
+++ b/images/screenshot_markup_tutorial_harmel.png
--- a/read_sequencer.egg-info/PKG-INFO
+++ b/read_sequencer.egg-info/PKG-INFO
-Metadata-Version: 2.1
-Name: read-sequencer
-Version: 0.1.1
-Summary: Simulates sequencing with a specified read length from sequences specified by a FASTA file.
-Home-page: https://git.scicore.unibas.ch/zavolan_group/tools/read-sequencer
-Author: Clara Serger, Michael Sandholzer and Christoph Harmel
-Author-email: christoph.harmel@unibas.ch
-License: MIT
-License-File: LICENSE.txt
--- a/read_sequencer.egg-info/SOURCES.txt
+++ b/read_sequencer.egg-info/SOURCES.txt
-LICENSE.txt
-README.md
-setup.py
-read_sequencer.egg-info/PKG-INFO
-read_sequencer.egg-info/SOURCES.txt
-read_sequencer.egg-info/dependency_links.txt
-read_sequencer.egg-info/entry_points.txt
-read_sequencer.egg-info/requires.txt
-read_sequencer.egg-info/top_level.txt
-read_sequencer_package/__init__.py
-read_sequencer_package/cli.py
-read_sequencer_package/modules.py
\ No newline at end of file
--- a/read_sequencer.egg-info/dependency_links.txt
+++ b/read_sequencer.egg-info/dependency_links.txt
--- a/read_sequencer.egg-info/entry_points.txt
+++ b/read_sequencer.egg-info/entry_points.txt
-[console_scripts]
-read_sequencer = read_sequencer_package.cli:main
--- a/read_sequencer.egg-info/requires.txt
+++ b/read_sequencer.egg-info/requires.txt
-random
-textwrap
-argparse
--- a/read_sequencer.egg-info/top_level.txt
+++ b/read_sequencer.egg-info/top_level.txt
-read_sequencer_package