diff --git a/.gitignore b/.gitignore index e67a40ec0cbb5aa7e77e9607e08a517dc1187296..42770ec5750d8d11f9d2c139003b8307d923f626 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,174 @@ .DS_Store .idea/ + +# Created by https://www.toptal.com/developers/gitignore/api/python +# Edit at https://www.toptal.com/developers/gitignore?templates=python + +### Python ### +# Byte-compiled / optimized / DLL files __pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv + For a library or package, you might want to ignore these files since the code is + intended to run in multiple environments; otherwise, check them in: +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + + +# End of https://www.toptal.com/developers/gitignore/api/python diff --git a/build/lib/read_sequencer_package/__init__.py b/build/lib/read_sequencer_package/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/build/lib/read_sequencer_package/cli.py b/build/lib/read_sequencer_package/cli.py deleted file mode 100644 index e786d78de5391ea2b035c058f4de2cd16162a4d6..0000000000000000000000000000000000000000 --- a/build/lib/read_sequencer_package/cli.py +++ /dev/null @@ -1,23 +0,0 @@ -import argparse -from modules import read_sequencer as rs - -parser = argparse.ArgumentParser(prog='read_sequencer', - description='Simulates sequencing of DNA sequences specified by an FASTA file.') -parser.add_argument('--input_file_path', - help='path to FASTA file') -parser.add_argument('--output_file_path', - help='path to FASTA file') -parser.add_argument('--read_length', - help='read length for sequencing', - type=int) - -args = parser.parse_args() - -def main(): - read_sequencer = rs() - read_sequencer.read_fasta(args.input_file_path) - read_sequencer.run_sequencing(args.read_length) - read_sequencer.write_fasta(args.output_file_path) - -if __name__ == '__main__': - main() diff --git a/build/lib/read_sequencer_package/modules.py b/build/lib/read_sequencer_package/modules.py deleted file mode 100644 index 39a686616817f6b496328d460fe06931f9670da8..0000000000000000000000000000000000000000 --- a/build/lib/read_sequencer_package/modules.py +++ /dev/null @@ -1,148 +0,0 @@ -def generate_sequences(n, mean, sd): - """ - Generates random sequences. - - Args: - n (int): Amount of sequences to generate. - mean (int): mean length of sequence (gaussian distribution). - sd (float): standard deviation of length of sequence (gaussian distribution). - - Returns: - list: of n sequences - """ - from random import gauss, choice - dict = {} - for i in range(n): - keys = range(n) - seq = "" - nt = ["A", "T", "C", "G"] - for value in range(abs(round(gauss(mean, sd)))): - seq = seq + choice(nt) - dict[keys[i]] = seq - return dict - - -def read_in_fasta(file_path): - ''' - This function reads in FASTA files. - - Args: - file_path (str): A file path directing to the fasta file. - - Returns: - Dict: It returns a dictionary with sequences. - - ''' - sequences = {} - f = open(file_path) - for line in f: - if line[0] == '>': - defline = line.strip() - defline = defline.replace('>', '') - else: - if defline not in sequences: - sequences[defline] = '' - sequences[defline] += line.strip() - f.close() - return sequences - -def read_sequence(seq, read_length): - ''' - This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is - smaller then the requested length or cuts the sequence if its longer. - - Args: - seq (str): the sequence to read - read_length (int): length of reads - - Returns: - str: returns sequenced element - - ''' - from random import choice - bases = ["A", "T", "C", "G"] - sequenced = '' - if read_length >= len(seq): - for nt in range(len(seq)): - sequenced += seq[nt] - for nt in range(len(seq), read_length): - sequenced += choice(bases) - else: - for nt in range(read_length): - sequenced += seq[nt] - - return sequenced - -def simulate_sequencing(sequences, read_length): - """ - Simulates sequencing. - - Args: - sequences (dict): Dictionary of sequences to sequence. - read_length (int): length of reads - - Returns: - dict: of n sequences as values - """ - results = {} - for index, key in enumerate(sequences): - results[key] = read_sequence(sequences[key], read_length=read_length) - - return results - -import random -def generate_sequences(n, mean, sd): - """ - Generates random sequences. - - Args: - n (int): Amount of sequences to generate. - mean (int): mean length of sequence (gaussian distribution). - sd (float): standart deviation of length of sequence (gaussian distribution). - - Returns: - dict: of n sequences - """ - dict1 = {} - for i in range(n): - keys = range(n) - seq = "" - nt = ["A", "T", "C", "G"] - for value in range(round(random.gauss(mean, sd))): - seq = seq + random.choice(nt) - dict1[keys[i]] = seq - return dict1 - -def write_fasta(sequences, file_path): - """ - Takes a dictionary and writes it to a fasta file. - Must specify the filename when calling the function. - - Args: - sequences (dict): Dictionary of sequence. - file_path (str): A file path directing to the output folder. - - """ - from textwrap import wrap - with open(file_path, "w") as outfile: - for key, value in sequences.items(): - outfile.write(key + "\n") - outfile.write("\n".join(wrap(value, 60))) - outfile.write("\n") - -class read_sequencer: - def __init__(self): - self.sequences = {} - self.reads = {} - - def add_random_sequences(self, n, mean, sd): - self.sequences = generate_sequences(n, mean, sd) - - def read_fasta(self, input_file): - self.sequences = read_in_fasta(input_file) - - def run_sequencing(self, read_length): - self.reads = simulate_sequencing(self.sequences, read_length) - - def write_fasta(self, output_file_path): - write_fasta(self.reads, output_file_path) diff --git a/dist/read_sequencer-0.1.1-py3-none-any.whl b/dist/read_sequencer-0.1.1-py3-none-any.whl deleted file mode 100644 index 234b5227d12d9f5e410d26b2e131fed34c2596fc..0000000000000000000000000000000000000000 Binary files a/dist/read_sequencer-0.1.1-py3-none-any.whl and /dev/null differ diff --git a/dist/read_sequencer-0.1.1.tar.gz b/dist/read_sequencer-0.1.1.tar.gz deleted file mode 100644 index 7eb03d4a5f331e1050f6b5f6c8cb5ee356fc2595..0000000000000000000000000000000000000000 Binary files a/dist/read_sequencer-0.1.1.tar.gz and /dev/null differ diff --git a/images/Git_Tutorial_CSerger.png b/images/Git_Tutorial_CSerger.png deleted file mode 100644 index a80ebb2cff393ab5575bbb50804b42f9ba7ff77c..0000000000000000000000000000000000000000 Binary files a/images/Git_Tutorial_CSerger.png and /dev/null differ diff --git a/images/Markdown_Tutorial_CSerger.png b/images/Markdown_Tutorial_CSerger.png deleted file mode 100644 index 1146d19773186c648382ce4aa87681c0001b7a67..0000000000000000000000000000000000000000 Binary files a/images/Markdown_Tutorial_CSerger.png and /dev/null differ diff --git a/images/Michael_Screenshot 2022-11-07 at 17.38.44.png b/images/Michael_Screenshot 2022-11-07 at 17.38.44.png deleted file mode 100644 index 272acf066d23d8440b02356cfc2ebd579f2f9569..0000000000000000000000000000000000000000 Binary files a/images/Michael_Screenshot 2022-11-07 at 17.38.44.png and /dev/null differ diff --git a/images/Michael_Screenshot 2022-11-08 at 13.35.22.png b/images/Michael_Screenshot 2022-11-08 at 13.35.22.png deleted file mode 100644 index 2deedd0c2a642d492a959ce85733fbfdf2e508ed..0000000000000000000000000000000000000000 Binary files a/images/Michael_Screenshot 2022-11-08 at 13.35.22.png and /dev/null differ diff --git a/images/Michael_Screenshot 2022-11-08 at 14.38.02.png b/images/Michael_Screenshot 2022-11-08 at 14.38.02.png deleted file mode 100644 index 4bdbfdf333fcacdc043e990027dd232953d53b20..0000000000000000000000000000000000000000 Binary files a/images/Michael_Screenshot 2022-11-08 at 14.38.02.png and /dev/null differ diff --git a/images/screenshot_git_tutorial_main_harmel.png b/images/screenshot_git_tutorial_main_harmel.png deleted file mode 100644 index fcbb8fde4d7ee8cb4d61d2cf3ff62ccddf11ff53..0000000000000000000000000000000000000000 Binary files a/images/screenshot_git_tutorial_main_harmel.png and /dev/null differ diff --git a/images/screenshot_git_tutorial_remote_harmel.png b/images/screenshot_git_tutorial_remote_harmel.png deleted file mode 100644 index 76ff324be44e1e54447ae6fc09ecc62d50b48583..0000000000000000000000000000000000000000 Binary files a/images/screenshot_git_tutorial_remote_harmel.png and /dev/null differ diff --git a/images/screenshot_markup_tutorial_harmel.png b/images/screenshot_markup_tutorial_harmel.png deleted file mode 100644 index db3aaf02cb5ec285c3cf5f76fe1d25134df69907..0000000000000000000000000000000000000000 Binary files a/images/screenshot_markup_tutorial_harmel.png and /dev/null differ diff --git a/read_sequencer.egg-info/PKG-INFO b/read_sequencer.egg-info/PKG-INFO deleted file mode 100644 index ed2a2f723157a60010ad224e55406bda28f82255..0000000000000000000000000000000000000000 --- a/read_sequencer.egg-info/PKG-INFO +++ /dev/null @@ -1,9 +0,0 @@ -Metadata-Version: 2.1 -Name: read-sequencer -Version: 0.1.1 -Summary: Simulates sequencing with a specified read length from sequences specified by a FASTA file. -Home-page: https://git.scicore.unibas.ch/zavolan_group/tools/read-sequencer -Author: Clara Serger, Michael Sandholzer and Christoph Harmel -Author-email: christoph.harmel@unibas.ch -License: MIT -License-File: LICENSE.txt diff --git a/read_sequencer.egg-info/SOURCES.txt b/read_sequencer.egg-info/SOURCES.txt deleted file mode 100644 index 9b093f4ab2fab7288a29384742947a5405d68517..0000000000000000000000000000000000000000 --- a/read_sequencer.egg-info/SOURCES.txt +++ /dev/null @@ -1,12 +0,0 @@ -LICENSE.txt -README.md -setup.py -read_sequencer.egg-info/PKG-INFO -read_sequencer.egg-info/SOURCES.txt -read_sequencer.egg-info/dependency_links.txt -read_sequencer.egg-info/entry_points.txt -read_sequencer.egg-info/requires.txt -read_sequencer.egg-info/top_level.txt -read_sequencer_package/__init__.py -read_sequencer_package/cli.py -read_sequencer_package/modules.py \ No newline at end of file diff --git a/read_sequencer.egg-info/dependency_links.txt b/read_sequencer.egg-info/dependency_links.txt deleted file mode 100644 index 8b137891791fe96927ad78e64b0aad7bded08bdc..0000000000000000000000000000000000000000 --- a/read_sequencer.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/read_sequencer.egg-info/entry_points.txt b/read_sequencer.egg-info/entry_points.txt deleted file mode 100644 index 569e8154320e35d343a621996abf05109990fec6..0000000000000000000000000000000000000000 --- a/read_sequencer.egg-info/entry_points.txt +++ /dev/null @@ -1,2 +0,0 @@ -[console_scripts] -read_sequencer = read_sequencer_package.cli:main diff --git a/read_sequencer.egg-info/requires.txt b/read_sequencer.egg-info/requires.txt deleted file mode 100644 index d9ca6c64cbd4a8ce3229966dd400911a7f626665..0000000000000000000000000000000000000000 --- a/read_sequencer.egg-info/requires.txt +++ /dev/null @@ -1,3 +0,0 @@ -random -textwrap -argparse diff --git a/read_sequencer.egg-info/top_level.txt b/read_sequencer.egg-info/top_level.txt deleted file mode 100644 index a5c49290c04dc99c11fd0696381a1287c34af400..0000000000000000000000000000000000000000 --- a/read_sequencer.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -read_sequencer_package diff --git a/read_sequencer_package/cli.py b/read_sequencer_package/cli.py index 9a68267c115d328afa32e582d8c03e4aede2a076..c6dce09b4e30a35520040220358e868ea99e8081 100644 --- a/read_sequencer_package/cli.py +++ b/read_sequencer_package/cli.py @@ -1,5 +1,5 @@ import argparse -from modules import ReadSequencer +from read_sequencer import ReadSequencer import logging parser = argparse.ArgumentParser(prog='read_sequencer', diff --git a/read_sequencer_package/modules.py b/read_sequencer_package/read_sequencer.py similarity index 100% rename from read_sequencer_package/modules.py rename to read_sequencer_package/read_sequencer.py diff --git a/read_sequencer_package/fasta_testfile/50_seqs_50_1000_bp.fasta b/tests/fasta_testfile/50_seqs_50_1000_bp.fasta similarity index 100% rename from read_sequencer_package/fasta_testfile/50_seqs_50_1000_bp.fasta rename to tests/fasta_testfile/50_seqs_50_1000_bp.fasta diff --git a/read_sequencer_package/fasta_testfile/result.fasta b/tests/fasta_testfile/result.fasta similarity index 100% rename from read_sequencer_package/fasta_testfile/result.fasta rename to tests/fasta_testfile/result.fasta