Skip to content
Snippets Groups Projects
Commit ea45d048 authored by Christoph Harmel's avatar Christoph Harmel
Browse files

Merge branch 'clean_up' into 'main'

fix: renamed modules.py and .gitignore, deleted unneeded directories, added tests directory

See merge request !25
parents 536371a0 d8328900
No related branches found
No related tags found
1 merge request!25fix: renamed modules.py and .gitignore, deleted unneeded directories, added tests directory
Showing
with 171 additions and 199 deletions
.DS_Store .DS_Store
.idea/ .idea/
# Created by https://www.toptal.com/developers/gitignore/api/python
# Edit at https://www.toptal.com/developers/gitignore?templates=python
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
For a library or package, you might want to ignore these files since the code is
intended to run in multiple environments; otherwise, check them in:
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
### Python Patch ###
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
poetry.toml
# End of https://www.toptal.com/developers/gitignore/api/python
import argparse
from modules import read_sequencer as rs
parser = argparse.ArgumentParser(prog='read_sequencer',
description='Simulates sequencing of DNA sequences specified by an FASTA file.')
parser.add_argument('--input_file_path',
help='path to FASTA file')
parser.add_argument('--output_file_path',
help='path to FASTA file')
parser.add_argument('--read_length',
help='read length for sequencing',
type=int)
args = parser.parse_args()
def main():
read_sequencer = rs()
read_sequencer.read_fasta(args.input_file_path)
read_sequencer.run_sequencing(args.read_length)
read_sequencer.write_fasta(args.output_file_path)
if __name__ == '__main__':
main()
def generate_sequences(n, mean, sd):
"""
Generates random sequences.
Args:
n (int): Amount of sequences to generate.
mean (int): mean length of sequence (gaussian distribution).
sd (float): standard deviation of length of sequence (gaussian distribution).
Returns:
list: of n sequences
"""
from random import gauss, choice
dict = {}
for i in range(n):
keys = range(n)
seq = ""
nt = ["A", "T", "C", "G"]
for value in range(abs(round(gauss(mean, sd)))):
seq = seq + choice(nt)
dict[keys[i]] = seq
return dict
def read_in_fasta(file_path):
'''
This function reads in FASTA files.
Args:
file_path (str): A file path directing to the fasta file.
Returns:
Dict: It returns a dictionary with sequences.
'''
sequences = {}
f = open(file_path)
for line in f:
if line[0] == '>':
defline = line.strip()
defline = defline.replace('>', '')
else:
if defline not in sequences:
sequences[defline] = ''
sequences[defline] += line.strip()
f.close()
return sequences
def read_sequence(seq, read_length):
'''
This function reads a sequence of a specific read length and adds additional nucleotides if the sequence is
smaller then the requested length or cuts the sequence if its longer.
Args:
seq (str): the sequence to read
read_length (int): length of reads
Returns:
str: returns sequenced element
'''
from random import choice
bases = ["A", "T", "C", "G"]
sequenced = ''
if read_length >= len(seq):
for nt in range(len(seq)):
sequenced += seq[nt]
for nt in range(len(seq), read_length):
sequenced += choice(bases)
else:
for nt in range(read_length):
sequenced += seq[nt]
return sequenced
def simulate_sequencing(sequences, read_length):
"""
Simulates sequencing.
Args:
sequences (dict): Dictionary of sequences to sequence.
read_length (int): length of reads
Returns:
dict: of n sequences as values
"""
results = {}
for index, key in enumerate(sequences):
results[key] = read_sequence(sequences[key], read_length=read_length)
return results
import random
def generate_sequences(n, mean, sd):
"""
Generates random sequences.
Args:
n (int): Amount of sequences to generate.
mean (int): mean length of sequence (gaussian distribution).
sd (float): standart deviation of length of sequence (gaussian distribution).
Returns:
dict: of n sequences
"""
dict1 = {}
for i in range(n):
keys = range(n)
seq = ""
nt = ["A", "T", "C", "G"]
for value in range(round(random.gauss(mean, sd))):
seq = seq + random.choice(nt)
dict1[keys[i]] = seq
return dict1
def write_fasta(sequences, file_path):
"""
Takes a dictionary and writes it to a fasta file.
Must specify the filename when calling the function.
Args:
sequences (dict): Dictionary of sequence.
file_path (str): A file path directing to the output folder.
"""
from textwrap import wrap
with open(file_path, "w") as outfile:
for key, value in sequences.items():
outfile.write(key + "\n")
outfile.write("\n".join(wrap(value, 60)))
outfile.write("\n")
class read_sequencer:
def __init__(self):
self.sequences = {}
self.reads = {}
def add_random_sequences(self, n, mean, sd):
self.sequences = generate_sequences(n, mean, sd)
def read_fasta(self, input_file):
self.sequences = read_in_fasta(input_file)
def run_sequencing(self, read_length):
self.reads = simulate_sequencing(self.sequences, read_length)
def write_fasta(self, output_file_path):
write_fasta(self.reads, output_file_path)
File deleted
File deleted
images/Git_Tutorial_CSerger.png

57.9 KiB

images/Markdown_Tutorial_CSerger.png

267 KiB

images/Michael_Screenshot 2022-11-07 at 17.38.44.png

164 KiB

images/Michael_Screenshot 2022-11-08 at 13.35.22.png

75.8 KiB

images/Michael_Screenshot 2022-11-08 at 14.38.02.png

108 KiB

images/screenshot_git_tutorial_main_harmel.png

1010 KiB

images/screenshot_git_tutorial_remote_harmel.png

1.07 MiB

images/screenshot_markup_tutorial_harmel.png

752 KiB

Metadata-Version: 2.1
Name: read-sequencer
Version: 0.1.1
Summary: Simulates sequencing with a specified read length from sequences specified by a FASTA file.
Home-page: https://git.scicore.unibas.ch/zavolan_group/tools/read-sequencer
Author: Clara Serger, Michael Sandholzer and Christoph Harmel
Author-email: christoph.harmel@unibas.ch
License: MIT
License-File: LICENSE.txt
LICENSE.txt
README.md
setup.py
read_sequencer.egg-info/PKG-INFO
read_sequencer.egg-info/SOURCES.txt
read_sequencer.egg-info/dependency_links.txt
read_sequencer.egg-info/entry_points.txt
read_sequencer.egg-info/requires.txt
read_sequencer.egg-info/top_level.txt
read_sequencer_package/__init__.py
read_sequencer_package/cli.py
read_sequencer_package/modules.py
\ No newline at end of file
[console_scripts]
read_sequencer = read_sequencer_package.cli:main
random
textwrap
argparse
read_sequencer_package
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment