Skip to content
Snippets Groups Projects
Commit d5fa6f95 authored by Christoph Harmel's avatar Christoph Harmel
Browse files

fix: added missing files to commits

parent 3d5456c0
No related branches found
No related tags found
1 merge request!10feature added: Simulate sequencing upper level function, restructuring of package modules and functions
This commit is part of merge request !10. Comments created here will be created in the context of that merge request.
import argparse
from modules import run_read_sequencer
parser = argparse.ArgumentParser(prog= 'read_sequencer', description='Simulates Sequenceing of a FASTA file.')
parser.add_argument('--file_path',
help='path to FASTA file', action='store_const')
parser = argparse.ArgumentParser(prog='read_sequencer',
description='Simulates sequencing of a DNA sequences specified by an FASTA file.')
parser.add_argument('--input_file_path',
help='path to FASTA file')
parser.add_argument('--output_file_path',
help='path to FASTA file')
parser.add_argument('--read_length',
help='read length for sequencing', action='store_const')
help='read length for sequencing',
type=int)
args = parser.parse_args()
print(args.file_path, args.read_length)
def main():
run_read_sequencer(args.input_file_path, args.read_length, args.output_file_path)
if __name__ == '__main__':
main()
def read_in_fasta(file_path):
'''
This function reads in FASTA files
argument is file_path
it returns a dictionary with the sequences
'''
sequences = {}
f = open(file_path)
for line in f:
if line[0] == '>':
defline = line.strip()
defline = defline.replace('>', '')
else:
if defline not in sequences:
sequences[defline] = ''
sequences[defline] += line.strip()
f.close()
return sequences
def read_sequence(seq, read_length, padding_probabilities=None):
'''
This function reads sequences
arguments: seq is a list of sequences
padding_probabilities is a number??
returns sequenced element
'''
from random import choice
bases = ["A", "T", "C", "G"]
sequenced = ''
if read_length >= len(seq):
for nt in range(len(seq)):
sequenced += seq[nt]
for nt in range(len(seq),read_length):
sequenced += choice(bases)
else:
for nt in range(read_length):
sequenced += seq[nt]
return sequenced
def simulate_sequencing(sequences, read_length):
results = {}
for index, key in enumerate(sequences):
results[key] = read_sequence(sequences[key],read_length=read_length)
return results
def write_fasta(sequences, file_path):
"""
Takes a dictionary and writes it to a fasta file
Must specify the filename when caling the function
"""
from textwrap import wrap
with open(file_path, "w") as outfile:
for key, value in sequences.items():
outfile.write(key + "\n")
outfile.write("\n".join(wrap(value, 60)))
outfile.write("\n")
def run_read_sequencer(input_file_path, read_length, output_file_path):
sequences = read_in_fasta(input_file_path)
reads = simulate_sequencing(sequences, read_length)
write_fasta(reads, output_file_path)
from setuptools import setup
from setuptools import setup
setup(
name='awesome_read_sequencer',
version='0.1.0',
author='An Awesome Coder',
author_email='aac@example.com',
packages=['random', 'sys'],
scripts=['read_in_FASTA.py','read_sequence.py']
license='LICENSE.txt',
description='An awesome package that simulates sequencing of a FASTA file.',
long_description=open('README.md').read(),
install_requires=[
"random",
"sys"
],
entry_points = {
'console_scripts': ['read_sequencer_package/cli.py:parser'],
setup(
name='awesome_read_sequencer',
version='0.1.0',
author='An Awesome Coder',
author_email='aac@example.com',
packages=['random'],
scripts=['cli.py', 'modules.py'],
license='LICENSE.txt',
description='An awesome package that simulates sequencing from sequences specified by a FASTA file.',
long_description=open('README.md').read(),
install_requires=['random', 'sys'],
entry_points={
'console_scripts': ['read_sequencer=read_sequencer_package/cli.py:main']
}
)
\ No newline at end of file
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment