Skip to content
Snippets Groups Projects
Commit d5fa6f95 authored by Christoph Harmel's avatar Christoph Harmel
Browse files

fix: added missing files to commits

parent 3d5456c0
No related branches found
No related tags found
1 merge request!10feature added: Simulate sequencing upper level function, restructuring of package modules and functions
import argparse
from modules import run_read_sequencer
parser = argparse.ArgumentParser(prog= 'read_sequencer', description='Simulates Sequenceing of a FASTA file.')
parser.add_argument('--file_path',
help='path to FASTA file', action='store_const')
parser = argparse.ArgumentParser(prog='read_sequencer',
description='Simulates sequencing of a DNA sequences specified by an FASTA file.')
parser.add_argument('--input_file_path',
help='path to FASTA file')
parser.add_argument('--output_file_path',
help='path to FASTA file')
parser.add_argument('--read_length',
help='read length for sequencing', action='store_const')
help='read length for sequencing',
type=int)
args = parser.parse_args()
print(args.file_path, args.read_length)
def main():
run_read_sequencer(args.input_file_path, args.read_length, args.output_file_path)
if __name__ == '__main__':
main()
def read_in_fasta(file_path):
'''
This function reads in FASTA files
argument is file_path
it returns a dictionary with the sequences
'''
sequences = {}
f = open(file_path)
for line in f:
if line[0] == '>':
defline = line.strip()
defline = defline.replace('>', '')
else:
if defline not in sequences:
sequences[defline] = ''
sequences[defline] += line.strip()
f.close()
return sequences
def read_sequence(seq, read_length, padding_probabilities=None):
'''
This function reads sequences
arguments: seq is a list of sequences
padding_probabilities is a number??
returns sequenced element
'''
from random import choice
bases = ["A", "T", "C", "G"]
sequenced = ''
if read_length >= len(seq):
for nt in range(len(seq)):
sequenced += seq[nt]
for nt in range(len(seq),read_length):
sequenced += choice(bases)
else:
for nt in range(read_length):
sequenced += seq[nt]
return sequenced
def simulate_sequencing(sequences, read_length):
results = {}
for index, key in enumerate(sequences):
results[key] = read_sequence(sequences[key],read_length=read_length)
return results
def write_fasta(sequences, file_path):
"""
Takes a dictionary and writes it to a fasta file
Must specify the filename when caling the function
"""
from textwrap import wrap
with open(file_path, "w") as outfile:
for key, value in sequences.items():
outfile.write(key + "\n")
outfile.write("\n".join(wrap(value, 60)))
outfile.write("\n")
def run_read_sequencer(input_file_path, read_length, output_file_path):
sequences = read_in_fasta(input_file_path)
reads = simulate_sequencing(sequences, read_length)
write_fasta(reads, output_file_path)
from setuptools import setup
from setuptools import setup
setup(
name='awesome_read_sequencer',
version='0.1.0',
author='An Awesome Coder',
author_email='aac@example.com',
packages=['random', 'sys'],
scripts=['read_in_FASTA.py','read_sequence.py']
license='LICENSE.txt',
description='An awesome package that simulates sequencing of a FASTA file.',
long_description=open('README.md').read(),
install_requires=[
"random",
"sys"
],
entry_points = {
'console_scripts': ['read_sequencer_package/cli.py:parser'],
setup(
name='awesome_read_sequencer',
version='0.1.0',
author='An Awesome Coder',
author_email='aac@example.com',
packages=['random'],
scripts=['cli.py', 'modules.py'],
license='LICENSE.txt',
description='An awesome package that simulates sequencing from sequences specified by a FASTA file.',
long_description=open('README.md').read(),
install_requires=['random', 'sys'],
entry_points={
'console_scripts': ['read_sequencer=read_sequencer_package/cli.py:main']
}
)
\ No newline at end of file
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment