fix: added missing files to commits

d5fa6f95 · Christoph Harmel · 3d5456c0 · d5fa6f95 · d5fa6f95 · d5fa6f95
Commit d5fa6f95 authored 2 years ago by Christoph Harmel
--- a/read_sequencer_package/cli.py
+++ b/read_sequencer_package/cli.py
 import argparse
+from modules import run_read_sequencer
-parser = argparse.ArgumentParser(prog= 'read_sequencer', description='Simulates Sequenceing of a FASTA file.')
+parser = argparse.ArgumentParser(prog='read_sequencer',
-parser.add_argument('--file_path',
+                                 description='Simulates sequencing of a DNA sequences specified by an FASTA file.')
-                    help='path to FASTA file', action='store_const')
+parser.add_argument('--input_file_path',
+                    help='path to FASTA file')
+parser.add_argument('--output_file_path',
+                    help='path to FASTA file')
 parser.add_argument('--read_length',
-                    help='read length for sequencing', action='store_const')
+                    help='read length for sequencing',
+                    type=int)
 args = parser.parse_args()
-print(args.file_path, args.read_length)
+def main():
+    run_read_sequencer(args.input_file_path, args.read_length, args.output_file_path)
+if __name__ == '__main__':
+    main()
--- a/read_sequencer_package/modules.py
+++ b/read_sequencer_package/modules.py
+def read_in_fasta(file_path):
+    '''
+    This function reads in FASTA files
+    argument is file_path
+    it returns a dictionary with the sequences
+    '''
+    sequences = {}
+    f = open(file_path)
+    for line in f:
+        if line[0] == '>':
+            defline = line.strip()
+            defline = defline.replace('>', '')
+        else:
+            if defline not in sequences:
+                sequences[defline] = ''
+                sequences[defline] += line.strip()
+    f.close()
+    return sequences
+def read_sequence(seq, read_length, padding_probabilities=None):
+    '''
+    This function reads sequences
+    arguments: seq is a list of sequences
+    padding_probabilities is a number??
+    returns sequenced element
+    '''
+    from random import choice
+    bases = ["A", "T", "C", "G"]
+    sequenced = ''
+    if read_length >= len(seq):
+        for nt in range(len(seq)):
+            sequenced += seq[nt]
+        for nt in range(len(seq),read_length):
+            sequenced += choice(bases)
+    else:
+        for nt in range(read_length):
+            sequenced += seq[nt]
+    return sequenced
+def simulate_sequencing(sequences, read_length):
+    results = {}
+    for index, key in enumerate(sequences):
+        results[key] = read_sequence(sequences[key],read_length=read_length)
+    return results
+def write_fasta(sequences, file_path):
+    """
+    Takes a dictionary and writes it to a fasta file
+    Must specify the filename when caling the function
+    """
+    from textwrap import wrap
+    with open(file_path, "w") as outfile:
+        for key, value in sequences.items():
+            outfile.write(key + "\n")
+            outfile.write("\n".join(wrap(value, 60)))
+            outfile.write("\n")
+def run_read_sequencer(input_file_path, read_length, output_file_path):
+    sequences = read_in_fasta(input_file_path)
+    reads = simulate_sequencing(sequences, read_length)
+    write_fasta(reads, output_file_path)
--- a/setup.py
+++ b/setup.py
- from setuptools import setup
+from setuptools import setup
- setup(
+setup(
-   name='awesome_read_sequencer',
+    name='awesome_read_sequencer',
-   version='0.1.0',
+    version='0.1.0',
-   author='An Awesome Coder',
+    author='An Awesome Coder',
-   author_email='aac@example.com',
+    author_email='aac@example.com',
-   packages=['random', 'sys'],
+    packages=['random'],
-   scripts=['read_in_FASTA.py','read_sequence.py']
+    scripts=['cli.py', 'modules.py'],
-   license='LICENSE.txt',
+    license='LICENSE.txt',
-   description='An awesome package that simulates sequencing of a FASTA file.',
+    description='An awesome package that simulates sequencing from sequences specified by a FASTA file.',
-   long_description=open('README.md').read(),
+    long_description=open('README.md').read(),
-   install_requires=[
+    install_requires=['random', 'sys'],
-       "random",
+    entry_points={
-       "sys"
+        'console_scripts': ['read_sequencer=read_sequencer_package/cli.py:main']
-   ], 
-    entry_points = {
-        'console_scripts': ['read_sequencer_package/cli.py:parser'],
    }
 )
\ No newline at end of file