diff --git a/read_sequencer_package/cli.py b/read_sequencer_package/cli.py index 8273b5565d3efdcc95061d7ffa237f39c41aad0e..f242c4c1f4058c5f6baf5f3e7f675f784308c26a 100644 --- a/read_sequencer_package/cli.py +++ b/read_sequencer_package/cli.py @@ -1,11 +1,22 @@ import argparse +from modules import run_read_sequencer -parser = argparse.ArgumentParser(prog= 'read_sequencer', description='Simulates Sequenceing of a FASTA file.') -parser.add_argument('--file_path', - help='path to FASTA file', action='store_const') +parser = argparse.ArgumentParser(prog='read_sequencer', + description='Simulates sequencing of a DNA sequences specified by an FASTA file.') +parser.add_argument('--input_file_path', + help='path to FASTA file') +parser.add_argument('--output_file_path', + help='path to FASTA file') parser.add_argument('--read_length', - help='read length for sequencing', action='store_const') - + help='read length for sequencing', + type=int) args = parser.parse_args() -print(args.file_path, args.read_length) + + +def main(): + run_read_sequencer(args.input_file_path, args.read_length, args.output_file_path) + + +if __name__ == '__main__': + main() diff --git a/read_sequencer_package/modules.py b/read_sequencer_package/modules.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..ca84a645b8a5f88f1d3c083d5d489917b3b90adb 100644 --- a/read_sequencer_package/modules.py +++ b/read_sequencer_package/modules.py @@ -0,0 +1,69 @@ + +def read_in_fasta(file_path): + ''' + This function reads in FASTA files + + argument is file_path + + it returns a dictionary with the sequences + + ''' + sequences = {} + f = open(file_path) + for line in f: + if line[0] == '>': + defline = line.strip() + defline = defline.replace('>', '') + else: + if defline not in sequences: + sequences[defline] = '' + sequences[defline] += line.strip() + f.close() + return sequences + +def read_sequence(seq, read_length, padding_probabilities=None): + ''' + This function reads sequences + arguments: seq is a list of sequences + padding_probabilities is a number?? + + returns sequenced element + + ''' + from random import choice + bases = ["A", "T", "C", "G"] + sequenced = '' + if read_length >= len(seq): + for nt in range(len(seq)): + sequenced += seq[nt] + for nt in range(len(seq),read_length): + sequenced += choice(bases) + else: + for nt in range(read_length): + sequenced += seq[nt] + + return sequenced + +def simulate_sequencing(sequences, read_length): + results = {} + for index, key in enumerate(sequences): + results[key] = read_sequence(sequences[key],read_length=read_length) + + return results + +def write_fasta(sequences, file_path): + """ + Takes a dictionary and writes it to a fasta file + Must specify the filename when caling the function + """ + from textwrap import wrap + with open(file_path, "w") as outfile: + for key, value in sequences.items(): + outfile.write(key + "\n") + outfile.write("\n".join(wrap(value, 60))) + outfile.write("\n") + +def run_read_sequencer(input_file_path, read_length, output_file_path): + sequences = read_in_fasta(input_file_path) + reads = simulate_sequencing(sequences, read_length) + write_fasta(reads, output_file_path) diff --git a/setup.py b/setup.py index 528670cb0aba646f293aeb91aedbcdcc5d617d55..1102b5a453b8bd5c032792a7a3058446e70d6fb8 100644 --- a/setup.py +++ b/setup.py @@ -1,20 +1,17 @@ - from setuptools import setup +from setuptools import setup - setup( - name='awesome_read_sequencer', - version='0.1.0', - author='An Awesome Coder', - author_email='aac@example.com', - packages=['random', 'sys'], - scripts=['read_in_FASTA.py','read_sequence.py'] - license='LICENSE.txt', - description='An awesome package that simulates sequencing of a FASTA file.', - long_description=open('README.md').read(), - install_requires=[ - "random", - "sys" - ], - entry_points = { - 'console_scripts': ['read_sequencer_package/cli.py:parser'], +setup( + name='awesome_read_sequencer', + version='0.1.0', + author='An Awesome Coder', + author_email='aac@example.com', + packages=['random'], + scripts=['cli.py', 'modules.py'], + license='LICENSE.txt', + description='An awesome package that simulates sequencing from sequences specified by a FASTA file.', + long_description=open('README.md').read(), + install_requires=['random', 'sys'], + entry_points={ + 'console_scripts': ['read_sequencer=read_sequencer_package/cli.py:main'] } -) \ No newline at end of file +)