Skip to content
Snippets Groups Projects
Commit 864334ab authored by Mate Balajti's avatar Mate Balajti
Browse files

refactor: update read_sequencer.py

parent 3e226481
No related branches found
No related tags found
1 merge request!40refactor: update tests, CI, main script
Pipeline #17380 failed
...@@ -23,8 +23,8 @@ class ReadSequencer: ...@@ -23,8 +23,8 @@ class ReadSequencer:
def __init__( def __init__(
self, self,
fasta: str = None, fasta: str,
output: str = None, output: str,
read_length: int = 150, read_length: int = 150,
chunk_size: int = 10000, chunk_size: int = 10000,
) -> None: ) -> None:
...@@ -35,7 +35,7 @@ class ReadSequencer: ...@@ -35,7 +35,7 @@ class ReadSequencer:
self.chunk_size = chunk_size self.chunk_size = chunk_size
self.random = False self.random = False
self.bases = ("A", "T", "C", "G") self.bases = ("A", "T", "C", "G")
self.n_sequences = None self.n_sequences: int
def get_n_sequences(self) -> None: def get_n_sequences(self) -> None:
"""Detect number of sequences present in set fasta file. """Detect number of sequences present in set fasta file.
...@@ -135,11 +135,18 @@ class ReadSequencer: ...@@ -135,11 +135,18 @@ class ReadSequencer:
range(self.n_sequences), self.chunk_size range(self.n_sequences), self.chunk_size
) )
for i, batch in enumerate(batch_generator): for i, batch in enumerate(batch_generator):
filename = self.output.replace(".fasta", "") + f"_chunk_{i}.fasta" % (i + 1) filename = (
with open(filename, "w", encoding="utf-8") as output_handle: self.output.replace(".fasta", "") +
for j, k in enumerate(batch): f"_chunk_{i}.fasta" % (i + 1)
)
with open(
filename, "w", encoding="utf-8"
) as output_handle:
for j, _ in enumerate(batch):
record = SeqRecord( record = SeqRecord(
self.generate_random_sequence(self.read_length), self.generate_random_sequence(
self.read_length
),
id="random_seq: " + str(j + 1), id="random_seq: " + str(j + 1),
) )
SeqIO.write(record, output_handle, "fasta") SeqIO.write(record, output_handle, "fasta")
...@@ -153,12 +160,16 @@ class ReadSequencer: ...@@ -153,12 +160,16 @@ class ReadSequencer:
SeqIO.write(record, output_handle, "fasta") SeqIO.write(record, output_handle, "fasta")
else: else:
record_iter = SeqIO.parse(open(self.fasta, encoding="utf-8"), "fasta") with open(self.fasta, encoding="utf-8") as file:
for i, batch in enumerate( record_iter = SeqIO.parse(file, "fasta")
self.batch_iterator(record_iter, self.chunk_size) for i, batch in enumerate(
): self.batch_iterator(record_iter, self.chunk_size)
filename = self.output.replace(".fasta", "") + "_chunk_%i.fasta" % (i + 1) ):
for j, record in enumerate(batch): filename = (
batch[j].seq = self.resize_sequence(record) self.output.replace(".fasta", "") +
with open(filename, "w") as handle: f"_chunk_{i}.fasta" % (i + 1)
SeqIO.write(batch, handle, "fasta") )
for j, record in enumerate(batch):
batch[j].seq = self.resize_sequence(record)
with open(filename, "w", encoding="utf-8") as handle:
SeqIO.write(batch, handle, "fasta")
...@@ -14,8 +14,8 @@ setup( ...@@ -14,8 +14,8 @@ setup(
license='MIT', license='MIT',
author='Clara Serger, Michael Sandholzer and Christoph Harmel', author='Clara Serger, Michael Sandholzer and Christoph Harmel',
author_email='christoph.harmel@unibas.ch', author_email='christoph.harmel@unibas.ch',
description='Simulates sequencing with a specified read length from \ description='Simulates sequencing with a specified read length from'
sequences specified by a FASTA file.', 'sequences specified by a FASTA file.',
packages=find_packages(), packages=find_packages(),
install_requires=INSTALL_REQUIRED, install_requires=INSTALL_REQUIRED,
entry_points={'console_scripts': ['readsequencer=readsequencer.cli:main']} entry_points={'console_scripts': ['readsequencer=readsequencer.cli:main']}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment