diff --git a/src/parameter_parser.py b/src/parameter_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..61af08e707a9cf3485432b50007ea516dc8f15ae --- /dev/null +++ b/src/parameter_parser.py @@ -0,0 +1,72 @@ +"""Module containing functionalities to store run parameters. + +Class: + ParamParse: Take as input a file containing the parameters + and stores them in its attributes. +""" +import logging +from pathlib import Path + +LOG = logging.getLogger(__name__) + + +class ParamParse: + """Class holding the parameters of the run. + + Args: + param_file: Path to file with parameter values. + + Attributes: + param_file: File with parameter values. + transcripts_file: File with transcript abundances. + genome_ref_file: Reference genome file. + annotations_file: Transcripts annotations. + output_path: Output folder. + n_reads: Number of reads to be simulated. + n_cells: Number of cells to be simulated. + rna_avg_length: average RNA fragment length. + rna_sd_length: RNA fragment length standard deviation. + read_length: Read length. + intron_rate: Constant probability of retaining an intron. + add_poly_a: Boolean option to add a poly A tail. + poly_a_func: Function to add a poly_a tail. + primer_seq: Sequence of the primer. + priming_func: Function that evaluates internal priming. + """ + + def __init__(self, param_file: Path) -> None: + """Class constructor.""" + self.param_file: Path = Path(param_file) + with open(param_file) as f: + LOG.info("Loading parameters...") + for line in f: + s = line.split(':') + if s[0] == 'Csv transcripts file': + self.transcripts_file: Path = Path(s[1].strip()) + elif s[0] == 'Reference genome file': + self.genome_ref_file: Path = Path(s[1].strip()) + elif s[0] == 'Transcripts annotation file': + self.annotations_file: Path = Path(s[1].strip()) + elif s[0] == 'Output folder': + self.output_path: Path = Path(s[1].strip()) + elif s[0] == 'Number of reads': + self.n_reads: int = int(s[1].strip()) + elif s[0] == 'Number of cells': + self.n_cells: int = int(s[1].strip()) + elif s[0] == 'Average RNA fragments length': + self.rna_avg: float = float(s[1].strip()) + elif s[0] == 'RNA fragment length standard deviation': + self.rna_sd_length: float = float(s[1].strip()) + elif s[0] == 'Reads length': + self.read_length: int = int(s[1].strip()) + elif s[0] == 'Intron retaining probability': + self.intron_rate: float = float(s[1].strip()) + elif s[0] == 'Add poly A tail': + self.add_poly_a: bool = bool(s[1].strip()) + elif s[0] == 'Function to add poly A tail': + self.poly_a_func: str = str(s[1].strip()) + elif s[0] == 'Primer sequence': + self.primer_seq: str = str(s[1].strip()) + elif s[0] == 'Function to evaluate internal priming': + self.priming_func: str = str(s[1].strip()) + LOG.info("Parameters loaded.") diff --git a/tests/resources/Param_test.txt b/tests/resources/Param_test.txt new file mode 100644 index 0000000000000000000000000000000000000000..6af6df2c32ab69c6a3d776a5cc0909a890bea1a0 --- /dev/null +++ b/tests/resources/Param_test.txt @@ -0,0 +1,27 @@ +Csv transcripts file: ./transcripts.csv + +Reference genome file: ./home/ref.ref + +Transcripts annotation file: ./home/annotations.ann + +Output folder: ./home/output + +Number of reads: 10023 + +Number of cells: 34 + +Average RNA fragments length: 150 + +RNA fragment length standard deviation: 10 + +Reads length: 100 + +Intron retaining probability: 0.2 + +Add poly A tail: TRUE + +Function to add poly A tail: generate_poly_a + +Primer sequence: ACCTGATCGTACG + +Function to evaluate internal priming: internal_priming \ No newline at end of file diff --git a/tests/test_parser.py b/tests/test_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..364ce9b8dfad69cf60749b7daf78e9c05c326667 --- /dev/null +++ b/tests/test_parser.py @@ -0,0 +1,25 @@ +"""Tests the parameter parser class.""" + +import pytest +from pathlib import Path +from src import parameter_parser as pp +from src import poly_a + +def test_parser(): + """Tests the attributes of the class.""" + par=pp.ParamParse('./tests/resources/Param_test.txt') + assert par.param_file == Path('./tests/resources/Param_test.txt') + assert par.transcripts_file == Path('./transcripts.csv') + assert par.genome_ref_file == Path('./home/ref.ref') + assert par.annotations_file == Path('./home/annotations.ann') + assert par.output_path == Path('./home/output') + assert par.n_reads == 10023 + assert par.n_cells == 34 + assert par.rna_avg == 150 + assert par.rna_sd_length == 10 + assert par.read_length == 100 + assert par.intron_rate == 0.2 + assert par.add_poly_a == bool('TRUE') + assert par.poly_a_func == 'generate_poly_a' + assert par.primer_seq == 'ACCTGATCGTACG' + assert par.priming_func == 'internal_priming' \ No newline at end of file diff --git a/tests/test_sampleinput.py b/tests/test_sampleinput.py index f73dfb9dedc3d9436a585f75f356685f59e30e9a..3f81651fa79f0e6cedb59c24ec8322141870c9af 100644 --- a/tests/test_sampleinput.py +++ b/tests/test_sampleinput.py @@ -1,4 +1,4 @@ -"""Placeholder test for pipeline.""" +"""Tests the transcriptome abundance file input reader.""" import pytest import pandas as pd