Skip to content
Snippets Groups Projects
Commit c154c55a authored by Melvin Alappat's avatar Melvin Alappat
Browse files

Adding linted version of Issue #4 code

parents 9ca598fb 557be05b
No related branches found
No related tags found
1 merge request!16Issue_4
Pipeline #13759 failed
"""Imports.""" """Imports."""
import numpy as np import numpy as np
import scipy.constants import scipy.constants
import argparse
from pathlib import Path
class Probability: class Probability:
"""Calculates the probability of priming and write the gff file.""" """Calculates the probability of priming and write the gff file."""
def InterPara(): # adding parser
parser = argparse.ArgumentParser(
description="Fasta-file input",
add_help=False,
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
# add arguments
parser.add_argument(
'input_file',
type=lambda p: Path(p).absolute(),
metavar="PATH",
help="path to fasta-file",
)
args = parser.parse_args()
def InterPara(path):
"""Open the RIblast output file and read only the parameter lines. """Open the RIblast output file and read only the parameter lines.
Args: Args:
None Path to Fasta-file
Returns: Returns:
my_list (list): Contains all the paramter lines from RIblast my_list (list): Contains all the paramter lines from RIblast
""" """
myfile = open("./Docker-Files/Energies.txt", "r") # ouput of RIblast # myfile = open(sys.argv[1], "r") # ouput of RIblast
myfile = open(path, "r")
mylist = [] # all lines of Energies starting with an ID-number mylist = [] # all lines of Energies starting with an ID-number
...@@ -24,11 +43,12 @@ class Probability: ...@@ -24,11 +43,12 @@ class Probability:
mylist.append(myline) mylist.append(myline)
else: else:
continue continue
myfile.close() myfile.close()
return(mylist) return(mylist)
data = InterPara() data = InterPara(args.input_file)
def InterProb(data_list): def InterProb(data_list):
"""Calculate the prob. and make the gff file. """Calculate the prob. and make the gff file.
...@@ -39,6 +59,32 @@ class Probability: ...@@ -39,6 +59,32 @@ class Probability:
Returns: Returns:
gff (file): Gff file contains all the output information gff (file): Gff file contains all the output information
""" """
# count interactions per script through fasta ID (first line of fasta)
mycounter = open("./Docker-Files/transcript.fasta", "r")
mycounter_list = []
for mylinecounter in mycounter:
if mylinecounter.startswith(">"):
a = mylinecounter
a = mylinecounter.replace(">", "")
b = a.replace("\n", "")
mycounter_list.append(b)
else:
continue
counter = 0
counter_list = []
for cc in range(0, len(mycounter_list)):
for dd in range(0, len(data_list)):
if mycounter_list[cc] in data_list[dd]:
counter = counter + 1
else:
continue
counter_list.append(counter)
counter = 0
para_list = [] para_list = []
for i in range(0, len(data_list)): for i in range(0, len(data_list)):
...@@ -60,8 +106,8 @@ class Probability: ...@@ -60,8 +106,8 @@ class Probability:
para_list[d][2] = a[1] para_list[d][2] = a[1]
for k in range(0, len(para_list)): # type-conversion of ID and E for k in range(0, len(para_list)): # type-conversion of ID and E
for l in range(0, 2): for w in range(0, 2):
para_list[k][l] = float(para_list[k][l]) para_list[k][w] = float(para_list[k][w])
for z in range(0, len(para_list)): # from kcal/mol to Joule/mol for z in range(0, len(para_list)): # from kcal/mol to Joule/mol
para_list[z][1] = para_list[z][1] * 4184 para_list[z][1] = para_list[z][1] * 4184
...@@ -78,21 +124,47 @@ class Probability: ...@@ -78,21 +124,47 @@ class Probability:
prob_list.append(probab) prob_list.append(probab)
para_list[h][1] = probab para_list[h][1] = probab
prob_list_sum = sum(prob_list) # Sum of all probabilities count_sum = 0
sum_list = []
prob_list2 = prob_list.copy()
for jj in range(0, len(counter_list)):
for ii in range(0, counter_list[jj]):
count_sum = count_sum + prob_list[ii]
sum_list.append(count_sum)
count_sum = 0
del prob_list[0:counter_list[jj]]
real_prob = [] real_prob = []
for v in range(0, len(para_list)): # Normalized probabilities for jj in range(0, len(sum_list)):
prob_linear = (para_list[v][1])/prob_list_sum for ii in range(0, counter_list[jj]):
real_prob.append(prob_linear) prob_list2[ii] = prob_list2[ii]/sum_list[jj]
para_list[v][1] = prob_linear real_prob.append(prob_list2[ii])
del prob_list2[0:counter_list[jj]] # Normalized probabilities
# real_prob contains all the linearized probabilities
for vv in range(0, len(para_list)):
para_list[vv][1] = real_prob[vv]
final_list = []
for bb in range(0, len(sum_list)): # Insert ID in paralist
for ss in range(0, counter_list[bb]):
para_list[ss][0] = mycounter_list[bb]
final_list.append(para_list[ss])
del para_list[0:counter_list[bb]]
gff = open("./output/Potential_Priming_sites.txt", "w+") # gff file gff = open("./output/Potential_Priming_sites.txt", "w+") # gff file
for i in range(0, len(para_list)): for ll in range(0, len(final_list)):
gff.write("Interactions: "+str(para_list[i][0]) + gff.write(str(final_list[ll][0]) +
"\tRIblast\ttranscript\t"+str(para_list[i][2][1])+"\t" + "\tRIblast\ttranscript\t" +
str(para_list[i][2][0])+"\t" + str(final_list[ll][2][1])+"\t" +
str(para_list[i][1])+"\t.\t.\t.\n") str(final_list[ll][2][0])+"\t" +
str(final_list[ll][1])+"\t.\t.\t.\n")
gff.close gff.close
......
#!/usr/bin/env nextflow #!/usr/bin/env nextflow
params.transcripts = "$baseDir/Docker-Files/transcript3.fasta" params.transcripts = "$baseDir/Docker-Files/transcript2.fasta"
params.primers = "$baseDir/Docker-Files/primer.fasta" params.primers = "$baseDir/Docker-Files/primer.fasta"
log.info """\ log.info """\
...@@ -23,3 +23,4 @@ process RIblast_interaction { ...@@ -23,3 +23,4 @@ process RIblast_interaction {
RIblast ris -i $primer -o /RIblast/Energies.txt -d /RIblast/test_db RIblast ris -i $primer -o /RIblast/Energies.txt -d /RIblast/test_db
""" """
} }
"""Module containing functionalities to store run parameters.
Class:
ParamParse: Take as input a file containing the parameters
and stores them in its attributes.
"""
import logging
from pathlib import Path
LOG = logging.getLogger(__name__)
class ParamParse:
"""Class holding the parameters of the run.
Args:
param_file: Path to file with parameter values.
Attributes:
param_file: File with parameter values.
transcripts_file: File with transcript abundances.
genome_ref_file: Reference genome file.
annotations_file: Transcripts annotations.
output_path: Output folder.
n_reads: Number of reads to be simulated.
n_cells: Number of cells to be simulated.
rna_avg_length: average RNA fragment length.
rna_sd_length: RNA fragment length standard deviation.
read_length: Read length.
intron_rate: Constant probability of retaining an intron.
add_poly_a: Boolean option to add a poly A tail.
poly_a_func: Function to add a poly_a tail.
primer_seq: Sequence of the primer.
priming_func: Function that evaluates internal priming.
"""
def __init__(self, param_file: Path) -> None:
"""Class constructor."""
self.param_file: Path = Path(param_file)
with open(param_file) as f:
LOG.info("Loading parameters...")
for line in f:
s = line.split(':')
if s[0] == 'Csv transcripts file':
self.transcripts_file: Path = Path(s[1].strip())
elif s[0] == 'Reference genome file':
self.genome_ref_file: Path = Path(s[1].strip())
elif s[0] == 'Transcripts annotation file':
self.annotations_file: Path = Path(s[1].strip())
elif s[0] == 'Output folder':
self.output_path: Path = Path(s[1].strip())
elif s[0] == 'Number of reads':
self.n_reads: int = int(s[1].strip())
elif s[0] == 'Number of cells':
self.n_cells: int = int(s[1].strip())
elif s[0] == 'Average RNA fragments length':
self.rna_avg: float = float(s[1].strip())
elif s[0] == 'RNA fragment length standard deviation':
self.rna_sd_length: float = float(s[1].strip())
elif s[0] == 'Reads length':
self.read_length: int = int(s[1].strip())
elif s[0] == 'Intron retaining probability':
self.intron_rate: float = float(s[1].strip())
elif s[0] == 'Add poly A tail':
self.add_poly_a: bool = bool(s[1].strip())
elif s[0] == 'Function to add poly A tail':
self.poly_a_func: str = str(s[1].strip())
elif s[0] == 'Primer sequence':
self.primer_seq: str = str(s[1].strip())
elif s[0] == 'Function to evaluate internal priming':
self.priming_func: str = str(s[1].strip())
LOG.info("Parameters loaded.")
Csv transcripts file: ./transcripts.csv
Reference genome file: ./home/ref.ref
Transcripts annotation file: ./home/annotations.ann
Output folder: ./home/output
Number of reads: 10023
Number of cells: 34
Average RNA fragments length: 150
RNA fragment length standard deviation: 10
Reads length: 100
Intron retaining probability: 0.2
Add poly A tail: TRUE
Function to add poly A tail: generate_poly_a
Primer sequence: ACCTGATCGTACG
Function to evaluate internal priming: internal_priming
\ No newline at end of file
"""Tests the parameter parser class."""
import pytest
from pathlib import Path
from src import parameter_parser as pp
from src import poly_a
def test_parser():
"""Tests the attributes of the class."""
par=pp.ParamParse('./tests/resources/Param_test.txt')
assert par.param_file == Path('./tests/resources/Param_test.txt')
assert par.transcripts_file == Path('./transcripts.csv')
assert par.genome_ref_file == Path('./home/ref.ref')
assert par.annotations_file == Path('./home/annotations.ann')
assert par.output_path == Path('./home/output')
assert par.n_reads == 10023
assert par.n_cells == 34
assert par.rna_avg == 150
assert par.rna_sd_length == 10
assert par.read_length == 100
assert par.intron_rate == 0.2
assert par.add_poly_a == bool('TRUE')
assert par.poly_a_func == 'generate_poly_a'
assert par.primer_seq == 'ACCTGATCGTACG'
assert par.priming_func == 'internal_priming'
\ No newline at end of file
"""Placeholder test for pipeline.""" """Tests the transcriptome abundance file input reader."""
import pytest import pytest
import pandas as pd import pandas as pd
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment