Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
scRNA-seq-simulation
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
zavolan_group
pipelines
scRNA-seq-simulation
Commits
0329e263
Commit
0329e263
authored
3 years ago
by
Melvin Alappat
Browse files
Options
Downloads
Patches
Plain Diff
fix: remove unnecessary class
parent
4456ea0f
No related branches found
No related tags found
1 merge request
!16
Issue_4
Pipeline
#13906
passed
3 years ago
Stage: qc
Stage: test
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/primingprob/priming_prob.py
+111
-113
111 additions, 113 deletions
src/primingprob/priming_prob.py
with
111 additions
and
113 deletions
src/primingprob/priming_prob.py
+
111
−
113
View file @
0329e263
"""
Imports
.
"""
"""
Calculates the probability of priming and write the gff file
.
"""
import
numpy
as
np
import
numpy
as
np
import
scipy.constants
# type: ignore
import
scipy.constants
# type: ignore
class
Probability
:
def
inter_para
(
input_path
)
:
"""
Calculates the probability of priming and write the gff file.
"""
"""
Open the RIblast output file and read only the parameter lines.
def
inter_para
(
input_path
)
:
Args
:
"""
Open the RIblast output file and read only the parameter lines.
Path to Energies.txt file
Args:
Returns:
Path to Energies.txt file
my_list (list): Contains the paramter lines from Energies.txt
"""
Energies
=
open
(
input_path
,
"
r
"
)
# ouput of RIblast
Returns:
mylist
=
[]
# all lines of Energies starting with an ID-number
my_list (list): Contains the paramter lines from Energies.txt
"""
Energies
=
open
(
input_path
,
"
r
"
)
# ouput of RIblast
mylist
=
[]
# all lines of Energies starting with an ID-number
for
myline
in
Energies
:
# Read lines containing needed data
if
myline
[
0
].
isdigit
():
mylist
.
append
(
myline
)
elif
myline
[
0
].
isdigit
()
is
False
:
continue
for
myline
in
Energies
:
# Read lines containing needed data
Energies
.
close
()
if
myline
[
0
].
isdigit
():
mylist
.
append
(
myline
)
elif
myline
[
0
].
isdigit
()
is
False
:
continue
Energies
.
close
(
)
return
(
mylist
)
return
(
mylist
)
def
inter_prob
(
data_list
,
fasta_path
,
output_path
):
def
inter_prob
(
data_list
,
fasta_path
,
output_path
):
"""
Calculate the prob. and write the gff file.
"""
Calculate the prob. and write the gff file.
Args:
Args:
data_list (list): Contains all parameters of Energies.txt
data_list (list): Contains all parameters of Energies.txt
fasta_path (path): Path to fasta file
fasta_path (path): Path to fasta file
output_path (path): Path to output file
output_path (path): Path to output file
Returns:
Returns:
gff (file): Gff file contains all the output information
gff (file): Gff file contains all the output information
"""
"""
# count interactions per script through fasta-ID
# count interactions per script through fasta-ID
fastafile
=
open
(
fasta_path
,
"
r
"
)
fastafile
=
open
(
fasta_path
,
"
r
"
)
id_list
=
[]
# contains list of transcipt IDs
id_list
=
[]
# contains list of transcipt IDs
for
mylinecounter
in
fastafile
:
for
mylinecounter
in
fastafile
:
if
mylinecounter
.
startswith
(
"
>
"
):
if
mylinecounter
.
startswith
(
"
>
"
):
a
=
mylinecounter
a
=
mylinecounter
a
=
mylinecounter
.
replace
(
"
>
"
,
""
)
a
=
mylinecounter
.
replace
(
"
>
"
,
""
)
b
=
a
.
split
()
b
=
a
.
split
()
c
=
b
[
0
]
c
=
b
[
0
]
id_list
.
append
(
c
)
id_list
.
append
(
c
)
elif
mylinecounter
.
startswith
(
"
>
"
)
is
False
:
elif
mylinecounter
.
startswith
(
"
>
"
)
is
False
:
continue
continue
counter
=
0
counter
=
0
counter_list
=
[]
# list of number of interactions per transcript
counter_list
=
[]
# list of number of interactions per transcript
for
cc
in
range
(
0
,
len
(
id_list
)):
for
cc
in
range
(
0
,
len
(
id_list
)):
for
dd
in
range
(
0
,
len
(
data_list
)):
for
dd
in
range
(
0
,
len
(
data_list
)):
if
id_list
[
cc
]
in
data_list
[
dd
]:
if
id_list
[
cc
]
in
data_list
[
dd
]:
counter
=
counter
+
1
counter
=
counter
+
1
counter_list
.
append
(
counter
)
counter_list
.
append
(
counter
)
counter
=
0
counter
=
0
para_list
=
[]
para_list
=
[]
for
i
in
range
(
0
,
len
(
data_list
)):
for
i
in
range
(
0
,
len
(
data_list
)):
x
=
data_list
[
i
].
split
(
"
,
"
)
x
=
data_list
[
i
].
split
(
"
,
"
)
para_list
.
append
(
x
)
para_list
.
append
(
x
)
# splitting each list item by the "," this results in a 2D list
# splitting each list item by the "," this results in a 2D list
for
j
in
range
(
0
,
len
(
para_list
)):
for
j
in
range
(
0
,
len
(
para_list
)):
del
para_list
[
j
][
1
:
-
2
]
del
para_list
[
j
][
1
:
-
2
]
# only keeps the ID-numer, the interaction
# only keeps the ID-numer, the interaction
# energy, and interaction site of both sequences. (still a 2D-list)
# energy, and interaction site of both sequences. (still a 2D-list)
start_end_index
=
1
# index of start and end in the list
start_end_index
=
1
# index of start and end in the list
for
d
in
range
(
0
,
len
(
para_list
)):
# Optimize location output
for
d
in
range
(
0
,
len
(
para_list
)):
# Optimize location output
a
=
para_list
[
d
][
2
].
split
(
"
:
"
)
a
=
para_list
[
d
][
2
].
split
(
"
:
"
)
a
[
start_end_index
]
=
a
[
start_end_index
].
replace
(
"
)
"
,
""
)
a
[
start_end_index
]
=
a
[
start_end_index
].
replace
(
"
)
"
,
""
)
a
[
start_end_index
]
=
a
[
start_end_index
].
replace
(
"
\n
"
,
""
)
a
[
start_end_index
]
=
a
[
start_end_index
].
replace
(
"
\n
"
,
""
)
a
[
start_end_index
]
=
a
[
start_end_index
].
replace
(
"
-
"
,
"
"
)
a
[
start_end_index
]
=
a
[
start_end_index
].
replace
(
"
-
"
,
"
"
)
a
[
start_end_index
]
=
a
[
start_end_index
].
split
(
"
"
)
a
[
start_end_index
]
=
a
[
start_end_index
].
split
(
"
"
)
para_list
[
d
][
2
]
=
a
[
start_end_index
]
para_list
[
d
][
2
]
=
a
[
start_end_index
]
for
k
in
range
(
0
,
len
(
para_list
)):
# type-conversion of ID and E
for
k
in
range
(
0
,
len
(
para_list
)):
# type-conversion of ID and E
for
w
in
range
(
0
,
2
):
# "2" because first two elements in each sublist are ID and E
for
w
in
range
(
0
,
2
):
# "2" because first two elements in each sublist are ID and E
para_list
[
k
][
w
]
=
float
(
para_list
[
k
][
w
])
para_list
[
k
][
w
]
=
float
(
para_list
[
k
][
w
])
joule
=
4184
# 1kcal = 4184 joule
joule
=
4184
# 1kcal = 4184 joule
inter_energy
=
1
# index of inter. energy in list
inter_energy
=
1
# index of inter. energy in list
for
z
in
range
(
0
,
len
(
para_list
)):
# from kcal/mol to Joule/mol
for
z
in
range
(
0
,
len
(
para_list
)):
# from kcal/mol to Joule/mol
para_list
[
z
][
inter_energy
]
=
para_list
[
z
][
inter_energy
]
*
joule
para_list
[
z
][
inter_energy
]
=
para_list
[
z
][
inter_energy
]
*
joule
T
=
300.15
# Roomtemperature (27 degree celsius) in Kelvin
T
=
300.15
# Roomtemperature (27 degree celsius) in Kelvin
KT
=
scipy
.
constants
.
R
*
T
# calculating gas constant R * T
KT
=
scipy
.
constants
.
R
*
T
# calculating gas constant R * T
for
u
in
range
(
0
,
len
(
para_list
)):
# calculating -E / RT
for
u
in
range
(
0
,
len
(
para_list
)):
# calculating -E / RT
para_list
[
u
][
inter_energy
]
=
(
-
(
para_list
[
u
][
inter_energy
])
/
KT
)
para_list
[
u
][
inter_energy
]
=
(
-
(
para_list
[
u
][
inter_energy
])
/
KT
)
prob_list
=
[]
# List containing all the prob.
prob_list
=
[]
# List containing all the prob.
for
h
in
range
(
0
,
len
(
para_list
)):
# calculating the e^(-E/kT)
for
h
in
range
(
0
,
len
(
para_list
)):
# calculating the e^(-E/kT)
probab
=
np
.
exp
(
para_list
[
h
][
inter_energy
])
probab
=
np
.
exp
(
para_list
[
h
][
inter_energy
])
prob_list
.
append
(
probab
)
prob_list
.
append
(
probab
)
para_list
[
h
][
inter_energy
]
=
probab
para_list
[
h
][
inter_energy
]
=
probab
probability_sum
=
0
# variable to calculate sum of probabilities per transcipt
probability_sum
=
0
# variable to calculate sum of probabilities per transcipt
sum_list
=
[]
# List containing all the sums
sum_list
=
[]
# List containing all the sums
prob_list2
=
prob_list
.
copy
()
prob_list2
=
prob_list
.
copy
()
for
jj
in
range
(
0
,
len
(
counter_list
)):
for
jj
in
range
(
0
,
len
(
counter_list
)):
for
ii
in
range
(
0
,
counter_list
[
jj
]):
for
ii
in
range
(
0
,
counter_list
[
jj
]):
probability_sum
=
probability_sum
+
prob_list
[
ii
]
probability_sum
=
probability_sum
+
prob_list
[
ii
]
sum_list
.
append
(
probability_sum
)
sum_list
.
append
(
probability_sum
)
probability_sum
=
0
# setting back to 0, for next interactions
probability_sum
=
0
# setting back to 0, for next interactions
del
prob_list
[
0
:
counter_list
[
jj
]]
del
prob_list
[
0
:
counter_list
[
jj
]]
real_prob
=
[]
# contains all the normalized probabilities
real_prob
=
[]
# contains all the normalized probabilities
for
jj
in
range
(
0
,
len
(
sum_list
)):
for
jj
in
range
(
0
,
len
(
sum_list
)):
for
ii
in
range
(
0
,
counter_list
[
jj
]):
for
ii
in
range
(
0
,
counter_list
[
jj
]):
prob_list2
[
ii
]
=
prob_list2
[
ii
]
/
sum_list
[
jj
]
prob_list2
[
ii
]
=
prob_list2
[
ii
]
/
sum_list
[
jj
]
real_prob
.
append
(
prob_list2
[
ii
])
real_prob
.
append
(
prob_list2
[
ii
])
del
prob_list2
[
0
:
counter_list
[
jj
]]
# Normalized probabilities
del
prob_list2
[
0
:
counter_list
[
jj
]]
# Normalized probabilities
for
vv
in
range
(
0
,
len
(
para_list
)):
# inserting the normalized values in para_list
for
vv
in
range
(
0
,
len
(
para_list
)):
# inserting the normalized values in para_list
para_list
[
vv
][
1
]
=
real_prob
[
vv
]
para_list
[
vv
][
1
]
=
real_prob
[
vv
]
final_list
=
[]
# List containing all the final paramters to print
final_list
=
[]
# List containing all the final paramters to print
for
bb
in
range
(
0
,
len
(
sum_list
)):
# Insert ID in paralist
for
bb
in
range
(
0
,
len
(
sum_list
)):
# Insert ID in paralist
for
ss
in
range
(
0
,
counter_list
[
bb
]):
for
ss
in
range
(
0
,
counter_list
[
bb
]):
para_list
[
ss
][
0
]
=
id_list
[
bb
]
para_list
[
ss
][
0
]
=
id_list
[
bb
]
final_list
.
append
(
para_list
[
ss
])
final_list
.
append
(
para_list
[
ss
])
del
para_list
[
0
:
counter_list
[
bb
]]
del
para_list
[
0
:
counter_list
[
bb
]]
gff
=
open
(
output_path
,
"
w+
"
)
# gff file
gff
=
open
(
output_path
,
"
w+
"
)
# gff file
for
ll
in
range
(
0
,
len
(
final_list
)):
# writing gff file
for
ll
in
range
(
0
,
len
(
final_list
)):
# writing gff file
gff
.
write
(
str
(
final_list
[
ll
][
0
])
+
#
t
ranscript ID
gff
.
write
(
str
(
final_list
[
ll
][
0
])
+
#
T
ranscript ID
"
\t
RIblast
\t
Priming Site
\t
"
+
"
\t
RIblast
\t
Priming Site
\t
"
+
str
(
final_list
[
ll
][
2
][
1
])
+
"
\t
"
+
#
s
tart
str
(
final_list
[
ll
][
2
][
1
])
+
"
\t
"
+
#
S
tart
str
(
final_list
[
ll
][
2
][
0
])
+
"
\t
"
+
#
e
nd
str
(
final_list
[
ll
][
2
][
0
])
+
"
\t
"
+
#
E
nd
str
(
final_list
[
ll
][
1
])
+
"
\t
.
\t
.
\t
.
\n
"
)
# probability
str
(
final_list
[
ll
][
1
])
+
"
\t
.
\t
.
\t
.
\n
"
)
# probability
gff
.
close
gff
.
close
return
final_list
return
final_list
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment