Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
cDNA generator
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
zavolan_group
tools
cDNA generator
Commits
d4e83766
Commit
d4e83766
authored
2 years ago
by
Eric Boittier
Browse files
Options
Downloads
Patches
Plain Diff
clean up
parent
a884c9a7
No related branches found
No related tags found
No related merge requests found
Pipeline
#14905
failed
2 years ago
Stage: test
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
.gitlab-ci.yml
+1
-2
1 addition, 2 deletions
.gitlab-ci.yml
cdna/cdna.py
+30
-27
30 additions, 27 deletions
cdna/cdna.py
cdna/cli.py
+9
-2
9 additions, 2 deletions
cdna/cli.py
with
40 additions
and
31 deletions
.gitlab-ci.yml
+
1
−
2
View file @
d4e83766
...
@@ -6,6 +6,5 @@ default:
...
@@ -6,6 +6,5 @@ default:
my_tests
:
my_tests
:
# Good to put a small description here of what this job does
# Good to put a small description here of what this job does
script
:
script
:
-
pip install -r requirements_dev.txt
-
pip install -e -r requirements_dev.txt
-
black cdna
-
pylint cdna
-
pylint cdna
This diff is collapsed.
Click to expand it.
cdna/cdna.py
+
30
−
27
View file @
d4e83766
...
@@ -45,6 +45,9 @@ def seq_compliment(sequence: str) -> str:
...
@@ -45,6 +45,9 @@ def seq_compliment(sequence: str) -> str:
class
CDNAGen
:
class
CDNAGen
:
"""
Module that performs the cDNA synthesis.
"""
def
__init__
(
self
,
ifasta
:
str
,
igtf
:
str
,
icpn
:
str
,
ofasta
:
str
,
ocsv
:
str
):
def
__init__
(
self
,
ifasta
:
str
,
igtf
:
str
,
icpn
:
str
,
ofasta
:
str
,
ocsv
:
str
):
# inputs
# inputs
self
.
fasta
=
ifasta
self
.
fasta
=
ifasta
...
@@ -56,7 +59,7 @@ class CDNAGen:
...
@@ -56,7 +59,7 @@ class CDNAGen:
# variables
# variables
self
.
fasta_dict
=
None
self
.
fasta_dict
=
None
self
.
fasta_records
=
None
self
.
fasta_records
=
None
self
.
df_input_GTF
=
None
self
.
gtf_df
=
None
self
.
run
()
self
.
run
()
def
run
(
self
)
->
None
:
def
run
(
self
)
->
None
:
...
@@ -77,7 +80,7 @@ class CDNAGen:
...
@@ -77,7 +80,7 @@ class CDNAGen:
def
add_records
(
self
)
->
None
:
def
add_records
(
self
)
->
None
:
self
.
fasta_records
=
[]
self
.
fasta_records
=
[]
for
index
,
row
in
self
.
df_input_GTF
.
iterrows
():
for
index
,
row
in
self
.
gtf_df
.
iterrows
():
if
row
[
"
compliment
"
]
is
not
None
:
if
row
[
"
compliment
"
]
is
not
None
:
copy_number
=
row
[
"
Transcript_Copy_Number
"
]
copy_number
=
row
[
"
Transcript_Copy_Number
"
]
record
=
SeqRecord
(
record
=
SeqRecord
(
...
@@ -94,7 +97,7 @@ class CDNAGen:
...
@@ -94,7 +97,7 @@ class CDNAGen:
Returns: None
Returns: None
"""
"""
self
.
df_input_GTF
[
"
priming_site
"
]
=
self
.
df_input_GTF
.
apply
(
self
.
gtf_df
[
"
priming_site
"
]
=
self
.
gtf_df
.
apply
(
lambda
row
:
self
.
read_primingsite
(
row
[
"
seqname
"
],
row
[
"
start
"
]),
lambda
row
:
self
.
read_primingsite
(
row
[
"
seqname
"
],
row
[
"
start
"
]),
axis
=
1
,
axis
=
1
,
)
)
...
@@ -105,7 +108,7 @@ class CDNAGen:
...
@@ -105,7 +108,7 @@ class CDNAGen:
Returns: None
Returns: None
"""
"""
self
.
df_input_GTF
[
"
compliment
"
]
=
self
.
df_input_GTF
[
"
priming_site
"
].
apply
(
self
.
gtf_df
[
"
compliment
"
]
=
self
.
gtf_df
[
"
priming_site
"
].
apply
(
lambda
x
:
seq_compliment
(
x
)
lambda
x
:
seq_compliment
(
x
)
)
)
...
@@ -147,11 +150,11 @@ class CDNAGen:
...
@@ -147,11 +150,11 @@ class CDNAGen:
Returns: None
Returns: None
"""
"""
df_
input_CSV
=
pd
.
read_csv
(
self
.
cpn
,
index_col
=
False
)
df_
csv
=
pd
.
read_csv
(
self
.
cpn
,
index_col
=
False
)
df_
input_CSV
=
(
df_
csv
=
(
df_
input_CSV
.
reset_index
()
df_
csv
.
reset_index
()
)
# make sure indexes pair with number of rows
)
# make sure indexes pair with number of rows
self
.
df_input_CSV
=
df_input_CSV
self
.
csv_df
=
df_csv
def
read_gtf
(
self
)
->
None
:
def
read_gtf
(
self
)
->
None
:
"""
Read and process the GTF file.
"""
Read and process the GTF file.
...
@@ -163,43 +166,43 @@ class CDNAGen:
...
@@ -163,43 +166,43 @@ class CDNAGen:
"""
"""
# returns GTF with essential columns such as "feature", "seqname", "start", "end"
# returns GTF with essential columns such as "feature", "seqname", "start", "end"
# alongside the names of any optional keys which appeared in the attribute column
# alongside the names of any optional keys which appeared in the attribute column
df_input_GTF
=
read_gtf
(
self
.
gtf
)
gtf_df
=
read_gtf
(
self
.
gtf
)
df_input_GTF
[
"
Binding_Probability
"
]
=
pd
.
to_numeric
(
gtf_df
[
"
Binding_Probability
"
]
=
pd
.
to_numeric
(
df_input_GTF
[
"
Binding_Probability
"
]
gtf_df
[
"
Binding_Probability
"
]
)
# convert to numeric
)
# convert to numeric
df_normalization_bind_probablility
=
df_input_GTF
.
groupby
(
"
seqname
"
)[
df_normalization_bind_probablility
=
gtf_df
.
groupby
(
"
seqname
"
)[
"
Binding_Probability
"
"
Binding_Probability
"
].
sum
()
# extract binding probab
l
ility
].
sum
()
# extract binding probability
count
=
0
count
=
0
prev_id
=
None
prev_id
=
None
# Adds Normalized_Binding_Probability and Transcript_Copy_Number to each transcript in the dataframe
# Adds Normalized_Binding_Probability and Transcript_Copy_Number to each transcript in the dataframe
for
index
,
row
in
df_input_GTF
.
iterrows
():
for
index
,
row
in
gtf_df
.
iterrows
():
# GTF transcript ID
# GTF transcript ID
id_
GTF
=
str
(
row
[
"
seqname
"
])
id_
=
str
(
row
[
"
seqname
"
])
if
id_
GTF
==
prev_id
:
if
id_
==
prev_id
:
count
+=
1
count
+=
1
else
:
else
:
prev_id
=
None
prev_id
=
None
count
=
0
count
=
0
# CVS transcript ID
# CVS transcript ID
id_
CSV
=
str
(
row
[
"
seqname
"
]).
split
(
"
_
"
)[
1
]
id_
csv
=
str
(
row
[
"
seqname
"
]).
split
(
"
_
"
)[
1
]
# Calculate Normalized_Binding_Probability and add to GTF dataframe
# Calculate Normalized_Binding_Probability and add to GTF dataframe
df_input_GTF
.
loc
[
index
,
"
Normalized_Binding_Probability
"
]
=
(
gtf_df
.
loc
[
index
,
"
Normalized_Binding_Probability
"
]
=
(
row
[
"
Binding_Probability
"
]
/
df_normalization_bind_probablility
[
id_
GTF
]
row
[
"
Binding_Probability
"
]
/
df_normalization_bind_probablility
[
id_
]
)
)
# Calculate Normalized_Binding_Probability and add to GTF dataframe
# Calculate Normalized_Binding_Probability and add to GTF dataframe
csv_transcript_copy_number
=
self
.
df_input_CSV
.
loc
[
csv_transcript_copy_number
=
self
.
csv_df
.
loc
[
self
.
df_input_CSV
[
"
ID of transcript
"
]
==
int
(
id_
CSV
),
self
.
csv_df
[
"
ID of transcript
"
]
==
int
(
id_
csv
),
"
Transcript copy number
"
,
"
Transcript copy number
"
,
].
iloc
[
0
]
].
iloc
[
0
]
df_input_GTF
.
loc
[
index
,
"
Transcript_Copy_Number
"
]
=
round
(
gtf_df
.
loc
[
index
,
"
Transcript_Copy_Number
"
]
=
round
(
csv_transcript_copy_number
csv_transcript_copy_number
*
df_input_GTF
.
loc
[
index
,
"
Normalized_Binding_Probability
"
]
*
gtf_df
.
loc
[
index
,
"
Normalized_Binding_Probability
"
]
)
)
df_input_GTF
.
loc
[
index
,
"
cdna_ID
"
]
=
f
"
{
id_
GTF
}
_
{
count
}
"
gtf_df
.
loc
[
index
,
"
cdna_ID
"
]
=
f
"
{
id_
}
_
{
count
}
"
prev_id
=
id_
GTF
prev_id
=
id_
self
.
df_input_GTF
=
df_input_GTF
self
.
gtf_df
=
gtf_df
def
write_fasta
(
self
)
->
None
:
def
write_fasta
(
self
)
->
None
:
"""
Writes cDNA fasta records to file.
"""
Writes cDNA fasta records to file.
...
@@ -220,7 +223,7 @@ class CDNAGen:
...
@@ -220,7 +223,7 @@ class CDNAGen:
Returns: None
Returns: None
"""
"""
self
.
df_input_GTF
[[
"
cdna_ID
"
,
"
Transcript_Copy_Number
"
]].
to_csv
(
self
.
gtf_df
[[
"
cdna_ID
"
,
"
Transcript_Copy_Number
"
]].
to_csv
(
self
.
output_csv
,
index
=
False
self
.
output_csv
,
index
=
False
)
)
print
(
f
"
Copy number csv file successfully written to:
{
self
.
output_csv
}
"
)
print
(
f
"
Copy number csv file successfully written to:
{
self
.
output_csv
}
"
)
This diff is collapsed.
Click to expand it.
cdna/cli.py
+
9
−
2
View file @
d4e83766
...
@@ -4,7 +4,14 @@ import logging
...
@@ -4,7 +4,14 @@ import logging
from
cdna
import
CDNAGen
from
cdna
import
CDNAGen
def
parser
():
def
parser
()
->
None
:
"""
Parser for cDNA generator
Parses command line arguments for cDNA generation.
Returns: None
"""
parser
=
argparse
.
ArgumentParser
(
parser
=
argparse
.
ArgumentParser
(
prog
=
"
cDNA generator
"
,
prog
=
"
cDNA generator
"
,
description
=
"
Generate cDNA sequences based on primer probabilities.
"
,
description
=
"
Generate cDNA sequences based on primer probabilities.
"
,
...
@@ -48,4 +55,4 @@ if __name__ == "__main__":
...
@@ -48,4 +55,4 @@ if __name__ == "__main__":
print
(
"
**********************
"
)
print
(
"
**********************
"
)
print
(
"
Running cDNA generator
"
)
print
(
"
Running cDNA generator
"
)
print
(
"
**********************
"
)
print
(
"
**********************
"
)
cnda_object
=
parser
()
parser
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment