Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
cDNA generator
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
zavolan_group
tools
cDNA generator
Commits
572246f7
Commit
572246f7
authored
1 year ago
by
Mate Balajti
Browse files
Options
Downloads
Patches
Plain Diff
feat: add lint job to CI
parent
7fd08564
No related branches found
No related tags found
1 merge request
!6
feat: add testing and linting to CI workflow
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
.gitlab-ci.yml
+4
-4
4 additions, 4 deletions
.gitlab-ci.yml
cdna/__init__.py
+1
-0
1 addition, 0 deletions
cdna/__init__.py
cdna/cdna.py
+34
-30
34 additions, 30 deletions
cdna/cdna.py
with
39 additions
and
34 deletions
.gitlab-ci.yml
+
4
−
4
View file @
572246f7
...
@@ -20,7 +20,7 @@ unit-test-job: # This job runs in the test stage.
...
@@ -20,7 +20,7 @@ unit-test-job: # This job runs in the test stage.
-
pip install -r requirements.txt
-
pip install -r requirements.txt
-
pip install -r requirements_dev.txt
-
pip install -r requirements_dev.txt
-
pip install -e .
-
pip install -e .
-
coverage run --source
term_frag_sel
-m pytest
-
coverage run --source
cdna
-m pytest
-
coverage report -m
-
coverage report -m
lint-test-job
:
# This job also runs in the test stage.
lint-test-job
:
# This job also runs in the test stage.
...
@@ -29,6 +29,6 @@ lint-test-job: # This job also runs in the test stage.
...
@@ -29,6 +29,6 @@ lint-test-job: # This job also runs in the test stage.
-
pip install -r requirements.txt
-
pip install -r requirements.txt
-
pip install -r requirements_dev.txt
-
pip install -r requirements_dev.txt
-
pip install -e .
-
pip install -e .
-
flake8 --docstring-convention google term_frag_sel/ tests/
-
flake8 --docstring-convention google cdna/ tests/
-
pylint term_frag_sel/ tests/
-
pylint cdna/ tests/
-
mypy term_frag_sel/
-
mypy cdna/
\ No newline at end of file
\ No newline at end of file
This diff is collapsed.
Click to expand it.
cdna/__init__.py
+
1
−
0
View file @
572246f7
"""
Initialise package.
"""
This diff is collapsed.
Click to expand it.
cdna/cdna.py
+
34
−
30
View file @
572246f7
"""
cDNA generator.
"""
import
warnings
import
warnings
import
pandas
as
pd
import
pandas
as
pd
from
Bio
import
SeqIO
from
Bio
import
SeqIO
from
Bio.Seq
import
Seq
from
Bio.Seq
import
Seq
...
@@ -11,8 +11,8 @@ warnings.filterwarnings(action="ignore", category=FutureWarning)
...
@@ -11,8 +11,8 @@ warnings.filterwarnings(action="ignore", category=FutureWarning)
def
complement
(
res
:
str
)
->
str
:
def
complement
(
res
:
str
)
->
str
:
"""
"""
Return the cDNA complement of a given base pair.
Returns the cDNA complement of a given base pair
Args:
Args:
res: residue code.
res: residue code.
...
@@ -28,9 +28,8 @@ def complement(res: str) -> str:
...
@@ -28,9 +28,8 @@ def complement(res: str) -> str:
def
seq_complement
(
sequence
:
str
)
->
str
or
None
:
def
seq_complement
(
sequence
:
str
)
->
str
or
None
:
"""
"""
Return the corresponding cDNA sequence by finding the complementary
\
Returns the corresponding cDNA sequence by finding the complementary
base pairs and returning the reversed sequence.
base pairs and returning the reversed sequence.
Args:
Args:
sequence: sequence to be converted into cDNA.
sequence: sequence to be converted into cDNA.
...
@@ -40,16 +39,17 @@ def seq_complement(sequence: str) -> str or None:
...
@@ -40,16 +39,17 @@ def seq_complement(sequence: str) -> str or None:
"""
"""
if
sequence
is
None
:
if
sequence
is
None
:
return
None
return
None
_
=
""
.
join
([
complement
(
char
)
for
char
in
str
(
sequence
)])[::
-
1
]
# reverse string
_
=
""
.
join
([
complement
(
char
)
for
char
in
str
(
sequence
)])[::
-
1
]
# reverse string
# noqa: E501
return
_
return
_
class
CDNAGen
:
class
CDNAGen
:
"""
"""
Perform the cDNA synthesis.
"""
Module that performs the cDNA synthesis.
"""
def
__init__
(
self
,
ifasta
:
str
,
igtf
:
str
,
icpn
:
str
,
ofasta
:
str
,
ocsv
:
str
):
def
__init__
(
self
,
ifasta
:
str
,
igtf
:
str
,
icpn
:
str
,
ofasta
:
str
,
ocsv
:
str
):
"""
Initialise function.
"""
# inputs
# inputs
self
.
fasta
=
ifasta
self
.
fasta
=
ifasta
self
.
gtf
=
igtf
self
.
gtf
=
igtf
...
@@ -65,8 +65,8 @@ class CDNAGen:
...
@@ -65,8 +65,8 @@ class CDNAGen:
self
.
run
()
self
.
run
()
def
run
(
self
)
->
None
:
def
run
(
self
)
->
None
:
"""
"""
Execute the cDNA workflow.
Executes the cDNA workflow.
Returns: None
Returns: None
"""
"""
...
@@ -80,7 +80,7 @@ class CDNAGen:
...
@@ -80,7 +80,7 @@ class CDNAGen:
self
.
write_csv
()
self
.
write_csv
()
def
add_records
(
self
)
->
None
:
def
add_records
(
self
)
->
None
:
"""
Add
s
data records to fasta file.
"""
Add data records to fasta file.
Adds the copy number information to the fasta records.
Adds the copy number information to the fasta records.
...
@@ -88,7 +88,7 @@ class CDNAGen:
...
@@ -88,7 +88,7 @@ class CDNAGen:
"""
"""
self
.
fasta_records
=
[]
self
.
fasta_records
=
[]
for
index
,
row
in
self
.
gtf_df
.
iterrows
():
for
_
,
row
in
self
.
gtf_df
.
iterrows
():
if
row
[
"
complement
"
]
is
not
None
:
if
row
[
"
complement
"
]
is
not
None
:
copy_number
=
row
[
"
Transcript_Copy_Number
"
]
copy_number
=
row
[
"
Transcript_Copy_Number
"
]
for
_
in
range
(
int
(
copy_number
)):
for
_
in
range
(
int
(
copy_number
)):
...
@@ -101,8 +101,8 @@ class CDNAGen:
...
@@ -101,8 +101,8 @@ class CDNAGen:
self
.
fasta_records
.
append
(
record
)
self
.
fasta_records
.
append
(
record
)
def
add_sequences
(
self
)
->
None
:
def
add_sequences
(
self
)
->
None
:
"""
"""
Add the sequence for a given priming site.
Adds the sequence for a given priming site.
Returns: None
Returns: None
"""
"""
...
@@ -112,8 +112,8 @@ class CDNAGen:
...
@@ -112,8 +112,8 @@ class CDNAGen:
)
)
def
add_complement
(
self
)
->
None
:
def
add_complement
(
self
)
->
None
:
"""
"""
Add the complementary cDNA sequence.
Adds the complementary cDNA sequence.
Returns: None
Returns: None
"""
"""
...
@@ -122,7 +122,7 @@ class CDNAGen:
...
@@ -122,7 +122,7 @@ class CDNAGen:
)
)
def
read_primingsite
(
self
,
sequence
:
str
,
end
:
int
)
->
None
:
def
read_primingsite
(
self
,
sequence
:
str
,
end
:
int
)
->
None
:
"""
Read a fasta file from a given start character
"""
Read a fasta file from a given start character
.
Reads a fasta sequence with ID (sequence) and returns the
Reads a fasta sequence with ID (sequence) and returns the
sequence starting from the index start.
sequence starting from the index start.
...
@@ -151,7 +151,7 @@ class CDNAGen:
...
@@ -151,7 +151,7 @@ class CDNAGen:
self
.
fasta_dict
=
{
x
.
name
:
x
for
x
in
records
}
self
.
fasta_dict
=
{
x
.
name
:
x
for
x
in
records
}
def
read_csv
(
self
)
->
None
:
def
read_csv
(
self
)
->
None
:
"""
Read
s
a given copy number csv file
"""
Read a given copy number csv file
.
Wrapper for Pandas read_csv.
Wrapper for Pandas read_csv.
...
@@ -159,24 +159,27 @@ class CDNAGen:
...
@@ -159,24 +159,27 @@ class CDNAGen:
"""
"""
df_csv
=
pd
.
read_csv
(
self
.
cpn
,
index_col
=
False
)
df_csv
=
pd
.
read_csv
(
self
.
cpn
,
index_col
=
False
)
df_csv
=
df_csv
.
reset_index
()
# make sure indexes pair with number of rows
df_csv
=
df_csv
.
reset_index
()
# make sure indexes pair with number of rows
# noqa: E501
self
.
csv_df
=
df_csv
self
.
csv_df
=
df_csv
def
read_gtf
(
self
)
->
None
:
def
read_gtf
(
self
)
->
None
:
"""
Read and process the GTF file.
"""
Read and process the GTF file.
Reads a GTF file and determines copy numbers from normalized probabilities.
Reads a GTF file and determines copy numbers from
\
normalized probabilities.
Returns: None
Returns: None
"""
"""
# returns GTF with essential columns such as "feature", "seqname", "start", "end"
# returns GTF with essential columns such as \
# alongside the names of any optional keys which appeared in the attribute column
# "feature", "seqname", "start", "end"
# alongside the names of any optional keys \
# which appeared in the attribute column
gtf_df
=
read_gtf
(
self
.
gtf
)
gtf_df
=
read_gtf
(
self
.
gtf
)
gtf_df
[
"
Binding_Probability
"
]
=
pd
.
to_numeric
(
gtf_df
[
"
Binding_Probability
"
]
=
pd
.
to_numeric
(
gtf_df
[
"
Binding_Probability
"
]
gtf_df
[
"
Binding_Probability
"
]
)
# convert to numeric
)
# convert to numeric
df_norm
alization
_bind_prob
ablility
=
gtf_df
.
groupby
(
"
seqname
"
)[
df_norm_bind_prob
=
gtf_df
.
groupby
(
"
seqname
"
)[
"
Binding_Probability
"
"
Binding_Probability
"
].
sum
()
# extract binding probability
].
sum
()
# extract binding probability
count
=
0
count
=
0
...
@@ -194,7 +197,7 @@ class CDNAGen:
...
@@ -194,7 +197,7 @@ class CDNAGen:
id_csv
=
str
(
row
[
"
seqname
"
]).
split
(
"
_
"
)[
1
]
id_csv
=
str
(
row
[
"
seqname
"
]).
split
(
"
_
"
)[
1
]
# Calculate Normalized_Binding_Probability and add to GTF dataframe
# Calculate Normalized_Binding_Probability and add to GTF dataframe
gtf_df
.
loc
[
index
,
"
Normalized_Binding_Probability
"
]
=
(
gtf_df
.
loc
[
index
,
"
Normalized_Binding_Probability
"
]
=
(
row
[
"
Binding_Probability
"
]
/
df_norm
alization
_bind_prob
ablility
[
id_
]
row
[
"
Binding_Probability
"
]
/
df_norm_bind_prob
[
id_
]
)
)
# Calculate Normalized_Binding_Probability and add to GTF dataframe
# Calculate Normalized_Binding_Probability and add to GTF dataframe
csv_transcript_copy_number
=
self
.
csv_df
.
loc
[
csv_transcript_copy_number
=
self
.
csv_df
.
loc
[
...
@@ -211,7 +214,7 @@ class CDNAGen:
...
@@ -211,7 +214,7 @@ class CDNAGen:
self
.
gtf_df
=
gtf_df
self
.
gtf_df
=
gtf_df
def
write_fasta
(
self
)
->
None
:
def
write_fasta
(
self
)
->
None
:
"""
Write
s
cDNA fasta records to file.
"""
Write cDNA fasta records to file.
Wrapper for SeqIO.write.
Wrapper for SeqIO.write.
...
@@ -222,7 +225,7 @@ class CDNAGen:
...
@@ -222,7 +225,7 @@ class CDNAGen:
print
(
f
"
Fasta file successfully written to:
{
self
.
output_fasta
}
"
)
print
(
f
"
Fasta file successfully written to:
{
self
.
output_fasta
}
"
)
def
write_csv
(
self
)
->
None
:
def
write_csv
(
self
)
->
None
:
"""
Write
s
the copy number information to a csv file.
"""
Write the copy number information to a csv file.
Wrapper for Pandas to_csv.
Wrapper for Pandas to_csv.
...
@@ -232,4 +235,5 @@ class CDNAGen:
...
@@ -232,4 +235,5 @@ class CDNAGen:
self
.
gtf_df
[[
"
cdna_ID
"
,
"
Transcript_Copy_Number
"
]].
to_csv
(
self
.
gtf_df
[[
"
cdna_ID
"
,
"
Transcript_Copy_Number
"
]].
to_csv
(
self
.
output_csv
,
index
=
False
self
.
output_csv
,
index
=
False
)
)
print
(
f
"
Copy number csv file successfully written to:
{
self
.
output_csv
}
"
)
print
(
f
"
Copy number csv file successfully written to:
\
{
self
.
output_csv
}
"
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment