Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
cDNA generator
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
zavolan_group
tools
cDNA generator
Commits
0db28afa
Commit
0db28afa
authored
2 years ago
by
Eric Boittier
Browse files
Options
Downloads
Patches
Plain Diff
setup.py uses requirements.txt
parent
31edb603
No related branches found
No related tags found
No related merge requests found
Pipeline
#14900
passed
2 years ago
Stage: test
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
Dockerfile
+10
-0
10 additions, 0 deletions
Dockerfile
cdna/cdna.py
+88
-24
88 additions, 24 deletions
cdna/cdna.py
requirements.txt
+2
-1
2 additions, 1 deletion
requirements.txt
setup.py
+4
-1
4 additions, 1 deletion
setup.py
with
104 additions
and
26 deletions
Dockerfile
0 → 100644
+
10
−
0
View file @
0db28afa
FROM
python:3.8-slim-buster
MAINTAINER
EricBoittier
ENV
PATH=$PATH:/usr/games/
COPY
. .
This diff is collapsed.
Click to expand it.
cdna/cdna.py
+
88
−
24
View file @
0db28afa
...
...
@@ -13,10 +13,12 @@ warnings.filterwarnings(action="ignore", category=FutureWarning)
def
compliment
(
res
:
str
)
->
str
:
"""
Returns the compliment of a given DNA residue.
:param res: DNA residue
:return:
Returns the cDNA compliment of a given base pair
Args:
res: residue code.
Returns: corresponding cDNA residue.
"""
translate_dict
=
{
"
A
"
:
"
T
"
,
"
T
"
:
"
A
"
,
"
U
"
:
"
A
"
,
"
G
"
:
"
C
"
,
"
C
"
:
"
G
"
}
if
res
not
in
translate_dict
.
keys
():
...
...
@@ -27,11 +29,14 @@ def compliment(res: str) -> str:
def
seq_compliment
(
sequence
:
str
)
->
str
:
"""
Returns the corresponding cDNA sequence for a given input by finding the
corresponding compliment base pair and reversing the input.
Returns the corresponding cDNA sequence by finding the complimentary
base pairs and returning the reversed sequence.
Args:
sequence: sequence to be converted into cDNA.
Returns: corresponding cDNA sequence.
:param sequence: DNA sequence
:return: cDNA sequence
"""
if
sequence
is
None
:
return
"
None
"
...
...
@@ -51,21 +56,26 @@ class CDNAGen:
# variables
self
.
fasta_dict
=
None
self
.
fasta_records
=
None
self
.
df_input_GTF
=
None
self
.
run
()
def
run
(
self
):
def
run
(
self
)
->
None
:
"""
Executes the cDNA workflow.
Returns: None
"""
self
.
read_csv
()
self
.
read_fasta
()
self
.
read_gtf
()
self
.
add_sequences
()
self
.
add_compliment
()
self
.
add_records
()
print
()
print
()
# blank line for pretty printing
self
.
write_fasta
()
self
.
write_csv
()
def
add_records
(
self
):
def
add_records
(
self
)
->
None
:
self
.
fasta_records
=
[]
for
index
,
row
in
self
.
df_input_GTF
.
iterrows
():
if
row
[
"
compliment
"
]
is
not
None
:
...
...
@@ -78,36 +88,79 @@ class CDNAGen:
)
self
.
fasta_records
.
append
(
record
)
def
add_sequences
(
self
):
def
add_sequences
(
self
)
->
None
:
"""
Adds the sequence for a given priming site.
Returns: None
"""
self
.
df_input_GTF
[
"
priming_site
"
]
=
self
.
df_input_GTF
.
apply
(
lambda
row
:
self
.
read_primingsite
(
row
[
"
seqname
"
],
row
[
"
start
"
]),
axis
=
1
,
)
def
add_compliment
(
self
):
def
add_compliment
(
self
)
->
None
:
"""
Adds the complimentary cDNA sequence.
Returns: None
"""
self
.
df_input_GTF
[
"
compliment
"
]
=
self
.
df_input_GTF
[
"
priming_site
"
].
apply
(
lambda
x
:
seq_compliment
(
x
)
)
def
read_primingsite
(
self
,
sequence
,
start
):
def
read_primingsite
(
self
,
sequence
:
str
,
start
:
int
)
->
None
:
"""
Read a fasta file from a given start character
Reads a fasta sequence with ID (sequence) and returns the
sequence starting from the index start.
Args:
sequence: sequence ID to be read.
start: start of the sequence.
Returns: None
"""
if
sequence
not
in
self
.
fasta_dict
.
keys
():
return
None
_
=
self
.
fasta_dict
[
sequence
].
seq
[
start
:]
return
_
def
read_fasta
(
self
):
def
read_fasta
(
self
)
->
None
:
"""
Read a given fasta file.
Wrapper for SeqIO.parse.
Returns: None
"""
record
=
SeqIO
.
parse
(
self
.
fasta
,
"
fasta
"
)
records
=
list
(
record
)
self
.
fasta_dict
=
{
x
.
name
:
x
for
x
in
records
}
def
read_csv
(
self
):
def
read_csv
(
self
)
->
None
:
"""
Reads a given copy number csv file
Wrapper for Pandas read_csv.
Returns: None
"""
df_input_CSV
=
pd
.
read_csv
(
self
.
cpn
,
index_col
=
False
)
df_input_CSV
=
(
df_input_CSV
.
reset_index
()
)
# make sure indexes pair with number of rows
self
.
df_input_CSV
=
df_input_CSV
def
read_gtf
(
self
):
def
read_gtf
(
self
)
->
None
:
"""
Read and process the GTF file.
Reads a GTF file and determines copy numbers from normalized probabilities.
Returns: None
"""
# returns GTF with essential columns such as "feature", "seqname", "start", "end"
# alongside the names of any optional keys which appeared in the attribute column
df_input_GTF
=
read_gtf
(
self
.
gtf
)
...
...
@@ -132,7 +185,7 @@ class CDNAGen:
id_CSV
=
str
(
row
[
"
seqname
"
]).
split
(
"
_
"
)[
1
]
# Calculate Normalized_Binding_Probability and add to GTF dataframe
df_input_GTF
.
loc
[
index
,
"
Normalized_Binding_Probability
"
]
=
(
row
[
"
Binding_Probability
"
]
/
df_normalization_bind_probablility
[
id_GTF
]
row
[
"
Binding_Probability
"
]
/
df_normalization_bind_probablility
[
id_GTF
]
)
# Calculate Normalized_Binding_Probability and add to GTF dataframe
csv_transcript_copy_number
=
self
.
df_input_CSV
.
loc
[
...
...
@@ -148,15 +201,26 @@ class CDNAGen:
self
.
df_input_GTF
=
df_input_GTF
def
write_fasta
(
self
):
def
write_fasta
(
self
)
->
None
:
"""
Writes cDNA fasta records to file.
Wrapper for SeqIO.write.
Returns: None
"""
SeqIO
.
write
(
self
.
fasta_records
,
self
.
output_fasta
,
"
fasta
"
)
print
(
f
"
Fasta file successfully written to:
{
self
.
output_fasta
}
"
)
def
write_csv
(
self
):
def
write_csv
(
self
)
->
None
:
"""
Writes the copy number information to a csv file.
Wrapper for Pandas to_csv.
Returns: None
"""
self
.
df_input_GTF
[[
"
cdna_ID
"
,
"
Transcript_Copy_Number
"
]].
to_csv
(
self
.
output_csv
,
index
=
False
)
print
(
f
"
Copy number csv file successfully written to:
{
self
.
output_csv
}
"
)
This diff is collapsed.
Click to expand it.
requirements.txt
+
2
−
1
View file @
0db28afa
gtfparse
biopython
\ No newline at end of file
biopython
pandas
This diff is collapsed.
Click to expand it.
setup.py
+
4
−
1
View file @
0db28afa
from
setuptools
import
setup
,
find_packages
with
open
(
'
requirements.txt
'
)
as
f
:
required
=
f
.
read
().
splitlines
()
setup
(
name
=
'
cdna
'
,
url
=
'
https://gitlab.com/my_user_name/my_package.git
'
,
...
...
@@ -9,5 +12,5 @@ setup(
license
=
'
MIT
'
,
version
=
'
1.0.0
'
,
packages
=
find_packages
(),
# this will autodetect Python packages from the directory tree, e.g., in `code/`
install_requires
=
[]
,
# add here packages that are required for your package to run, including version or range of versions
install_requires
=
required
,
# add here packages that are required for your package to run, including version or range of versions
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment