Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
scRNA-seq-simulation
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
zavolan_group
pipelines
scRNA-seq-simulation
Commits
88665b17
Commit
88665b17
authored
3 years ago
by
Timon Baltisberger
Browse files
Options
Downloads
Patches
Plain Diff
fix: flake8 fixes
parent
968f449c
Branches
Branches containing commit
No related tags found
1 merge request
!15
add: generate transcript structure
Pipeline
#13641
passed
3 years ago
Stage: qc
Stage: test
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
transcript_structure/Generate_transcript_structure.py
+8
-14
8 additions, 14 deletions
transcript_structure/Generate_transcript_structure.py
with
8 additions
and
14 deletions
transcript_structure/Generate_transcript_structure.py
+
8
−
14
View file @
88665b17
"""
Creates differently spliced transcripts.
"""
import
random
import
random
import
csv
import
csv
import
copy
import
copy
class
BuildTranscriptStructure
:
class
BuildTranscriptStructure
:
"""
Creates differently spliced transcripts.
"""
Creates differently spliced transcripts.
Args:
Args:
...
@@ -34,7 +35,6 @@ class BuildTranscriptStructure:
...
@@ -34,7 +35,6 @@ class BuildTranscriptStructure:
gtf_lines(list): List with all newly created gtf lines.
gtf_lines(list): List with all newly created gtf lines.
_transcripts_generated(bool): Indicates whether splicing was conducted or not yet.
_transcripts_generated(bool): Indicates whether splicing was conducted or not yet.
"""
"""
def
__init__
(
self
,
def
__init__
(
self
,
input_gene_count
:
str
,
input_gene_count
:
str
,
input_coordinates
:
str
,
input_coordinates
:
str
,
...
@@ -110,7 +110,7 @@ class BuildTranscriptStructure:
...
@@ -110,7 +110,7 @@ class BuildTranscriptStructure:
self
.
gene_sequences_dict
[
gene_name
]
=
gene_info
self
.
gene_sequences_dict
[
gene_name
]
=
gene_info
def
_make_new_transcripts
(
self
)
->
None
:
def
_make_new_transcripts
(
self
)
->
None
:
"""
Generates the differently spliced transcripts.
"""
"""
Generates the differently spliced transcripts.
"""
for
gene
in
self
.
gene_count_dict
:
for
gene
in
self
.
gene_count_dict
:
# Computes the intron splicing for each transcript.
# Computes the intron splicing for each transcript.
...
@@ -136,7 +136,7 @@ class BuildTranscriptStructure:
...
@@ -136,7 +136,7 @@ class BuildTranscriptStructure:
self
.
gene_transcript_dict
[
gene
]
=
transcript_numbers
self
.
gene_transcript_dict
[
gene
]
=
transcript_numbers
def
_make_gtf_info
(
self
)
->
None
:
def
_make_gtf_info
(
self
)
->
None
:
"""
Writes the lines of the new gtf file for the differently spliced transcripts.
"""
"""
Writes the lines of the new gtf file for the differently spliced transcripts.
"""
for
gene
in
self
.
gene_transcript_dict
:
# Iterates over all genes required.
for
gene
in
self
.
gene_transcript_dict
:
# Iterates over all genes required.
self
.
gtf_lines
.
append
(
self
.
gene_sequences_dict
[
gene
][
'
gene_line
'
])
# Add gene line to list.
self
.
gtf_lines
.
append
(
self
.
gene_sequences_dict
[
gene
][
'
gene_line
'
])
# Add gene line to list.
sense
=
self
.
gene_sequences_dict
[
gene
][
'
strand_sense
'
]
sense
=
self
.
gene_sequences_dict
[
gene
][
'
strand_sense
'
]
...
@@ -189,12 +189,9 @@ class BuildTranscriptStructure:
...
@@ -189,12 +189,9 @@ class BuildTranscriptStructure:
self
.
gtf_lines
.
extend
(
exon_lines
)
self
.
gtf_lines
.
extend
(
exon_lines
)
def
_sort_gtf_lines
(
self
)
->
None
:
def
_sort_gtf_lines
(
self
)
->
None
:
"""
Sorts the gtf lines by the position of the genes (increasing) and returns it.
"""
"""
Sorts the gtf lines by the position of the genes (increasing) and returns it.
"""
# Builds and uses a dictionary with the start of the gene as key, and all lines related to this gene as value:
# Builds and uses a dictionary with the start of the gene as key, and all lines related to this gene as value:
# {start_gene(int): [[gene_line],[transcript_line],[exon_line1],[exon_line2],...]}
# {start_gene(int): [[gene_line],[transcript_line],[exon_line1],[exon_line2],...]}
gene_lines_dict
=
{}
gene_lines_dict
=
{}
gene_start
=
0
# Validation: This key should remain unused, as every gtf file starts with a gene.
gene_start
=
0
# Validation: This key should remain unused, as every gtf file starts with a gene.
for
index
,
line
in
enumerate
(
self
.
gtf_lines
):
for
index
,
line
in
enumerate
(
self
.
gtf_lines
):
...
@@ -214,13 +211,11 @@ class BuildTranscriptStructure:
...
@@ -214,13 +211,11 @@ class BuildTranscriptStructure:
def
write_csv
(
self
,
def
write_csv
(
self
,
csv_output
:
str
csv_output
:
str
)
->
None
:
)
->
None
:
"""
Writes a csv file containing the number of differently spliced transcripts.
"""
Writes a csv file containing the number of differently spliced transcripts.
Args:
Args:
csv_output(str): Path and name of the output cvs file:
"
transcript_ID
"
,
"
gene_ID
"
, count.
csv_output(str): Path and name of the output cvs file:
"
transcript_ID
"
,
"
gene_ID
"
, count.
"""
"""
with
open
(
csv_output
,
'
w
'
,
newline
=
''
)
as
file
:
with
open
(
csv_output
,
'
w
'
,
newline
=
''
)
as
file
:
writer
=
csv
.
writer
(
file
)
writer
=
csv
.
writer
(
file
)
writer
.
writerow
([
'
Transcript_ID
'
,
'
Gene_ID
'
,
'
count
'
])
writer
.
writerow
([
'
Transcript_ID
'
,
'
Gene_ID
'
,
'
count
'
])
...
@@ -231,8 +226,7 @@ class BuildTranscriptStructure:
...
@@ -231,8 +226,7 @@ class BuildTranscriptStructure:
def
write_gtf
(
self
,
def
write_gtf
(
self
,
gtf_output
:
str
gtf_output
:
str
)
->
None
:
)
->
None
:
"""
Writes a gtf file with the information about the differently spliced transcripts.
"""
Writes a gtf file with the information about the differently spliced transcripts.
Args:
Args:
gtf_output(str): Path and name of the output gtf file with the information of all relevant transcripts.
gtf_output(str): Path and name of the output gtf file with the information of all relevant transcripts.
...
@@ -242,7 +236,7 @@ class BuildTranscriptStructure:
...
@@ -242,7 +236,7 @@ class BuildTranscriptStructure:
def
main
():
def
main
():
"""
Main Function.
"""
"""
Main Function.
"""
# Inputs
# Inputs
# gene_count = 'gene_count/Rik_5.csv' # Strand with + sense
# gene_count = 'gene_count/Rik_5.csv' # Strand with + sense
# gene_count = 'gene_count/Rp1_5.csv' # Strand with - sense.
# gene_count = 'gene_count/Rp1_5.csv' # Strand with - sense.
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment