Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
scRNA-seq-simulation
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
zavolan_group
pipelines
scRNA-seq-simulation
Commits
de2d344c
Commit
de2d344c
authored
3 years ago
by
Timon Baltisberger
Browse files
Options
Downloads
Patches
Plain Diff
add: command line interface
parent
88665b17
No related branches found
No related tags found
1 merge request
!15
add: generate transcript structure
Pipeline
#13680
passed
3 years ago
Stage: qc
Stage: test
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
transcript_structure/Outputs/csv_new.csv
+2
-7
2 additions, 7 deletions
transcript_structure/Outputs/csv_new.csv
transcript_structure/Outputs/gtf_new.gtf
+2
-26
2 additions, 26 deletions
transcript_structure/Outputs/gtf_new.gtf
transcript_structure/cli.py
+129
-0
129 additions, 0 deletions
transcript_structure/cli.py
with
133 additions
and
33 deletions
transcript_structure/Outputs/csv_new.csv
+
2
−
7
View file @
de2d344c
Transcript_ID,Gene_ID,count
1700034P13Rik-010,1700034P13Rik,3
1700034P13Rik-100,1700034P13Rik,1
1700034P13Rik-000,1700034P13Rik,1
Rp1-111,Rp1,1
Rp1-000,Rp1,2
Rp1-010,Rp1,1
Rp1-001,Rp1,1
1700034P13Rik-111,1700034P13Rik,5
Rp1-111,Rp1,5
This diff is collapsed.
Click to expand it.
transcript_structure/Outputs/gtf_new.gtf
+
2
−
26
View file @
de2d344c
1 ensembl_havana gene 3999557 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding";
1 havana transcript 4290846 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-111"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "1";
1 havana exon 4409170 4293012 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-111"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana transcript 4290846 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-000"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "1";
1 havana exon 4409170 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-000"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana exon 4352202 4352837 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "2"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-000"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana exon 4351910 4352081 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "3"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-000"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana exon 4290846 4293012 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "4"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-000"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana transcript 4290846 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-010"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "1";
1 havana exon 4409170 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-010"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana exon 4352202 4352081 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "2"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-010"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana exon 4290846 4293012 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "3"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-010"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana transcript 4290846 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-001"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "1";
1 havana exon 4409170 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-001"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana exon 4352202 4352837 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "2"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-001"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana exon 4351910 4293012 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "3"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-001"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana gene 9747648 9791924 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA";
1 havana transcript 9747648 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-010"; transcript_source "havana"; transcript_biotype "lncRNA"; transcript_support_level "1";
1 havana exon 9747648 9748604 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "1"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-010"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana exon 9752449 9789780 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "2"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-010"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana exon 9791125 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "3"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-010"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana transcript 9747648 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-100"; transcript_source "havana"; transcript_biotype "lncRNA"; transcript_support_level "1";
1 havana exon 9747648 9752564 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "1"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-100"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana exon 9789656 9789780 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "2"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-100"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana exon 9791125 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "3"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-100"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana transcript 9747648 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-000"; transcript_source "havana"; transcript_biotype "lncRNA"; transcript_support_level "1";
1 havana exon 9747648 9748604 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "1"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-000"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana exon 9752449 9752564 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "2"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-000"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana exon 9789656 9789780 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "3"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-000"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana exon 9791125 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "4"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-000"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana transcript 9747648 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-111"; transcript_source "havana"; transcript_biotype "lncRNA"; transcript_support_level "1";
1 havana exon 9747648 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "1"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-111"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
This diff is collapsed.
Click to expand it.
transcript_structure/cli.py
0 → 100644
+
129
−
0
View file @
de2d344c
"""
Command Line Interface for the transcript structure generator.
"""
import
argparse
import
logging
import
sys
from
enum
import
Enum
from
transcript_structure
import
__version__
from
transcript_structure.Generate_transcript_structure
import
BuildTranscriptStructure
as
bts
LOG
=
logging
.
getLogger
(
__name__
)
class
LogLevels
(
Enum
):
"""
Log level enumerator.
"""
DEBUG
=
logging
.
DEBUG
INFO
=
logging
.
INFO
WARN
=
logging
.
WARNING
WARNING
=
logging
.
WARNING
ERROR
=
logging
.
ERROR
CRITICAL
=
logging
.
CRITICAL
def
parse_args
()
->
argparse
.
Namespace
:
"""
Parse CLI arguments.
Returns:
Parsed CLI arguments.
"""
# set metadata
description
=
(
f
"
{
sys
.
modules
[
__name__
].
__doc__
}
\n\n
"
""
)
epilog
=
(
f
"
%(prog)s v
{
__version__
}
, (c) 2021 by Zavolab
"
"
(zavolab-biozentrum@unibas.ch)
"
)
# instantiate parser
parser
=
argparse
.
ArgumentParser
(
description
=
description
,
epilog
=
epilog
,
add_help
=
False
,
formatter_class
=
argparse
.
ArgumentDefaultsHelpFormatter
,
)
# add arguments
parser
.
add_argument
(
'
input_gene_count
'
,
type
=
str
,
help
=
"
path and name of coma-separated file with gene abundances per gene
"
,
)
parser
.
add_argument
(
'
input_coordinates
'
,
type
=
str
,
help
=
"
path and name of gtf file with exon information about all genes
"
,
)
parser
.
add_argument
(
"
--p_intron
"
,
default
=
0
,
type
=
float
,
help
=
"
probability to include an intron
"
,
)
parser
.
add_argument
(
"
csv_output_name
"
,
type
=
str
,
help
=
"
path and name of coma-separated file with count of differently spliced transcripts
"
,
)
parser
.
add_argument
(
"
gtf_output_name
"
,
type
=
str
,
help
=
"
path and name of gtf file with intron coordinates of differently spliced transcripts
"
,
)
parser
.
add_argument
(
"
--verbosity
"
,
choices
=
[
e
.
name
for
e
in
LogLevels
],
default
=
LogLevels
.
INFO
.
name
,
type
=
str
,
help
=
"
logging verbosity level
"
,
)
# return parsed arguments
return
parser
.
parse_args
()
def
setup_logging
(
verbosity
:
str
=
'
INFO
'
)
->
None
:
"""
Configure logging.
Args:
verbosity: Level of logging verbosity.
"""
level
=
LogLevels
[
verbosity
].
value
logging
.
basicConfig
(
level
=
level
,
format
=
"
[%(asctime)s %(levelname)s] %(message)s
"
,
datefmt
=
'
%Y-%m-%d %H:%M:%S
'
,
)
def
main
()
->
None
:
"""
Entry point for CLI executable.
"""
try
:
# handle CLI args
args
=
parse_args
()
# set up logging
setup_logging
(
verbosity
=
args
.
verbosity
)
LOG
.
info
(
"
Started transcript structure builder...
"
)
LOG
.
debug
(
f
"
CLI arguments:
{
args
}
"
)
builder
=
bts
(
input_gene_count
=
args
.
input_gene_count
,
input_coordinates
=
args
.
input_coordinates
,
p_intron
=
args
.
p_intron
)
builder
.
generate_transcript_structure
()
builder
.
write_csv
(
args
.
csv_output_name
)
builder
.
write_gtf
(
args
.
gtf_output_name
)
except
KeyboardInterrupt
:
LOG
.
error
(
'
Execution interrupted.
'
)
sys
.
exit
(
128
+
signal
.
SIGINT
)
# conclude execution
LOG
.
info
(
"
Done.
"
)
if
__name__
==
'
__main__
'
:
main
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment