Skip to content
Snippets Groups Projects
Commit de2d344c authored by Timon Baltisberger's avatar Timon Baltisberger
Browse files

add: command line interface

parent 88665b17
No related branches found
No related tags found
1 merge request!15add: generate transcript structure
Pipeline #13680 passed
Transcript_ID,Gene_ID,count
1700034P13Rik-010,1700034P13Rik,3
1700034P13Rik-100,1700034P13Rik,1
1700034P13Rik-000,1700034P13Rik,1
Rp1-111,Rp1,1
Rp1-000,Rp1,2
Rp1-010,Rp1,1
Rp1-001,Rp1,1
1700034P13Rik-111,1700034P13Rik,5
Rp1-111,Rp1,5
1 ensembl_havana gene 3999557 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding";
1 havana transcript 4290846 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-111"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "1";
1 havana exon 4409170 4293012 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-111"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana transcript 4290846 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-000"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "1";
1 havana exon 4409170 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-000"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana exon 4352202 4352837 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "2"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-000"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana exon 4351910 4352081 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "3"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-000"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana exon 4290846 4293012 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "4"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-000"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana transcript 4290846 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-010"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "1";
1 havana exon 4409170 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-010"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana exon 4352202 4352081 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "2"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-010"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana exon 4290846 4293012 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "3"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-010"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana transcript 4290846 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-001"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "1";
1 havana exon 4409170 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-001"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana exon 4352202 4352837 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "2"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-001"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana exon 4351910 4293012 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "3"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-001"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1";
1 havana gene 9747648 9791924 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA";
1 havana transcript 9747648 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-010"; transcript_source "havana"; transcript_biotype "lncRNA"; transcript_support_level "1";
1 havana exon 9747648 9748604 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "1"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-010"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana exon 9752449 9789780 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "2"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-010"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana exon 9791125 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "3"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-010"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana transcript 9747648 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-100"; transcript_source "havana"; transcript_biotype "lncRNA"; transcript_support_level "1";
1 havana exon 9747648 9752564 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "1"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-100"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana exon 9789656 9789780 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "2"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-100"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana exon 9791125 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "3"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-100"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana transcript 9747648 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-000"; transcript_source "havana"; transcript_biotype "lncRNA"; transcript_support_level "1";
1 havana exon 9747648 9748604 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "1"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-000"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana exon 9752449 9752564 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "2"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-000"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana exon 9789656 9789780 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "3"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-000"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana exon 9791125 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "4"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-000"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
1 havana transcript 9747648 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-111"; transcript_source "havana"; transcript_biotype "lncRNA"; transcript_support_level "1";
1 havana exon 9747648 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "1"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-111"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1";
"""Command Line Interface for the transcript structure generator."""
import argparse
import logging
import sys
from enum import Enum
from transcript_structure import __version__
from transcript_structure.Generate_transcript_structure import BuildTranscriptStructure as bts
LOG = logging.getLogger(__name__)
class LogLevels(Enum):
"""Log level enumerator."""
DEBUG = logging.DEBUG
INFO = logging.INFO
WARN = logging.WARNING
WARNING = logging.WARNING
ERROR = logging.ERROR
CRITICAL = logging.CRITICAL
def parse_args() -> argparse.Namespace:
"""Parse CLI arguments.
Returns:
Parsed CLI arguments.
"""
# set metadata
description = (
f"{sys.modules[__name__].__doc__}\n\n"
""
)
epilog = (
f"%(prog)s v{__version__}, (c) 2021 by Zavolab "
"(zavolab-biozentrum@unibas.ch)"
)
# instantiate parser
parser = argparse.ArgumentParser(
description=description,
epilog=epilog,
add_help=False,
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
# add arguments
parser.add_argument(
'input_gene_count',
type=str,
help="path and name of coma-separated file with gene abundances per gene",
)
parser.add_argument(
'input_coordinates',
type=str,
help="path and name of gtf file with exon information about all genes",
)
parser.add_argument(
"--p_intron",
default=0,
type=float,
help="probability to include an intron",
)
parser.add_argument(
"csv_output_name",
type=str,
help="path and name of coma-separated file with count of differently spliced transcripts",
)
parser.add_argument(
"gtf_output_name",
type=str,
help="path and name of gtf file with intron coordinates of differently spliced transcripts",
)
parser.add_argument(
"--verbosity",
choices=[e.name for e in LogLevels],
default=LogLevels.INFO.name,
type=str,
help="logging verbosity level",
)
# return parsed arguments
return parser.parse_args()
def setup_logging(verbosity: str = 'INFO') -> None:
"""Configure logging.
Args:
verbosity: Level of logging verbosity.
"""
level = LogLevels[verbosity].value
logging.basicConfig(
level=level,
format="[%(asctime)s %(levelname)s] %(message)s",
datefmt='%Y-%m-%d %H:%M:%S',
)
def main() -> None:
"""Entry point for CLI executable."""
try:
# handle CLI args
args = parse_args()
# set up logging
setup_logging(verbosity=args.verbosity)
LOG.info("Started transcript structure builder...")
LOG.debug(f"CLI arguments: {args}")
builder = bts(input_gene_count=args.input_gene_count,
input_coordinates=args.input_coordinates,
p_intron=args.p_intron)
builder.generate_transcript_structure()
builder.write_csv(args.csv_output_name)
builder.write_gtf(args.gtf_output_name)
except KeyboardInterrupt:
LOG.error('Execution interrupted.')
sys.exit(128 + signal.SIGINT)
# conclude execution
LOG.info("Done.")
if __name__ == '__main__':
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment