diff --git a/transcript_structure/Outputs/csv_new.csv b/transcript_structure/Outputs/csv_new.csv index 88a301674c9709792b1fa39ba3b721fb76dcf010..3dcf4bda7b86b89378ea50b4e2dd065194c8cb6a 100644 --- a/transcript_structure/Outputs/csv_new.csv +++ b/transcript_structure/Outputs/csv_new.csv @@ -1,8 +1,3 @@ Transcript_ID,Gene_ID,count -1700034P13Rik-010,1700034P13Rik,3 -1700034P13Rik-100,1700034P13Rik,1 -1700034P13Rik-000,1700034P13Rik,1 -Rp1-111,Rp1,1 -Rp1-000,Rp1,2 -Rp1-010,Rp1,1 -Rp1-001,Rp1,1 +1700034P13Rik-111,1700034P13Rik,5 +Rp1-111,Rp1,5 diff --git a/transcript_structure/Outputs/gtf_new.gtf b/transcript_structure/Outputs/gtf_new.gtf index e80579574e8088396d34b4d736efcbfaf8322d88..b710cb49d9579682c794ccb2b78d43cd3c1442b2 100644 --- a/transcript_structure/Outputs/gtf_new.gtf +++ b/transcript_structure/Outputs/gtf_new.gtf @@ -1,30 +1,6 @@ 1 ensembl_havana gene 3999557 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; 1 havana transcript 4290846 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-111"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "1"; 1 havana exon 4409170 4293012 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-111"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1"; -1 havana transcript 4290846 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-000"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "1"; -1 havana exon 4409170 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-000"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1"; -1 havana exon 4352202 4352837 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "2"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-000"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1"; -1 havana exon 4351910 4352081 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "3"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-000"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1"; -1 havana exon 4290846 4293012 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "4"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-000"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1"; -1 havana transcript 4290846 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-010"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "1"; -1 havana exon 4409170 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-010"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1"; -1 havana exon 4352202 4352081 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "2"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-010"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1"; -1 havana exon 4290846 4293012 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "3"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-010"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1"; -1 havana transcript 4290846 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-001"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "1"; -1 havana exon 4409170 4409241 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "1"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-001"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1"; -1 havana exon 4352202 4352837 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "2"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-001"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1"; -1 havana exon 4351910 4293012 . - . gene_id "ENSMUSG00000025900"; gene_version "12"; transcript_id "ENSMUST00000208793"; transcript_version "1"; exon_number "3"; gene_name "Rp1"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "Rp1-001"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSMUSE00001379779"; exon_version "1"; transcript_support_level "1"; 1 havana gene 9747648 9791924 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; -1 havana transcript 9747648 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-010"; transcript_source "havana"; transcript_biotype "lncRNA"; transcript_support_level "1"; -1 havana exon 9747648 9748604 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "1"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-010"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1"; -1 havana exon 9752449 9789780 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "2"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-010"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1"; -1 havana exon 9791125 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "3"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-010"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1"; -1 havana transcript 9747648 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-100"; transcript_source "havana"; transcript_biotype "lncRNA"; transcript_support_level "1"; -1 havana exon 9747648 9752564 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "1"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-100"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1"; -1 havana exon 9789656 9789780 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "2"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-100"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1"; -1 havana exon 9791125 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "3"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-100"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1"; -1 havana transcript 9747648 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-000"; transcript_source "havana"; transcript_biotype "lncRNA"; transcript_support_level "1"; -1 havana exon 9747648 9748604 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "1"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-000"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1"; -1 havana exon 9752449 9752564 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "2"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-000"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1"; -1 havana exon 9789656 9789780 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "3"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-000"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1"; -1 havana exon 9791125 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "4"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-000"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1"; +1 havana transcript 9747648 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-111"; transcript_source "havana"; transcript_biotype "lncRNA"; transcript_support_level "1"; +1 havana exon 9747648 9791922 . + . gene_id "ENSMUSG00000097893"; gene_version "8"; transcript_id "ENSMUST00000181821"; transcript_version "7"; exon_number "1"; gene_name "1700034P13Rik"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "1700034P13Rik-111"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSMUSE00001104564"; exon_version "1"; transcript_support_level "1"; diff --git a/transcript_structure/cli.py b/transcript_structure/cli.py new file mode 100644 index 0000000000000000000000000000000000000000..dfdc5c2d81f7a095482eb2c264f6ce58112c89d3 --- /dev/null +++ b/transcript_structure/cli.py @@ -0,0 +1,129 @@ +"""Command Line Interface for the transcript structure generator.""" + +import argparse +import logging +import sys +from enum import Enum + +from transcript_structure import __version__ +from transcript_structure.Generate_transcript_structure import BuildTranscriptStructure as bts + +LOG = logging.getLogger(__name__) + + +class LogLevels(Enum): + """Log level enumerator.""" + + DEBUG = logging.DEBUG + INFO = logging.INFO + WARN = logging.WARNING + WARNING = logging.WARNING + ERROR = logging.ERROR + CRITICAL = logging.CRITICAL + + +def parse_args() -> argparse.Namespace: + """Parse CLI arguments. + + Returns: + Parsed CLI arguments. + """ + # set metadata + description = ( + f"{sys.modules[__name__].__doc__}\n\n" + "" + ) + epilog = ( + f"%(prog)s v{__version__}, (c) 2021 by Zavolab " + "(zavolab-biozentrum@unibas.ch)" + ) + + # instantiate parser + parser = argparse.ArgumentParser( + description=description, + epilog=epilog, + add_help=False, + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + + # add arguments + parser.add_argument( + 'input_gene_count', + type=str, + help="path and name of coma-separated file with gene abundances per gene", + ) + parser.add_argument( + 'input_coordinates', + type=str, + help="path and name of gtf file with exon information about all genes", + ) + parser.add_argument( + "--p_intron", + default=0, + type=float, + help="probability to include an intron", + ) + parser.add_argument( + "csv_output_name", + type=str, + help="path and name of coma-separated file with count of differently spliced transcripts", + ) + parser.add_argument( + "gtf_output_name", + type=str, + help="path and name of gtf file with intron coordinates of differently spliced transcripts", + ) + parser.add_argument( + "--verbosity", + choices=[e.name for e in LogLevels], + default=LogLevels.INFO.name, + type=str, + help="logging verbosity level", + ) + + # return parsed arguments + return parser.parse_args() + + +def setup_logging(verbosity: str = 'INFO') -> None: + """Configure logging. + + Args: + verbosity: Level of logging verbosity. + """ + level = LogLevels[verbosity].value + logging.basicConfig( + level=level, + format="[%(asctime)s %(levelname)s] %(message)s", + datefmt='%Y-%m-%d %H:%M:%S', + ) + + +def main() -> None: + """Entry point for CLI executable.""" + try: + # handle CLI args + args = parse_args() + + # set up logging + setup_logging(verbosity=args.verbosity) + LOG.info("Started transcript structure builder...") + LOG.debug(f"CLI arguments: {args}") + + builder = bts(input_gene_count=args.input_gene_count, + input_coordinates=args.input_coordinates, + p_intron=args.p_intron) + builder.generate_transcript_structure() + builder.write_csv(args.csv_output_name) + builder.write_gtf(args.gtf_output_name) + + except KeyboardInterrupt: + LOG.error('Execution interrupted.') + sys.exit(128 + signal.SIGINT) + + # conclude execution + LOG.info("Done.") + + +if __name__ == '__main__': + main()