diff --git a/README.md b/README.md index 2d8c3ba9f2149b4cd0b9e01f67a9b0060e3ebf3d..09e3801da678be7d9ca5820f8a47c0ace0fdb9f6 100644 --- a/README.md +++ b/README.md @@ -32,10 +32,10 @@ To generate the sampled transcripts, open a new shell, activate your environment ``` conda activate transcript-structure-generator -transcript-generator --transcripts <transcripts_file> --annotation <annotations_file> --prob_inclusion=<probability_inclusion> +transcript-generator --transcripts <transcripts_file> --annotation <annotations_file> --prob_inclusion=<probability_inclusion> [--log "INFO"] ``` -where the transcripts file should be csv-formatted, the annotation file gtf-formatted and the inclusion probability for introns a float in the range [0,1]. +where the transcripts file should be csv-formatted, the annotation file gtf-formatted and the inclusion probability for introns a float in the range [0,1]. The log parameter is optional and can be one of `["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]`. The default is `INFO`. # Development diff --git a/pyproject.toml b/pyproject.toml index de522e9e6d4e73b1852e910bcc02eed53e09653d..1214e21f73e7f024bf4d0b19a7b7b4ce41661201 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,9 @@ dependencies = [ "tqdm", ] +[tool.setuptools] +packages = ["tsg"] + [project.urls] "Homepage" = "https://git.scicore.unibas.ch/zavolan_group/tools/transcript-structure-generator" diff --git a/tsg/cli.py b/tsg/cli.py index f448fcc305b765b311a1233ce45bddaa01968e03..57b6e0e9e95b796def7a7952875de1dafcf9e643 100644 --- a/tsg/cli.py +++ b/tsg/cli.py @@ -5,13 +5,19 @@ from pathlib import Path from tsg.main import sample_transcripts -def setup_logging(loglevel: str=None) -> None: +def setup_logging(loglevel: str = None) -> None: # Set up logging if loglevel: numeric_level = getattr(logging, loglevel.upper()) if not isinstance(numeric_level, int): raise ValueError("Invalid log level: %s" % loglevel) - logging.basicConfig(level=numeric_level) + else: + numeric_level = getattr(logging, "INFO") + + logging.basicConfig( + format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")', + level=numeric_level, + ) def build_arg_parser(): @@ -42,6 +48,7 @@ def output_filename(filename: str) -> str: raise NotImplementedError() return outfile + def app(): args = get_args() diff --git a/tsg/main.py b/tsg/main.py index 29e1420e0045d559d61b024d8a691a4f3218152c..9f506bf67e9efe623b29a930deb3385d4f377b2c 100644 --- a/tsg/main.py +++ b/tsg/main.py @@ -229,12 +229,16 @@ class TranscriptGenerator: if self.strand == "+": origninal_end = df_generated["end"] df_generated["end"] = np.where( - inclusions, df_generated["start"].shift(periods=-1, fill_value=-1) - 1, origninal_end + inclusions, + df_generated["start"].shift(periods=-1, fill_value=-1) - 1, + origninal_end, ) if self.strand == "-": origninal_start = df_generated["start"] df_generated["start"] = np.where( - inclusions, df_generated["end"].shift(periods=-1, fill_value=-1) + 1, origninal_start + inclusions, + df_generated["end"].shift(periods=-1, fill_value=-1) + 1, + origninal_start, ) original_id = df_generated["exon_id"] @@ -269,7 +273,7 @@ class TranscriptGenerator: df = reverse_parse_free_text(df) write_gtf(df, filename) - LOG.info(f"Transcript {self.id} sampled") + LOG.debug(f"Transcript {self.id} sampled") except ValueError: LOG.error(f"Transcript {self.id} could not be sampled.") @@ -283,10 +287,13 @@ def sample_transcripts( ): transcripts = read_abundances(input_transcripts_file) + LOG.info("Parsing annotations...") annotations = Gtf() annotations.read_file(input_annotations_file) annotations.parse_free_text() + LOG.info("Done parsing...") + LOG.info("Start sampling transcripts...") # Set up output file, write header once and append data in loop write_header(output_annotations_file) @@ -303,3 +310,4 @@ def sample_transcripts( ) transcripts.generate_annotations(output_annotations_file) transcripts.generate_transcripts(output_transcripts_file) + LOG.info("Done.")