Resolve "Formatting and logging"

aa1f69ac · Larissa Glass · b8eef73a · aa1f69ac · aa1f69ac · aa1f69ac
Commit aa1f69ac authored 2 years ago by Larissa Glass
--- a/README.md
+++ b/README.md
@@ -32,10 +32,10 @@ To generate the sampled transcripts, open a new shell, activate your environment
 ```
 conda activate transcript-structure-generator

-transcript-generator --transcripts <transcripts_file> --annotation <annotations_file> --prob_inclusion=<probability_inclusion>
+transcript-generator --transcripts <transcripts_file> --annotation <annotations_file> --prob_inclusion=<probability_inclusion> [--log "INFO"]
 ```

-where the transcripts file should be csv-formatted, the annotation file gtf-formatted and the inclusion probability for introns a float in the range [0,1].
+where the transcripts file should be csv-formatted, the annotation file gtf-formatted and the inclusion probability for introns a float in the range [0,1]. The log parameter is optional and can be one of `["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]`. The default is `INFO`.


 # Development

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,6 +15,9 @@ dependencies = [
    "tqdm",
 ]

+[tool.setuptools]
+packages = ["tsg"]
+
 [project.urls]
 "Homepage" = "https://git.scicore.unibas.ch/zavolan_group/tools/transcript-structure-generator"


--- a/tsg/cli.py
+++ b/tsg/cli.py
@@ -5,13 +5,19 @@ from pathlib import Path
 from tsg.main import sample_transcripts


-def setup_logging(loglevel: str=None) -> None:
+def setup_logging(loglevel: str = None) -> None:
    # Set up logging
    if loglevel:
        numeric_level = getattr(logging, loglevel.upper())
        if not isinstance(numeric_level, int):
            raise ValueError("Invalid log level: %s" % loglevel)
-        logging.basicConfig(level=numeric_level)
+    else:
+        numeric_level = getattr(logging, "INFO")
+
+    logging.basicConfig(
+        format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")',
+        level=numeric_level,
+    )


 def build_arg_parser():
@@ -42,6 +48,7 @@ def output_filename(filename: str) -> str:
        raise NotImplementedError()
    return outfile

+
 def app():
    args = get_args()


--- a/tsg/main.py
+++ b/tsg/main.py
@@ -229,12 +229,16 @@ class TranscriptGenerator:
        if self.strand == "+":
            origninal_end = df_generated["end"]
            df_generated["end"] = np.where(
-                inclusions, df_generated["start"].shift(periods=-1, fill_value=-1) - 1, origninal_end
+                inclusions,
+                df_generated["start"].shift(periods=-1, fill_value=-1) - 1,
+                origninal_end,
            )
        if self.strand == "-":
            origninal_start = df_generated["start"]
            df_generated["start"] = np.where(
-                inclusions, df_generated["end"].shift(periods=-1, fill_value=-1) + 1, origninal_start
+                inclusions,
+                df_generated["end"].shift(periods=-1, fill_value=-1) + 1,
+                origninal_start,
            )

        original_id = df_generated["exon_id"]
@@ -269,7 +273,7 @@ class TranscriptGenerator:
            df = reverse_parse_free_text(df)

            write_gtf(df, filename)
-            LOG.info(f"Transcript {self.id} sampled")
+            LOG.debug(f"Transcript {self.id} sampled")
        except ValueError:
            LOG.error(f"Transcript {self.id} could not be sampled.")

@@ -283,10 +287,13 @@ def sample_transcripts(
 ):
    transcripts = read_abundances(input_transcripts_file)

+    LOG.info("Parsing annotations...")
    annotations = Gtf()
    annotations.read_file(input_annotations_file)
    annotations.parse_free_text()
+    LOG.info("Done parsing...")

+    LOG.info("Start sampling transcripts...")
    # Set up output file, write header once and append data in loop
    write_header(output_annotations_file)

@@ -303,3 +310,4 @@ def sample_transcripts(
        )
        transcripts.generate_annotations(output_annotations_file)
        transcripts.generate_transcripts(output_transcripts_file)
+    LOG.info("Done.")