Skip to content
Snippets Groups Projects
Commit aa1f69ac authored by Larissa Glass's avatar Larissa Glass
Browse files

Resolve "Formatting and logging"

parent b8eef73a
No related branches found
No related tags found
1 merge request!10Resolve "Formatting and logging"
...@@ -32,10 +32,10 @@ To generate the sampled transcripts, open a new shell, activate your environment ...@@ -32,10 +32,10 @@ To generate the sampled transcripts, open a new shell, activate your environment
``` ```
conda activate transcript-structure-generator conda activate transcript-structure-generator
transcript-generator --transcripts <transcripts_file> --annotation <annotations_file> --prob_inclusion=<probability_inclusion> transcript-generator --transcripts <transcripts_file> --annotation <annotations_file> --prob_inclusion=<probability_inclusion> [--log "INFO"]
``` ```
where the transcripts file should be csv-formatted, the annotation file gtf-formatted and the inclusion probability for introns a float in the range [0,1]. where the transcripts file should be csv-formatted, the annotation file gtf-formatted and the inclusion probability for introns a float in the range [0,1]. The log parameter is optional and can be one of `["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]`. The default is `INFO`.
# Development # Development
......
...@@ -15,6 +15,9 @@ dependencies = [ ...@@ -15,6 +15,9 @@ dependencies = [
"tqdm", "tqdm",
] ]
[tool.setuptools]
packages = ["tsg"]
[project.urls] [project.urls]
"Homepage" = "https://git.scicore.unibas.ch/zavolan_group/tools/transcript-structure-generator" "Homepage" = "https://git.scicore.unibas.ch/zavolan_group/tools/transcript-structure-generator"
......
...@@ -5,13 +5,19 @@ from pathlib import Path ...@@ -5,13 +5,19 @@ from pathlib import Path
from tsg.main import sample_transcripts from tsg.main import sample_transcripts
def setup_logging(loglevel: str=None) -> None: def setup_logging(loglevel: str = None) -> None:
# Set up logging # Set up logging
if loglevel: if loglevel:
numeric_level = getattr(logging, loglevel.upper()) numeric_level = getattr(logging, loglevel.upper())
if not isinstance(numeric_level, int): if not isinstance(numeric_level, int):
raise ValueError("Invalid log level: %s" % loglevel) raise ValueError("Invalid log level: %s" % loglevel)
logging.basicConfig(level=numeric_level) else:
numeric_level = getattr(logging, "INFO")
logging.basicConfig(
format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")',
level=numeric_level,
)
def build_arg_parser(): def build_arg_parser():
...@@ -42,6 +48,7 @@ def output_filename(filename: str) -> str: ...@@ -42,6 +48,7 @@ def output_filename(filename: str) -> str:
raise NotImplementedError() raise NotImplementedError()
return outfile return outfile
def app(): def app():
args = get_args() args = get_args()
......
...@@ -229,12 +229,16 @@ class TranscriptGenerator: ...@@ -229,12 +229,16 @@ class TranscriptGenerator:
if self.strand == "+": if self.strand == "+":
origninal_end = df_generated["end"] origninal_end = df_generated["end"]
df_generated["end"] = np.where( df_generated["end"] = np.where(
inclusions, df_generated["start"].shift(periods=-1, fill_value=-1) - 1, origninal_end inclusions,
df_generated["start"].shift(periods=-1, fill_value=-1) - 1,
origninal_end,
) )
if self.strand == "-": if self.strand == "-":
origninal_start = df_generated["start"] origninal_start = df_generated["start"]
df_generated["start"] = np.where( df_generated["start"] = np.where(
inclusions, df_generated["end"].shift(periods=-1, fill_value=-1) + 1, origninal_start inclusions,
df_generated["end"].shift(periods=-1, fill_value=-1) + 1,
origninal_start,
) )
original_id = df_generated["exon_id"] original_id = df_generated["exon_id"]
...@@ -269,7 +273,7 @@ class TranscriptGenerator: ...@@ -269,7 +273,7 @@ class TranscriptGenerator:
df = reverse_parse_free_text(df) df = reverse_parse_free_text(df)
write_gtf(df, filename) write_gtf(df, filename)
LOG.info(f"Transcript {self.id} sampled") LOG.debug(f"Transcript {self.id} sampled")
except ValueError: except ValueError:
LOG.error(f"Transcript {self.id} could not be sampled.") LOG.error(f"Transcript {self.id} could not be sampled.")
...@@ -283,10 +287,13 @@ def sample_transcripts( ...@@ -283,10 +287,13 @@ def sample_transcripts(
): ):
transcripts = read_abundances(input_transcripts_file) transcripts = read_abundances(input_transcripts_file)
LOG.info("Parsing annotations...")
annotations = Gtf() annotations = Gtf()
annotations.read_file(input_annotations_file) annotations.read_file(input_annotations_file)
annotations.parse_free_text() annotations.parse_free_text()
LOG.info("Done parsing...")
LOG.info("Start sampling transcripts...")
# Set up output file, write header once and append data in loop # Set up output file, write header once and append data in loop
write_header(output_annotations_file) write_header(output_annotations_file)
...@@ -303,3 +310,4 @@ def sample_transcripts( ...@@ -303,3 +310,4 @@ def sample_transcripts(
) )
transcripts.generate_annotations(output_annotations_file) transcripts.generate_annotations(output_annotations_file)
transcripts.generate_transcripts(output_transcripts_file) transcripts.generate_transcripts(output_transcripts_file)
LOG.info("Done.")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment