From 4b19dd953f635654cb8f696012f39db9b8f64b40 Mon Sep 17 00:00:00 2001 From: Larissa Glass <larissa.glass@unibas.ch> Date: Thu, 3 Nov 2022 15:42:26 +0000 Subject: [PATCH] Feature/homework 2022 11 02 --- .gitignore | 4 ++- LICENSE.md | 21 +++++++++++++++ README.md | 26 +++++++++++++++--- environment.yml | 1 + setup.py | 18 +++++++++++++ src/main.py | 17 ------------ {src => tsg}/__init__.py | 4 ++- tsg/cli.py | 57 ++++++++++++++++++++++++++++++++++++++++ 8 files changed, 126 insertions(+), 22 deletions(-) create mode 100644 LICENSE.md create mode 100644 setup.py delete mode 100644 src/main.py rename {src => tsg}/__init__.py (50%) create mode 100644 tsg/cli.py diff --git a/.gitignore b/.gitignore index bfe0573..dc92f04 100644 --- a/.gitignore +++ b/.gitignore @@ -156,4 +156,6 @@ dmypy.json # Cython debug symbols cython_debug/ -# End of https://www.toptal.com/developers/gitignore/api/python,git \ No newline at end of file +# End of https://www.toptal.com/developers/gitignore/api/python,git + +data \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..2313fb3 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 Zavolan Lab, Biozentrum, University of Basel + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index 58cfa03..0a814af 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,8 @@ conda activate transcript-structure-generator # Usage Input: -- Csv-formatted file ("ID,Count") with counts for individual transcripts -- Probability of intron inclusion (float in range [0,1]) +- csv-formatted file ("ID,Count") with counts for individual transcripts +- probability of intron inclusion (float in range [0,1]) - gtf-formatted file with exon coordinates of the transcripts included in the csv file Output: @@ -19,4 +19,24 @@ Output: - csv-formatted file ("NewTranscriptID,ID,Count") with - id of generated transcript - id of original transcript (without intron inclusions) - - count \ No newline at end of file + - count + +To install package, run + +``` +pip install . +``` + +Afterwards, it can be imported using + +```python +import tsg +``` + +To generate the sampled transcripts, run + +``` +transcript-generator --transcripts <transcripts_file> --annotation <annotations_file> --prob_inclusion=<probability_inclusion> +``` + +where the transcripts file should be csv-formatted, the annotation file gtf-formatted and the inclusion probability for introns a float in the range [0,1]. \ No newline at end of file diff --git a/environment.yml b/environment.yml index 2199378..3287b74 100644 --- a/environment.yml +++ b/environment.yml @@ -6,6 +6,7 @@ dependencies: - python=3.9 - matplotlib - pandas + - pip - flake8-docstrings - mypy - flake8 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..3352b1f --- /dev/null +++ b/setup.py @@ -0,0 +1,18 @@ +"""Setup the package.""" + +from setuptools import setup + +setup( + name='tsg', + author='Zimmermann, M; Fraenkl, A;Glass, L', + url='https://git.scicore.unibas.ch/zavolan_group/tools/transcript-structure-generator', + license='MIT', + version='0.0.1', + packages=['tsg'], + install_requires=['pandas'], + entry_points={ + 'console_scripts': [ + 'transcript-generator = tsg:cli', + ] + } +) diff --git a/src/main.py b/src/main.py deleted file mode 100644 index ea1a4d9..0000000 --- a/src/main.py +++ /dev/null @@ -1,17 +0,0 @@ -import argparse -from pathlib import Path - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--transcripts", type=str) - parser.add_argument("--annotation", type=str) - parser.add_argument("--prob_inclusion", type=float) - args = parser.parse_args() - - input_transcripts_file = args.transcripts - input_annotations_file = args.annotation - prob_inclusion = args.prob_inclusion - input_transcripts_path = Path(input_transcripts_file) - input_annotations_path = Path(input_annotations_file) - output_transcripts_file = "generated_" + input_transcripts_path.stem + ".csv" - output_annotations_file = "generated_" + input_annotations_path.name diff --git a/src/__init__.py b/tsg/__init__.py similarity index 50% rename from src/__init__.py rename to tsg/__init__.py index d9744d3..b9b9505 100644 --- a/src/__init__.py +++ b/tsg/__init__.py @@ -1,3 +1,5 @@ """Transcript structure generator package.""" -__version__ = '0.0.0' \ No newline at end of file +from tsg.cli import cli + +__version__ = '0.0.0' diff --git a/tsg/cli.py b/tsg/cli.py new file mode 100644 index 0000000..72da7ba --- /dev/null +++ b/tsg/cli.py @@ -0,0 +1,57 @@ +import argparse +import logging +from pathlib import Path + +from .main import sample_transcripts + + +def setup_logging(loglevel: str=None) -> None: + # Set up logging + if loglevel: + numeric_level = getattr(logging, loglevel.upper()) + if not isinstance(numeric_level, int): + raise ValueError("Invalid log level: %s" % loglevel) + logging.basicConfig(level=numeric_level) + + +def build_arg_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("--transcripts", type=str) + parser.add_argument("--annotation", type=str) + parser.add_argument("--prob_inclusion", type=float) + parser.add_argument("--log", type=str) + + return parser + + +def get_args(): + parser = build_arg_parser() + + args = parser.parse_args() + + return args + + +def output_filename(filename: str) -> str: + filepath = Path(filename) + if filename.endswith(".csv") or filename.endswith(".tsv"): + return "generated_" + filepath.stem + ".csv" + if filename.endswith(".gtf"): + return "generated_" + filepath.name + + +def cli(): + args = get_args() + + setup_logging(args.log) + sample_transcripts( + args.transcripts, + args.annotation, + args.prob_inclusion, + output_filename(args.transcripts), + output_filename(args.annotation), + ) + + +if __name__ == "__main__": + cli() -- GitLab