diff --git a/.gitignore b/.gitignore index bfe05739d4822fa33b071bdd21d39ac3cb90940e..dc92f048e63319c243f77db6273f57ed8cd93931 100644 --- a/.gitignore +++ b/.gitignore @@ -156,4 +156,6 @@ dmypy.json # Cython debug symbols cython_debug/ -# End of https://www.toptal.com/developers/gitignore/api/python,git \ No newline at end of file +# End of https://www.toptal.com/developers/gitignore/api/python,git + +data \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000000000000000000000000000000000000..2313fb3329fb7fe0c110628879967409dd7d3105 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 Zavolan Lab, Biozentrum, University of Basel + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index 58cfa033adb314cd8add28f69dc37ea581cbb0b8..0a814af155e0c27171d1ff8568b5f2c15de05ae4 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,8 @@ conda activate transcript-structure-generator # Usage Input: -- Csv-formatted file ("ID,Count") with counts for individual transcripts -- Probability of intron inclusion (float in range [0,1]) +- csv-formatted file ("ID,Count") with counts for individual transcripts +- probability of intron inclusion (float in range [0,1]) - gtf-formatted file with exon coordinates of the transcripts included in the csv file Output: @@ -19,4 +19,24 @@ Output: - csv-formatted file ("NewTranscriptID,ID,Count") with - id of generated transcript - id of original transcript (without intron inclusions) - - count \ No newline at end of file + - count + +To install package, run + +``` +pip install . +``` + +Afterwards, it can be imported using + +```python +import tsg +``` + +To generate the sampled transcripts, run + +``` +transcript-generator --transcripts <transcripts_file> --annotation <annotations_file> --prob_inclusion=<probability_inclusion> +``` + +where the transcripts file should be csv-formatted, the annotation file gtf-formatted and the inclusion probability for introns a float in the range [0,1]. \ No newline at end of file diff --git a/environment.yml b/environment.yml index 2199378c8f19bb33d873e2e095caece8417dc8af..3287b74c3d5dda04a146e078b53e86c512da9806 100644 --- a/environment.yml +++ b/environment.yml @@ -6,6 +6,7 @@ dependencies: - python=3.9 - matplotlib - pandas + - pip - flake8-docstrings - mypy - flake8 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..3352b1f711749b9a9816c794b818be70ca5677b9 --- /dev/null +++ b/setup.py @@ -0,0 +1,18 @@ +"""Setup the package.""" + +from setuptools import setup + +setup( + name='tsg', + author='Zimmermann, M; Fraenkl, A;Glass, L', + url='https://git.scicore.unibas.ch/zavolan_group/tools/transcript-structure-generator', + license='MIT', + version='0.0.1', + packages=['tsg'], + install_requires=['pandas'], + entry_points={ + 'console_scripts': [ + 'transcript-generator = tsg:cli', + ] + } +) diff --git a/src/main.py b/src/main.py deleted file mode 100644 index ea1a4d93fbc05e94153cc1e612826a096ec138cd..0000000000000000000000000000000000000000 --- a/src/main.py +++ /dev/null @@ -1,17 +0,0 @@ -import argparse -from pathlib import Path - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--transcripts", type=str) - parser.add_argument("--annotation", type=str) - parser.add_argument("--prob_inclusion", type=float) - args = parser.parse_args() - - input_transcripts_file = args.transcripts - input_annotations_file = args.annotation - prob_inclusion = args.prob_inclusion - input_transcripts_path = Path(input_transcripts_file) - input_annotations_path = Path(input_annotations_file) - output_transcripts_file = "generated_" + input_transcripts_path.stem + ".csv" - output_annotations_file = "generated_" + input_annotations_path.name diff --git a/src/__init__.py b/tsg/__init__.py similarity index 50% rename from src/__init__.py rename to tsg/__init__.py index d9744d3ffce1f105fb23f0129578fbeb2cc6b9da..b9b9505644f3871476ec18738d8b1ee7b37f32fe 100644 --- a/src/__init__.py +++ b/tsg/__init__.py @@ -1,3 +1,5 @@ """Transcript structure generator package.""" -__version__ = '0.0.0' \ No newline at end of file +from tsg.cli import cli + +__version__ = '0.0.0' diff --git a/tsg/cli.py b/tsg/cli.py new file mode 100644 index 0000000000000000000000000000000000000000..72da7ba229bea7448d5ec66beba9ee26d8a20763 --- /dev/null +++ b/tsg/cli.py @@ -0,0 +1,57 @@ +import argparse +import logging +from pathlib import Path + +from .main import sample_transcripts + + +def setup_logging(loglevel: str=None) -> None: + # Set up logging + if loglevel: + numeric_level = getattr(logging, loglevel.upper()) + if not isinstance(numeric_level, int): + raise ValueError("Invalid log level: %s" % loglevel) + logging.basicConfig(level=numeric_level) + + +def build_arg_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("--transcripts", type=str) + parser.add_argument("--annotation", type=str) + parser.add_argument("--prob_inclusion", type=float) + parser.add_argument("--log", type=str) + + return parser + + +def get_args(): + parser = build_arg_parser() + + args = parser.parse_args() + + return args + + +def output_filename(filename: str) -> str: + filepath = Path(filename) + if filename.endswith(".csv") or filename.endswith(".tsv"): + return "generated_" + filepath.stem + ".csv" + if filename.endswith(".gtf"): + return "generated_" + filepath.name + + +def cli(): + args = get_args() + + setup_logging(args.log) + sample_transcripts( + args.transcripts, + args.annotation, + args.prob_inclusion, + output_filename(args.transcripts), + output_filename(args.annotation), + ) + + +if __name__ == "__main__": + cli()