Feature/homework 2022 11 02

4b19dd95 · Larissa Glass · Michael Zimmermann · 24d35332 · 4b19dd95 · 4b19dd95
Commit 4b19dd95 authored 2 years ago by Larissa Glass Committed by Michael Zimmermann 2 years ago
--- a/.gitignore
+++ b/.gitignore
@@ -156,4 +156,6 @@ dmypy.json
 # Cython debug symbols
 cython_debug/

-# End of https://www.toptal.com/developers/gitignore/api/python,git
\ No newline at end of file
+# End of https://www.toptal.com/developers/gitignore/api/python,git
+
+data
\ No newline at end of file
--- a/LICENSE.md
+++ b/LICENSE.md
+MIT License
+
+Copyright (c) 2021 Zavolan Lab, Biozentrum, University of Basel
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
--- a/README.md
+++ b/README.md
@@ -10,8 +10,8 @@ conda activate transcript-structure-generator
 # Usage

 Input:
- Csv-formatted file ("ID,Count") with counts for individual transcripts
- Probability of intron inclusion (float in range [0,1])
+- csv-formatted file ("ID,Count") with counts for individual transcripts
+- probability of intron inclusion (float in range [0,1])
 - gtf-formatted file with exon coordinates of the transcripts included in the csv file

 Output:
@@ -19,4 +19,24 @@ Output:
 - csv-formatted file ("NewTranscriptID,ID,Count") with
 	- id of generated transcript
 	- id of original transcript (without intron inclusions)
-	- count
\ No newline at end of file
+	- count
+
+To install package, run
+
+```
+pip install .
+```
+
+Afterwards, it can be imported using
+
+```python
+import tsg
+```
+
+To generate the sampled transcripts, run
+
+```
+transcript-generator --transcripts <transcripts_file> --annotation <annotations_file> --prob_inclusion=<probability_inclusion>
+```
+
+where the transcripts file should be csv-formatted, the annotation file gtf-formatted and the inclusion probability for introns a float in the range [0,1].
\ No newline at end of file
--- a/environment.yml
+++ b/environment.yml
@@ -6,6 +6,7 @@ dependencies:
  - python=3.9
  - matplotlib
  - pandas
+  - pip
  - flake8-docstrings
  - mypy
  - flake8

--- a/setup.py
+++ b/setup.py
+"""Setup the package."""
+
+from setuptools import setup
+
+setup(
+    name='tsg',
+    author='Zimmermann, M; Fraenkl, A;Glass, L',
+    url='https://git.scicore.unibas.ch/zavolan_group/tools/transcript-structure-generator',
+    license='MIT',
+    version='0.0.1',
+    packages=['tsg'],
+    install_requires=['pandas'],
+    entry_points={
+        'console_scripts': [
+            'transcript-generator = tsg:cli',
+        ]
+    }
+)
--- a/src/main.py
+++ b/src/main.py
-import argparse
-from pathlib import Path
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--transcripts", type=str)
-    parser.add_argument("--annotation", type=str)
-    parser.add_argument("--prob_inclusion", type=float)
-    args = parser.parse_args()
-
-    input_transcripts_file = args.transcripts
-    input_annotations_file = args.annotation
-    prob_inclusion = args.prob_inclusion
-    input_transcripts_path = Path(input_transcripts_file)
-    input_annotations_path = Path(input_annotations_file)
-    output_transcripts_file = "generated_" + input_transcripts_path.stem + ".csv"
-    output_annotations_file = "generated_" + input_annotations_path.name
--- a/src/__init__.py
+++ b/src/__init__.py
 """Transcript structure generator package."""

-__version__ = '0.0.0'
\ No newline at end of file
+from tsg.cli import cli
+
+__version__ = '0.0.0'
--- a/tsg/cli.py
+++ b/tsg/cli.py
+import argparse
+import logging
+from pathlib import Path
+
+from .main import sample_transcripts
+
+
+def setup_logging(loglevel: str=None) -> None:
+    # Set up logging
+    if loglevel:
+        numeric_level = getattr(logging, loglevel.upper())
+        if not isinstance(numeric_level, int):
+            raise ValueError("Invalid log level: %s" % loglevel)
+        logging.basicConfig(level=numeric_level)
+
+
+def build_arg_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--transcripts", type=str)
+    parser.add_argument("--annotation", type=str)
+    parser.add_argument("--prob_inclusion", type=float)
+    parser.add_argument("--log", type=str)
+
+    return parser
+
+
+def get_args():
+    parser = build_arg_parser()
+
+    args = parser.parse_args()
+
+    return args
+
+
+def output_filename(filename: str) -> str:
+    filepath = Path(filename)
+    if filename.endswith(".csv") or filename.endswith(".tsv"):
+        return "generated_" + filepath.stem + ".csv"
+    if filename.endswith(".gtf"):
+        return "generated_" + filepath.name
+
+
+def cli():
+    args = get_args()
+
+    setup_logging(args.log)
+    sample_transcripts(
+        args.transcripts,
+        args.annotation,
+        args.prob_inclusion,
+        output_filename(args.transcripts),
+        output_filename(args.annotation),
+    )
+
+
+if __name__ == "__main__":
+    cli()