From 4b19dd953f635654cb8f696012f39db9b8f64b40 Mon Sep 17 00:00:00 2001
From: Larissa Glass <larissa.glass@unibas.ch>
Date: Thu, 3 Nov 2022 15:42:26 +0000
Subject: [PATCH] Feature/homework 2022 11 02

---
 .gitignore               |  4 ++-
 LICENSE.md               | 21 +++++++++++++++
 README.md                | 26 +++++++++++++++---
 environment.yml          |  1 +
 setup.py                 | 18 +++++++++++++
 src/main.py              | 17 ------------
 {src => tsg}/__init__.py |  4 ++-
 tsg/cli.py               | 57 ++++++++++++++++++++++++++++++++++++++++
 8 files changed, 126 insertions(+), 22 deletions(-)
 create mode 100644 LICENSE.md
 create mode 100644 setup.py
 delete mode 100644 src/main.py
 rename {src => tsg}/__init__.py (50%)
 create mode 100644 tsg/cli.py

diff --git a/.gitignore b/.gitignore
index bfe0573..dc92f04 100644
--- a/.gitignore
+++ b/.gitignore
@@ -156,4 +156,6 @@ dmypy.json
 # Cython debug symbols
 cython_debug/
 
-# End of https://www.toptal.com/developers/gitignore/api/python,git
\ No newline at end of file
+# End of https://www.toptal.com/developers/gitignore/api/python,git
+
+data
\ No newline at end of file
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 0000000..2313fb3
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 Zavolan Lab, Biozentrum, University of Basel
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/README.md b/README.md
index 58cfa03..0a814af 100644
--- a/README.md
+++ b/README.md
@@ -10,8 +10,8 @@ conda activate transcript-structure-generator
 # Usage
 
 Input:
-- Csv-formatted file ("ID,Count") with counts for individual transcripts
-- Probability of intron inclusion (float in range [0,1])
+- csv-formatted file ("ID,Count") with counts for individual transcripts
+- probability of intron inclusion (float in range [0,1])
 - gtf-formatted file with exon coordinates of the transcripts included in the csv file
 
 Output:
@@ -19,4 +19,24 @@ Output:
 - csv-formatted file ("NewTranscriptID,ID,Count") with
 	- id of generated transcript
 	- id of original transcript (without intron inclusions)
-	- count
\ No newline at end of file
+	- count
+
+To install package, run
+
+```
+pip install .
+```
+
+Afterwards, it can be imported using
+
+```python
+import tsg
+```
+
+To generate the sampled transcripts, run
+
+```
+transcript-generator --transcripts <transcripts_file> --annotation <annotations_file> --prob_inclusion=<probability_inclusion>
+```
+
+where the transcripts file should be csv-formatted, the annotation file gtf-formatted and the inclusion probability for introns a float in the range [0,1].
\ No newline at end of file
diff --git a/environment.yml b/environment.yml
index 2199378..3287b74 100644
--- a/environment.yml
+++ b/environment.yml
@@ -6,6 +6,7 @@ dependencies:
   - python=3.9
   - matplotlib
   - pandas
+  - pip
   - flake8-docstrings
   - mypy
   - flake8
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..3352b1f
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,18 @@
+"""Setup the package."""
+
+from setuptools import setup
+
+setup(
+    name='tsg',
+    author='Zimmermann, M; Fraenkl, A;Glass, L',
+    url='https://git.scicore.unibas.ch/zavolan_group/tools/transcript-structure-generator',
+    license='MIT',
+    version='0.0.1',
+    packages=['tsg'],
+    install_requires=['pandas'],
+    entry_points={
+        'console_scripts': [
+            'transcript-generator = tsg:cli',
+        ]
+    }
+)
diff --git a/src/main.py b/src/main.py
deleted file mode 100644
index ea1a4d9..0000000
--- a/src/main.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import argparse
-from pathlib import Path
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--transcripts", type=str)
-    parser.add_argument("--annotation", type=str)
-    parser.add_argument("--prob_inclusion", type=float)
-    args = parser.parse_args()
-
-    input_transcripts_file = args.transcripts
-    input_annotations_file = args.annotation
-    prob_inclusion = args.prob_inclusion
-    input_transcripts_path = Path(input_transcripts_file)
-    input_annotations_path = Path(input_annotations_file)
-    output_transcripts_file = "generated_" + input_transcripts_path.stem + ".csv"
-    output_annotations_file = "generated_" + input_annotations_path.name
diff --git a/src/__init__.py b/tsg/__init__.py
similarity index 50%
rename from src/__init__.py
rename to tsg/__init__.py
index d9744d3..b9b9505 100644
--- a/src/__init__.py
+++ b/tsg/__init__.py
@@ -1,3 +1,5 @@
 """Transcript structure generator package."""
 
-__version__ = '0.0.0'
\ No newline at end of file
+from tsg.cli import cli
+
+__version__ = '0.0.0'
diff --git a/tsg/cli.py b/tsg/cli.py
new file mode 100644
index 0000000..72da7ba
--- /dev/null
+++ b/tsg/cli.py
@@ -0,0 +1,57 @@
+import argparse
+import logging
+from pathlib import Path
+
+from .main import sample_transcripts
+
+
+def setup_logging(loglevel: str=None) -> None:
+    # Set up logging
+    if loglevel:
+        numeric_level = getattr(logging, loglevel.upper())
+        if not isinstance(numeric_level, int):
+            raise ValueError("Invalid log level: %s" % loglevel)
+        logging.basicConfig(level=numeric_level)
+
+
+def build_arg_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--transcripts", type=str)
+    parser.add_argument("--annotation", type=str)
+    parser.add_argument("--prob_inclusion", type=float)
+    parser.add_argument("--log", type=str)
+
+    return parser
+
+
+def get_args():
+    parser = build_arg_parser()
+
+    args = parser.parse_args()
+
+    return args
+
+
+def output_filename(filename: str) -> str:
+    filepath = Path(filename)
+    if filename.endswith(".csv") or filename.endswith(".tsv"):
+        return "generated_" + filepath.stem + ".csv"
+    if filename.endswith(".gtf"):
+        return "generated_" + filepath.name
+
+
+def cli():
+    args = get_args()
+
+    setup_logging(args.log)
+    sample_transcripts(
+        args.transcripts,
+        args.annotation,
+        args.prob_inclusion,
+        output_filename(args.transcripts),
+        output_filename(args.annotation),
+    )
+
+
+if __name__ == "__main__":
+    cli()
-- 
GitLab