From 77ae9776f7e4f4ea652f8aa03077dbdb4723e737 Mon Sep 17 00:00:00 2001
From: Larissa Glass <larissa.glass@unibas.ch>
Date: Tue, 29 Nov 2022 15:00:17 +0000
Subject: [PATCH] Milestone Review

---
 .flake8            |   3 +
 .pylintrc          |   5 ++
 Dockerfile         |   6 +-
 README.md          |  14 ++++-
 environment.yml    |   1 +
 pyproject.toml     |   2 +-
 tests/test_main.py |   2 +-
 tsg/__init__.py    |   5 +-
 tsg/__main__.py    |   3 +-
 tsg/cli.py         | 104 ++++++++++++++++--------------
 tsg/main.py        | 154 ++++++++++++++++++++++++++-------------------
 11 files changed, 181 insertions(+), 118 deletions(-)
 create mode 100644 .flake8
 create mode 100644 .pylintrc

diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..1d48b94
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,3 @@
+[flake8]
+max-line-length = 120
+docstring-convention = google
\ No newline at end of file
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 0000000..f6b4eac
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,5 @@
+[FORMAT]
+max-line-length=120
+
+[BASIC]
+good-names=df, i, fh, id, s, d
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index be953d5..4d99edb 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,11 +1,11 @@
-FROM python:3-slim
+FROM python:3.9-slim-buster
 
 WORKDIR /usr/src/app
 
-COPY README.md requirements.txt pyproject.toml ./
+COPY README.md pyproject.toml ./
 ADD tsg ./tsg
 ADD data ./data
 
 RUN pip install .
 
-CMD [ "transcript-generator", "--transcripts", "data/transcripts_short.csv", "--annotation", "data/Homo_sapiens.GRCh38.103.chr.gtf", "--prob_inclusion", "0.05" ]
\ No newline at end of file
+ENTRYPOINT [ "transcript-generator" ]
\ No newline at end of file
diff --git a/README.md b/README.md
index b336096..d520651 100644
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@ To generate the sampled transcripts, open a new shell, activate your environment
 ```
 conda activate transcript-structure-generator
 
-transcript-generator --transcripts <transcripts_file> --annotation <annotations_file> --prob_inclusion=<probability_inclusion> [--log "INFO"]
+transcript-generator --prob_inclusion <probability_inclusion> [--log "INFO"] <transcripts_file> <annotations_file>
 ```
 
 where the transcripts file should be csv-formatted, the annotation file gtf-formatted and the inclusion probability for introns a float in the range [0,1]. The log parameter is optional and can be one of `["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]`. The default is `INFO`.
@@ -53,6 +53,18 @@ To perform all tests, make sure your environment corresponds to the `environment
 pytest tests
 ```
 
+To build Docker image, run
+
+```
+docker build -t transcript-app .
+```
+
+Afterwards, you can use the Docker image like so
+
+```
+docker run transcript-app --prob_inclusion <probability_inclusion> [--log "INFO"] <transcripts_file> <annotations_file>
+```
+
 # License
 
 MIT license, Copyright (c) 2021 Zavolan Lab, Biozentrum, University of Basel
diff --git a/environment.yml b/environment.yml
index ac26257..bf74421 100644
--- a/environment.yml
+++ b/environment.yml
@@ -12,4 +12,5 @@ dependencies:
   - mypy
   - flake8
   - pytest
+  - pylint
   - coverage
diff --git a/pyproject.toml b/pyproject.toml
index 1214e21..6cb72f0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "tsg"
-version = "0.1.0"
+version = "0.2.0"
 authors = [
   { name="Michael Zimmermann", email="michael.zimmermann@unibas.ch" },
   { name="Andri Fraenkl", email="andri.fraenkl@unibas.ch" },
diff --git a/tests/test_main.py b/tests/test_main.py
index 48b9b55..49aaeea 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -62,7 +62,7 @@ class TestGtf:
     def test_parsed(self):
         annotations = Gtf()
         annotations.read_file("tests/resources/Annotation1.gtf")
-        annotations.parse_free_text()
+        annotations.parse_key_value()
 
         assert annotations.parsed == True
         assert set(annotations.free_text_columns) == set(
diff --git a/tsg/__init__.py b/tsg/__init__.py
index 229aaf5..75a11c4 100644
--- a/tsg/__init__.py
+++ b/tsg/__init__.py
@@ -1,4 +1,7 @@
 """Transcript structure generator package."""
+import importlib.metadata
 
+from tsg.main import Gtf, TranscriptGenerator, sample_transcripts
 
-__version__ = '0.0.0'
+__version__ = importlib.metadata.version("tsg")
+__all__ = ["Gtf", "TranscriptGenerator", "sample_transcripts"]
diff --git a/tsg/__main__.py b/tsg/__main__.py
index cd76d30..b4fac98 100644
--- a/tsg/__main__.py
+++ b/tsg/__main__.py
@@ -1,4 +1,5 @@
+"""Main module."""
 from tsg.cli import app
 
 if __name__ == "__main__":
-    app()
\ No newline at end of file
+    app()
diff --git a/tsg/cli.py b/tsg/cli.py
index ee82182..63996d9 100644
--- a/tsg/cli.py
+++ b/tsg/cli.py
@@ -1,3 +1,4 @@
+"""Command line interface."""
 import argparse
 import logging
 from pathlib import Path
@@ -9,7 +10,7 @@ def setup_logging(loglevel: str = None) -> None:
     """Set up logging. Loglevel can be one of ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"].
 
     Args:
-        loglevel (str, optional): Level of log output. Defaults to None.
+        loglevel: Level of log output.
 
     Raises:
         ValueError: If string that is not a log level is passed, raise error.
@@ -17,12 +18,15 @@ def setup_logging(loglevel: str = None) -> None:
     Returns:
         None
     """
+    # set default log level
+    numeric_level = getattr(logging, "INFO")
+
     if loglevel:
-        numeric_level = getattr(logging, loglevel.upper())
-        if not isinstance(numeric_level, int):
-            raise ValueError("Invalid log level: %s" % loglevel)
-    else:
-        numeric_level = getattr(logging, "INFO")
+        try:
+            numeric_level = getattr(logging, loglevel.upper())
+        except AttributeError as err:
+            print(f"Unexpected {err=}, {type(err)=}")
+            raise
 
     logging.basicConfig(
         format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")',
@@ -31,44 +35,50 @@ def setup_logging(loglevel: str = None) -> None:
 
 
 def build_arg_parser() -> argparse.ArgumentParser:
-    """ Builds the argument parser.
+    """Builds the argument parser.
 
     Args:
-        1) path to the csv-file with the number of transcripts (str)
-        2) path to the gtf-file with the annotations for each transcript (str)
-        3) a value for the probability of intron inclusion (float)
-        4) a log message (str)
-    
+        1) path to the csv-file with the number of transcripts
+        2) path to the gtf-file with the annotations for each transcript
+        3) a value for the probability of intron inclusion
+        4) a log message
+
     Raises:
-        None  
+        None
 
     Returns:
-        parser  
+        arguments for parser
     """
     parser = argparse.ArgumentParser()
-    parser.add_argument("--transcripts", type=str)
-    parser.add_argument("--annotation", type=str)
-    parser.add_argument("--prob_inclusion", type=float)
-    parser.add_argument("--log", type=str)
-
-    return parser
-
-
-def get_args() -> argparse.Namespace:
-    """Builds a parser and returns its arguments.
-
-    Args:
-        None
-    
-    Raises:
-        None 
+    parser.add_argument(
+        "transcripts",
+        type=str,
+        help="Path to csv file with number of transcripts (ID,Count).",
+    )
+    parser.add_argument(
+        "annotation",
+        type=str,
+        help="Path to gtf-file with exon annotation."
+    )
+    parser.add_argument(
+        "-p",
+        "--prob-inclusion",
+        type=float,
+        default=0.05,
+        help="Probability of intron inclusion.",
+    )
+    parser.add_argument(
+        "--log",
+        type=str,
+        default="INFO",
+        help='Level of logging. Can be one of ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]',
+    )
 
-    Returns:
-        arguments for parser   
-    """
-    parser = build_arg_parser()
     args = parser.parse_args()
 
+    assert args.prob_inclusion >= 0
+    assert args.prob_inclusion <= 1
+
     return args
 
 
@@ -76,7 +86,7 @@ def output_filename(filename: str) -> str:
     """Generate output filename for given input filename.
 
     Args:
-        filename (str): Input filename
+        filename: Input filename
 
     Raises:
         NotImplementedError: Only accept filetypes .csv, .tsv and .gtf.
@@ -86,7 +96,7 @@ def output_filename(filename: str) -> str:
         str: Output filename
     """
     filepath = Path(filename)
-    if filepath.suffix == ".csv" or filepath.suffix == ".tsv":
+    if filepath.suffix in (".csv", ".tsv"):
         outfile = "generated_" + filepath.stem + ".csv"
     elif filepath.suffix == ".gtf":
         outfile = "generated_" + filepath.name
@@ -95,26 +105,26 @@ def output_filename(filename: str) -> str:
 
     if Path(outfile).exists():
         raise FileExistsError(f"The output file {outfile} already exists.")
-        
+
     return outfile
 
 
 def app():
     """Gets the args, sets up the logging and starts the programm with the provided parameters.
 
-    Args: 
-        1) path to the csv-file with the number of transcripts (str)
-        2) path to the gtf-file with the annotations for each transcript (str)
-        3) a value for the probability of intron inclusion (float)
-        4) a log message (str)
-    
+    Args:
+        1) path to the csv-file with the number of transcripts
+        2) path to the gtf-file with the annotations for each transcript
+        3) a value for the probability of intron inclusion
+        4) a log message
+
     Raises:
-        None  
+        None
 
     Returns:
-        None  
+        None
     """
-    args = get_args()
+    args = build_arg_parser()
 
     setup_logging(args.log)
     sample_transcripts(
@@ -123,4 +133,4 @@ def app():
         args.prob_inclusion,
         output_filename(args.transcripts),
         output_filename(args.annotation),
-    )
\ No newline at end of file
+    )
diff --git a/tsg/main.py b/tsg/main.py
index e7bd9db..4ad774f 100644
--- a/tsg/main.py
+++ b/tsg/main.py
@@ -14,10 +14,10 @@ def read_abundances(transcripts_file: str) -> pd.DataFrame:
     """Read transcript-abundance file into dataframe.
 
     Args:
-        transcripts_file (str): Input filename
+        transcripts_file: Input filename
 
     Returns:
-        pd.DataFrame: Transcript abundances ("id", "count")
+        A pd.DataFrame with the transcript abundances ("id", "count").
 
     Raises:
         ValueError: When the input file is neither csv or tsv
@@ -25,13 +25,12 @@ def read_abundances(transcripts_file: str) -> pd.DataFrame:
     cols: list = ["id", "count"]
     if transcripts_file.endswith(".tsv"):
         return pd.read_table(transcripts_file, header=None, names=cols)
-    elif transcripts_file.endswith(".csv"):
+    if transcripts_file.endswith(".csv"):
         return pd.read_csv(transcripts_file, header=None, names=cols)
-    else:
-        raise ValueError("File type needs to be either csv or tsv")
+    raise ValueError("File type needs to be either csv or tsv")
 
 
-def filter_df(df: pd.DataFrame, transcripts: list = []) -> pd.DataFrame:
+def filter_df(df: pd.DataFrame, transcripts: list = None) -> pd.DataFrame:
     """Filter annotations to include only exons with the highest transcript support level, i.e. TSL1.
 
     `feature` column is filtered on value "exon" and
@@ -48,6 +47,8 @@ def filter_df(df: pd.DataFrame, transcripts: list = []) -> pd.DataFrame:
         A pd.DataFrame containing only rows with exon annotations of highest transcript support level and,
         if provided, belonging to one of the given transcripts
     """
+    if transcripts is None:
+        transcripts = []
     df_filter = df[
         (df["feature"] == "exon")
         & (df["free_text"].str.contains('transcript_support_level "1"'))
@@ -63,8 +64,8 @@ def filter_df(df: pd.DataFrame, transcripts: list = []) -> pd.DataFrame:
 def str_to_dict(s: str) -> dict:
     """Split between key/value pairs.
 
-    Split string based on delimiter ';' into items, remove empty items and split items on delimiter ' ' into key/value pairs.
-    Remove quotes from value strings and create a dictionary.
+    Split string based on delimiter ';' into items, remove empty items and split items on delimiter ' ' into
+    key/value pairs. Remove quotes from value strings and create a dictionary.
 
     Args:
         s: A string of the form 'gene_id "GENE1"; transcript_id "TRANSCRIPT1";'
@@ -97,7 +98,7 @@ def dict_to_str(d: dict) -> str:
     # join key, value pairs from dictionary with a space in a list,
     # then join items in list by ;
     # end on ;
-    # check if value is nan
+    # value == value checks that value is not nan
     s: str = (
         "; ".join([f'{key} "{value}"' for key, value in d.items() if value == value])
         + ";"
@@ -106,17 +107,17 @@ def dict_to_str(d: dict) -> str:
 
 
 def reverse_parse_free_text(df_all: pd.DataFrame) -> pd.DataFrame:
-    """Reverse parsing of gtf based pd.DataFrame to include only columns that are well defnined by gtf file standards.
+    """Reverse parsing of gtf based pd.DataFrame to include only columns that are well defnined by gtf-file standards.
 
-    The first 8 defined columns are constant as defined by gtf file standards.
+    The first 8 defined columns are constant as defined by gtf-file standards.
     Further columns are assumed to be parsed free-text columns (see Gtf.parse_free_text()).
     The parsed free-text columns are aggregated as a dictionary and the dictionry is parsed as a string in gtf format.
 
     Args:
-        df_all: A pd.DataFrame containing a parsed gtf file.
+        df_all: A pd.DataFrame containing a parsed gtf-file.
 
     Returns:
-        A DataFrame with the columns as defined by gtf file standards.
+        A pd.DataFrame with the columns as defined by gtf-file standards.
     """
     # Define pd.DataFrame containing only parsed free-text columns
     df_free_text = df_all.iloc[:, 8:]
@@ -133,8 +134,8 @@ def write_gtf(df: pd.DataFrame, filename: str) -> None:
     Makes sure data types are correct and saves object in gtf format.
 
     Args:
-        df: A pd.DataFrame containing a gtf file.
-        Filename: File to save to.
+        df: A pd.DataFrame containing a gtf-file.
+        filename: File to save to.
     """
     # Make sure the data types are correct.
     df = df.astype(Gtf.dtypes)
@@ -156,7 +157,7 @@ def write_header(annotations_file: str) -> None:
     Args:
         annotations_file: Filename to write header to.
     """
-    with open(annotations_file, "w") as fh:
+    with open(annotations_file, "w", encoding="utf_8") as fh:
         fh.write("\t".join(Gtf.dtypes.keys()) + "\n")
 
 
@@ -184,14 +185,15 @@ class Gtf:
 
     def __init__(self):
         """Initialize Gtf object."""
+        self.df = None
         self.parsed = False
         self.original_columns = list(self.dtypes.keys())
         self.free_text_columns = []
 
     def read_file(self, annotations_file: str) -> None:
-        """Read gtf file.
+        """Read gtf-file.
 
-        Iterate over chunks of the gtf file reading 100000 rows at a time. Filter chunks for exon annotations of
+        Iterate over chunks of the gtf-file reading 100000 rows at a time. Filter chunks for exon annotations of
         the highest transcript support level. Concatenate chunks to get resulting pd.DataFrame.
 
         Args:
@@ -219,11 +221,11 @@ class Gtf:
 
         Part of initialization is:
         Set dataframe attribute
-        Check which columns belong to the free-text part of the GTF-file.
+        Check which columns belong to the free-text part of the gtf-file.
         Check if there are no columns called free-text and if so, sets the value of parsed attribute to TRUE.
 
         Args:
-            df: pd.DataFrame
+            df: A pd.DataFrame containing a gtf-file.
         """
         self.free_text_columns = [
             col for col in df.columns if col not in self.original_columns
@@ -232,13 +234,13 @@ class Gtf:
         if "free_text" not in df.columns:
             self.parsed = True
 
-    def parse_free_text(self):
+    def parse_key_value(self):
         """Parse key/value pairs from `free_text` column into column `key` with row entry `value`.
 
         Creates a dataframe with columns for keys in the free-text column instead of `free_text` column.
         Saves it to Gtf.df attribute.
         """
-        assert self.parsed == False
+        assert self.parsed is False
         # create dataframe with columns for values in free_text column
         df_free_text = self.df["free_text"].map(str_to_dict).apply(pd.Series)
         # remember which columns come from free_text
@@ -256,7 +258,7 @@ class Gtf:
         Creates a data frame that can be written in gtf format to file. Parsed free-text columns are aggregated
         into `free_text` column according to gtf format specification.
         """
-        assert self.parsed == True
+        assert self.parsed is True
         # create dataframe with only free_text columns
         df_free_text = self.df[self.free_text_columns]
         # filter current dataframe to only original columns, except "free_text" column
@@ -276,6 +278,32 @@ class Gtf:
 class TranscriptGenerator:
     """Class to sample a transcript."""
 
+    def __new__(
+        cls,
+        transcript_id: str,
+        transcript_count: int,
+        transcript_df: pd.DataFrame,
+        prob_inclusion: float,
+    ):
+        """Initialize TranscriptGenerator object."""
+        strands = transcript_df["strand"].unique()
+        if len(transcript_df) == 0:
+            LOG.warning(
+                "Transcript %s can't be sampled. Annotation is missing", transcript_id
+            )
+            instance = None
+        elif len(strands) > 1:
+            LOG.warning(
+                "Transcript %s can't be sampled. Transcript generator is not implemented for transcripts with"
+                " exons annotated on different strands",
+                transcript_id,
+            )
+            instance = None
+        else:
+            instance = super().__new__(cls)
+
+        return instance
+
     def __init__(
         self,
         transcript_id: str,
@@ -284,10 +312,6 @@ class TranscriptGenerator:
         prob_inclusion: float,
     ):
         """Initialize TranscriptGenerator object."""
-        assert len(transcript_df) > 0
-        assert transcript_count > 0
-        assert (prob_inclusion >= 0) and (prob_inclusion <= 1)
-
         self.id = transcript_id
         self.count = transcript_count
         self.df = transcript_df
@@ -301,7 +325,7 @@ class TranscriptGenerator:
         Each column corresponds to one sample and the number of columns corresponds to the number of samples.
 
         Returns:
-            inclusion_arr: A boolean np.array, where True means intron inclusion.
+            A boolean np.array, where True means intron inclusion.
         """
         inclusion_arr = np.random.rand(self.no_exons, self.count) < self.prob_inclusion
         if self.strand == "+":
@@ -317,10 +341,9 @@ class TranscriptGenerator:
         Args:
 
         Returns:
-            names: List of names for generated exons.
-            inclusion_arr_unique: A boolean np.array where columns correspond to generated transcripts and rows to
-                intron inclusion.
-            counts: A np.array containing sample number per generated inclusions, i.e. transcript.
+            - List of names for generated exons.
+            - A boolean np.array where columns correspond to generated transcripts and rows to intron inclusion.
+            - A np.array containing sample number per generated inclusions, i.e. transcript.
         """
         inclusion_arr = self._get_inclusions()
         # Unique intron inclusion arrays and counts
@@ -330,7 +353,7 @@ class TranscriptGenerator:
         # Name for each generated transcript
         names = []
         for i in range(inclusion_arr_unique.shape[1]):
-            if np.all(inclusion_arr_unique[:, i] == False, axis=0):
+            if np.all(inclusion_arr_unique[:, i] is False, axis=0):
                 names.append(self.id)
             else:
                 names.append(f"{self.id}_{i}")
@@ -341,26 +364,26 @@ class TranscriptGenerator:
         """Take as input a dataframe filtered to one transcript and a boolean vector denoting intron inclusions.
 
         Args:
-            inclusions (np.array): A boolean vector denoting intron inclusion.
-            transcript_id (str): The transcript id.
+            inclusions: A boolean vector denoting intron inclusion.
+            transcript_id: The transcript id.
 
         Returns:
             The generated transcript as a pd.DataFrame.
         """
         df_generated = self.df.copy()
         if self.strand == "+":
-            origninal_end = df_generated["end"]
+            original_end = df_generated["end"]
             df_generated["end"] = np.where(
                 inclusions,
                 df_generated["start"].shift(periods=-1, fill_value=-1) - 1,
-                origninal_end,
+                original_end,
             )
         if self.strand == "-":
-            origninal_start = df_generated["start"]
+            original_start = df_generated["start"]
             df_generated["start"] = np.where(
                 inclusions,
                 df_generated["end"].shift(periods=-1, fill_value=-1) + 1,
-                origninal_start,
+                original_start,
             )
 
         original_id = df_generated["exon_id"]
@@ -373,39 +396,36 @@ class TranscriptGenerator:
         df_generated["transcript_id"] = transcript_id
         return df_generated
 
-    def generate_transcripts(self, filename: str) -> None:
+    def write_sequences(self, filename: str) -> None:
         """Write transcripts to file.
 
         Args:
-            filename (str): Output csv filename.
+            filename: Output csv filename.
         """
-        ids, inclusions, counts = self._get_unique_inclusions()
-        with open(filename, "a") as fh:
+        ids, _, counts = self._get_unique_inclusions()
+        with open(filename, "a", encoding="utf_8") as fh:
             for transcript_id, transcript_count in zip(ids, counts):
                 fh.write(f"{transcript_id},{self.id},{transcript_count}\n")
 
-    def generate_annotations(self, filename: str) -> None:
+    def write_annotations(self, filename: str) -> None:
         """Generate a annotations in gtf format for sampled transcript.
 
         Args:
-            Filename: Output gtf filename.
+            Filename: Output gtf-filename.
 
         Raises:
             ValueError: If given transcript ID could not be sampled.
         """
-        ids, inclusions, counts = self._get_unique_inclusions()
+        ids, inclusions, _ = self._get_unique_inclusions()
         n_unique = len(ids)
 
-        try:
-            df = pd.concat(
-                [self._get_df(inclusions[:, i], ids[i]) for i in range(n_unique)]
-            )
-            df = reverse_parse_free_text(df)
+        df = pd.concat(
+            [self._get_df(inclusions[:, i], ids[i]) for i in range(n_unique)]
+        )
+        df = reverse_parse_free_text(df)
 
-            write_gtf(df, filename)
-            LOG.debug(f"Transcript {self.id} sampled")
-        except ValueError:
-            LOG.error(f"Transcript {self.id} could not be sampled.")
+        write_gtf(df, filename)
+        LOG.debug("Transcript %s sampled", self.id)
 
 
 def sample_transcripts(
@@ -418,18 +438,23 @@ def sample_transcripts(
     """Read input files, iterate over transcript IDs, sample each transcript and save results.
 
     Args:
-        input_transcripts_file (str): Filename of transcript abundances, needs to be csv or tsv.
-        input_annotations_file (str): Filename of annotations, needs to be gtf.
-        prob_inclusion (float): Probability of intron inclusion, needs to be float in range [0,1].
-        output_transcripts_file (str): Filename of file to write sampled transcripts to.
-        output_annotations_file (str): Filename of file to write generated annotations to.
+        input_transcripts_file: Filename of transcript abundances, needs to be csv or tsv.
+        input_annotations_file: Filename of annotations, needs to be gtf.
+        prob_inclusion: Probability of intron inclusion, needs to be float in range [0,1].
+        output_transcripts_file: Filename of file to write sampled transcripts to.
+        output_annotations_file: Filename of file to write generated annotations to.
     """
+    LOG.info("Probability of intron inclusion: %s", str(prob_inclusion))
+    LOG.info("Parsing transcript abundances...")
     transcripts = read_abundances(input_transcripts_file)
+    # Make sure that abundance is positive
+    transcripts = transcripts[transcripts["count"] > 0]
+    LOG.info("Done parsing...")
 
     LOG.info("Parsing annotations...")
     annotations = Gtf()
     annotations.read_file(input_annotations_file)
-    annotations.parse_free_text()
+    annotations.parse_key_value()
     LOG.info("Done parsing...")
 
     LOG.info("Start sampling transcripts...")
@@ -447,6 +472,9 @@ def sample_transcripts(
             transcript_df,
             prob_inclusion=prob_inclusion,
         )
-        transcripts.generate_annotations(output_annotations_file)
-        transcripts.generate_transcripts(output_transcripts_file)
+        try:
+            transcripts.write_annotations(output_annotations_file)
+            transcripts.write_sequences(output_transcripts_file)
+        except AttributeError:
+            pass
     LOG.info("Done.")
-- 
GitLab