diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 704ad66d4bb39fc389e8464f80374132ead95d01..906cfa26c05a62a1c0b8ceac7c8b1146d85c5fff 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -15,12 +15,21 @@ build-job: # First stage deployment and installation of dependencies. - pip install -e . - echo "Dependencies successfully deployed." +unit-test-job: # This job runs in the test stage. + stage: test # It only starts when the job in the build stage completes successfully. + script: + - pip install -r requirements.txt + - pip install -r requirements_dev.txt + - pip install -e . + - coverage run --source sequence_extractor -m pytest + - coverage report -m + lint-test-job: # Test Stage stage: test # Deploys and runs all 3 linters. script: - pip install -r requirements.txt - pip install -r requirements_dev.txt - pip install -e . - - flake8 --docstring-convention google sequence_extractor/ gtf_processing/ --ignore=D104,F821 - - pylint sequence_extractor/ gtf_processing/ - - mypy sequence_extractor/ gtf_processing/ + - flake8 --docstring-convention google sequence_extractor/ + - pylint sequence_extractor/ + - mypy sequence_extractor/ diff --git a/gtf_processing/pre_bedtools.py b/gtf_processing/pre_bedtools.py deleted file mode 100644 index b5ccc5d2b27228d18700fc631c4fb6bcfaeb7fd6..0000000000000000000000000000000000000000 --- a/gtf_processing/pre_bedtools.py +++ /dev/null @@ -1,47 +0,0 @@ -"""This script defines a BED from exon annotation in a GTF, to get exon coordinates for use in bedtools. It also ensures that the concatenation happens in the correct order, regardless of the strandedness of the transcript. - -Args: - GTF file - -Returns: - BED file with the format: chr, start, end, transcript_id, score, strand, gene_id -""" - -import argparse -import pandas as pd -from gtfparse import read_gtf - -parser = argparse.ArgumentParser( - prog="pre_bedtools", - description="extracts ordered information from gtf file and for transcripts in the negative strand, flips the order in which exons are ordered.", -) -parser.add_argument("--input_gtf_file", help="ordered and processed gtf file") -parser.add_argument( - "--output_bed_file", - help="bed file with only exons with strandedness taken into account", -) -args = parser.parse_args() - -gtf = read_gtf(args.input_gtf_file) -gtf_exons = gtf[gtf["feature"] == "exon"] -gtf_exons = gtf_exons[ - ["seqname", "start", "end", "transcript_id", "score", "strand", "gene_id"] -] - -gtf_df_neg = gtf_exons[gtf_exons["strand"] == "-"] -gtf_df_neg = ( - gtf_df_neg.sort_values(["transcript_id", "start"], ascending=False) - .groupby("transcript_id") - .head(len(gtf_df_neg.transcript_id)) -) - -gtf_df_pos = gtf_exons[gtf_exons["strand"] == "+"] -gtf_df_pos = ( - gtf_df_pos.sort_values(["transcript_id", "start"], ascending=True) - .groupby("transcript_id") - .head(len(gtf_df_pos.transcript_id)) -) - -pd.concat([gtf_df_pos, gtf_df_neg]).to_csv( - args.output_bed_file, sep="\t", index=False -) # gtf_df_pos and gtf_df_neg must be dataframes diff --git a/images/Ahmed_mahmoud_git_tutorial.PNG b/images/Ahmed_mahmoud_git_tutorial.PNG deleted file mode 100644 index de3936f17494e54ec1e2899e0cd1f2a7b557619b..0000000000000000000000000000000000000000 Binary files a/images/Ahmed_mahmoud_git_tutorial.PNG and /dev/null differ diff --git a/images/Gina_Homework2Image.png b/images/Gina_Homework2Image.png deleted file mode 100644 index 44b9a978228bcb08415ad42ec69320f578cbdefd..0000000000000000000000000000000000000000 Binary files a/images/Gina_Homework2Image.png and /dev/null differ diff --git a/images/Markdown_Homework_GinaBoot.png b/images/Markdown_Homework_GinaBoot.png deleted file mode 100644 index 0fd7e57c9add525dfab1de345f8bcbfe127cad6c..0000000000000000000000000000000000000000 Binary files a/images/Markdown_Homework_GinaBoot.png and /dev/null differ diff --git a/images/Samuel_Mondal_Markdown_tutorial_completion_page.png b/images/Samuel_Mondal_Markdown_tutorial_completion_page.png deleted file mode 100644 index 58d025d997164ae4c7e04af178e1413fe9c1232d..0000000000000000000000000000000000000000 Binary files a/images/Samuel_Mondal_Markdown_tutorial_completion_page.png and /dev/null differ diff --git a/images/Samuel_Mondal_git_tutorial_completion_page.png b/images/Samuel_Mondal_git_tutorial_completion_page.png deleted file mode 100644 index af33f4a586aeefff0071ca592f5831a139d0effc..0000000000000000000000000000000000000000 Binary files a/images/Samuel_Mondal_git_tutorial_completion_page.png and /dev/null differ diff --git a/images/markdown-tutorial-AM.PNG b/images/markdown-tutorial-AM.PNG deleted file mode 100644 index f23c78a1c79d99e264298cb4fdf6850781fbd63e..0000000000000000000000000000000000000000 Binary files a/images/markdown-tutorial-AM.PNG and /dev/null differ diff --git a/requirements.txt b/requirements.txt index 1e4ef6d4d11827251de76f3b5bbada4240ecb8bd..c3cba872576efe22404bdd67a4dfc6da9ff47aeb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ - pandas~=1.5 - numpy~=1.23 - gtfparse~=1.2 +numpy>=1.23.3 +pandas>=1.4.4 +gtfparse +polars==0.16.17 diff --git a/requirements_dev.txt b/requirements_dev.txt index ab4c4964f11ad764a889bccac4d6a17fdf1cd12e..4588ded2ab2d4143c8d8088dbccdfdc7146570d0 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,7 +1,7 @@ -pytest~=7.2 -coverage~=6.5 -black~=22.10 -flake8~=6.0 -flake8-docstrings~=1.6 -mypy~=0.991 -pylint~=2.15 +pytest +coverage +black>=22.10 +flake8 +flake8-docstrings +mypy +pylint diff --git a/sequence_extractor/__init__.py b/sequence_extractor/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d0d8d56bdb15401f4ebe8c00041039d15c0ffec3 100644 --- a/sequence_extractor/__init__.py +++ b/sequence_extractor/__init__.py @@ -0,0 +1 @@ +"""Initalise package.""" diff --git a/sequence_extractor/cli.py b/sequence_extractor/cli.py index 1fc4949133d15189bd19335e85b134aecd5bf897..1b2a12051962fb895f9491bf6c82f274de45d419 100644 --- a/sequence_extractor/cli.py +++ b/sequence_extractor/cli.py @@ -1,40 +1,106 @@ -"""command line script to be run on output fasta file from bedtools getfasta.""" +"""CLI to be run on output fasta file from bedtools getfasta.""" import argparse import logging -from exon_concatenation import exon_concatenation -from poly_a import poly_a_addition_to_fasta_list +from sequence_extractor.pre_bedtools import pre_bedtools_mode +from sequence_extractor.exon_concatenation import exon_concatenation +from sequence_extractor.poly_a import poly_a_addition_to_fasta_list -parser = argparse.ArgumentParser( - prog="transcript_sequence_extractor", - description="extracts transcript sequences from genome sequence and ouputs transcripts with PolyA tail added to them", -) -parser.add_argument("--input_fasta_file", help="fasta file obtained from bedtools") -parser.add_argument("--output_file_name", help="Name of the output fasta file") - -args = parser.parse_args() +LOG = logging.getLogger(__name__) def main(): - """Runs on the output from bedtools and concatenates the exons together and adds a polyA tail and outputs a fasta file. + """Use CLI arguments to extract sequences. + + Runs on the output from bedtools and concatenates the exons together + and adds a polyA tail and outputs a fasta file. Args: None: this will run on its own by taking the information from argparse Returns: - A fasta file with a single entry for each transcript ID with polyA tail being added onto the sequence at 3'end + A fasta file with a single entry for each transcript ID with + polyA tail being added onto the sequence at 3'end """ - LOG.info("sequence_extractor begins") - fasta_list = exon_concatenation(args.input_fasta_file) - final_list = poly_a_addition_to_fasta_list(fasta_list) - with open(args.output_file_name, "w", encoding="utf-8") as fasta_out: - fasta_out.write("\n".join("%s\n%s" % x for x in final_list)) - LOG.info("sequence_extractor ends") + args = parse_args() + setup_logging() + + if args.mode == "pre_bedtools": + pre_bedtools_mode(args) + elif args.mode == "post_bedtools": + post_bedtools_mode(args) + else: + LOG.error( + "Invalid mode specified." + "Please choose 'pre_bedtools' or 'post_bedtools'.") -if ___name__ == "main": +def setup_logging() -> None: + """Configure logging.""" logging.basicConfig( - format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")', + format='[%(asctime)s: %(levelname)s] %(message)s ' + '(module "%(module)s")', level=logging.INFO, ) - LOG = logging.getLogger(__name__) + + +def parse_args(): + """Parse arguments for CLI.""" + parser = argparse.ArgumentParser( + description="extracts transcript sequences from genome sequence and" + "ouputs transcripts with PolyA tail added to them", + ) + parser.add_argument( + "--mode", + choices=["pre_bedtools", "post_bedtools"], + required=True, + help="Select the mode of operation" + "('pre_bedtools' or 'post_bedtools')." + ) + parser.add_argument( + "-i", "--input-fasta-file", + dest="input_fasta_file", + help="Fasta-formatted file obtained from bedtools" + ) + parser.add_argument( + "-o", "--output-file-name", + dest="output_file_name", + help="Name of the output fasta file" + ) + parser.add_argument( + "-p", "--polyA-length", + dest="poly_a_length", + type=int, + help="Length of the polyA tail to be added (def: 250)", + default=250 + ) + parser.add_argument( + "--input-gtf-file", + dest="input_gtf_file", + help="Ordered and processed gtf file for 'pre_bedtools' mode.") + parser.add_argument( + "--output-bed-file", + dest="output_bed_file", + help="Bed file with only exons with strandedness" + "taken into account for 'pre_bedtools' mode.") + + args = parser.parse_args() + return args + + +def post_bedtools_mode(args): + """Execute the 'post_bedtools' mode.""" + LOG.info("Starting 'post_bedtools' mode...") + + fasta_list = exon_concatenation(args.input_fasta_file) + final_list = poly_a_addition_to_fasta_list(fasta_list, args.poly_a_length) + + if args.output_file_name is None: + args.output_file_name = "default_output.fasta" + + with open(args.output_file_name, "w", encoding="utf-8") as fasta_out: + fasta_out.write("\n".join(f"{x[0]}\n{x[1]}" for x in final_list)) + LOG.info("Transcript sequence extractor finished in 'post_bedtools' mode.") + + +if __name__ == '__main__': main() diff --git a/sequence_extractor/exon_concatenation.py b/sequence_extractor/exon_concatenation.py index 194e693c9603deb0195bc0db2bc80bf257c73ba7..c909878e55e874ee39c7cca9415001639c3bcb73 100644 --- a/sequence_extractor/exon_concatenation.py +++ b/sequence_extractor/exon_concatenation.py @@ -1,19 +1,24 @@ -"""Script containing the function to concatenate exons and output the results in a list of tuples.""" +"""Concatenate exons and output the results in a list of tuples.""" def exon_concatenation( post_bedtools_fasta: str, ) -> list: - """Concatenate all sequences starting with identical transcripit ID and outputs it as a list with sequence header (Transcript ID) and concatenated sequences as tuples. + """Concatenate exons. + + Concatenate all sequences starting with identical transcript ID and + output it as a list with sequence header (Transcript ID) and + concatenated sequences as tuples. Args: - post_bedtools_fasta: The name of the fasta file obtained after bedtools has been run + post_bedtools_fasta: The name of the fasta file obtained after + bedtools has been run Returns: A list containing transcript ID and concatenated exons in tuples. """ with open(post_bedtools_fasta, "r", encoding="utf-8") as fasta: - annotation = [] + annotation: list = [] fasta_format_list = [] for line1, line2 in zip(fasta, fasta): if len(annotation) == 0: diff --git a/sequence_extractor/poly_a.py b/sequence_extractor/poly_a.py index 73913713a0298e4a292120f7afc7bab41039bddd..f2cdf51a93a2c2910c36d260904fd54503543192 100644 --- a/sequence_extractor/poly_a.py +++ b/sequence_extractor/poly_a.py @@ -1,39 +1,47 @@ -"""This script contains two functions and the first function is called by the second function and used to add poly A tail to the concatenated exon.""" - - +"""Add poly A tail to the concatenated exon.""" import numpy as np -# To do: Taking probabilities of nucleotides from user and raising error if sum != 1 +# To do: Taking probabilities of nucleotides from user +# and raising errorif sum != 1 def poly_a_generator( exon: str, + poly_a_length: int = 250, ) -> str: - """Adds a PolyA tail to an exon sequence input into the function. + """Add a PolyA tail to an exon sequence input into the function. Args: - exon: RNA sequence, obtained from concatenation of exons, that needs polyA to be added to its 3' end. + exon: RNA sequence, obtained from concatenation of exons, + that needs polyA to be added to its 3' end. Returns: - RNA with polyA tail added to its 3' end. + RNA with polyA tail added to its 3' end. """ list_of_nucleotides = ["A", "T", "G", "C"] poly_a_string = "".join( - np.random.choice(list_of_nucleotides, 250, p=[0.914, 0.028, 0.025, 0.033]) + np.random.choice( + list_of_nucleotides, poly_a_length, p=[0.914, 0.028, 0.025, 0.033] + ) ) return exon + poly_a_string def poly_a_addition_to_fasta_list( fasta_list: list, + poly_a_length: int = 250, ) -> list: - """Takes in a list of tuples with annotations and exons and outputs a list where polyA tail has been added to all the exon 3' ends. + """Add polyA tail to all the exon 3' ends. + + Takes in a list of tuples with annotations and exons and outputs a list. Args: - fasta_list: List contaning tuples of annotations and exons + fasta_list: List contaning tuples of annotations and exons Returns: - A list like the initial list, this time with polyA tail added onto it. + A list like the initial list, this time with polyA tail added onto it. """ - mature_rna_list = [(i[0], poly_a_generator(i[1])) for i in fasta_list] + mature_rna_list = [ + (i[0], poly_a_generator(i[1], poly_a_length)) for i in fasta_list + ] return mature_rna_list diff --git a/sequence_extractor/pre_bedtools.py b/sequence_extractor/pre_bedtools.py new file mode 100644 index 0000000000000000000000000000000000000000..98142f94b7670576d7efe866dcf9a5ee3a3833d9 --- /dev/null +++ b/sequence_extractor/pre_bedtools.py @@ -0,0 +1,47 @@ +"""Prepare gtf file for bedtools. + +This script defines a BED from exon annotation in a GTF, to get exon +coordinates for use in bedtools. It also ensures that the concatenation happens +in the correct order, regardless of the strandedness of the transcript. + +Args: + GTF file + +Returns: + BED file with the format: + chr, start, end, transcript_id, score, strand, gene_id +""" +import pandas as pd # type: ignore +from gtfparse import read_gtf # type: ignore + + +def pre_bedtools_mode(args): + """Execute the 'pre_bedtools' mode.""" + gtf = read_gtf(args.input_gtf_file, result_type="pandas") + gtf_exons = gtf[gtf["feature"] == "exon"] + gtf_exons = gtf_exons[ + ["seqname", "start", "end", + "transcript_id", "score", + "strand", "gene_id"] + ] + + gtf_df_neg = gtf_exons[gtf_exons["strand"] == "-"] + gtf_df_neg = ( + gtf_df_neg.sort_values(["transcript_id", "start"], ascending=False) + .groupby("transcript_id") + .head(len(gtf_df_neg.transcript_id)) + ) + + gtf_df_pos = gtf_exons[gtf_exons["strand"] == "+"] + gtf_df_pos = ( + gtf_df_pos.sort_values(["transcript_id", "start"], ascending=True) + .groupby("transcript_id") + .head(len(gtf_df_pos.transcript_id)) + ) + + if args.output_bed_file is None: + args.output_bed_file = "default_output.bed" + + pd.concat([gtf_df_pos, gtf_df_neg]).to_csv( + args.output_bed_file, sep="\t", index=False + ) # gtf_df_pos and gtf_df_neg must be dataframes diff --git a/setup.py b/setup.py index e6b45b580ea92a073bba154ec8ad7365ece02f1c..900b047e7b1602ad39d253eafb94cd85c31eaf69 100644 --- a/setup.py +++ b/setup.py @@ -1,22 +1,29 @@ -from setuptools import setup, find_packages +"""Set up project.""" from pathlib import Path +from setuptools import setup, find_packages project_root_dir = Path(__file__).parent.resolve() +with open(project_root_dir / "requirements.txt", + "r", encoding="utf-8") as file: + INSTALL_REQUIRES = file.read().splitlines() -with open(project_root_dir / "requirements.txt", "r", encoding="utf-8") as _file: - INSTALL_REQUIRES = _file.read().splitlines() +URL = ('https://git.scicore.unibas.ch/zavolan_group/' + 'tools/transcript-sequence-extractor') setup( - name='sequence_extractor', + name='transcript-sequence-extractor', + version='0.1.0', + url=URL, + license='MIT', author='Samuel Mondal', author_email='samuel.mondal@unibas.ch', - url='https://git.scicore.unibas.ch/zavolan_group/tools/transcript-sequence-extractor', - license='MIT', - version='0.0.1', - description='Extracts transcript sequences from gtf file and adds polyA tail to the output sequence', + description=('Extracts transcript sequences from gtf file' + 'and adds polyA tail to the output sequence'), packages=find_packages(), install_requires=INSTALL_REQUIRES, - entrypoints={ - 'console_scripts': ['sequence_extractor=sequence_extractor.cli:main'] + entry_points={ + 'console_scripts': [ + 'sequence-extractor=sequence_extractor.cli:main' + ] } ) diff --git a/tests/test_exon_concatenation.py b/tests/test_exon_concatenation.py index 01cbd49eeb0c0ee5acd4e07bef6a4a75bd13e7af..9c8c44b77b1a2996548f3cf24ae6b25428e06c57 100644 --- a/tests/test_exon_concatenation.py +++ b/tests/test_exon_concatenation.py @@ -1,9 +1,28 @@ -import pytest -import exon_concatenation from exon_concatenation +"""Test exon_concatenation.py.""" +from pathlib import Path +from sequence_extractor.exon_concatenation import exon_concatenation + +test_dir = Path(__file__).parent.resolve() + +test_fasta_1 = test_dir / "test_files" / "test_1.fa" +test_fasta_2 = test_dir / "test_files" / "test_2.fa" -test_fasta_1 = "test_files/test_1.fa" -test_fasta_2 = "test_files/test_2.fa" def test_exon_concatenation(): - assert exon_concatenation(test_fasta_1) == expected_list_of_tuples - assert exon_concatenation(test_fasta_2) == expected + """Test exon_concatenation function.""" + # Test for test_fasta_1 + expected_fasta_1 = [ + (">ENST00000673477", "TTTCGCCTGCGCAGTGGTCCTGGCCACCGGCTCGCGGCGCGTGGAGGCTGCTCCCAGCCGCGCCCGAGTCAGACTCGGGTGGGGGTCCCGGTTACGCCAAGGAGGCCCTGAATCTGGCGCAGATGCAGGAGCAGACGCTGCAGTTGGAGCAACAGTCCAAGCTCAAA"), + (">ENST00000378391", "AAATACTGACGGACGTGGAAGTGTCGCCCCAGGAAGGCTGCATCACAAAGTCTCCGAAGACCTGGGCAGTGAGAAGTTCTGCGTGGATGCAAATCAGGCGGGGG"), + ] + + result_fasta_1 = exon_concatenation(test_fasta_1) + assert result_fasta_1 == expected_fasta_1 + + # Test for test_fasta_2 + expected_fasta_2 = [ + (">ENST00000673477", "ACGGCTGGCACCTTGTTTGGGGAAGGATTCCGTGCCTTTGTGACAGACCGGGACAAAGTGACTGGCTGGGCTGACGCTGCTGGCTGTCGGGGTCTACTCAGCCAAGAATGCGATCAGCCGGCGGCTCCTCAGTCGACCCCAGGACGTGCTGGAGGGTGTTGTGCTTAGT"), + ] + + result_fasta_2 = exon_concatenation(test_fasta_2) + assert result_fasta_2 == expected_fasta_2 diff --git a/tests/test_files/test.gtf b/tests/test_files/test.gtf new file mode 100644 index 0000000000000000000000000000000000000000..7342b86ed26a4ce8e0f657faf49c5f7080ecf16f --- /dev/null +++ b/tests/test_files/test.gtf @@ -0,0 +1,144 @@ +seqname source feature start end score strand frame free_text +1 havana exon 1980 2000 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_0"; exon_number "1"; exon_id "ENSE00001890219"; transcript_support_level "1"; +1 havana exon 1900 1950 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_0"; exon_number "2"; exon_id "ENSE00003507205"; transcript_support_level "1"; +1 havana exon 1800 1850 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_0"; exon_number "3"; exon_id "ENSE00003477500"; transcript_support_level "1"; +1 havana exon 1700 1750 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_0"; exon_number "4"; exon_id "ENSE00003565697"; transcript_support_level "1"; +1 havana exon 1551 1650 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_0"; exon_number "5"; exon_id "ENSE00003475637_4"; transcript_support_level "1"; +1 havana exon 1500 1550 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_0"; exon_number "6"; exon_id "ENSE00003502542"; transcript_support_level "1"; +1 havana exon 1351 1450 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_0"; exon_number "7"; exon_id "ENSE00003553898_6"; transcript_support_level "1"; +1 havana exon 1251 1350 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_0"; exon_number "8"; exon_id "ENSE00003621279_7"; transcript_support_level "1"; +1 havana exon 1200 1250 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_0"; exon_number "9"; exon_id "ENSE00002030414"; transcript_support_level "1"; +1 havana exon 1051 1150 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_0"; exon_number "10"; exon_id "ENSE00001935574_9"; transcript_support_level "1"; +1 havana exon 1000 1050 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_0"; exon_number "11"; exon_id "ENSE00001843071"; transcript_support_level "1"; +1 havana exon 1980 2000 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_1"; exon_number "1"; exon_id "ENSE00001890219"; transcript_support_level "1"; +1 havana exon 1900 1950 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_1"; exon_number "2"; exon_id "ENSE00003507205"; transcript_support_level "1"; +1 havana exon 1800 1850 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_1"; exon_number "3"; exon_id "ENSE00003477500"; transcript_support_level "1"; +1 havana exon 1700 1750 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_1"; exon_number "4"; exon_id "ENSE00003565697"; transcript_support_level "1"; +1 havana exon 1551 1650 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_1"; exon_number "5"; exon_id "ENSE00003475637_4"; transcript_support_level "1"; +1 havana exon 1451 1550 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_1"; exon_number "6"; exon_id "ENSE00003502542_5"; transcript_support_level "1"; +1 havana exon 1400 1450 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_1"; exon_number "7"; exon_id "ENSE00003553898"; transcript_support_level "1"; +1 havana exon 1251 1350 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_1"; exon_number "8"; exon_id "ENSE00003621279_7"; transcript_support_level "1"; +1 havana exon 1151 1250 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_1"; exon_number "9"; exon_id "ENSE00002030414_8"; transcript_support_level "1"; +1 havana exon 1051 1150 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_1"; exon_number "10"; exon_id "ENSE00001935574_9"; transcript_support_level "1"; +1 havana exon 1000 1050 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_1"; exon_number "11"; exon_id "ENSE00001843071"; transcript_support_level "1"; +1 havana exon 1980 2000 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_2"; exon_number "1"; exon_id "ENSE00001890219"; transcript_support_level "1"; +1 havana exon 1900 1950 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_2"; exon_number "2"; exon_id "ENSE00003507205"; transcript_support_level "1"; +1 havana exon 1751 1850 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_2"; exon_number "3"; exon_id "ENSE00003477500_2"; transcript_support_level "1"; +1 havana exon 1700 1750 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_2"; exon_number "4"; exon_id "ENSE00003565697"; transcript_support_level "1"; +1 havana exon 1600 1650 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_2"; exon_number "5"; exon_id "ENSE00003475637"; transcript_support_level "1"; +1 havana exon 1500 1550 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_2"; exon_number "6"; exon_id "ENSE00003502542"; transcript_support_level "1"; +1 havana exon 1400 1450 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_2"; exon_number "7"; exon_id "ENSE00003553898"; transcript_support_level "1"; +1 havana exon 1300 1350 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_2"; exon_number "8"; exon_id "ENSE00003621279"; transcript_support_level "1"; +1 havana exon 1200 1250 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_2"; exon_number "9"; exon_id "ENSE00002030414"; transcript_support_level "1"; +1 havana exon 1051 1150 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_2"; exon_number "10"; exon_id "ENSE00001935574_9"; transcript_support_level "1"; +1 havana exon 1000 1050 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_2"; exon_number "11"; exon_id "ENSE00001843071"; transcript_support_level "1"; +1 havana exon 1980 2000 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_3"; exon_number "1"; exon_id "ENSE00001890219"; transcript_support_level "1"; +1 havana exon 1900 1950 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_3"; exon_number "2"; exon_id "ENSE00003507205"; transcript_support_level "1"; +1 havana exon 1751 1850 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_3"; exon_number "3"; exon_id "ENSE00003477500_2"; transcript_support_level "1"; +1 havana exon 1651 1750 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_3"; exon_number "4"; exon_id "ENSE00003565697_3"; transcript_support_level "1"; +1 havana exon 1551 1650 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_3"; exon_number "5"; exon_id "ENSE00003475637_4"; transcript_support_level "1"; +1 havana exon 1451 1550 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_3"; exon_number "6"; exon_id "ENSE00003502542_5"; transcript_support_level "1"; +1 havana exon 1351 1450 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_3"; exon_number "7"; exon_id "ENSE00003553898_6"; transcript_support_level "1"; +1 havana exon 1251 1350 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_3"; exon_number "8"; exon_id "ENSE00003621279_7"; transcript_support_level "1"; +1 havana exon 1200 1250 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_3"; exon_number "9"; exon_id "ENSE00002030414"; transcript_support_level "1"; +1 havana exon 1051 1150 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_3"; exon_number "10"; exon_id "ENSE00001935574_9"; transcript_support_level "1"; +1 havana exon 1000 1050 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_3"; exon_number "11"; exon_id "ENSE00001843071"; transcript_support_level "1"; +1 havana exon 1980 2000 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_4"; exon_number "1"; exon_id "ENSE00001890219"; transcript_support_level "1"; +1 havana exon 1851 1950 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_4"; exon_number "2"; exon_id "ENSE00003507205_1"; transcript_support_level "1"; +1 havana exon 1800 1850 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_4"; exon_number "3"; exon_id "ENSE00003477500"; transcript_support_level "1"; +1 havana exon 1651 1750 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_4"; exon_number "4"; exon_id "ENSE00003565697_3"; transcript_support_level "1"; +1 havana exon 1600 1650 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_4"; exon_number "5"; exon_id "ENSE00003475637"; transcript_support_level "1"; +1 havana exon 1451 1550 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_4"; exon_number "6"; exon_id "ENSE00003502542_5"; transcript_support_level "1"; +1 havana exon 1351 1450 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_4"; exon_number "7"; exon_id "ENSE00003553898_6"; transcript_support_level "1"; +1 havana exon 1300 1350 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_4"; exon_number "8"; exon_id "ENSE00003621279"; transcript_support_level "1"; +1 havana exon 1151 1250 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_4"; exon_number "9"; exon_id "ENSE00002030414_8"; transcript_support_level "1"; +1 havana exon 1051 1150 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_4"; exon_number "10"; exon_id "ENSE00001935574_9"; transcript_support_level "1"; +1 havana exon 1000 1050 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_4"; exon_number "11"; exon_id "ENSE00001843071"; transcript_support_level "1"; +1 havana exon 1951 2000 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_5"; exon_number "1"; exon_id "ENSE00001890219_0"; transcript_support_level "1"; +1 havana exon 1900 1950 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_5"; exon_number "2"; exon_id "ENSE00003507205"; transcript_support_level "1"; +1 havana exon 1800 1850 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_5"; exon_number "3"; exon_id "ENSE00003477500"; transcript_support_level "1"; +1 havana exon 1651 1750 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_5"; exon_number "4"; exon_id "ENSE00003565697_3"; transcript_support_level "1"; +1 havana exon 1600 1650 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_5"; exon_number "5"; exon_id "ENSE00003475637"; transcript_support_level "1"; +1 havana exon 1451 1550 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_5"; exon_number "6"; exon_id "ENSE00003502542_5"; transcript_support_level "1"; +1 havana exon 1400 1450 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_5"; exon_number "7"; exon_id "ENSE00003553898"; transcript_support_level "1"; +1 havana exon 1300 1350 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_5"; exon_number "8"; exon_id "ENSE00003621279"; transcript_support_level "1"; +1 havana exon 1151 1250 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_5"; exon_number "9"; exon_id "ENSE00002030414_8"; transcript_support_level "1"; +1 havana exon 1051 1150 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_5"; exon_number "10"; exon_id "ENSE00001935574_9"; transcript_support_level "1"; +1 havana exon 1000 1050 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_5"; exon_number "11"; exon_id "ENSE00001843071"; transcript_support_level "1"; +1 havana exon 1951 2000 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_6"; exon_number "1"; exon_id "ENSE00001890219_0"; transcript_support_level "1"; +1 havana exon 1900 1950 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_6"; exon_number "2"; exon_id "ENSE00003507205"; transcript_support_level "1"; +1 havana exon 1751 1850 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_6"; exon_number "3"; exon_id "ENSE00003477500_2"; transcript_support_level "1"; +1 havana exon 1700 1750 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_6"; exon_number "4"; exon_id "ENSE00003565697"; transcript_support_level "1"; +1 havana exon 1551 1650 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_6"; exon_number "5"; exon_id "ENSE00003475637_4"; transcript_support_level "1"; +1 havana exon 1451 1550 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_6"; exon_number "6"; exon_id "ENSE00003502542_5"; transcript_support_level "1"; +1 havana exon 1400 1450 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_6"; exon_number "7"; exon_id "ENSE00003553898"; transcript_support_level "1"; +1 havana exon 1300 1350 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_6"; exon_number "8"; exon_id "ENSE00003621279"; transcript_support_level "1"; +1 havana exon 1200 1250 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_6"; exon_number "9"; exon_id "ENSE00002030414"; transcript_support_level "1"; +1 havana exon 1051 1150 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_6"; exon_number "10"; exon_id "ENSE00001935574_9"; transcript_support_level "1"; +1 havana exon 1000 1050 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_6"; exon_number "11"; exon_id "ENSE00001843071"; transcript_support_level "1"; +1 havana exon 1951 2000 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_7"; exon_number "1"; exon_id "ENSE00001890219_0"; transcript_support_level "1"; +1 havana exon 1851 1950 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_7"; exon_number "2"; exon_id "ENSE00003507205_1"; transcript_support_level "1"; +1 havana exon 1800 1850 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_7"; exon_number "3"; exon_id "ENSE00003477500"; transcript_support_level "1"; +1 havana exon 1700 1750 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_7"; exon_number "4"; exon_id "ENSE00003565697"; transcript_support_level "1"; +1 havana exon 1551 1650 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_7"; exon_number "5"; exon_id "ENSE00003475637_4"; transcript_support_level "1"; +1 havana exon 1451 1550 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_7"; exon_number "6"; exon_id "ENSE00003502542_5"; transcript_support_level "1"; +1 havana exon 1351 1450 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_7"; exon_number "7"; exon_id "ENSE00003553898_6"; transcript_support_level "1"; +1 havana exon 1251 1350 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_7"; exon_number "8"; exon_id "ENSE00003621279_7"; transcript_support_level "1"; +1 havana exon 1200 1250 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_7"; exon_number "9"; exon_id "ENSE00002030414"; transcript_support_level "1"; +1 havana exon 1100 1150 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_7"; exon_number "10"; exon_id "ENSE00001935574"; transcript_support_level "1"; +1 havana exon 1000 1050 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_7"; exon_number "11"; exon_id "ENSE00001843071"; transcript_support_level "1"; +1 havana exon 1951 2000 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_8"; exon_number "1"; exon_id "ENSE00001890219_0"; transcript_support_level "1"; +1 havana exon 1851 1950 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_8"; exon_number "2"; exon_id "ENSE00003507205_1"; transcript_support_level "1"; +1 havana exon 1800 1850 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_8"; exon_number "3"; exon_id "ENSE00003477500"; transcript_support_level "1"; +1 havana exon 1700 1750 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_8"; exon_number "4"; exon_id "ENSE00003565697"; transcript_support_level "1"; +1 havana exon 1551 1650 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_8"; exon_number "5"; exon_id "ENSE00003475637_4"; transcript_support_level "1"; +1 havana exon 1451 1550 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_8"; exon_number "6"; exon_id "ENSE00003502542_5"; transcript_support_level "1"; +1 havana exon 1351 1450 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_8"; exon_number "7"; exon_id "ENSE00003553898_6"; transcript_support_level "1"; +1 havana exon 1251 1350 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_8"; exon_number "8"; exon_id "ENSE00003621279_7"; transcript_support_level "1"; +1 havana exon 1151 1250 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_8"; exon_number "9"; exon_id "ENSE00002030414_8"; transcript_support_level "1"; +1 havana exon 1051 1150 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_8"; exon_number "10"; exon_id "ENSE00001935574_9"; transcript_support_level "1"; +1 havana exon 1000 1050 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_8"; exon_number "11"; exon_id "ENSE00001843071"; transcript_support_level "1"; +1 havana exon 1951 2000 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_9"; exon_number "1"; exon_id "ENSE00001890219_0"; transcript_support_level "1"; +1 havana exon 1851 1950 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_9"; exon_number "2"; exon_id "ENSE00003507205_1"; transcript_support_level "1"; +1 havana exon 1751 1850 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_9"; exon_number "3"; exon_id "ENSE00003477500_2"; transcript_support_level "1"; +1 havana exon 1651 1750 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_9"; exon_number "4"; exon_id "ENSE00003565697_3"; transcript_support_level "1"; +1 havana exon 1600 1650 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_9"; exon_number "5"; exon_id "ENSE00003475637"; transcript_support_level "1"; +1 havana exon 1500 1550 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_9"; exon_number "6"; exon_id "ENSE00003502542"; transcript_support_level "1"; +1 havana exon 1351 1450 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_9"; exon_number "7"; exon_id "ENSE00003553898_6"; transcript_support_level "1"; +1 havana exon 1251 1350 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_9"; exon_number "8"; exon_id "ENSE00003621279_7"; transcript_support_level "1"; +1 havana exon 1151 1250 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_9"; exon_number "9"; exon_id "ENSE00002030414_8"; transcript_support_level "1"; +1 havana exon 1100 1150 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_9"; exon_number "10"; exon_id "ENSE00001935574"; transcript_support_level "1"; +1 havana exon 1000 1050 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_9"; exon_number "11"; exon_id "ENSE00001843071"; transcript_support_level "1"; +1 havana exon 1951 2000 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_10"; exon_number "1"; exon_id "ENSE00001890219_0"; transcript_support_level "1"; +1 havana exon 1851 1950 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_10"; exon_number "2"; exon_id "ENSE00003507205_1"; transcript_support_level "1"; +1 havana exon 1751 1850 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_10"; exon_number "3"; exon_id "ENSE00003477500_2"; transcript_support_level "1"; +1 havana exon 1651 1750 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_10"; exon_number "4"; exon_id "ENSE00003565697_3"; transcript_support_level "1"; +1 havana exon 1600 1650 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_10"; exon_number "5"; exon_id "ENSE00003475637"; transcript_support_level "1"; +1 havana exon 1451 1550 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_10"; exon_number "6"; exon_id "ENSE00003502542_5"; transcript_support_level "1"; +1 havana exon 1351 1450 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_10"; exon_number "7"; exon_id "ENSE00003553898_6"; transcript_support_level "1"; +1 havana exon 1251 1350 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_10"; exon_number "8"; exon_id "ENSE00003621279_7"; transcript_support_level "1"; +1 havana exon 1151 1250 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_10"; exon_number "9"; exon_id "ENSE00002030414_8"; transcript_support_level "1"; +1 havana exon 1051 1150 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_10"; exon_number "10"; exon_id "ENSE00001935574_9"; transcript_support_level "1"; +1 havana exon 1000 1050 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_10"; exon_number "11"; exon_id "ENSE00001843071"; transcript_support_level "1"; +1 havana exon 1951 2000 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_11"; exon_number "1"; exon_id "ENSE00001890219_0"; transcript_support_level "1"; +1 havana exon 1851 1950 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_11"; exon_number "2"; exon_id "ENSE00003507205_1"; transcript_support_level "1"; +1 havana exon 1751 1850 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_11"; exon_number "3"; exon_id "ENSE00003477500_2"; transcript_support_level "1"; +1 havana exon 1651 1750 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_11"; exon_number "4"; exon_id "ENSE00003565697_3"; transcript_support_level "1"; +1 havana exon 1551 1650 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_11"; exon_number "5"; exon_id "ENSE00003475637_4"; transcript_support_level "1"; +1 havana exon 1500 1550 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_11"; exon_number "6"; exon_id "ENSE00003502542"; transcript_support_level "1"; +1 havana exon 1351 1450 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_11"; exon_number "7"; exon_id "ENSE00003553898_6"; transcript_support_level "1"; +1 havana exon 1251 1350 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_11"; exon_number "8"; exon_id "ENSE00003621279_7"; transcript_support_level "1"; +1 havana exon 1151 1250 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_11"; exon_number "9"; exon_id "ENSE00002030414_8"; transcript_support_level "1"; +1 havana exon 1100 1150 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_11"; exon_number "10"; exon_id "ENSE00001935574"; transcript_support_level "1"; +1 havana exon 1000 1050 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_11"; exon_number "11"; exon_id "ENSE00001843071"; transcript_support_level "1"; +1 havana exon 1951 2000 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_12"; exon_number "1"; exon_id "ENSE00001890219_0"; transcript_support_level "1"; +1 havana exon 1851 1950 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_12"; exon_number "2"; exon_id "ENSE00003507205_1"; transcript_support_level "1"; +1 havana exon 1751 1850 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_12"; exon_number "3"; exon_id "ENSE00003477500_2"; transcript_support_level "1"; +1 havana exon 1651 1750 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_12"; exon_number "4"; exon_id "ENSE00003565697_3"; transcript_support_level "1"; +1 havana exon 1551 1650 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_12"; exon_number "5"; exon_id "ENSE00003475637_4"; transcript_support_level "1"; +1 havana exon 1451 1550 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_12"; exon_number "6"; exon_id "ENSE00003502542_5"; transcript_support_level "1"; +1 havana exon 1351 1450 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_12"; exon_number "7"; exon_id "ENSE00003553898_6"; transcript_support_level "1"; +1 havana exon 1251 1350 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_12"; exon_number "8"; exon_id "ENSE00003621279_7"; transcript_support_level "1"; +1 havana exon 1151 1250 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_12"; exon_number "9"; exon_id "ENSE00002030414_8"; transcript_support_level "1"; +1 havana exon 1100 1150 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_12"; exon_number "10"; exon_id "ENSE00001935574"; transcript_support_level "1"; +1 havana exon 1000 1050 . - . gene_id "GENE2"; transcript_id "TRANSCRIPT2_12"; exon_number "11"; exon_id "ENSE00001843071"; transcript_support_level "1"; diff --git a/tests/test_files/test_1.fa b/tests/test_files/test_1.fa index f92bb2b84f79e445dcda17c45ae0e6f34aa64c82..e8a3b1eb26ed5ca5cc9a0ea92272b03c04c9aac9 100644 --- a/tests/test_files/test_1.fa +++ b/tests/test_files/test_1.fa @@ -1,8 +1,8 @@ >ENST00000673477::1:1471765-1472089 -TTTCGCCTGCGCAGTGGTCCTGGCCACCGGCTCGCGGCGCGTGGAGGCTGCTCCCAGCCGCGCCCGAGTCAGACTCGGGTGGGGGTCCCGGCGGCGGTAGCGGCGGCGGCGGTGCGAGCATGTCGTGGCTCTTCGGCGTTAACAAGGGCCCCAAGGGTGAAGGCGCGGGGCCGCCGCCGCCTTTGCCGCCCGCGCAGCCCGGGGCCGAGGGCGGCGGGGACCGCGGTTTGGGAGACCGGCCGGCGCCCAAGGACAAATGGAGCAACTTCGACCCCACCGGCCTGGAGCGCGCCGCCAAGGCGGCGCGCGAGCTGGAGCACTCGC +TTTCGCCTGCGCAGTGGTCCTGGCCACCGGCTCGCGGCGCGTGGAGGCTGCTCCCAGCCGCGCCCGAGTCAGACTCGGGTGGGGGTCCCGG >ENST00000673477::1:1477274-1477350 TTACGCCAAGGAGGCCCTGAATCTGGCGCAGATGCAGGAGCAGACGCTGCAGTTGGAGCAACAGTCCAAGCTCAAA >ENST00000378391::1:3244087-3244137 AAATACTGACGGACGTGGAAGTGTCGCCCCAGGAAGGCTGCATCACAAAG >ENST00000378391::1:3385152-3385286 -TCTCCGAAGACCTGGGCAGTGAGAAGTTCTGCGTGGATGCAAATCAGGCGGGGGCTGGCAGCTGGCTCAAGTACATCCGTGTGGCGTGCTCCTGCGATGACCAGAACCTCACCATGTGTCAGATCAGTGAGCAG +TCTCCGAAGACCTGGGCAGTGAGAAGTTCTGCGTGGATGCAAATCAGGCGGGGG diff --git a/tests/test_files/test_2.fa b/tests/test_files/test_2.fa index 7b8ebb26c52072dc4431535d5a0eae56fee268ee..6acd1f1bf50641b3eb5bddc4ce164edaae77ca05 100644 --- a/tests/test_files/test_2.fa +++ b/tests/test_files/test_2.fa @@ -1,7 +1,7 @@ >ENST00000673477::1:1482545-1482614 -ACGGCTGGCACCTTGTTTGGGGAAGGATTCCGTGCCTTTGTGACAGACCGGGACAAAGTGACAGCCACG +ACGGCTGGCACCTTGTTTGGGGAAGGATTCCGTGCCTTTGTGACAGACCGGGACAAAGTGAC >ENST00000673477::1:1485016-1485171 -TGGCTGGGCTGACGCTGCTGGCTGTCGGGGTCTACTCAGCCAAGAATGCGACAGCCGTCACTGGCCGCTTCATCGAGGCTCGGCTGGGGAAGCCGTCCCTAGTGAGGGAGACGTCCCGCATCACGGTGCTGGAGGCGCTGCGGCACCCCATCCAG +TGGCTGGGCTGACGCTGCTGGCTGTCGGGGTCTACTCAGCCAAGAATGCGA >ENST00000673477::1:1485782-1485838 TCAGCCGGCGGCTCCTCAGTCGACCCCAGGACGTGCTGGAGGGTGTTGTGCTTAGT >ENST00000673477::1:1486110-1486235 diff --git a/tests/test_poly_a.py b/tests/test_poly_a.py index 91f90c19937d45778c56b8b3fdeafecd31fe9ea5..c22e83322afaa3b551d1aa0e10512b0d81622211 100644 --- a/tests/test_poly_a.py +++ b/tests/test_poly_a.py @@ -1,12 +1,26 @@ -import pytest -from poly_a import poly_a_generator, poly_a_addition_to_fasta_list - +"""Test poly_a.py script.""" +import numpy as np +from sequence_extractor.poly_a import ( + poly_a_generator, poly_a_addition_to_fasta_list +) def test_poly_a_generator(): - assert poly_a_generator(exon_string) == exon_and_polya + """Test poly_a_generator function.""" + exon_string = 'ACGGCTGGCACCTTGTTTGGGGAAGGATTCCGTGCCTTTG' + poly_a_length = np.random.randint(100, 250) + test_string = poly_a_generator(exon_string, poly_a_length) + + assert len(test_string) == sum([len(exon_string), poly_a_length]) def test_poly_a_addition_to_fasta_list(): - assert poly_a_addition_to_fasta_list(list_of_tuples) == manipulated_list_of_tuples + """Test poly_a_addition_to_fasta_list function.""" + exon_list = [('>ENST00000673477', 'ACGGCTGGCACCTTGTTTGGGGAAG'), + ('>ENST00000378391', 'AAATACTGACGGACGTGG')] + poly_a_length = np.random.randint(100, 250) + test_list = poly_a_addition_to_fasta_list(exon_list, poly_a_length) + + assert len(test_list[0][1]) == sum([len(exon_list[0][1]), poly_a_length]) + assert len(test_list[1][1]) == sum([len(exon_list[1][1]), poly_a_length])