diff --git a/.gitignore b/.gitignore
index 2e906cc871537e0816ae7fd4437ade59afa2986c..7a8baad3fbb9055363fc60eda6a2113c091f85f5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@
 
 # ignore ALL files in ANY directory named temp
 temp/ 
+__pycache__
+output_files
\ No newline at end of file
diff --git a/README.md b/README.md
index 05f0a157c6478ab0f3b4dda1e979ed88b339850d..fee84f77ae53f93f3ef3466c1261a323e7f336a4 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Transcript Sampler
 
-This workflow sample representative transcripts per gene, in proportion to their relative abundance levels. Sampling is done by poisson sampling. 
+This workflow samples representative transcripts per gene, in proportion to their relative abundance levels. Sampling is done by Poisson sampling. 
 
 **This workflow takes as input:**
  - Path to genome annotation file in gtf format
@@ -15,10 +15,8 @@ This workflow sample representative transcripts per gene, in proportion to their
  
  **The workflow can be run via the command line as**
  
- `python scripts/new-exe.py --annotation {gtf input file} --output_csv {output csv file} --transcript_number {number of transcripts} --output_gtf {output gtf file} --input_csv {input csv file}`
-
- Exemple : 
-
- `python scripts\new_exe.py --annotation "input_files\test.gtf" --output_csv "output_files\output_csv.txt" --transcript_number 50  --output_gtf "output_files\output_gtf.gtf" --input_csv "input_files/expression.csv"`
+ `python transcript_sampler/new_exe.py --input_gtf={gtf input file} --input_csv={input csv file} --output_gtf={output gtf file} --output_csv={output csv file} --n_to_sample={number of transcripts}`
 
+ Example : 
 
+ `python transcript_sampler/new_exe.py --input_gtf="input_files/test.gtf" --input_csv="input_files/expression.csv" --output_gtf="output_files/output.gtf" --output_csv="output_files/output.csv" --n_to_sample=100`
diff --git a/images/Transcript_sampling__architecture.png b/images/Transcript_sampling__architecture.png
deleted file mode 100644
index 59dab4f65555d376d09385f55461e879d7b27527..0000000000000000000000000000000000000000
Binary files a/images/Transcript_sampling__architecture.png and /dev/null differ
diff --git a/input_files/test.gtf b/input_files/test.gtf
index bac42f56831fc4495d5f7680933fa3dc842b77e4..36d3fb5bce32177b4ac2512149b25571b43a7acf 100644
--- a/input_files/test.gtf
+++ b/input_files/test.gtf
@@ -1,183 +1,183 @@
-#!genome-build GRCh38.p13
-#!genome-version GRCh38
-#!genome-date 2013-12
-#!genome-build-accession GCA_000001405.28
-#!genebuild-last-updated 2022-04
-1	ensembl_havana	gene	1471765	1497848	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding";
-1	ensembl_havana	transcript	1471765	1497848	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; tag "basic";
-1	ensembl_havana	exon	1471765	14720800009	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003889014"; exon_version "1"; tag "basic";
-1	ensembl_havana	CDS	1471885	1472089	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
-1	ensembl_havana	start_codon	1471885	1471887	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; tag "basic";
-1	ensembl_havana	exon	1477274	1477350	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "2"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003467707"; exon_version "1"; tag "basic";
-1	ensembl_havana	CDS	1477274	1477350	.	+	2	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "2"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
-1	ensembl_havana	exon	1478644	1478745	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "3"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003569130"; exon_version "1"; tag "basic";
-1	ensembl_havana	CDS	1478644	1478745	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "3"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
-1	ensembl_havana	exon	1479049	1479108	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "4"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003608502"; exon_version "1"; tag "basic";
-1	ensembl_havana	CDS	1479049	1479108	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "4"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
-1	ensembl_havana	exon	1480867	1480936	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "5"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003474888"; exon_version "1"; tag "basic";
-1	ensembl_havana	CDS	1480867	1480936	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "5"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
-1	ensembl_havana	exon	1482138	1482303	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "6"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003654064"; exon_version "1"; tag "basic";
-1	ensembl_havana	CDS	1482138	1482303	.	+	2	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "6"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
-1	ensembl_havana	exon	1482545	1482614	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "7"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003510521"; exon_version "1"; tag "basic";
-1	ensembl_havana	CDS	1482545	1482614	.	+	1	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "7"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
-1	ensembl_havana	exon	1485016	1485171	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003459370"; exon_version "1"; tag "basic";
-1	ensembl_havana	CDS	1485016	1485171	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
-1	ensembl_havana	exon	1485782	1485838	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "9"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003655926"; exon_version "1"; tag "basic";
-1	ensembl_havana	CDS	1485782	1485838	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "9"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
-1	ensembl_havana	exon	1486110	1486235	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "10"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003594545"; exon_version "1"; tag "basic";
-1	ensembl_havana	CDS	1486110	1486235	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "10"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
-1	ensembl_havana	exon	1486544	1486668	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "11"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003892109"; exon_version "1"; tag "basic";
-1	ensembl_havana	CDS	1486544	1486668	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "11"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
-1	ensembl_havana	exon	1487863	1487914	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "12"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003689846"; exon_version "1"; tag "basic";
-1	ensembl_havana	CDS	1487863	1487914	.	+	1	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "12"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
-1	ensembl_havana	exon	1489204	1489274	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "13"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003670332"; exon_version "1"; tag "basic";
-1	ensembl_havana	CDS	1489204	1489274	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "13"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
-1	ensembl_havana	exon	1490257	1490424	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "14"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003505365"; exon_version "1"; tag "basic";
-1	ensembl_havana	CDS	1490257	1490424	.	+	1	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "14"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
-1	ensembl_havana	exon	1490563	1490671	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "15"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003497242"; exon_version "1"; tag "basic";
-1	ensembl_havana	CDS	1490563	1490671	.	+	1	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "15"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
-1	ensembl_havana	exon	1495485	1497848	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "16"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003892597"; exon_version "1"; tag "basic";
-1	ensembl_havana	CDS	1495485	1495814	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "16"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
-1	ensembl_havana	stop_codon	1495815	1495817	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "16"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; tag "basic";
-1	ensembl_havana	five_prime_utr	1471765	1471884	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; tag "basic";
-1	ensembl_havana	three_prime_utr	1495818	1497848	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; tag "basic";
-1	havana	transcript	1478026	1497848	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "1";
-1	havana	exon	1478026	1478745	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00001943609"; exon_version "1"; transcript_support_level "1";
-1	havana	exon	1479049	1479108	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "2"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003589422"; exon_version "1"; transcript_support_level "1";
-1	havana	exon	1480867	1480936	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "3"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003672769"; exon_version "1"; transcript_support_level "1";
-1	havana	exon	1482138	1482303	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "4"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003661157"; exon_version "1"; transcript_support_level "1";
-1	havana	exon	1482545	1482614	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "5"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003517812"; exon_version "1"; transcript_support_level "1";
-1	havana	exon	1485016	1485170001	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "6"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003542737"; exon_version "1"; transcript_support_level "1";
-1	havana	exon	1485782	14858380000	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "7"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003479480"; exon_version "1"; transcript_support_level "1";
-1	havana	exon	1486110	1486235000	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003503434"; exon_version "1"; transcript_support_level "1";
-1	havana	exon	1486544	1486668	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "9"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003513162"; exon_version "1"; transcript_support_level "1";
-1	havana	exon	1487863	1487914	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "10"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003528975"; exon_version "1"; transcript_support_level "1";
-1	havana	exon	1489204	1489274	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "11"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003611023"; exon_version "1"; transcript_support_level "1";
-1	havana	exon	1490257	1490424	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "12"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003653402"; exon_version "1"; transcript_support_level "1";
-1	havana	exon	1490563	1490671	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "13"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003486643"; exon_version "1"; transcript_support_level "1";
-1	havana	exon	1495485	1497848	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "14"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00001863816"; exon_version "1"; transcript_support_level "1";
-1	havana	transcript	1479049	1482662	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000378736"; transcript_version "3"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; transcript_support_level "5";
-1	havana	exon	1479049	1479108	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000378736"; transcript_version "3"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00003589422"; exon_version "1"; transcript_support_level "5";
-1	havana	exon	1480867	1480936	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000378736"; transcript_version "3"; exon_number "2"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00003672769"; exon_version "1"; transcript_support_level "5";
-1	havana	exon	1482138	1482303	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000378736"; transcript_version "3"; exon_number "3"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00003661157"; exon_version "1"; transcript_support_level "5";
-1	havana	exon	1482545	1482662	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000378736"; transcript_version "3"; exon_number "4"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00003614529"; exon_version "1"; transcript_support_level "5";
-1	havana	transcript	1483485	1496202	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "2";
-1	havana	exon	1483485	1485171	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00001893282"; exon_version "1"; transcript_support_level "2";
-1	havana	exon	1485782	1485838	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "2"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003479480"; exon_version "1"; transcript_support_level "2";
-1	havana	exon	1486110	1486235	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "3"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003503434"; exon_version "1"; transcript_support_level "2";
-1	havana	exon	1486544	1486668	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "4"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003513162"; exon_version "1"; transcript_support_level "2";
-1	havana	exon	1487863	1487914	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "5"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003528975"; exon_version "1"; transcript_support_level "2";
-1	havana	exon	1489204	1489274	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "6"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003611023"; exon_version "1"; transcript_support_level "2";
-1	havana	exon	1489692	1489811	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "7"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00001885858"; exon_version "1"; transcript_support_level "2";
-1	havana	exon	1490257	1490424	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003653402"; exon_version "1"; transcript_support_level "2";
-1	havana	exon	1490563	1490671	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "9"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003486643"; exon_version "1"; transcript_support_level "2";
-1	havana	exon	1495485	1496202	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "10"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003689276"; exon_version "1"; transcript_support_level "2";
-1	havana	transcript	1484569	1496201	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000474481"; transcript_version "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-204"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "2";
-1	havana	exon	1484569	1485171	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000474481"; transcript_version "1"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-204"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00001844843"; exon_version "1"; transcript_support_level "2";
-1	havana	exon	1485782	1486235	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000474481"; transcript_version "1"; exon_number "2"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-204"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00001818637"; exon_version "1"; transcript_support_level "2";
-1	havana	exon	1486544	1486668	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000474481"; transcript_version "1"; exon_number "3"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-204"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003513162"; exon_version "1"; transcript_support_level "2";
-1	havana	exon	1489204	1490671	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000474481"; transcript_version "1"; exon_number "4"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-204"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00001832340"; exon_version "1"; transcript_support_level "2";
-1	havana	exon	1495485	1496201	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000474481"; transcript_version "1"; exon_number "5"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-204"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00001844973"; exon_version "1"; transcript_support_level "2";
-1	ensembl	transcript	1471784	1496201	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	exon	1471784	1472089	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00001833190"; exon_version "2"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	CDS	1471885	1472089	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSP00000311766"; protein_version "8"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	start_codon	1471885	1471887	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	exon	1477274	1477350	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "2"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003467707"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	CDS	1477274	1477350	.	+	2	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "2"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSP00000311766"; protein_version "8"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	exon	1480867	1480908	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "3"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003889337"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	CDS	1480867	1480908	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "3"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSP00000311766"; protein_version "8"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	exon	1482266	1482303	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "4"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003889634"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	CDS	1482266	1482303	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "4"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSP00000311766"; protein_version "8"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	exon	1482545	1482614	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "5"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003510521"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	CDS	1482545	1482614	.	+	1	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "5"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSP00000311766"; protein_version "8"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	exon	1485016	1485171	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "6"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003459370"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	CDS	1485016	1485171	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "6"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSP00000311766"; protein_version "8"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	exon	1485782	1485838	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "7"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003655926"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	CDS	1485782	1485838	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "7"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSP00000311766"; protein_version "8"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	exon	1486110	1486235	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003594545"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	CDS	1486110	1486235	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSP00000311766"; protein_version "8"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	exon	1486544	1486668	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "9"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003662125"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	CDS	1486544	1486666	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "9"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSP00000311766"; protein_version "8"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	exon	1487863	1487914	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "10"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003528975"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	exon	1489204	1489274	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "11"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003611023"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	exon	1490257	1490424	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "12"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003653402"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	exon	1490563	1490671	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "13"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003486643"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	exon	1495485	1496201	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "14"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00001844973"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	five_prime_utr	1471784	1471884	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	three_prime_utr	1486667	1486668	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	three_prime_utr	1487863	1487914	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	three_prime_utr	1489204	1489274	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	three_prime_utr	1490257	1490424	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	three_prime_utr	1490563	1490671	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	ensembl	three_prime_utr	1495485	1496201	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-1	havana	gene	2212523	2220738	.	+	.	gene_id "ENSG00000234396"; gene_version "3"; gene_source "havana"; gene_biotype "lncRNA";
-1	havana	transcript	2212523	2220738	.	+	.	gene_id "ENSG00000234396"; gene_version "3"; transcript_id "ENST00000442483"; transcript_version "2"; gene_source "havana"; gene_biotype "lncRNA"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "3";
-1	havana	exon	2212523	2212644	.	+	.	gene_id "ENSG00000234396"; gene_version "3"; transcript_id "ENST00000442483"; transcript_version "2"; exon_number "1"; gene_source "havana"; gene_biotype "lncRNA"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00001603085"; exon_version "2"; tag "basic"; transcript_support_level "3";
-1	havana	exon	2220535	2220738	.	+	.	gene_id "ENSG00000234396"; gene_version "3"; transcript_id "ENST00000442483"; transcript_version "2"; exon_number "2"; gene_source "havana"; gene_biotype "lncRNA"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00001607640"; exon_version "2"; tag "basic"; transcript_support_level "3";
-1	havana	gene	629062	629433	.	+	.	gene_id "ENSG00000225972"; gene_version "1"; gene_name "MTND1P23"; gene_source "havana"; gene_biotype "unprocessed_pseudogene";
-1	havana	transcript	629062	629433	.	+	.	gene_id "ENSG00000225972"; gene_version "1"; transcript_id "ENST00000416931"; transcript_version "1"; gene_name "MTND1P23"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "MTND1P23-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA";
-1	havana	exon	629062	629433	.	+	.	gene_id "ENSG00000225972"; gene_version "1"; transcript_id "ENST00000416931"; transcript_version "1"; exon_number "1"; gene_name "MTND1P23"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "MTND1P23-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00001797039"; exon_version "1"; tag "basic"; transcript_support_level "NA";
-1	havana	gene	8786211	8786913	.	-	.	gene_id "ENSG00000224315"; gene_version "2"; gene_name "RPL7P7"; gene_source "havana"; gene_biotype "processed_pseudogene";
-1	havana	transcript	8786211	8786913	.	-	.	gene_id "ENSG00000224315"; gene_version "2"; transcript_id "ENST00000428803"; transcript_version "2"; gene_name "RPL7P7"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RPL7P7-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";
-1	havana	exon	8786211	8786913	.	-	.	gene_id "ENSG00000224315"; gene_version "2"; transcript_id "ENST00000428803"; transcript_version "2"; exon_number "1"; gene_name "RPL7P7"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RPL7P7-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001776158"; exon_version "2"; tag "basic"; transcript_support_level "NA";
-1	havana	gene	634376	634922	.	+	.	gene_id "ENSG00000198744"; gene_version "5"; gene_name "MTCO3P12"; gene_source "havana"; gene_biotype "unprocessed_pseudogene";
-1	havana	transcript	634376	634922	.	+	.	gene_id "ENSG00000198744"; gene_version "5"; transcript_id "ENST00000416718"; transcript_version "2"; gene_name "MTCO3P12"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "MTCO3P12-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA";
-1	havana	exon	634376	634922	.	+	.	gene_id "ENSG00000198744"; gene_version "5"; transcript_id "ENST00000416718"; transcript_version "2"; exon_number "1"; gene_name "MTCO3P12"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "MTCO3P12-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00001720008"; exon_version "2"; tag "basic"; transcript_support_level "NA";
-1	havana	gene	182696	184174	.	+	.	gene_id "ENSG00000279928"; gene_version "2"; gene_name "DDX11L17"; gene_source "havana"; gene_biotype "unprocessed_pseudogene";
-1	havana	transcript	182696	184174	.	+	.	gene_id "ENSG00000279928"; gene_version "2"; transcript_id "ENST00000624431"; transcript_version "2"; gene_name "DDX11L17"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "DDX11L17-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA";
-1	havana	exon	182696	182746	.	+	.	gene_id "ENSG00000279928"; gene_version "2"; transcript_id "ENST00000624431"; transcript_version "2"; exon_number "1"; gene_name "DDX11L17"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "DDX11L17-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003759020"; exon_version "2"; tag "basic"; transcript_support_level "NA";
-1	havana	exon	183132	183216	.	+	.	gene_id "ENSG00000279928"; gene_version "2"; transcript_id "ENST00000624431"; transcript_version "2"; exon_number "2"; gene_name "DDX11L17"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "DDX11L17-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003759581"; exon_version "2"; tag "basic"; transcript_support_level "NA";
-1	havana	exon	183494	183571	.	+	.	gene_id "ENSG00000279928"; gene_version "2"; transcript_id "ENST00000624431"; transcript_version "2"; exon_number "3"; gene_name "DDX11L17"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "DDX11L17-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003804405"; exon_version "1"; tag "basic"; transcript_support_level "NA";
-1	havana	exon	183740	183901	.	+	.	gene_id "ENSG00000279928"; gene_version "2"; transcript_id "ENST00000624431"; transcript_version "2"; exon_number "4"; gene_name "DDX11L17"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "DDX11L17-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003807458"; exon_version "1"; tag "basic"; transcript_support_level "NA";
-1	havana	exon	183981	184174	.	+	.	gene_id "ENSG00000279928"; gene_version "2"; transcript_id "ENST00000624431"; transcript_version "2"; exon_number "5"; gene_name "DDX11L17"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "DDX11L17-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003760199"; exon_version "2"; tag "basic"; transcript_support_level "NA";
-1	havana	gene	2581560	2584533	.	+	.	gene_id "ENSG00000228037"; gene_version "1"; gene_source "havana"; gene_biotype "lncRNA";
-1	havana	transcript	2581560	2584533	.	+	.	gene_id "ENSG00000228037"; gene_version "1"; transcript_id "ENST00000424215"; transcript_version "1"; gene_source "havana"; gene_biotype "lncRNA"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "5";
-1	havana	exon	2581560	25816500000	.	+	.	gene_id "ENSG00000228037"; gene_version "1"; transcript_id "ENST00000424215"; transcript_version "1"; exon_number "1"; gene_source "havana"; gene_biotype "lncRNA"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00001795368"; exon_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	exon	2583370	2583495	.	+	.	gene_id "ENSG00000228037"; gene_version "1"; transcript_id "ENST00000424215"; transcript_version "1"; exon_number "2"; gene_source "havana"; gene_biotype "lncRNA"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00001694676"; exon_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	exon	2584125	2584533	.	+	.	gene_id "ENSG00000228037"; gene_version "1"; transcript_id "ENST00000424215"; transcript_version "1"; exon_number "3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00001601095"; exon_version "1"; tag "basic"; transcript_support_level "5";
-1	ensembl_havana	gene	3069168	3438621	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding";
-1	havana	transcript	3069168	3434342	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5";
-1	havana	exon	3069168	3069296	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "1"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00002048533"; exon_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	CDS	3069260	3069296	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "1"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	start_codon	3069260	3069262	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "1"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5";
-1	havana	exon	3186125	3186474	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "2"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00001754112"; exon_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	CDS	3186125	3186474	.	+	2	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "2"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	exon	3244087	3244137	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "3"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003480863"; exon_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	CDS	3244087	3244137	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "3"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	exon	3385149	3385286	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "4"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00002034212"; exon_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	CDS	3385149	3385286	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "4"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	exon	3396491	3396593	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "5"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003700221"; exon_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	CDS	3396491	3396593	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "5"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	exon	3402791	3402998	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "6"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003696962"; exon_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	CDS	3402791	3402998	.	+	2	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "6"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	exon	3404739	3404886	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "7"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003700688"; exon_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	CDS	3404739	3404886	.	+	1	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "7"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	exon	3405495	3405648	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "8"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003700645"; exon_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	CDS	3405495	3405648	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "8"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	exon	3411384	3412800	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "9"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003695658"; exon_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	CDS	3411384	3412800	.	+	2	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "9"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	exon	3414560	3414647	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "10"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003701451"; exon_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	CDS	3414560	3414647	.	+	1	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "10"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	exon	3417828	3417997	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "11"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003699052"; exon_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	CDS	3417828	3417997	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "11"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	exon	3418667	3418744	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "12"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003698430"; exon_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	CDS	3418667	3418744	.	+	1	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "12"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	exon	3425581	3425750	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "13"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003699796"; exon_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	CDS	3425581	3425750	.	+	1	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "13"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	exon	3426051	3426225	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "14"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003701891"; exon_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	CDS	3426051	3426225	.	+	2	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "14"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	exon	3430872	3431108	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "15"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003698226"; exon_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	CDS	3430872	3431108	.	+	1	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "15"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	exon	3433677	3434342	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "16"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00002081080"; exon_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	CDS	3433677	3433686	.	+	1	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "16"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
-1	havana	stop_codon	3433687	3433689	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "16"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5";
-1	havana	five_prime_utr	3069168	3069259	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5";
-1	havana	three_prime_utr	3433690	3434342	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5";
-1	havana	transcript	3069183	3186591	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000607632"; transcript_version "1"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-210"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "2";
-1	havana	exon	3069183	3069296	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000607632"; transcript_version "1"; exon_number "1"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-210"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003700259"; exon_version "1"; transcript_support_level "2";
-1	havana	exon	3186125	3186591	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000607632"; transcript_version "1"; exon_number "2"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-210"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003695128"; exon_version "1"; transcript_support_level "2";
-1	havana	transcript	3069197	3435421	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000378391"; transcript_version "6"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS44048"; tag "basic"; transcript_support_level "1";
-1	havana	exon	3069197	3069296	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000378391"; transcript_version "6"; exon_number "1"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS44048"; exon_id "ENSE00001222906"; exon_version "5"; tag "basic"; transcript_support_level "1";
-1	havana	CDS	3069260	3069296	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000378391"; transcript_version "6"; exon_number "1"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS44048"; protein_id "ENSP00000367643"; protein_version "2"; tag "basic"; transcript_support_level "1";
-1	havana	start_codon	3069260	3069262	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000378391"; transcript_version "6"; exon_number "1"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS44048"; tag "basic"; transcript_support_level "1";
-1	havana	exon	3186125	31864740000	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000378391"; transcript_version "6"; exon_number "2"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS44048"; exon_id "ENSE00001754112"; exon_version "1"; tag "basic"; transcript_support_level "1";
\ No newline at end of file
+#!genome-build GRCh38.p13
+#!genome-version GRCh38
+#!genome-date 2013-12
+#!genome-build-accession GCA_000001405.28
+#!genebuild-last-updated 2022-04
+1	ensembl_havana	gene	1471765	1497848	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding";
+1	ensembl_havana	transcript	1471765	1497848	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; tag "basic";
+1	ensembl_havana	exon	1471765	14720800009	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003889014"; exon_version "1"; tag "basic";
+1	ensembl_havana	CDS	1471885	1472089	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
+1	ensembl_havana	start_codon	1471885	1471887	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; tag "basic";
+1	ensembl_havana	exon	1477274	1477350	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "2"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003467707"; exon_version "1"; tag "basic";
+1	ensembl_havana	CDS	1477274	1477350	.	+	2	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "2"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
+1	ensembl_havana	exon	1478644	1478745	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "3"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003569130"; exon_version "1"; tag "basic";
+1	ensembl_havana	CDS	1478644	1478745	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "3"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
+1	ensembl_havana	exon	1479049	1479108	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "4"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003608502"; exon_version "1"; tag "basic";
+1	ensembl_havana	CDS	1479049	1479108	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "4"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
+1	ensembl_havana	exon	1480867	1480936	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "5"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003474888"; exon_version "1"; tag "basic";
+1	ensembl_havana	CDS	1480867	1480936	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "5"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
+1	ensembl_havana	exon	1482138	1482303	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "6"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003654064"; exon_version "1"; tag "basic";
+1	ensembl_havana	CDS	1482138	1482303	.	+	2	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "6"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
+1	ensembl_havana	exon	1482545	1482614	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "7"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003510521"; exon_version "1"; tag "basic";
+1	ensembl_havana	CDS	1482545	1482614	.	+	1	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "7"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
+1	ensembl_havana	exon	1485016	1485171	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003459370"; exon_version "1"; tag "basic";
+1	ensembl_havana	CDS	1485016	1485171	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
+1	ensembl_havana	exon	1485782	1485838	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "9"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003655926"; exon_version "1"; tag "basic";
+1	ensembl_havana	CDS	1485782	1485838	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "9"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
+1	ensembl_havana	exon	1486110	1486235	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "10"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003594545"; exon_version "1"; tag "basic";
+1	ensembl_havana	CDS	1486110	1486235	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "10"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
+1	ensembl_havana	exon	1486544	1486668	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "11"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003892109"; exon_version "1"; tag "basic";
+1	ensembl_havana	CDS	1486544	1486668	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "11"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
+1	ensembl_havana	exon	1487863	1487914	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "12"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003689846"; exon_version "1"; tag "basic";
+1	ensembl_havana	CDS	1487863	1487914	.	+	1	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "12"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
+1	ensembl_havana	exon	1489204	1489274	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "13"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003670332"; exon_version "1"; tag "basic";
+1	ensembl_havana	CDS	1489204	1489274	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "13"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
+1	ensembl_havana	exon	1490257	1490424	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "14"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003505365"; exon_version "1"; tag "basic";
+1	ensembl_havana	CDS	1490257	1490424	.	+	1	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "14"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
+1	ensembl_havana	exon	1490563	1490671	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "15"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003497242"; exon_version "1"; tag "basic";
+1	ensembl_havana	CDS	1490563	1490671	.	+	1	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "15"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
+1	ensembl_havana	exon	1495485	1497848	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "16"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; exon_id "ENSE00003892597"; exon_version "1"; tag "basic";
+1	ensembl_havana	CDS	1495485	1495814	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "16"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; protein_id "ENSP00000500094"; protein_version "1"; tag "basic";
+1	ensembl_havana	stop_codon	1495815	1495817	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; exon_number "16"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; tag "basic";
+1	ensembl_havana	five_prime_utr	1471765	1471884	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; tag "basic";
+1	ensembl_havana	three_prime_utr	1495818	1497848	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000673477"; transcript_version "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-206"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30"; tag "basic";
+1	havana	transcript	1478026	1497848	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "1";
+1	havana	exon	1478026	1478745	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00001943609"; exon_version "1"; transcript_support_level "1";
+1	havana	exon	1479049	1479108	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "2"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003589422"; exon_version "1"; transcript_support_level "1";
+1	havana	exon	1480867	1480936	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "3"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003672769"; exon_version "1"; transcript_support_level "1";
+1	havana	exon	1482138	1482303	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "4"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003661157"; exon_version "1"; transcript_support_level "1";
+1	havana	exon	1482545	1482614	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "5"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003517812"; exon_version "1"; transcript_support_level "1";
+1	havana	exon	1485016	1485170001	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "6"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003542737"; exon_version "1"; transcript_support_level "1";
+1	havana	exon	1485782	14858380000	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "7"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003479480"; exon_version "1"; transcript_support_level "1";
+1	havana	exon	1486110	1486235000	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003503434"; exon_version "1"; transcript_support_level "1";
+1	havana	exon	1486544	1486668	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "9"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003513162"; exon_version "1"; transcript_support_level "1";
+1	havana	exon	1487863	1487914	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "10"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003528975"; exon_version "1"; transcript_support_level "1";
+1	havana	exon	1489204	1489274	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "11"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003611023"; exon_version "1"; transcript_support_level "1";
+1	havana	exon	1490257	1490424	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "12"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003653402"; exon_version "1"; transcript_support_level "1";
+1	havana	exon	1490563	1490671	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "13"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003486643"; exon_version "1"; transcript_support_level "1";
+1	havana	exon	1495485	1497848	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000472194"; transcript_version "6"; exon_number "14"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-203"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00001863816"; exon_version "1"; transcript_support_level "1";
+1	havana	transcript	1479049	1482662	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000378736"; transcript_version "3"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; transcript_support_level "5";
+1	havana	exon	1479049	1479108	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000378736"; transcript_version "3"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00003589422"; exon_version "1"; transcript_support_level "5";
+1	havana	exon	1480867	1480936	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000378736"; transcript_version "3"; exon_number "2"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00003672769"; exon_version "1"; transcript_support_level "5";
+1	havana	exon	1482138	1482303	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000378736"; transcript_version "3"; exon_number "3"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00003661157"; exon_version "1"; transcript_support_level "5";
+1	havana	exon	1482545	1482662	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000378736"; transcript_version "3"; exon_number "4"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00003614529"; exon_version "1"; transcript_support_level "5";
+1	havana	transcript	1483485	1496202	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "2";
+1	havana	exon	1483485	1485171	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00001893282"; exon_version "1"; transcript_support_level "2";
+1	havana	exon	1485782	1485838	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "2"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003479480"; exon_version "1"; transcript_support_level "2";
+1	havana	exon	1486110	1486235	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "3"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003503434"; exon_version "1"; transcript_support_level "2";
+1	havana	exon	1486544	1486668	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "4"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003513162"; exon_version "1"; transcript_support_level "2";
+1	havana	exon	1487863	1487914	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "5"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003528975"; exon_version "1"; transcript_support_level "2";
+1	havana	exon	1489204	1489274	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "6"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003611023"; exon_version "1"; transcript_support_level "2";
+1	havana	exon	1489692	1489811	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "7"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00001885858"; exon_version "1"; transcript_support_level "2";
+1	havana	exon	1490257	1490424	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003653402"; exon_version "1"; transcript_support_level "2";
+1	havana	exon	1490563	1490671	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "9"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003486643"; exon_version "1"; transcript_support_level "2";
+1	havana	exon	1495485	1496202	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000485748"; transcript_version "5"; exon_number "10"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-205"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003689276"; exon_version "1"; transcript_support_level "2";
+1	havana	transcript	1484569	1496201	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000474481"; transcript_version "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-204"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "2";
+1	havana	exon	1484569	1485171	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000474481"; transcript_version "1"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-204"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00001844843"; exon_version "1"; transcript_support_level "2";
+1	havana	exon	1485782	1486235	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000474481"; transcript_version "1"; exon_number "2"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-204"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00001818637"; exon_version "1"; transcript_support_level "2";
+1	havana	exon	1486544	1486668	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000474481"; transcript_version "1"; exon_number "3"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-204"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003513162"; exon_version "1"; transcript_support_level "2";
+1	havana	exon	1489204	1490671	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000474481"; transcript_version "1"; exon_number "4"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-204"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00001832340"; exon_version "1"; transcript_support_level "2";
+1	havana	exon	1495485	1496201	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000474481"; transcript_version "1"; exon_number "5"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-204"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00001844973"; exon_version "1"; transcript_support_level "2";
+1	ensembl	transcript	1471784	1496201	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	exon	1471784	1472089	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00001833190"; exon_version "2"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	CDS	1471885	1472089	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSP00000311766"; protein_version "8"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	start_codon	1471885	1471887	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "1"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	exon	1477274	1477350	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "2"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003467707"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	CDS	1477274	1477350	.	+	2	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "2"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSP00000311766"; protein_version "8"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	exon	1480867	1480908	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "3"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003889337"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	CDS	1480867	1480908	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "3"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSP00000311766"; protein_version "8"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	exon	1482266	1482303	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "4"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003889634"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	CDS	1482266	1482303	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "4"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSP00000311766"; protein_version "8"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	exon	1482545	1482614	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "5"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003510521"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	CDS	1482545	1482614	.	+	1	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "5"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSP00000311766"; protein_version "8"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	exon	1485016	1485171	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "6"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003459370"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	CDS	1485016	1485171	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "6"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSP00000311766"; protein_version "8"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	exon	1485782	1485838	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "7"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003655926"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	CDS	1485782	1485838	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "7"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSP00000311766"; protein_version "8"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	exon	1486110	1486235	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003594545"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	CDS	1486110	1486235	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSP00000311766"; protein_version "8"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	exon	1486544	1486668	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "9"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003662125"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	CDS	1486544	1486666	.	+	0	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "9"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; protein_id "ENSP00000311766"; protein_version "8"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	exon	1487863	1487914	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "10"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003528975"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	exon	1489204	1489274	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "11"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003611023"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	exon	1490257	1490424	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "12"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003653402"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	exon	1490563	1490671	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "13"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00003486643"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	exon	1495485	1496201	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; exon_number "14"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; exon_id "ENSE00001844973"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	five_prime_utr	1471784	1471884	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	three_prime_utr	1486667	1486668	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	three_prime_utr	1487863	1487914	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	three_prime_utr	1489204	1489274	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	three_prime_utr	1490257	1490424	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	three_prime_utr	1490563	1490671	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	ensembl	three_prime_utr	1495485	1496201	.	+	.	gene_id "ENSG00000160072"; gene_version "20"; transcript_id "ENST00000308647"; transcript_version "8"; gene_name "ATAD3B"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "ATAD3B-201"; transcript_source "ensembl"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+1	havana	gene	2212523	2220738	.	+	.	gene_id "ENSG00000234396"; gene_version "3"; gene_source "havana"; gene_biotype "lncRNA";
+1	havana	transcript	2212523	2220738	.	+	.	gene_id "ENSG00000234396"; gene_version "3"; transcript_id "ENST00000442483"; transcript_version "2"; gene_source "havana"; gene_biotype "lncRNA"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "3";
+1	havana	exon	2212523	2212644	.	+	.	gene_id "ENSG00000234396"; gene_version "3"; transcript_id "ENST00000442483"; transcript_version "2"; exon_number "1"; gene_source "havana"; gene_biotype "lncRNA"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00001603085"; exon_version "2"; tag "basic"; transcript_support_level "3";
+1	havana	exon	2220535	2220738	.	+	.	gene_id "ENSG00000234396"; gene_version "3"; transcript_id "ENST00000442483"; transcript_version "2"; exon_number "2"; gene_source "havana"; gene_biotype "lncRNA"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00001607640"; exon_version "2"; tag "basic"; transcript_support_level "3";
+1	havana	gene	629062	629433	.	+	.	gene_id "ENSG00000225972"; gene_version "1"; gene_name "MTND1P23"; gene_source "havana"; gene_biotype "unprocessed_pseudogene";
+1	havana	transcript	629062	629433	.	+	.	gene_id "ENSG00000225972"; gene_version "1"; transcript_id "ENST00000416931"; transcript_version "1"; gene_name "MTND1P23"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "MTND1P23-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA";
+1	havana	exon	629062	629433	.	+	.	gene_id "ENSG00000225972"; gene_version "1"; transcript_id "ENST00000416931"; transcript_version "1"; exon_number "1"; gene_name "MTND1P23"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "MTND1P23-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00001797039"; exon_version "1"; tag "basic"; transcript_support_level "NA";
+1	havana	gene	8786211	8786913	.	-	.	gene_id "ENSG00000224315"; gene_version "2"; gene_name "RPL7P7"; gene_source "havana"; gene_biotype "processed_pseudogene";
+1	havana	transcript	8786211	8786913	.	-	.	gene_id "ENSG00000224315"; gene_version "2"; transcript_id "ENST00000428803"; transcript_version "2"; gene_name "RPL7P7"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RPL7P7-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";
+1	havana	exon	8786211	8786913	.	-	.	gene_id "ENSG00000224315"; gene_version "2"; transcript_id "ENST00000428803"; transcript_version "2"; exon_number "1"; gene_name "RPL7P7"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RPL7P7-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001776158"; exon_version "2"; tag "basic"; transcript_support_level "NA";
+1	havana	gene	634376	634922	.	+	.	gene_id "ENSG00000198744"; gene_version "5"; gene_name "MTCO3P12"; gene_source "havana"; gene_biotype "unprocessed_pseudogene";
+1	havana	transcript	634376	634922	.	+	.	gene_id "ENSG00000198744"; gene_version "5"; transcript_id "ENST00000416718"; transcript_version "2"; gene_name "MTCO3P12"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "MTCO3P12-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA";
+1	havana	exon	634376	634922	.	+	.	gene_id "ENSG00000198744"; gene_version "5"; transcript_id "ENST00000416718"; transcript_version "2"; exon_number "1"; gene_name "MTCO3P12"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "MTCO3P12-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00001720008"; exon_version "2"; tag "basic"; transcript_support_level "NA";
+1	havana	gene	182696	184174	.	+	.	gene_id "ENSG00000279928"; gene_version "2"; gene_name "DDX11L17"; gene_source "havana"; gene_biotype "unprocessed_pseudogene";
+1	havana	transcript	182696	184174	.	+	.	gene_id "ENSG00000279928"; gene_version "2"; transcript_id "ENST00000624431"; transcript_version "2"; gene_name "DDX11L17"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "DDX11L17-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA";
+1	havana	exon	182696	182746	.	+	.	gene_id "ENSG00000279928"; gene_version "2"; transcript_id "ENST00000624431"; transcript_version "2"; exon_number "1"; gene_name "DDX11L17"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "DDX11L17-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003759020"; exon_version "2"; tag "basic"; transcript_support_level "NA";
+1	havana	exon	183132	183216	.	+	.	gene_id "ENSG00000279928"; gene_version "2"; transcript_id "ENST00000624431"; transcript_version "2"; exon_number "2"; gene_name "DDX11L17"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "DDX11L17-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003759581"; exon_version "2"; tag "basic"; transcript_support_level "NA";
+1	havana	exon	183494	183571	.	+	.	gene_id "ENSG00000279928"; gene_version "2"; transcript_id "ENST00000624431"; transcript_version "2"; exon_number "3"; gene_name "DDX11L17"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "DDX11L17-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003804405"; exon_version "1"; tag "basic"; transcript_support_level "NA";
+1	havana	exon	183740	183901	.	+	.	gene_id "ENSG00000279928"; gene_version "2"; transcript_id "ENST00000624431"; transcript_version "2"; exon_number "4"; gene_name "DDX11L17"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "DDX11L17-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003807458"; exon_version "1"; tag "basic"; transcript_support_level "NA";
+1	havana	exon	183981	184174	.	+	.	gene_id "ENSG00000279928"; gene_version "2"; transcript_id "ENST00000624431"; transcript_version "2"; exon_number "5"; gene_name "DDX11L17"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "DDX11L17-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003760199"; exon_version "2"; tag "basic"; transcript_support_level "NA";
+1	havana	gene	2581560	2584533	.	+	.	gene_id "ENSG00000228037"; gene_version "1"; gene_source "havana"; gene_biotype "lncRNA";
+1	havana	transcript	2581560	2584533	.	+	.	gene_id "ENSG00000228037"; gene_version "1"; transcript_id "ENST00000424215"; transcript_version "1"; gene_source "havana"; gene_biotype "lncRNA"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "5";
+1	havana	exon	2581560	25816500000	.	+	.	gene_id "ENSG00000228037"; gene_version "1"; transcript_id "ENST00000424215"; transcript_version "1"; exon_number "1"; gene_source "havana"; gene_biotype "lncRNA"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00001795368"; exon_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	exon	2583370	2583495	.	+	.	gene_id "ENSG00000228037"; gene_version "1"; transcript_id "ENST00000424215"; transcript_version "1"; exon_number "2"; gene_source "havana"; gene_biotype "lncRNA"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00001694676"; exon_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	exon	2584125	2584533	.	+	.	gene_id "ENSG00000228037"; gene_version "1"; transcript_id "ENST00000424215"; transcript_version "1"; exon_number "3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00001601095"; exon_version "1"; tag "basic"; transcript_support_level "5";
+1	ensembl_havana	gene	3069168	3438621	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding";
+1	havana	transcript	3069168	3434342	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5";
+1	havana	exon	3069168	3069296	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "1"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00002048533"; exon_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	CDS	3069260	3069296	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "1"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	start_codon	3069260	3069262	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "1"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5";
+1	havana	exon	3186125	3186474	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "2"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00001754112"; exon_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	CDS	3186125	3186474	.	+	2	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "2"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	exon	3244087	3244137	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "3"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003480863"; exon_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	CDS	3244087	3244137	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "3"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	exon	3385149	3385286	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "4"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00002034212"; exon_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	CDS	3385149	3385286	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "4"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	exon	3396491	3396593	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "5"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003700221"; exon_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	CDS	3396491	3396593	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "5"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	exon	3402791	3402998	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "6"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003696962"; exon_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	CDS	3402791	3402998	.	+	2	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "6"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	exon	3404739	3404886	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "7"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003700688"; exon_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	CDS	3404739	3404886	.	+	1	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "7"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	exon	3405495	3405648	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "8"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003700645"; exon_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	CDS	3405495	3405648	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "8"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	exon	3411384	3412800	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "9"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003695658"; exon_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	CDS	3411384	3412800	.	+	2	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "9"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	exon	3414560	3414647	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "10"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003701451"; exon_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	CDS	3414560	3414647	.	+	1	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "10"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	exon	3417828	3417997	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "11"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003699052"; exon_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	CDS	3417828	3417997	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "11"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	exon	3418667	3418744	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "12"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003698430"; exon_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	CDS	3418667	3418744	.	+	1	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "12"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	exon	3425581	3425750	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "13"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003699796"; exon_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	CDS	3425581	3425750	.	+	1	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "13"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	exon	3426051	3426225	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "14"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003701891"; exon_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	CDS	3426051	3426225	.	+	2	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "14"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	exon	3430872	3431108	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "15"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003698226"; exon_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	CDS	3430872	3431108	.	+	1	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "15"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	exon	3433677	3434342	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "16"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00002081080"; exon_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	CDS	3433677	3433686	.	+	1	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "16"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000426975"; protein_version "1"; tag "basic"; transcript_support_level "5";
+1	havana	stop_codon	3433687	3433689	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; exon_number "16"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5";
+1	havana	five_prime_utr	3069168	3069259	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5";
+1	havana	three_prime_utr	3433690	3434342	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000511072"; transcript_version "5"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-206"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5";
+1	havana	transcript	3069183	3186591	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000607632"; transcript_version "1"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-210"; transcript_source "havana"; transcript_biotype "retained_intron"; transcript_support_level "2";
+1	havana	exon	3069183	3069296	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000607632"; transcript_version "1"; exon_number "1"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-210"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003700259"; exon_version "1"; transcript_support_level "2";
+1	havana	exon	3186125	3186591	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000607632"; transcript_version "1"; exon_number "2"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-210"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003695128"; exon_version "1"; transcript_support_level "2";
+1	havana	transcript	3069197	3435421	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000378391"; transcript_version "6"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS44048"; tag "basic"; transcript_support_level "1";
+1	havana	exon	3069197	3069296	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000378391"; transcript_version "6"; exon_number "1"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS44048"; exon_id "ENSE00001222906"; exon_version "5"; tag "basic"; transcript_support_level "1";
+1	havana	CDS	3069260	3069296	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000378391"; transcript_version "6"; exon_number "1"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS44048"; protein_id "ENSP00000367643"; protein_version "2"; tag "basic"; transcript_support_level "1";
+1	havana	start_codon	3069260	3069262	.	+	0	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000378391"; transcript_version "6"; exon_number "1"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS44048"; tag "basic"; transcript_support_level "1";
+1	havana	exon	3186125	31864740000	.	+	.	gene_id "ENSG00000142611"; gene_version "17"; transcript_id "ENST00000378391"; transcript_version "6"; exon_number "2"; gene_name "PRDM16"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "PRDM16-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS44048"; exon_id "ENSE00001754112"; exon_version "1"; tag "basic"; transcript_support_level "1";
diff --git a/scripts/exon_length_filter.py b/scripts/exon_length_filter.py
new file mode 100644
index 0000000000000000000000000000000000000000..2aeb302af55566a5292f7283012bf21ce064d5e7
--- /dev/null
+++ b/scripts/exon_length_filter.py
@@ -0,0 +1,201 @@
+# Exon length filter #
+"""Exon length filter
+Version 2.1.0"""
+# Called Packages #
+import re
+import os
+import transcript_extractor as te
+
+python_version = "3.7.13"
+module_list = [re, os]
+modul_name_list = ["re", "os"]
+
+# Functions #
+
+
+def exon_length_calculator(entry):
+    """This function finds the start and end cordinates of the
+    exon and uses them to calculate its length"""
+    try:
+        find_exon_coordinates = re.compile(r"\t\d{1,15}\t")
+        # this difines the pattern of the coordinates
+        try_find_start_coordinates = find_exon_coordinates.search(entry)
+        # this line findes the start coordinares based on the pattern 
+        start_coordinates = int(try_find_start_coordinates[0].replace("\t", ""))
+        # this line removes the \t at the end and the start of the pattern and
+        # turn the string of the coordinates into intergers
+        final_index_start_coordinates = entry.find(try_find_start_coordinates[0])+len(try_find_start_coordinates[0])-1
+        # this line determines the indes of the final digit
+        # of the start coordinates
+        sub_entry = entry[final_index_start_coordinates:]
+        # this lineused the index determin above a starting point 
+        # for a new sub entry
+        try_find_end_coordinates = find_exon_coordinates.search(sub_entry)
+        end_coordinates = int(try_find_end_coordinates[0].replace("\t", ""))
+        # these two lines find the end coordinates and turn tham int an int
+        exon_length = end_coordinates-start_coordinates
+        # this line claculates the transcript length
+    except:
+        print("\n\nIn the following enty only one or no valid coordinates \
+              could be found:\n",entry,"the value will be set to NA")
+        exon_length = "NA"
+    return exon_length
+
+
+def exon_fider(entry):
+    """This funtion determines if a given entry belongs to an exon
+    Expected inputs:
+        entry: str #any enty of a gtf file"""
+    exon_test = entry.find(r"\texon\t")
+    # This line look for the entry exon in the file
+    if exon_test == -1: 
+        try_exon_test = False
+    else:
+        try_exon_test = True
+    # The block above evaluates the results of the search for the wort exon
+    return try_exon_test
+
+def __longest_transcript_finder(
+        current_exon_length,
+        longest_transcript,
+        longest_transcript_ID,
+        old_transcript_ID
+        ):
+    """This funtion encapsulates an operation that has to be carried out
+    at several points in the exon_length_filter function and serves to
+    make that function more modular"""
+    if current_exon_length > longest_transcript:
+        # This condition updates the most promesing for
+        # beeing the representative transcript
+        longest_transcript = current_exon_length
+        longest_transcript_ID = old_transcript_ID
+    current_exon_length = 0
+    return current_exon_length, longest_transcript, longest_transcript_ID
+
+
+def _representative_transcript_csv(
+        representative_transcript, file_name = "test", deposit_pathway_name =os.getcwd()
+        ):
+    with open(os.path.join(
+        deposit_pathway_name, file_name+"_"+"representative_transcripts"+".csv"
+            ), "w", encoding="utf-8") as rt:
+        for i in representative_transcript:
+            transcript = representative_transcript[i]
+            new_entry = str(i)+","+transcript+"\n"
+            rt.write(new_entry)
+
+
+def _exon_length_filter(file_name = "test",source_pathway_name = os.getcwd(),deposit_pathway_name =os.getcwd(),gen_dict = {"ENSG00000160072":["ENST00000673477","ENST00000472194","ENST00000378736","ENST00000308647","ENST00000442483"],"ENSG00000225972":["ENST00000416931"],"ENSG00000279928":["ENST00000624431","ENST00000424215"],"ENSG00000142611":["ENST00000378391","ENST00000607632","ENST00000511072"]}):
+    """This funtion selects only the transcripts for a dictionary that have the longest total mRNA"""  
+    bar,start_time = te.bar_builder(length_multiplyer = 3)
+    total_genes = len(gen_dict)
+    gens_done = 0
+
+    with open(os.path.join(source_pathway_name,file_name+".gtf"), 'r') as f:
+        
+        old_gen = str()
+        old_transcript_ID = str()
+        representative_transcript = dict()
+        representative_trasnscript_not_found = True
+        longest_transcript_ID = str()
+        current_exon_length = 0
+        longest_transcript = 0 
+        percentage_done = 0
+
+        for entry in f:
+
+            try:
+                corrent_gen = te.gene_ID_finder(entry)
+            except:
+                corrent_gen = old_gen
+            #The block above test if there is a gen name in the entry
+            if corrent_gen != old_gen:   
+                representative_trasnscript_not_found = True
+
+            #The block above determines if the Gen name is new and set the test
+            #representative_trasnscript_not_found back to true which is used to 
+            #make the program faster if there is just one transcript for a given
+            #gen in the dict
+            if representative_trasnscript_not_found and corrent_gen != str():
+                #print(corrent_gen)
+                #The conditon prvents serges if a representative transcript has
+                #all ready been chosen
+                if corrent_gen != old_gen:
+                    current_exon_length,longest_transcript,longest_transcript_ID = __longest_transcript_finder(current_exon_length,longest_transcript,longest_transcript_ID,old_transcript_ID)
+                    representative_transcript[old_gen] = longest_transcript_ID
+                    try:
+                        del gen_dict[old_gen]
+                        old_gen = corrent_gen                   
+                        gens_done += 1
+                        corrent_percentage_done = (gens_done/total_genes)*100
+                        if corrent_percentage_done > percentage_done+10:
+                            bar,start_time = te.bar_builder(percentage=percentage_done+10,length_multiplyer = 3,start_time=start_time,bar =bar)
+                            percentage_done = int(corrent_percentage_done)  
+                        
+                         
+                    except:
+                        old_gen = corrent_gen
+                    longest_transcript = 0
+                    #The block above adds the transcript of the last gen that 
+                    #had the longest exons into the representative transcripts dict
+                    try: 
+                        #This try / except block test if the gen is in the input dictionary
+                        transcript_IDs = gen_dict[corrent_gen]
+                        if len(gen_dict[corrent_gen]) == 1:
+                            #This conditions is a short cut for Genes that 
+                            #allready have a representative transcript
+                            representative_transcript=gen_dict[corrent_gen[0]]
+                            representative_trasnscript_not_found = False
+                            continue
+                    except:
+                        continue
+                    
+                try: 
+                    current_transcript_ID = te.transcript_ID_finder(entry)         
+                except: 
+                    continue
+                #The block above searches for a transcript ID in the current entry
+
+                if current_transcript_ID in transcript_IDs:
+                    #This condition test if the Transcript is one of the 
+                    #candidates for representative transcripts
+                    if current_transcript_ID != old_transcript_ID:
+                        #This condition if the enty still belongs to the 
+                        #previous transcript and is triggers if that is not the case
+                        current_exon_length,longest_transcript,longest_transcript_ID = __longest_transcript_finder(current_exon_length,longest_transcript,longest_transcript_ID,old_transcript_ID)
+                        try:
+                            transcript_IDs.remove(old_transcript_ID)
+                            old_transcript_ID = current_transcript_ID
+                        except:
+                            old_transcript_ID = current_transcript_ID
+                    if exon_fider(entry): 
+                        exon_length = exon_length_calculator(entry)
+                        current_exon_length += exon_length
+                    else: 
+                        continue 
+        current_exon_length,longest_transcript,longest_transcript_ID = __longest_transcript_finder(current_exon_length,longest_transcript,longest_transcript_ID,old_transcript_ID)
+        representative_transcript[old_gen] = longest_transcript_ID
+    del representative_transcript[str()]
+    te.bar_builder(100,length_multiplyer = 3,start_time=start_time,bar =bar)
+    return(representative_transcript)
+
+def exon_length_filter(file_name = "test",source_pathway_name = os.getcwd(),deposit_pathway_name =os.getcwd(),gen_dict = {"ENSG00000160072":["ENST00000673477","ENST00000472194","ENST00000378736","ENST00000308647","ENST00000442483"],"ENSG00000225972":["ENST00000416931"],"ENSG00000279928":["ENST00000624431","ENST00000424215"],"ENSG00000142611":["ENST00000378391","ENST00000607632","ENST00000511072"]}):   
+    """This function filters a dictionary of genes and there transcripts by the length of there exons an selects the longes transcript for each gene and returns an dictionary {gene_ID : transcript_ID}.
+    Expected inputs: 
+        file_name: str ; default = test #the name of the gft file you want to look at
+        source_pathway_name: str ; default = current work directory #path of the gtf file       
+        deposit_pathway_name: str ; default = current work directory #path for files
+        gen_dict:dict{key == gene ID:[transcript IDs that belong to that gene]}""" 
+    
+    print("Representative trascipts are filterd based on exon length please wait...")
+    source_pathway_name,deposit_pathway_name = te.__do_pathways_exist__(source_pathway_name,deposit_pathway_name)
+    representative_transcript = _exon_length_filter(file_name,source_pathway_name,deposit_pathway_name,gen_dict)
+    print("\nRepresentative transcripts collected")
+    return representative_transcript 
+
+
+if __name__ == "__main__":
+    # te.version_control(module_list,modul_name_list,python_version)
+    exon_length_filter()
+
+# This line allows the file to be executed on its own also from
diff --git a/scripts/find_representative_transcripts.py b/scripts/find_representative_transcripts.py
deleted file mode 100644
index ee0dbc62de1d64c33d49184d7310cde8509bbdb2..0000000000000000000000000000000000000000
--- a/scripts/find_representative_transcripts.py
+++ /dev/null
@@ -1,246 +0,0 @@
-#### Find representative transcripts ####
-"""Version 1.1.1"""
-
-### Imports ### 
-import argparse
-
-### Functions ###
-    
-def attributs_converter(attributs):
-    """
-    This funtion converts the "unstrucktured" ;-seperated part of he line into a list of identifyers and coresponding data the struckture of
-    which can be used ot find the data easyly e.g the index of the identifier transcrip_id + 1 will give the trasncript id of the current gene
-    Input: 
-        attributs = str() #the unstrucktured part of the entry
-    Output:
-        attributs = list() # cleand list with the characterritsics discribed above
-    """
-    attributs = attributs.replace("\"","")
-    attributs = attributs.replace(";","")
-    attributs = attributs.replace("\\n","")
-    attributs =attributs.split(" ")
-    
-    return(attributs)
-
-def find_in_attributs (attributs,look_for):
-    """
-    This function finds a key word and used that to lokat the value of that key word e.g key = gene_id, value = 'ENSMUSG00002074970',
-    this works as they are next to each other in the attributs list. 
-    Inputs:
-        sub_enty = list() 
-        look_fore = str() #string of with the name of the key to look for
-    Output: 
-        attributs[index] or NA = str() #NA is returned if the key was not found in the attributs
-    """
-    try:
-        index = attributs.index(look_for)+1
-        return attributs[index]
-    except: 
-        #print("No",look_for,"in the entry the return was set to NA\n",attributs)
-        return "NA"
-
-def _re_format(rep_trans_dict):
-    """
-    This function is ment to reformat dictionary of the representatice transcripts into an dictionary with only one entry per key
-    Input:
-        rep_trans_dict = {gene_id : [transcript_id , transcript_support_level , transcript_length]}
-    Output: 
-        rep_transcripts = {gene_id : transcript_id}
-    """
-    rep_transcripts = dict()
-    for gene_id in rep_trans_dict: 
-        rep_transcripts[gene_id] = rep_trans_dict[gene_id][0]
-    
-    return rep_transcripts
-        
-    
-
-def get_rep_trans(file_name = "test"):
-    """ 
-    This is the main function of this script it selects one representative transcrip per gene based on a gtf annotation file. 
-    It does so be two criteria: first the transcript support level and it there are several transcript 
-    of one gene that have the same trasncript_support_level it chooses the one that corresponds to the longest mRNA.
-    Input: 
-        file_name = str() # name of the annotation file with or without the .gtf part
-    Output: 
-        rep_transcripts = {gene_id : transcript_id}
-    """
-    
-    #setting defoult variables
-    rep_trans = dict()
-    cur_gID = str()
-    cur_best_trans = [str(),100,0] # [transcript_id , transcript_support_level , transcript_length]
-    pot_best_trans = False
-    cur_tID = str()
-    ignor_trans = False
-    
-    with open (file_name,"r") as f: 
-        for line in f: 
-            entry = line.split("\t")
-            
-            #removes expected but unneeded entrys
-            exp_unneed = ["CDS","stop_codon","five_prime_utr","three_prime_utr","start_codon",'Selenocysteine']
-            if len(entry) == 1 or entry[2] in exp_unneed:
-                continue
-            
-            #this function turns the less organized part of the entry into a reable list
-            attributs = attributs_converter(entry[8])
-            #looking for and processing exons entrys
-            if entry[2] == "exon": 
-                
-                #dicide if to contiune or not
-                if ignor_trans: 
-                    continue
-                elif cur_gID != attributs[1]:
-                    raise ValueError("ERROR exon from an unexpected Gen")
-                    continue
-                elif find_in_attributs (attributs,"transcript_id") != cur_tID:
-                    raise ValueError("exon from an unexpected transcript")
-                    continue
-                
-                #adding the length of the exon to the appropriat list and chacking for changes in best transcript
-                if pot_best_trans: 
-                    pot_best_trans[2]+= int(entry[4])-int(entry[3])
-                    if pot_best_trans[2] > cur_best_trans[2]: 
-                        cur_best_trans = pot_best_trans
-                        pot_best_trans = False
-                else:
-                    cur_best_trans[2]+= int(entry[4])-int(entry[3])
-
-                                       
-                
-            #looking for and processing transcript entrys
-            elif entry[2] == "transcript":
-                    
-                #varryfi that the gen is correct
-                if cur_gID != attributs[1]:
-                    raise ValueError("ERROR transcript from an unexpected Gen")
-                    continue
-                
-                #finding the transcript id and the support level
-                cur_tID = find_in_attributs (attributs,"transcript_id")       
-                t_supp_lvl = find_in_attributs (attributs,"transcript_support_level")    
-                
-                #If there is no transcript support level or the level is given as NA it is nomed as 100. else the transcript support level is tunrn into int
-                if t_supp_lvl == "NA": 
-                    t_supp_lvl = 100
-                else:
-                    try:
-                        t_supp_lvl = int(t_supp_lvl)
-                    except: 
-                        t_supp_lvl = 100
-                
-                
-                #decides if the transcript has potential to become the representative transcript
-                if t_supp_lvl < cur_best_trans[1] or cur_best_trans[0] == "":
-                    cur_best_trans = [cur_tID,t_supp_lvl,0]
-                    pot_best_trans = False
-                    ignor_trans = False
-                     
-                elif t_supp_lvl == cur_best_trans[1]:
-                    pot_best_trans = [cur_tID,t_supp_lvl,0] 
-                else:
-                    ignor_trans = True
-                
-                  
-            #looking for and processing gene entrys
-            elif entry[2] == "gene":
-                
-                #updating rep_trans dict
-                if cur_gID not in rep_trans: 
-                    rep_trans[cur_gID] = cur_best_trans
-                else: 
-                    if rep_trans[cur_gID][1] > cur_best_trans[1]: 
-                        rep_trans[cur_gID] = cur_best_trans
-                    elif rep_trans[cur_gID][1] == cur_best_trans[1] and rep_trans[cur_gID][2] < cur_best_trans[2]: 
-                        rep_trans[cur_gID] = cur_best_trans
-                
-                #updating cur_gID and resetting cur_best_trans
-                cur_gID = attributs[1]
-                cur_best_trans = [str(),100,0]
-                    
-            #raises an error for unidentifyable entrys
-            else: 
-                raise ValueError("This entry could not be identified\n",entry)
-        
-        #addding the final gene to the dictionary
-        if cur_gID not in rep_trans: 
-            rep_trans[cur_gID] = cur_best_trans
-        else: 
-            if rep_trans[cur_gID][1] > cur_best_trans[1]: 
-                rep_trans[cur_gID] = cur_best_trans
-            elif rep_trans[cur_gID][1] == cur_best_trans[1] and rep_trans[cur_gID][2] < cur_best_trans[2]: 
-                rep_trans[cur_gID] = cur_best_trans        
-        
-        del rep_trans[""]
-        rep_transcripts = _re_format(rep_trans)
-        return(rep_transcripts )
-
-def gtf_file_writer (original_file, output_file): 
-    """
-    this function writes the output GTF file
-    """
-    output = []
-    rep_transcript_dict = get_rep_trans(original_file)
-
-    with open(original_file, 'r') as f:
-            for entry in f: 
-                if entry[0] != '#':
-                    attributes = attributs_converter(entry)
-                    type_ = attributes[2]
-                    if type_ == 'gene':
-                        gene_id = find_in_attributs(attributes, 'gene_id')
-                        output.append(entry)
-                    else:
-                        transcript_id = find_in_attributs(attributes, 'transcript_id')
-                        try:
-                            if rep_transcript_dict[gene_id] == transcript_id:
-                                output.append(entry)
-                        except:
-                            print("error")
-
-    with open(output_file, 'w') as last_file:
-        last_file.write(output)
-
-def _test(): 
-    """
-    This funtion is ment to be run for test
-    Output: 
-        file with the dictionary generated based on the test file 
-    """
-    file_name = "test.gtf"
-    rt = get_rep_trans(file_name)
-    expected_result = {"ENSG00000160072":"ENST00000472194","ENSG00000234396":"ENST00000442483",
-                       "ENSG00000225972":"ENST00000416931","ENSG00000224315":"ENST00000428803",
-                       "ENSG00000198744":"ENST00000416718","ENSG00000279928":"ENST00000624431",
-                       "ENSG00000228037":"ENST00000424215",'ENSG00000142611':'ENST00000378391'}
-    if rt != expected_result: 
-        print("The test fail due to not yieding the same results")
-        print("The results the program got\n",rt)
-        print("The expected results\n",expected_result)
-    else: 
-        print("The test was succses full")
-            
-### Execution part ###
-if __name__ == "__main__":   
-    parser = argparse.ArgumentParser(description="find_representativ_transcripts",formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("-file_name", required=True, help="gtf file with genome annotation")
-    parser.add_argument("-t", required=False,default = False,help="to run the test input -t True")
-    args = parser.parse_args()
-    
-    #standadize the file_name inlude .gtf#
-
-    file_name = args.file_name
-    i_gtf = file_name.find(".gtf")
-    if i_gtf == -1:
-        file_name += ".gtf"  
-    
-    if args.t: 
-        _test()
-    else:
-        get_rep_trans(file_name)
-   
-
-    
-                
-            
\ No newline at end of file
diff --git a/scripts/match_reprtranscript_expressionlevel.py b/scripts/match_reprtranscript_expressionlevel.py
deleted file mode 100644
index f7f3277c806b09131b79c2ad8f23781053a65881..0000000000000000000000000000000000000000
--- a/scripts/match_reprtranscript_expressionlevel.py
+++ /dev/null
@@ -1,182 +0,0 @@
-### Made by Hugo Gillet ###
-import pandas as pd
-from gtfparse import read_gtf
-
-
-def dict_reprTrans_to_df(dict_reprTrans: dict[str, str]) -> pd.DataFrame:
-
-    """Convert a dictionary of genes and their representative transcript into a dataframe 
-
-        Args:
-            dict_reprTrans (dict) : {'Gene':['transcriptA', 'transcriptB'], ...}
-
-        Returns:
-            Pandas dataframe having Gene and transcript as columns
-      
-        Raises:
-            Only dict are allowed
-            Key should be strings
-            Value should be strings
-          
-    """
-    pass
-    if not type(dict_reprTrans) is dict:
-        raise TypeError("Only dict are allowed")
-    if type(list(dict_reprTrans.keys())[0]) is not str:
-        raise TypeError("Key should be strings")
-    if type(list(dict_reprTrans.values())[0]) is not str:
-        raise TypeError("Values should be strings")
-
-    df_reprTrans = pd.DataFrame.from_dict(
-        dict_reprTrans, orient="index", columns=["reprTranscript"]
-    )
-    df_reprTrans = df_reprTrans.reset_index(level=0)
-    df_reprTrans.columns = ["Gene", "reprTrans"]
-    df_reprTrans["reprTrans"] = df_reprTrans["reprTrans"].str.replace(
-        r"\.[1-9]", "", regex=True
-    )
-    return df_reprTrans
-
-def gene_and_transcript(gtf_file:str)-> pd.DataFrame: 
-    """
-    This function take a .gtf file and convert it into a 
-    dataframe containing gene_id and their transcripts_id.
-        Args:
-            gtf_file (str) : path to the .gtf file
-
-        Returns:
-            df_gtf (pd.DataFrame) : pandas dataframe containing having has columns
-            gene_id and their transcripts_id.
-        Raises : 
-            None 
-    
-    """
-    df_gtf = read_gtf(gtf_file)
-    df_gtf = df_gtf.loc[df_gtf["feature"]=="transcript"]
-    df_gtf = df_gtf[["gene_id","transcript_id"]]
-    df_gtf = df_gtf.rename(columns={"gene_id":"Gene","transcript_id":"Transcript"})
-    return df_gtf
-
-
-
-def tsv_or_csv_to_df(input_txt: str) -> pd.DataFrame:
-    """Convert tsv or csv file into a pandas dataframe
-
-        Args:
-            input_txt (str): csv or tsv file containing transcript expression level
-
-        Returns:
-            df_gene (str): Pandas dataframe having transcript and expression level
-            as columns  
-      
-        Raises:
-            None          
-    """
-    pass
-    df_input = pd.read_csv(
-        input_txt,
-        sep=r"[\t,]",
-        lineterminator="\n",
-        names=["Transcript", "Expression_level"],
-        engine="python",
-    )
-    return df_input
-
-
-def exprLevel_byGene(
-    df_exprTrasncript: pd.DataFrame, df_output_gtf_selection: pd.DataFrame
-) -> pd.DataFrame:
-    """find the gene of each transcipt given by the expression level csv/tsv file,
-       and summ expression level of all transcipts from the same gene. 
-
-        Args:
-            df_exprTranscript : pandas Dataframe containing transcript and their expression level,
-            generated by "tsv_or_csv_to_df" function
-            df_output_gtf_selection : pandas Dataframe containing genes and transcripts,
-            generated by "transcripts_by_gene_inDf" function 
-
-        Returns:
-            Pandas dataframe having gene and sum of its transcript expression level
-      
-        Raises:
-            None          
-    """
-    pass
-    df_merged = pd.merge(
-        df_output_gtf_selection, df_exprTrasncript, how="inner", on="Transcript"
-    )
-    df_sum = df_merged.groupby("Gene").sum(
-        "Expression_level"
-    ) 
-    return df_sum
-
-
-def match_byGene(
-    df_reprTranscript: pd.DataFrame, df_expressionLevel_byGene: pd.DataFrame
-) -> pd.DataFrame:
-    """Find matching genes bewteen the 2 args 
-
-        Args:
-            df_reprTranscript : pandas Dataframe containing genes 
-            and their representative transcript, generated by
-            "dict_reprTrans_to_df()" 
-            df_expressionLevel_byGene : pandas Dataframe containing 
-            genes and their expression level generated by 
-            "transcript_by_gene_inDf()"
-
-        Returns:
-            Pandas dataframe having representative trasncripts 
-            and their expression level
-      
-        Raises:
-            None          
-    """
-    pass
-    df_merged = pd.merge(
-        df_reprTranscript, df_expressionLevel_byGene, how="outer", on="Gene"
-    )
-    df_clean = df_merged.dropna(axis=0)
-    df_clean = df_clean.loc[:, ["reprTrans", "Expression_level"]]
-    return df_clean
-
-
-
-
-
-### functions to run this part of the programm
-
-
-def match_reprTranscript_expressionLevel(
-    exprTrans: str, dict_reprTrans: dict, gtf_file: str,
-):
-    """Combine functions to replace transcripts from an expression level csv/tsv file 
-       with representative transcripts 
-
-        Args:
-            exprTrans (str): csv or tsv file containing transcripts
-            and their expression level 
-            dict_reprTrans (dict) : dict of genes and their 
-            representative transcipt
-            intemediate_file (str) : txt file containing genes, transcript 
-            and their expression level from the transkript_extractor function
-            output_path : path indicating were the tsv file should be written
-
-        Returns:
-            tsv file of representative trasncripts and their expression level
-      
-        Raises:
-            None          
-    """
-    df_gene_transcript = gene_and_transcript(gtf_file)
-    df_exprTrans = tsv_or_csv_to_df(exprTrans)
-    df_reprTrans = dict_reprTrans_to_df(dict_reprTrans)
-    df_exprLevel_byGene = exprLevel_byGene(df_exprTrans, df_gene_transcript) # error here
-    df_match = match_byGene(df_reprTrans, df_exprLevel_byGene)
-    df_match.rename(columns = {'reprTrans':'id', 'Expression_level':'level'}, inplace = True)
-    return df_match
-
-
-# run the programm 
-
-if __name__ == "__main__":
-    match_reprTranscript_expressionLevel()
diff --git a/scripts/new_exe.py b/scripts/new_exe.py
deleted file mode 100644
index ade521ceebe76d7bff2c424006c9d4624cf72199..0000000000000000000000000000000000000000
--- a/scripts/new_exe.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import argparse
-import time
-import transcript_sampler as ts
-
-# exemple execution : python C:\...\final_exe.py  --input_gtf  "C:\...\input_files\test.gtf" --input_csv "C:\...\input_files\expression.csv"  --output_gtf "C:\...\output\output_gtf.gtf"  --output_csv "C:\...\ouput\output_gtf.gtf" --n_to_sample 100
-
-
-def exe(input_gtf, input_csv, output_gtf, output_csv, transcript_nr, input_free=True):
-    start = time.time()
-    dict_repr_trans = ts.get_rep_trans(input_gtf)
-    df_repr = ts.match_reprTranscript_expressionLevel(
-        dict_reprTrans=dict_repr_trans, exprTrans=input_csv, gtf_file=input_gtf
-    )
-    print("Finiding match between representative transcripts and expression level file")
-    print("Poisson sampling of transcripts")
-    ts.transcript_sampling(transcript_nr, df_repr, output_csv)
-    print("output csv file ready")
-    print("writing output gtf file")
-    ts.gtf_file_writer(input_gtf, dict_repr_trans, output_gtf)
-    end = time.time()
-    print("\nScript executed in {} sec\n".format(end - start))
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="transcript sampler",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-    parser.add_argument(
-        "--input_gtf", required=True, help="gtf file with genome annotation"
-    )
-    parser.add_argument(
-        "--input_csv",
-        required=True,
-        help="csv or tsv file with transcript and their expression level ",
-    )
-    parser.add_argument(
-        "--output_gtf",
-        required=True,
-        help="output path for the new gtf file of representative transcripts",
-    )
-    parser.add_argument(
-        "--output_csv",
-        required=True,
-        help="output path for the new csv file of representative transcript and their sampled number",
-    )
-    parser.add_argument(
-        "--n_to_sample", required=True, help="total number of transcripts to sample"
-    )
-    args = parser.parse_args()
-    exe(
-        args.input_gtf,
-        args.input_csv,
-        args.output_gtf,
-        args.output_csv,
-        args.n_to_sample,
-    )
diff --git a/scripts/new_gtf_writer.py b/scripts/new_gtf_writer.py
deleted file mode 100644
index 43e7ca63a607a8e789e32e64138126b6f556ed6b..0000000000000000000000000000000000000000
--- a/scripts/new_gtf_writer.py
+++ /dev/null
@@ -1,20 +0,0 @@
-
-def gtf_file_writer (original_file, output_file): 
-    output = []
-    rep_transcript_dict = get_rep_trans(original_file)
-
-    with open(original_file, 'r') as f:
-            for entry in f: 
-                if entry[0] != '#':
-                    attributes = attributs_converter(entry)
-                    type_ = attributes[2]
-                    if type_ == 'gene':
-                        gene_id = find_in_attributs(attributes, 'gene_id')
-                        output.append(entry)
-                    if type_ != 'gene':
-                        transcript_id = find_in_attributs(attributes, 'transcript_id')
-                        if rep_transcript_dict[gene_id] == transcript_id:
-                            output.append(entry)
-
-    with open(output_file, 'w') as last_file:
-        last_file.write(output)
\ No newline at end of file
diff --git a/scripts/poisson_sampling.py b/scripts/poisson_sampling.py
deleted file mode 100644
index fedd8e8fb18eac8352b437308e0e925e8a317630..0000000000000000000000000000000000000000
--- a/scripts/poisson_sampling.py
+++ /dev/null
@@ -1,57 +0,0 @@
-### Called Packages ###
-import pandas as pd
-import numpy as np
-import argparse
-
-import transcript_extractor as te
-
-python_version = "3.7.13"
-module_list =[pd,np,argparse]
-modul_name_list = ["pd","np","argparse"]
-### Functions ###
-
-'''
-Sample transcript 
-
-This part of the code does Poisson sampling proportionally to gene expression levels for each gene. 
- 
-input:  total transcript number (int) 
-        csv file with gene id and  gene expression levels (columns named 'id' and 'level')
-
-output: csv file with gene id and count
-        gtf file with transcript samples
-'''
-
-
-def transcript_sampling(total_transcript_number, df_repr, output_csv):
-    #df = pd.read_csv(csv_file, sep="\t", lineterminator="\n",  names=["id", "level"])
-    df = df_repr # the function "match_reprTranscript_expressionLevel()" now directly output a dataframe  
-    levels = []
-    sums = df['level'].tolist()
-    total = sum(sums)
-    total_transcript_number=int(total_transcript_number) # I added this because writting a number in the terminal inputed a string 
-    normalized = total_transcript_number/total
-    for expression_level in df['level']:
-        poisson_sampled = np.random.poisson(expression_level*normalized)
-        levels.append(poisson_sampled)
-
-    transcript_numbers = pd.DataFrame({'id': df['id'],'count': levels})
-    pd.DataFrame.to_csv(transcript_numbers, output_csv)
-
-if __name__ == '__main__':
-    #te.version_control(module_list,modul_name_list,python_version)
-    parser = argparse.ArgumentParser(
-        description="Transcript Poisson sampler, csv output",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-
-    parser.add_argument("--expression_level", required=True, help="csv file with expression level")
-    parser.add_argument("--output_csv", required=True, help="output csv file")
-    parser.add_argument("--input_csv", required=True, help="input csv file")
-    parser.add_argument("--transcript_number", required=True, help="total number of transcripts to sample")
-    args = parser.parse_args()
-
-
-    transcript_sampling(args.transcript_number, args.input_csv, args.output_csv, args.transcript_number)
-
-
diff --git a/scripts/representative.py b/scripts/representative.py
index 1228e9a5438ca0cd89b6e176e1f78f181895198c..589f4b4c473ea4787efbd589a1c932bc0bfb87da 100644
--- a/scripts/representative.py
+++ b/scripts/representative.py
@@ -1,91 +1,91 @@
-import pandas as pd
-import os 
-
 '''
-This part of the code take as input a gtf modified file 
+This part of the code take as input a gtf modified file
 and return a dictionary of transcripts with best
 support level for each gene of the input
-
 '''
+import pandas as pd
+# import os
 
 
-
-
-def import_gtfSelection_to_df(gtf_modified_file: str) -> pd.DataFrame:
+def import_gtf_selection_to_df(gtf_modified_file: str) -> pd.DataFrame:
     """Import intermediate file from gtf and create a df
 
         Args:
             gtf_modified_file (str) : path to the intermediate file
 
         Returns:
-            Pandas dataframe having Gene, transcript 
+            Pandas dataframe having Gene, transcript
             and support level as columns
-      
+
         Raises:
             TypeError : Only str path is allowed
-          
+
     """
-    pass
-    if not type(gtf_modified_file) is str:
-      raise TypeError("Only str path is allowed")
-    df_input = pd.read_csv(gtf_modified_file, sep = '\t', lineterminator = '\n', 
-names = ["Gene_mixed", "Transcript", "Support_level", "Na1", "Na2"] )
+    if not isinstance(gtf_modified_file, str):
+        raise TypeError("Only str path is allowed")
+    df_input = pd.read_csv(
+        gtf_modified_file, sep='\t', lineterminator='\n',
+        names=["Gene_mixed", "Transcript", "Support_level", "Na1", "Na2"]
+        )
     df_input["Support_level"] = df_input["Support_level"].replace(" ", "")
-    df_input["Gene"] = df_input["Gene_mixed"].str.extract('([A-Z]\w{0,})', expand=True)
-    df_input["Transcript_number"] = df_input["Gene_mixed"].str.extract('(^\d)', expand=True)
-    df_clean = df_input.loc[:, ["Gene", "Transcript","Support_level"]]
-    df_clean["Gene"] = df_clean["Gene"].fillna(method = 'ffill')
-    df_clean = df_clean.dropna(axis = 0)
+    df_input["Gene"] = df_input["Gene_mixed"].str.extract(
+        r'([A-Z]\w{0,})', expand=True  # noqa: W605
+        )
+    df_input["Transcript_number"] = df_input["Gene_mixed"].str.extract(
+        r'(^\d)', expand=True  # noqa: W605
+        )
+    df_clean = df_input.loc[:, ["Gene", "Transcript", "Support_level"]]
+    df_clean["Gene"] = df_clean["Gene"].fillna(method='ffill')
+    df_clean = df_clean.dropna(axis=0)
     return df_clean
 
 
-
-
-def representative_transcripts_inDict(df_gtfSelection: pd.DataFrame) -> pd.DataFrame:
-    """Return a dict containing for each gene transcripts 
+def representative_transcripts_in_dict(
+        df_gtf_selection: pd.DataFrame) -> pd.DataFrame:
+    """Return a dict containing for each gene transcripts
         with highest confidence level
 
         Args:
-            df_gtfSelection (str): Pandas dataframe having Gene,
+            df_gtf_selection (str): Pandas dataframe having Gene,
             transcript and support level as columns
 
         Returns:
             Dict {'Gene':['transcriptA', 'transcriptB'], ...}
-      
+
         Raises:
             TypeError : Only pandas DataFrame is allowed
     """
-    pass 
-
-    if not type(df_gtfSelection) is pd.DataFrame:
+    if not isinstance(df_gtf_selection, pd.DataFrame):
         raise TypeError("Only pandas DataFrame is allowed")
-    df_min = df_gtfSelection[df_gtfSelection["Support_level"]==df_gtfSelection.groupby("Gene")["Support_level"].transform(min)]
-    df_final = df_min.drop(columns = ["Support_level"])
-    dict_representative_transcripts = df_final.groupby("Gene")["Transcript"].apply(list).to_dict()
-    return dict_representative_transcripts  
+    df_min = df_gtf_selection[
+        df_gtf_selection["Support_level"] ==
+        df_gtf_selection.groupby("Gene")["Support_level"].transform(min)
+        ]
+    df_final = df_min.drop(columns=["Support_level"])
+    dict_representative_transcripts = df_final.groupby("Gene")[
+        "Transcript"].apply(list).to_dict()
+    return dict_representative_transcripts
 
 
-
-def find_repr_by_SupportLevel(intermediate_file: str) -> dict[str,str]: 
-    """Combine functions import_gtfSelection_to_df() 
-        and representative_transcripts_inDict()
+def find_repr_by_support_level(intermediate_file: str) -> dict[str, str]:
+    """Combine functions import_gtf_selection_to_df()
+        and representative_transcripts_in_dict()
 
         Args:
             intermediate_file : path to the intermediate file
 
         Returns:
             Dict {'Gene':['transcriptA', 'transcriptB'], ...}
-      
+
         Raises:
             None
 
-          
+
     """
-    pass 
-    df_gtf = import_gtfSelection_to_df(intermediate_file)
-    dict_reprTrans = representative_transcripts_inDict(df_gtf)
-    return dict_reprTrans
+    df_gtf = import_gtf_selection_to_df(intermediate_file)
+    dict_repr_trans = representative_transcripts_in_dict(df_gtf)
+    return dict_repr_trans
 
 
-if __name__ == "__main__":  
-    find_repr_by_SupportLevel()
+# if __name__ == "__main__":
+#     find_repr_by_support_level()
diff --git a/scripts/transcript_extractor.py b/scripts/transcript_extractor.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c81bcd838d4dff57c44989ea07edc0a4ee2e3a7
--- /dev/null
+++ b/scripts/transcript_extractor.py
@@ -0,0 +1,329 @@
+#### Transcript extractor #####
+"""Transcript extractor 
+Version 1.2.0"""
+### Called Packages ###
+import re
+import os
+import time
+
+python_version = "3.7.13"
+module_list =[re,os,time]
+modul_name_list = ["re","os","time"]
+
+### Functions ###
+def version_control(module_list,modul_name_list,python_version):
+    with open("required.txt","a") as req:
+    
+        for i in range(len(module_list)):
+            
+            try:
+               version = module_list[i].__version__
+               entry = modul_name_list[i]+"\t"+str(version)+"\n"
+               req.write(entry)
+            except:
+                version = python_version
+                entry = modul_name_list[i]+"\t"+str(version)+"\n"
+                req.write(entry)
+
+def __parameter_editor(file_name,source_pathway_name,deposit_pathway_name):
+    """This function allows for changing the parameters after running the program"""
+    while True:
+        print("The program will run with the following parameters:\nFile name:\t\t",file_name,"\nSource pathway:\t",source_pathway_name,"\nDeposit pathway:\t",deposit_pathway_name,"\n")
+        parameter_conformation = input("To continue with these parameters input [continue or c] to change them input [edit]\n>")
+        if parameter_conformation == "continue"or parameter_conformation =="c":
+            break
+        elif parameter_conformation == "edit":
+            #edit the parameters
+            while True: 
+                change_question = input("select the parameter you want to change [nfile/spath/dpath] or input [b] to go back\n>")
+                if change_question == "nfile":
+                    #This condition allows the user to chenge the file name 
+                    file_name = input("Please input the new file name\n>")
+                    break
+                elif  change_question == "spath":
+                    #This condition allows the user to change the source path
+                    source_pathway_name = input("Please input the new source path\n>")
+                    
+                    does_source_pathway_exist = os.path.exists(source_pathway_name)
+                    if does_source_pathway_exist:
+                        break
+                    else: 
+                        print("The new source pathway:",source_pathway_name,"does not exist\nThe source pathway was returned to default:",os.getcwd())
+                        source_pathway_name = os.getcwd()
+                elif  change_question == "dpath":
+                    #This condition allows the user to change output file location
+                    deposit_pathway_name = input("Please input the new output file path name\n>")
+                    does_deposit_pathway_exist = os.path.exists(deposit_pathway_name)
+                    if does_deposit_pathway_exist:
+                        break
+                    else:
+                        print("The new deposit pathway:",deposit_pathway_name,"does not existe\nThe deposit pathway was returnt to default:",source_pathway_name)
+                        deposit_pathway_name = source_pathway_name
+                    #The block above test if the new deposit pathway is valid
+                elif  change_question == "b":
+                    # This condition allows the user to return to the main loop
+                    break             
+                else:
+                    #This condition covers all non valid inputs into the secund loop
+                    print("The input",change_question,"is not valid. Please use one of the specified commands") 
+                    
+        else: 
+            #This condition covers all non valid input for the main loop 
+           print("The input",parameter_conformation,"is not valide please use one of the specified comands\n") 
+    return(file_name,source_pathway_name,deposit_pathway_name)    
+    
+    
+    
+    
+    
+    
+    
+def __searche_for_preexisting_files(file_name,deposit_pathway_name = os.getcwd()):
+    """This function searches for preexisting files of the same name as the results file of the current program. It allows the user to choose to move on with the pre-existing file """
+    File_of_same_name_found = False
+    generat_new_file = False
+    directory_content = os.listdir(deposit_pathway_name)
+    for file in directory_content: 
+        if file == file_name: 
+            while True: 
+                File_found_input = input (file_name+" has allready been generated\nDo you want to generate a new one [y/n] \n>")
+                if File_found_input == "n":                     
+                    File_of_same_name_found = True
+                    break
+                elif File_found_input == "y":
+                    generat_new_file = True
+                    break
+                else: 
+                    print("Invalid input\nPlease press [y] if you want to generate a new file or [n] if you want to use the preexisting file")
+            break
+        else: 
+            continue
+    if File_of_same_name_found: 
+        print("No new file will be generated, the program can continue")
+    elif generat_new_file: 
+        print("A new file will be generated please wait...\n")
+    else:            
+        print("No pre-existing file of the relevant type has been found.\nA new file will be generated please wait...\n")
+    return(File_of_same_name_found)
+
+def bar_builder(percentage = 0,length_multiplyer = 2,start_time = time.time(),bar = str()):
+    """This function creates a loading bar that can load in 10% increments starting a 0% and ending at 100%
+    Expected inputs: 
+        percentage: int between 0 and 100 in steps of 10; default = 0 #defines the current loading increment
+        length_multiplyer: int > 0 ; default = 2 #determiens the amount of symbols per loading increment
+        start_time: any int ; default= time.time() #for determening loading time
+        bar: str ; default = str()#input of the current bar status does not need to be defined if for the 0% increment
+        """
+    if percentage == 100:
+        bar = bar.replace("-","#")
+        print("\r"+bar+"\t"+"100%\t\t"+str(int(time.time()-start_time)))
+    elif percentage > 0:
+        bar = bar.replace("-","#",length_multiplyer)
+        print("\r"+bar+"\t"+str(percentage)+"%", end='',flush=True)
+    elif percentage == 0: 
+        bar = "["+"-"*length_multiplyer*10+"]"
+        print(bar+"\t", end='',flush=True)
+    return(bar,start_time)
+
+def __test_file_name(file_name,source_pathway_name = os.getcwd()):
+    """This function validates that the source file exists at the source path. It turns the file name input in a standardized format that can be used in the next steps"""
+    
+    directory_content = os.listdir(source_pathway_name)
+    
+    index_of_the_dot = file_name.rfind(".")
+    valide_source_file = False
+    validate_source_file = True
+    if index_of_the_dot ==-1:
+        file_name += ".gtf"       
+    else: 
+        source_file_typ = file_name[index_of_the_dot:]
+        not_a_file_type = re.compile(".\d{1,13}")
+        try_not_a_file_type = not_a_file_type.search(source_file_typ)
+        if source_file_typ == ".gtf":
+            file_name = file_name
+        elif try_not_a_file_type:
+            file_name += ".gtf"
+        else: 
+            print("This program can not handle",source_file_typ,"files. \nplease use a .gtf file" )
+            validate_source_file = False
+    #The block above tests if the file_name includes the file type and if no 
+    #file type is found adds ".gtf" und if a non ".gtf" file is found gives an error
+    
+    if validate_source_file: 
+        for file in directory_content: 
+            if file == file_name:
+                valide_source_file = True 
+                break
+    #The block above tests if a file on the given name is in the given directora 
+    
+    if valide_source_file:
+        print("The file:",file_name,"has been found.\n")
+    else: 
+        print("No .gtf file of the name",file_name,"has been found in this pathway")
+    #The bock above gives feed back regarding the results of the file test 
+    
+    file_name = file_name.replace(".gtf","")
+    #This line normalizes the file name 
+    return(valide_source_file,file_name)
+
+def __do_pathways_exist__(source_pathway_name,deposit_pathway_name):
+    """This funtion tests that the entered pathways actualy exist"""
+    does_source_pathway_exist = os.path.exists(source_pathway_name)
+    does_deposit_pathway_exist = os.path.exists(deposit_pathway_name)
+    #The Block above does the actual testing
+    if does_source_pathway_exist:
+        source_pathway_name = source_pathway_name
+    else: 
+        print("The source pathway:",source_pathway_name,"has not been found\nThe source pathway was set to the default")
+        source_pathway_name = os.getcwd()
+    #The block above detail the possible reactions for the source pathe existing or not existing
+    if does_deposit_pathway_exist: 
+        deposit_pathway_name = deposit_pathway_name
+    else: 
+        print("The deposit pathway:",deposit_pathway_name,"has not been found\nThe deposit pathway was set to the default")
+        deposit_pathway_name = source_pathway_name
+    #The block above details the possible reactions for the deposit pathway existing or not existing 
+    return(source_pathway_name,deposit_pathway_name)
+        
+def gene_ID_finder(entry):
+    """This function is supposed to find the gene ID of a known gene entry
+    Expected inputs:
+        entry: str #a line from a gtf file that contains a gene ID"""
+    index_gene_id = entry.find("gene_id")
+    find_gene_id_name = re.compile("\"\S{1,25}\"")
+    sub_entry = entry[index_gene_id:]
+    try_find_gene_id_name = find_gene_id_name.search(sub_entry)   
+    gene_ID = try_find_gene_id_name[0].replace("\"","")
+    return (gene_ID)
+       
+def transcript_ID_finder (entry):
+    """This function is supposed to finde the transcript ID in a known transcript entry
+    Expected inputs:
+        entry: str #a line from a gtf file that contains a transcript ID"""
+    index_transcript_id = entry.find("transcript_id")
+    find_transcript_id_name = re.compile("\"\S{1,25}\"")
+    sub_entry = entry[index_transcript_id:]
+    try_find_transcript_id_name = find_transcript_id_name.search(sub_entry)   
+    
+    try: 
+        transcript_ID = try_find_transcript_id_name[0].replace("\"","")
+    except:
+        transcript_ID = ""
+    return (transcript_ID)
+        
+def transcript_support_level_finder(entry):
+    """This function is supposed to find the transcript support level in a known transcript entry
+    Expected input: 
+        entry: str #a line from a gtf file that be blongs to a transcript"""
+    transcript_support_level_start_ID = entry.find("transcript_support_level")
+    sub_entry = entry[transcript_support_level_start_ID:]
+    
+    try:
+        score_finder = re.compile("\W\w{1,16}\W{2}")
+        try_score_finder = score_finder.search(sub_entry)              
+        Pre_score_1 = try_score_finder[0]
+        Pre_score_2 = Pre_score_1.replace("\"","")
+        Pre_score_2 = Pre_score_2.replace("(","")
+        transcript_support_level = Pre_score_2.replace(";","")
+        if "NA" in transcript_support_level:
+            transcript_support_level = 100
+        #I changed This tell laura
+        
+
+    except:
+        transcript_support_level = 100
+    return (transcript_support_level)
+
+
+
+    
+def _transcript_extractor (file_name,source_pathway_name,deposit_pathway_name): 
+    """This function extracts the transcript number ,transcript ID, the transcript support level, the transcrip length and the line index from a gtf file of a given name and saves tham as a new file name given_name_intermediat_file.txt. 
+    Expected input:
+        file_name: str #the name of the gft file you want to look at without the .gtf part
+        source_pathway_name: str #path of the gtf file       
+        deposit_pathway_name: str #path for saving the intermediat file"""
+        
+    with open(os.path.join(source_pathway_name,file_name+".gtf"), 'r') as f:      
+        total_entrys =len(f.readlines())
+    with open(os.path.join(source_pathway_name,file_name+".gtf"), 'r') as f:
+        current_entry = 0 
+        percentage_done = 0 
+        bar,start_time = bar_builder(length_multiplyer = 3)
+        
+        
+        Old_gen_ID = str() 
+        #stand-in as the first couple entrys are not genes
+        with open(os.path.join(deposit_pathway_name,file_name+"_"+"intermediate_file"+".txt"),"w") as IMF:
+            transcript_number = 0
+            for entry in f: 
+
+                
+                current_entry += 1
+                current_percentage_done = 100* current_entry/total_entrys
+                if current_percentage_done > percentage_done +10: 
+                    bar,start_time = bar_builder(percentage=percentage_done+10,length_multiplyer = 3,start_time=start_time,bar =bar)
+                    percentage_done = int(current_percentage_done)  
+                
+                if "gene_id" in entry:
+                    Gen_ID = gene_ID_finder(entry)
+                else:
+                    Gen_ID = Old_gen_ID
+  
+                if Gen_ID != Old_gen_ID:
+                    Gen_entry = ">"+ Gen_ID +"\n"
+                    IMF.write(Gen_entry)
+                    transcript_number = 0
+                    Old_gen_ID = Gen_ID
+                
+                if "\ttranscript\t" in entry:
+                    transcript_number += 1
+                    Transcript_ID  = transcript_ID_finder(entry)
+                    #the function that determins the transcript ID is called
+                    transcript_support_level = transcript_support_level_finder(entry)
+                    #the function that determins the transcript support level is called
+                    New_entry = str(transcript_number)+"\t"+str(Transcript_ID)+"\t"+str(transcript_support_level)+"\t"+"\t\n"
+                    IMF.write(New_entry)
+        bar_builder(100,length_multiplyer = 3,start_time=start_time,bar =bar)
+        print("The transcripts have been collected") 
+        
+        
+def extract_transcript(file_name = "test",source_pathway_name = os.getcwd(),deposit_pathway_name = False,Input_free = False): 
+   """ This it the overall exetutable funtion that will execute the transcript extraction process for a given file with all checks. 
+    Expected input:
+        file_name: str ; default = test #the name of the gft file you want to look at
+        source_pathway_name: str ; default = current work directory #path of the gtf file       
+        deposit_pathway_name: str ; default = source_pathway_name #path for saving the intermediat file
+    Outputs: 
+        file_name: str 
+        source_pathway_name: str
+        deposit_pathway_name: str 
+   """
+        
+        
+   if deposit_pathway_name == False: 
+       deposit_pathway_name = source_pathway_name
+   if Input_free:
+       validated_file_name = __test_file_name(file_name,source_pathway_name)
+       file_name = validated_file_name[1]
+       _transcript_extractor (file_name,source_pathway_name,deposit_pathway_name)
+   else:
+       file_name,source_pathway_name,deposit_pathway_name = __parameter_editor(file_name,source_pathway_name,deposit_pathway_name)
+       source_pathway_name,deposit_pathway_name =__do_pathways_exist__(source_pathway_name,deposit_pathway_name)
+       validated_file_name = __test_file_name(file_name,source_pathway_name)
+       file_name = validated_file_name[1]
+       if validated_file_name[0]:
+           if __searche_for_preexisting_files(file_name+"_intermediate_file.txt",deposit_pathway_name):
+               print("The transcripts has been collected\n")
+           else:
+               _transcript_extractor (file_name,source_pathway_name,deposit_pathway_name)
+   return(file_name,source_pathway_name,deposit_pathway_name)
+
+#### Dev part ####
+
+if __name__ == "__main__":
+    #version_control(module_list,modul_name_list,python_version)
+    extract_transcript()
+#This line allows the file to be executed on its own also from 
+
+
diff --git a/scripts/transcript_sampler.py b/scripts/transcript_sampler.py
deleted file mode 100644
index 7ee155eb551aaf688e82269cb4bfc65aba4e9882..0000000000000000000000000000000000000000
--- a/scripts/transcript_sampler.py
+++ /dev/null
@@ -1,426 +0,0 @@
-import pandas as pd
-import numpy as np
-from gtfparse import read_gtf
-
-
-def attributs_converter(attributs):
-    """
-    This funtion converts the "unstrucktured" ;-seperated part of he line into a list of identifyers and coresponding data the struckture of
-    which can be used ot find the data easyly e.g the index of the identifier transcrip_id + 1 will give the trasncript id of the current gene
-    Input: 
-        attributs = str() #the unstrucktured part of the entry
-    Output:
-        attributs = list() # cleand list with the characterritsics discribed above
-    """
-    attributs = attributs.replace('"', "")
-    attributs = attributs.replace(";", "")
-    attributs = attributs.replace("\\n", "")
-    attributs = attributs.split(" ")
-
-    return attributs
-
-
-def find_in_attributs(attributs, look_for):
-    """
-    This function finds a key word and used that to lokat the value of that key word e.g key = gene_id, value = 'ENSMUSG00002074970',
-    this works as they are next to each other in the attributs list. 
-    Inputs:
-        sub_enty = list() 
-        look_fore = str() #string of with the name of the key to look for
-    Output: 
-        attributs[index] or NA = str() #NA is returned if the key was not found in the attributs
-    """
-    try:
-        index = attributs.index(look_for) + 1
-        return attributs[index]
-    except:
-        # print("No",look_for,"in the entry the return was set to NA\n",attributs)
-        return "NA"
-
-
-def _re_format(rep_trans_dict):
-    """
-    This function is ment to reformat dictionary of the representatice transcripts into an dictionary with only one entry per key
-    Input:
-        rep_trans_dict = {gene_id : [transcript_id , transcript_support_level , transcript_length]}
-    Output: 
-        rep_transcripts = {gene_id : transcript_id}
-    """
-    rep_transcripts = dict()
-    for gene_id in rep_trans_dict:
-        rep_transcripts[gene_id] = rep_trans_dict[gene_id][0]
-
-    return rep_transcripts
-
-
-def get_rep_trans(file_name="test"):
-    """ 
-    This is the main function of this script it selects one representative transcrip per gene based on a gtf annotation file. 
-    It does so be two criteria: first the transcript support level and it there are several transcript 
-    of one gene that have the same trasncript_support_level it chooses the one that corresponds to the longest mRNA.
-    Input: 
-        file_name = str() # name of the annotation file with or without the .gtf part
-    Output: 
-        rep_transcripts = {gene_id : transcript_id}
-    """
-
-    # setting defoult variables
-    rep_trans = dict()
-    cur_gID = str()
-    cur_best_trans = [
-        str(),
-        100,
-        0,
-    ]  # [transcript_id , transcript_support_level , transcript_length]
-    pot_best_trans = False
-    cur_tID = str()
-    ignor_trans = False
-
-    with open(file_name, "r") as f:
-        for line in f:
-            entry = line.split("\t")
-
-            # removes expected but unneeded entrys
-            exp_unneed = [
-                "CDS",
-                "stop_codon",
-                "five_prime_utr",
-                "three_prime_utr",
-                "start_codon",
-                "Selenocysteine",
-            ]
-            if len(entry) == 1 or entry[2] in exp_unneed:
-                continue
-
-            # this function turns the less organized part of the entry into a reable list
-            attributs = attributs_converter(entry[8])
-            # looking for and processing exons entrys
-            if entry[2] == "exon":
-
-                # dicide if to contiune or not
-                if ignor_trans:
-                    continue
-                elif cur_gID != attributs[1]:
-                    raise ValueError("ERROR exon from an unexpected Gen")
-                    continue
-                elif find_in_attributs(attributs, "transcript_id") != cur_tID:
-                    raise ValueError("exon from an unexpected transcript")
-                    continue
-
-                # adding the length of the exon to the appropriat list and chacking for changes in best transcript
-                if pot_best_trans:
-                    pot_best_trans[2] += int(entry[4]) - int(entry[3])
-                    if pot_best_trans[2] > cur_best_trans[2]:
-                        cur_best_trans = pot_best_trans
-                        pot_best_trans = False
-                else:
-                    cur_best_trans[2] += int(entry[4]) - int(entry[3])
-
-            # looking for and processing transcript entrys
-            elif entry[2] == "transcript":
-
-                # varryfi that the gen is correct
-                if cur_gID != attributs[1]:
-                    raise ValueError("ERROR transcript from an unexpected Gen")
-                    continue
-
-                # finding the transcript id and the support level
-                cur_tID = find_in_attributs(attributs, "transcript_id")
-                t_supp_lvl = find_in_attributs(attributs, "transcript_support_level")
-
-                # If there is no transcript support level or the level is given as NA it is nomed as 100. else the transcript support level is tunrn into int
-                if t_supp_lvl == "NA":
-                    t_supp_lvl = 100
-                else:
-                    try:
-                        t_supp_lvl = int(t_supp_lvl)
-                    except:
-                        t_supp_lvl = 100
-
-                # decides if the transcript has potential to become the representative transcript
-                if t_supp_lvl < cur_best_trans[1] or cur_best_trans[0] == "":
-                    cur_best_trans = [cur_tID, t_supp_lvl, 0]
-                    pot_best_trans = False
-                    ignor_trans = False
-
-                elif t_supp_lvl == cur_best_trans[1]:
-                    pot_best_trans = [cur_tID, t_supp_lvl, 0]
-                else:
-                    ignor_trans = True
-
-            # looking for and processing gene entrys
-            elif entry[2] == "gene":
-
-                # updating rep_trans dict
-                if cur_gID not in rep_trans:
-                    rep_trans[cur_gID] = cur_best_trans
-                else:
-                    if rep_trans[cur_gID][1] > cur_best_trans[1]:
-                        rep_trans[cur_gID] = cur_best_trans
-                    elif (
-                        rep_trans[cur_gID][1] == cur_best_trans[1]
-                        and rep_trans[cur_gID][2] < cur_best_trans[2]
-                    ):
-                        rep_trans[cur_gID] = cur_best_trans
-
-                # updating cur_gID and resetting cur_best_trans
-                cur_gID = attributs[1]
-                cur_best_trans = [str(), 100, 0]
-
-            # raises an error for unidentifyable entrys
-            else:
-                raise ValueError("This entry could not be identified\n", entry)
-
-        # addding the final gene to the dictionary
-        if cur_gID not in rep_trans:
-            rep_trans[cur_gID] = cur_best_trans
-        else:
-            if rep_trans[cur_gID][1] > cur_best_trans[1]:
-                rep_trans[cur_gID] = cur_best_trans
-            elif (
-                rep_trans[cur_gID][1] == cur_best_trans[1]
-                and rep_trans[cur_gID][2] < cur_best_trans[2]
-            ):
-                rep_trans[cur_gID] = cur_best_trans
-
-        del rep_trans[""]
-        rep_transcripts = _re_format(rep_trans)
-        return rep_transcripts
-
-
-def _test():
-    """
-    This funtion is ment to be run for test
-    Output: 
-        file with the dictionary generated based on the test file 
-    """
-    file_name = "test.gtf"
-    rt = get_rep_trans(file_name)
-    expected_result = {
-        "ENSG00000160072": "ENST00000472194",
-        "ENSG00000234396": "ENST00000442483",
-        "ENSG00000225972": "ENST00000416931",
-        "ENSG00000224315": "ENST00000428803",
-        "ENSG00000198744": "ENST00000416718",
-        "ENSG00000279928": "ENST00000624431",
-        "ENSG00000228037": "ENST00000424215",
-        "ENSG00000142611": "ENST00000378391",
-    }
-    if rt != expected_result:
-        print("The test fail due to not yieding the same results")
-        print("The results the program got\n", rt)
-        print("The expected results\n", expected_result)
-    else:
-        print("The test was succses full")
-
-
-def gtf_file_writer(original_file, rep_transcript_dict, output_file):
-    """
-    this function writes the output GTF file
-    """
-    output = []
-
-    with open(original_file, "r") as f:
-        for line in f:
-            entry = line.split("\t")
-            if line[0] != "#":
-                attributes = attributs_converter(entry[8])
-                type_ = entry[2]
-            else:
-                continue
-            if type_ == "gene":
-                gene_id = find_in_attributs(attributes, "gene_id")
-                output.append(line)
-            else:
-                transcript_id = find_in_attributs(attributes, "transcript_id")
-                if rep_transcript_dict[gene_id] == transcript_id:
-                    output.append(line)
-
-    with open(output_file, "w") as last_file:
-        for item in output:
-            last_file.write(item)
-
-
-def gtf_to_df(gtf_file: str) -> pd.DataFrame:
-    """
-    This function take a .gtf file and convert it into a 
-    dataframe containing gene_id and their transcripts_id.
-        Args:
-            gtf_file (str) : path to the .gtf file
-
-        Returns:
-            df_gtf (pd.DataFrame) : pandas dataframe containing columns
-            gene_id and their transcripts_id.
-        Raises : 
-            None 
-    
-    """
-    df_gtf = read_gtf(gtf_file)
-    df_gtf = df_gtf.loc[df_gtf["feature"] == "transcript"]
-    df_gtf = df_gtf[["gene_id", "transcript_id"]]
-    df_gtf = df_gtf.rename(columns={"gene_id": "Gene", "transcript_id": "Transcript"})
-    return df_gtf
-
-
-def dict_reprTrans_to_df(dict_reprTrans: dict[str, str]) -> pd.DataFrame:
-
-    """Convert a dictionary of genes and their representative transcript into a dataframe 
-
-        Args:
-            dict_reprTrans (dict) : {'Gene':['transcriptA', 'transcriptB'], ...}
-
-        Returns:
-            Pandas dataframe having Gene and transcript as columns
-      
-        Raises:
-            Only dict are allowed
-            Key should be strings
-            Value should be strings
-          
-    """
-    pass
-    if not type(dict_reprTrans) is dict:
-        raise TypeError("Only dict are allowed")
-    if type(list(dict_reprTrans.keys())[0]) is not str:
-        raise TypeError("Key should be strings")
-    if type(list(dict_reprTrans.values())[0]) is not str:
-        raise TypeError("Values should be strings")
-
-    df_reprTrans = pd.DataFrame.from_dict(
-        dict_reprTrans, orient="index", columns=["reprTranscript"]
-    )
-    df_reprTrans = df_reprTrans.reset_index(level=0)
-    df_reprTrans.columns = ["Gene", "reprTrans"]
-    df_reprTrans["reprTrans"] = df_reprTrans["reprTrans"].str.replace(
-        r"\.[1-9]", "", regex=True
-    )
-    return df_reprTrans
-
-
-def tsv_or_csv_to_df(input_txt: str) -> pd.DataFrame:
-    """Convert tsv or csv file into a pandas dataframe
-
-        Args:
-            input_txt (str): csv or tsv file containing transcript expression level
-
-        Returns:
-            df_gene (str): Pandas dataframe having transcript and expression level
-            as columns  
-      
-        Raises:
-            None          
-    """
-    pass
-    df_input = pd.read_csv(
-        input_txt,
-        sep=r"[\t,]",
-        lineterminator="\n",
-        names=["Transcript", "Expression_level"],
-        engine="python",
-    )
-    return df_input
-
-
-def exprLevel_byGene(
-    df_exprTrasncript: pd.DataFrame, df_output_gtf_selection: pd.DataFrame
-) -> pd.DataFrame:
-    """find the gene of each transcipt given by the expression level csv/tsv file,
-       and summ expression level of all transcipts from the same gene. 
-
-        Args:
-            df_exprTranscript : pandas Dataframe containing transcript and their expression level,
-            generated by "tsv_or_csv_to_df" function
-            df_output_gtf_selection : pandas Dataframe containing genes and transcripts,
-            generated by "transcripts_by_gene_inDf" function 
-
-        Returns:
-            Pandas dataframe having gene and sum of its transcript expression level
-      
-        Raises:
-            None          
-    """
-    pass
-    df_merged = pd.merge(
-        df_output_gtf_selection, df_exprTrasncript, how="inner", on="Transcript"
-    )
-    df_sum = df_merged.groupby("Gene").sum("Expression_level")
-    return df_sum
-
-
-def match_byGene(
-    df_reprTranscript: pd.DataFrame, df_expressionLevel_byGene: pd.DataFrame
-) -> pd.DataFrame:
-    """Find matching genes bewteen the 2 args 
-
-        Args:
-            df_reprTranscript : pandas Dataframe containing genes 
-            and their representative transcript, generated by
-            "dict_reprTrans_to_df()" 
-            df_expressionLevel_byGene : pandas Dataframe containing 
-            genes and their expression level generated by 
-            "transcript_by_gene_inDf()"
-
-        Returns:
-            Pandas dataframe having representative trasncripts 
-            and their expression level
-      
-        Raises:
-            None          
-    """
-    pass
-    df_merged = pd.merge(
-        df_reprTranscript, df_expressionLevel_byGene, how="outer", on="Gene"
-    )
-    df_clean = df_merged.dropna(axis=0)
-    df_clean = df_clean.loc[:, ["reprTrans", "Expression_level"]]
-    return df_clean
-
-
-### functions to run this part of the programm
-
-
-def match_reprTranscript_expressionLevel(
-    exprTrans: str, dict_reprTrans: dict, gtf_file: str,
-):
-    """Combine functions to replace transcripts from an expression level csv/tsv file 
-       with representative transcripts 
-
-        Args:
-            exprTrans (str): csv or tsv file containing transcripts
-            and their expression level 
-            dict_reprTrans (dict) : dict of genes and their 
-            representative transcipt
-            intemediate_file (str) : txt file containing genes, transcript 
-            and their expression level from the transkript_extractor function
-            output_path : path indicating were the tsv file should be written
-
-        Returns:
-            tsv file of representative trasncripts and their expression level
-      
-        Raises:
-            None          
-    """
-    df_gene_transcript = gtf_to_df(gtf_file)
-    df_exprTrans = tsv_or_csv_to_df(exprTrans)
-    df_reprTrans = dict_reprTrans_to_df(dict_reprTrans)
-    df_exprLevel_byGene = exprLevel_byGene(df_exprTrans, df_gene_transcript)
-    df_match = match_byGene(df_reprTrans, df_exprLevel_byGene)
-    df_match.rename(
-        columns={"reprTrans": "id", "Expression_level": "level"}, inplace=True
-    )
-    return df_match
-
-
-def transcript_sampling(total_transcript_number, df_repr, output_csv):
-    df = df_repr
-    levels = []
-    sums = df["level"].tolist()
-    total = sum(sums)
-    total_transcript_number = int(total_transcript_number)
-    normalized = total_transcript_number / total
-    for expression_level in df["level"]:
-        poisson_sampled = np.random.poisson(expression_level * normalized)
-        levels.append(poisson_sampled)
-
-    transcript_numbers = pd.DataFrame({"id": df["id"], "count": levels})
-    pd.DataFrame.to_csv(transcript_numbers, output_csv)
diff --git a/test/Test_representative_and_match/test_match.py b/test/Test_representative_and_match/test_match.py
deleted file mode 100644
index c8b156cb24435bb24d9764dd50c19ac1fdde6086..0000000000000000000000000000000000000000
--- a/test/Test_representative_and_match/test_match.py
+++ /dev/null
@@ -1,207 +0,0 @@
-import pandas as pd
-import json
-import re
-import match_reprtranscript_expressionlevel as match
-import os
-import pytest
-import test_Functions as tFun
-import numpy as np
-import representative as repr
-from pandas.testing import assert_frame_equal
-
-def test_dict_reprTrans_to_df():
-    """
-    This function test if a dict of {gene: representativeTranscript}
-    is converted in a dataframe in the right format 
-    """
-    dict_repr_test = {"ENSMUSG00000079415":"ENSMUST00000112933", 
-"ENSMUSG00000024691" : "ENSMUST00000025595",
-"ENSMUSG00000063683": "ENSMUST00000119960"}
-    dict_mixed = {"a":2, "b":3}
-    str_random = "jflkajflkaelfha"
-    dict_int = {12:34, 13:66}
-    df = match.dict_reprTrans_to_df(dict_repr_test)
-    datatype={'Gene': np.dtype('O'), 'reprTrans': np.dtype('O')}
-
-    with pytest.raises(TypeError, match=r"Only dict are allowed"):
-        match.dict_reprTrans_to_df(str_random) 
-    with pytest.raises(TypeError, match=r"Key should be strings"):
-        match.dict_reprTrans_to_df(dict_int) 
-    with pytest.raises(TypeError, match=r"Values should be strings"):
-        match.dict_reprTrans_to_df(dict_mixed)
-    assert tFun.column_number(df)==2, "number of columns is not equal to 2"
-    assert tFun.column_dType(df)==datatype, "at least one column has the wrong datatype"
-    assert tFun.duplicated_rows(df).empty, "at least one row are duplicated "
-    assert tFun.NA_value(df) == 0, "at least one row contain NA values "
-
-
-def test_txt_to_dict():
-    path = tFun.find_path("test_dict_repr_trans.txt")
-    dico = match.txt_to_dict(path)
-    dict_test = {'ENSMUSG00000079415': 'ENSMUST00000112933', 
-"ENSMUSG00000024691" : "ENSMUST00000025595",
-"ENSMUSG00000063683": "ENSMUST00000119960"}
-    assert dico == dict_test
-
-def test_transcripts_by_gene_inDf():
-    """
-    This function test if a dataframe generated from 
-    the intermediate file is converted in another 
-    dataframe without the support level column.
-    """
-    path = tFun.find_path_intermediateFile()
-    df = repr.import_gtfSelection_to_df(path)
-    df_gene = match.transcripts_by_gene_inDf(df)
-    datatype={'Gene': np.dtype('O'), 'Transcript': np.dtype('O')}
-    assert tFun.column_number(df_gene)==2, "number of columns is not equal to 2"
-    assert tFun.column_dType(df_gene)==datatype, "at least one column has the wrong datatype"
-    assert tFun.duplicated_rows(df_gene).empty, "at least one row are duplicated "
-    assert tFun.NA_value(df_gene) == 0, "at least one row contain NA values "
-
-
-def test_tsv_or_csv_to_df():
-    """
-    This function test if the function tsv_or_csv_to_df() cans take 
-    csv and tsv file as input and return a pandas dataframe in the 
-    right format 
-    """
-    path_tsv = tFun.find_path(r"test_gene_exprL")
-    df_tsv = match.tsv_or_csv_to_df(path_tsv)
-    path_csv = tFun.find_path(r"test_gene_exprL_csv.csv")
-    df_csv = match.tsv_or_csv_to_df(path_csv)
-    datatype ={'Transcript': np.dtype('O'), 'Expression_level': np.dtype('float64')}
-    assert tFun.column_number(df_tsv)==2, "number of columns is not equal to 2"
-    assert tFun.column_dType(df_tsv)==datatype, "at least one column has the wrong datatype"
-    assert tFun.duplicated_rows(df_tsv).empty, "at least one row are duplicated "
-    assert tFun.NA_value(df_tsv) == 0, "at least one row contain NA values "
-    assert_frame_equal(df_tsv, df_csv), "csv and tsv import doesn't match"
-    
-
-def test_exprLevel_byGene():
-    """
-    This function test if the function exprLevel_byGene can find the gene of 
-    each transcipt given by the expression level csv/tsv file and sum their 
-    expression level 
-    """
-    path_tsv = tFun.find_path(r"test_gene_exprL")
-    df_tsv_exprL = match.tsv_or_csv_to_df(path_tsv)
-
-    path_intermediate = tFun.find_path_intermediateFile()
-    df_intermediate = repr.import_gtfSelection_to_df(path_intermediate)
-    df_gene_transcript = match.transcripts_by_gene_inDf(df_intermediate)
-
-    df_exprLevel = match.exprLevel_byGene(df_tsv_exprL, df_gene_transcript)
-
-    datatype ={'Expression_level': np.dtype('float64')}
-    assert tFun.column_number(df_exprLevel)==1, "number of columns is not equal to 1"
-    assert tFun.column_dType(df_exprLevel)==datatype, "at least one column has the wrong datatype"
-    assert tFun.duplicated_rows(df_exprLevel).empty, "at least one row are duplicated "
-    assert tFun.NA_value(df_exprLevel) == 0, "at least one row contain NA values "
-    assert tFun.duplicated_index(df_exprLevel).empty, "at least one index element is duplicated"
-    
-def test_match_byGene():
-    """
-    This function test if the function "match_byGene()" can 
-    create a pandas dataframe matching representative transcript
-    and their expression level based on their gene in the 
-    correct pandas dataframe format. 
-    """
-
-
-    dict_repr_test = {'ENSMUSG00000079415': 'ENSMUST00000112933', 
-"ENSMUSG00000024691" : "ENSMUST00000025595",
-"ENSMUSG00000063683": "ENSMUST00000119960"}
-    df_dict_reprTrans = match.dict_reprTrans_to_df(dict_repr_test)
-
-
-    path_tsv = tFun.find_path(r"test_gene_exprL")
-    df_tsv_exprL = match.tsv_or_csv_to_df(path_tsv)
-    path_intermediate = tFun.find_path_intermediateFile()
-    df_intermediate = repr.import_gtfSelection_to_df(path_intermediate)
-    df_gene_transcript = match.transcripts_by_gene_inDf(df_intermediate)
-    df_exprLevel = match.exprLevel_byGene(df_tsv_exprL, df_gene_transcript)
-
-    df_match = match.match_byGene(df_dict_reprTrans, df_exprLevel)
-    datatype = {'reprTrans': np.dtype('O'), 'Expression_level': np.dtype('float64')}
-
-    assert tFun.column_number(df_match)==2, "number of columns is not equal to 2"
-    assert tFun.column_dType(df_match)==datatype, "at least one column has the wrong datatype"
-    assert tFun.duplicated_rows(df_match).empty, "at least one row are duplicated "
-    assert tFun.NA_value(df_match) == 0, "at least one row contain NA values "
-    assert tFun.duplicated_index(df_match).empty, "at least one index element is duplicated"
-
-def test_output_tsv(): 
-    """
-    This function test if a tsv file is generated from a pandas
-    dataframe in the right format. 
-    """
-
-    dict_repr_test = {'ENSMUSG00000079415': 'ENSMUST00000112933', 
-"ENSMUSG00000024691" : "ENSMUST00000025595",
-"ENSMUSG00000063683": "ENSMUST00000119960"}
-    df_dict_reprTrans = match.dict_reprTrans_to_df(dict_repr_test)
-
-
-    path_tsv = tFun.find_path(r"test_gene_exprL")
-    df_tsv_exprL = match.tsv_or_csv_to_df(path_tsv)
-    path_intermediate = tFun.find_path_intermediateFile()
-    df_intermediate = repr.import_gtfSelection_to_df(path_intermediate)
-    df_gene_transcript = match.transcripts_by_gene_inDf(df_intermediate)
-
-    df_exprLevel = match.exprLevel_byGene(df_tsv_exprL, df_gene_transcript)
-
-    df_match = match.match_byGene(df_dict_reprTrans, df_exprLevel)
-
-    match.output_tsv(df_match)
-
-    ref_path=tFun.find_path("test_ref_output.tsv")
-    output_path = tFun.find_output()
-
-    with open(ref_path, 'r') as t1, open(output_path, 'r') as t2:
-        fileRef = t1.readlines()
-        fileOutput = t2.readlines()
-
-
-    assert sorted(fileRef) == sorted(fileOutput), "the output does't match the expected tsv file"
-    
- 
-def test_match_reprTranscript_expressionLevel():
-    """
-    This function test that the right output is generated by the function
-    match_reprTranscript_expressionLevel()
-    """
-    input_path = tFun.find_path("test_gene_exprL")
-    intermediate_path = tFun.find_path_intermediateFile()
-    dict_repr_test = {'ENSMUSG00000079415': 'ENSMUST00000112933', 
-"ENSMUSG00000024691" : "ENSMUST00000025595",
-"ENSMUSG00000063683": "ENSMUST00000119960"}
-
-    match.match_reprTranscript_expressionLevel(input_path, dict_repr_test, intermediate_path)
-
-    ref_path=tFun.find_path("test_ref_output.tsv")
-    output_path = tFun.find_output()
-    
-
-    with open(ref_path, 'r') as t1,\
-         open(output_path, 'r') as t2,\
-         open(input_path, 'r') as t3 :
-        fileRef = t1.readlines()
-        fileOutput = t2.readlines()
-        fileInput = t3.readlines()
-
-    assert sorted(fileRef) == sorted(fileOutput), "the output does't match the expected tsv file"
-    assert sorted(fileRef) != sorted(fileInput), "the output does't match the expected tsv file"
-    
-         
-    
-
-test_dict_reprTrans_to_df()
-test_txt_to_dict()
-test_transcripts_by_gene_inDf()
-test_tsv_or_csv_to_df()
-test_exprLevel_byGene()
-test_match_byGene()
-test_output_tsv()
-test_match_reprTranscript_expressionLevel()
-
-print("test_match is done ! No error was found")
diff --git a/test/.gitkeep b/tests/__init__.py
similarity index 100%
rename from test/.gitkeep
rename to tests/__init__.py
diff --git a/test/Test_representative_and_match/.gitkeep b/tests/inputs/.gitkeep
similarity index 100%
rename from test/Test_representative_and_match/.gitkeep
rename to tests/inputs/.gitkeep
diff --git a/test/Test_representative_and_match/inputs/test_dict_repr_trans.txt b/tests/inputs/test_dict_repr_trans.txt
similarity index 100%
rename from test/Test_representative_and_match/inputs/test_dict_repr_trans.txt
rename to tests/inputs/test_dict_repr_trans.txt
diff --git a/test/Test_representative_and_match/inputs/test_gencode.vM31.annotation_intermediat_file.txt b/tests/inputs/test_gencode.vM31.annotation_intermediat_file.txt
similarity index 100%
rename from test/Test_representative_and_match/inputs/test_gencode.vM31.annotation_intermediat_file.txt
rename to tests/inputs/test_gencode.vM31.annotation_intermediat_file.txt
diff --git a/test/Test_representative_and_match/inputs/test_gene_exprL b/tests/inputs/test_gene_exprL
similarity index 100%
rename from test/Test_representative_and_match/inputs/test_gene_exprL
rename to tests/inputs/test_gene_exprL
diff --git a/test/Test_representative_and_match/inputs/test_gene_exprL_csv.csv b/tests/inputs/test_gene_exprL_csv.csv
similarity index 100%
rename from test/Test_representative_and_match/inputs/test_gene_exprL_csv.csv
rename to tests/inputs/test_gene_exprL_csv.csv
diff --git a/test/Test_representative_and_match/inputs/test_ref_output.tsv b/tests/inputs/test_ref_output.tsv
similarity index 100%
rename from test/Test_representative_and_match/inputs/test_ref_output.tsv
rename to tests/inputs/test_ref_output.tsv
diff --git a/test/Test_representative_and_match/test_Functions.py b/tests/test_Functions.py
similarity index 71%
rename from test/Test_representative_and_match/test_Functions.py
rename to tests/test_Functions.py
index 72a120d1d2e6967233abd0dc2bc14607a8ef40fe..fe51484c206908600360239917194fa455cf17c9 100644
--- a/test/Test_representative_and_match/test_Functions.py
+++ b/tests/test_Functions.py
@@ -2,32 +2,34 @@ import pandas as pd
 import numpy as np
 import os
 
-def find_path(filename:str)->str: 
+
+def find_path(filename: str) -> str:
     """Find the path to a file
 
         Args:
             name of a file
 
         Returns:
-            str path of a file 
-      
+            str path of a file
+
         Raises:
             None
     """
     absolute_path = os.path.dirname(__file__)
-    test_file = "inputs\\" + str(filename) 
+    test_file = "inputs/" + str(filename)
     full_path = os.path.join(absolute_path, test_file)
     return full_path
 
+
 def find_output():
-    """Find the path of the output file 
+    """Find the path of the output file
 
         Args:
             name of a file
 
         Returns:
-            str path of a file 
-      
+            str path of a file
+
         Raises:
             None
     """
@@ -37,7 +39,7 @@ def find_output():
     return full_path
 
 
-def find_path_intermediateFile()->str: 
+def find_path_intermediateFile() -> str:
     """Find the path to gencode.vM31.annotation_intermediat_file.txt
 
         Args:
@@ -45,77 +47,82 @@ def find_path_intermediateFile()->str:
 
         Returns:
             str path of gencode.vM31.annotation_intermediat_file.txt
-      
+
         Raises:
             None
-    """ 
+    """
     absolute_path = os.path.dirname(__file__)
-    test_file = r"inputs\test_gencode.vM31.annotation_intermediat_file.txt"
+    test_file = r"inputs/test_gencode.vM31.annotation_intermediat_file.txt"
     full_path = os.path.join(absolute_path, test_file)
     return full_path
 
-def column_number(df :pd.DataFrame)-> int:
 
-    """Return the number of column of a df 
+def column_number(df: pd.DataFrame) -> int:
+
+    """Return the number of column of a df
 
         Args:
             dataframe
 
         Returns:
             int
-      
+
         Raises:
             None
-    """ 
+    """
     length = len(df.columns)
     return length
 
-def column_dType(df : pd.DataFrame) -> dict[str,np.dtype]:
-    """Return the type of each column of a df in a dict 
+
+def column_dType(df: pd.DataFrame) -> dict[str, np.dtype]:
+    """Return the type of each column of a df in a dict
 
         Args:
             Pandas dataframe
 
         Returns:
             dict{column:np.dtype()}
-      
+
         Raises:
             None
-    """ 
-    dtype=df.dtypes.to_dict()
+    """
+    dtype = df.dtypes.to_dict()
     return dtype
 
+
 def duplicated_rows(df: pd.DataFrame) -> pd.DataFrame:
-    """Return the sum of duplicated rows in a df 
+    """Return the sum of duplicated rows in a df
 
         Args:
             Pandas dataframe
 
         Returns:
             int
-      
+
         Raises:
             None
-    """ 
+    """
     df_dupl = df[df.duplicated()]
     return df_dupl
 
+
 def duplicated_index(df: pd.DataFrame) -> pd.DataFrame:
-    """Return the sum of duplicated index in a df 
+    """Return the sum of duplicated index in a df
 
         Args:
             Pandas dataframe
 
         Returns:
             int
-      
+
         Raises:
             None
-    """ 
+    """
     df_dupl = df[df.index.duplicated()]
     return df_dupl
 
-def NA_value(df: pd.DataFrame) -> int: 
+
+def NA_value(df: pd.DataFrame) -> int:
     """Return the sum of NA values in a df
 
         Args:
@@ -123,10 +130,9 @@ def NA_value(df: pd.DataFrame) -> int:
 
         Returns:
             int
-      
+
         Raises:
             None
-    """ 
+    """
     nNA = df.isna().sum().sum()
     return nNA
-    
diff --git a/tests/test_match_reptrans_explvl.py b/tests/test_match_reptrans_explvl.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e1b52c7e62e2cd3ea7f2320b62f551def3828be
--- /dev/null
+++ b/tests/test_match_reptrans_explvl.py
@@ -0,0 +1,260 @@
+"""Tests for match representative transcript with expression level"""
+import pytest
+import pandas as pd
+import numpy as np
+from pandas.testing import assert_frame_equal
+import tests.test_Functions as tFun
+from transcript_sampler.match_reptrans_explvl import MatchReptransExplvl as match
+
+
+class TestMatchReptrans:
+    """Tests for match_reptrans_explvl.py"""
+    # def test_gtf_to_df(self):
+    # TO DO
+
+    def test_dict_repr_trans_to_df(self):
+        """
+        This function test if a dict of {gene: representativeTranscript}
+        is converted in a dataframe in the right format
+        """
+        dict_repr_test = {
+            "ENSMUSG00000079415": "ENSMUST00000112933",
+            "ENSMUSG00000024691": "ENSMUST00000025595",
+            "ENSMUSG00000063683": "ENSMUST00000119960"}
+        dict_mixed = {"a": 2, "b": 3}
+        str_random = "jflkajflkaelfha"
+        dict_int = {12: 34, 13: 66}
+        data_frame = match.dict_repr_trans_to_df(dict_repr_test)
+        datatype = {'Gene': np.dtype('O'), 'reprTrans': np.dtype('O')}
+
+        with pytest.raises(TypeError, match=r"Only dictionaries are allowed"):
+            match.dict_repr_trans_to_df(str_random)
+        with pytest.raises(TypeError, match=r"Keys should be strings"):
+            match.dict_repr_trans_to_df(dict_int)
+        with pytest.raises(TypeError, match=r"Values should be strings"):
+            match.dict_repr_trans_to_df(dict_mixed)
+
+        assert tFun.column_number(data_frame) == 2, \
+            "number of columns not equal to 2"
+        assert tFun.column_dType(data_frame) == datatype, \
+            "at least one column has the wrong datatype"
+        assert tFun.duplicated_rows(data_frame).empty, \
+            "at least one row is duplicated"
+        assert tFun.NA_value(data_frame) == 0, \
+            "at least one row contain NA values"
+
+    def test_tsv_or_csv_to_df(self):
+        """
+        This function test if the function tsv_or_csv_to_df() can take
+        csv and tsv file as input and return a pandas dataframe in the
+        right format
+        """
+        path_tsv = tFun.find_path(r"test_gene_exprL")
+        df_tsv = match.tsv_or_csv_to_df(path_tsv)
+        path_csv = tFun.find_path(r"test_gene_exprL_csv.csv")
+        df_csv = match.tsv_or_csv_to_df(path_csv)
+        datatype = {'Transcript': np.dtype('O'),
+                    'Expression_level': np.dtype('float64')}
+
+        assert tFun.column_number(df_tsv) == 2, \
+            "number of columns is not equal to 2"
+        assert tFun.column_dType(df_tsv) == datatype, \
+            "at least one column has the wrong datatype"
+        assert tFun.duplicated_rows(df_tsv).empty, \
+            "at least one row are duplicated "
+        assert tFun.NA_value(df_tsv) == 0, \
+            "at least one row contain NA values"
+        assert_frame_equal(df_tsv, df_csv), \
+            "csv and tsv import doesn't match"
+
+    def test_expr_level_by_gene(self):
+        """
+        This function test if the function expr_level_by_gene can find
+        the gene of each transcipt given by the expression level csv/tsv
+        file and sum their expression level
+        """
+        path_tsv = tFun.find_path(r"test_gene_exprL")
+        df_tsv_exprL = match.tsv_or_csv_to_df(path_tsv)
+        df_gene_transcript = pd.DataFrame(
+            {'Gene': ['ENSMUSG00000024691', 'ENSMUSG00000024691',
+                      'ENSMUSG00000024691', 'ENSMUSG00000024691',
+                      'ENSMUSG00000079415', 'ENSMUSG00000063683',
+                      'ENSMUSG00000063683', 'ENSMUSG00000063683',
+                      'ENSMUSG00000063683', 'ENSMUSG00000063683'],
+             'Transcript': ['ENSMUST00000139270', 'ENSMUST00000151307',
+                            'ENSMUST00000144662', 'ENSMUST00000025595',
+                            'ENSMUST00000112933', 'ENSMUST000000449762',
+                            'ENSMUST00000155846', 'ENSMUST00000157069',
+                            'ENSMUST00000119960', 'ENSMUST00000123173']}
+        )
+
+        df_exprLevel = match.expr_level_by_gene(
+            df_tsv_exprL, df_gene_transcript
+            )
+        datatype = {'Gene': np.dtype('O'),
+                    'Expression_level': np.dtype('float64')}
+
+        assert tFun.column_number(df_exprLevel) == 2, \
+            "number of columns is not equal to 2"
+        assert tFun.column_dType(df_exprLevel) == datatype, \
+            "at least one column has the wrong datatype"
+        assert tFun.duplicated_rows(df_exprLevel).empty, \
+            "at least one row are duplicated "
+        assert tFun.NA_value(df_exprLevel) == 0, \
+            "at least one row contain NA values "
+        assert tFun.duplicated_index(df_exprLevel).empty, \
+            "at least one index element is duplicated"
+
+    def test_match_by_gene(self):
+        """
+        This function test if the function "match_by_gene()" can
+        create a pandas dataframe matching representative transcript
+        and their expression level based on their gene in the
+        correct pandas dataframe format.
+        """
+
+        dict_repr_test = {
+            'ENSMUSG00000079415': 'ENSMUST00000112933',
+            'ENSMUSG00000024691': 'ENSMUST00000025595',
+            'ENSMUSG00000063683': 'ENSMUST00000119960'}
+        df_dict_reprTrans = match.dict_repr_trans_to_df(dict_repr_test)
+
+        path_tsv = tFun.find_path(r"test_gene_exprL")
+        df_tsv_exprL = match.tsv_or_csv_to_df(path_tsv)
+        df_gene_transcript = pd.DataFrame(
+            {'Gene': ['ENSMUSG00000024691', 'ENSMUSG00000024691',
+                      'ENSMUSG00000024691', 'ENSMUSG00000024691',
+                      'ENSMUSG00000079415', 'ENSMUSG00000063683',
+                      'ENSMUSG00000063683', 'ENSMUSG00000063683',
+                      'ENSMUSG00000063683', 'ENSMUSG00000063683'],
+             'Transcript': ['ENSMUST00000139270', 'ENSMUST00000151307',
+                            'ENSMUST00000144662', 'ENSMUST00000025595',
+                            'ENSMUST00000112933', 'ENSMUST000000449762',
+                            'ENSMUST00000155846', 'ENSMUST00000157069',
+                            'ENSMUST00000119960', 'ENSMUST00000123173']}
+        )
+        df_exprLevel = match.expr_level_by_gene(
+            df_tsv_exprL, df_gene_transcript)
+
+        df_match = match.match_by_gene(df_dict_reprTrans, df_exprLevel)
+        datatype = {
+            'reprTrans': np.dtype('O'),
+            'Expression_level': np.dtype('float64')}
+
+        assert tFun.column_number(df_match) == 2, \
+            "number of columns is not equal to 2"
+        assert tFun.column_dType(df_match) == datatype, \
+            "at least one column has the wrong datatype"
+        assert tFun.duplicated_rows(df_match).empty, \
+            "at least one row are duplicated "
+        assert tFun.NA_value(df_match) == 0, \
+            "at least one row contain NA values "
+        assert tFun.duplicated_index(df_match).empty, \
+            "at least one index element is duplicated"
+
+    def test_match_repr_transcript_expression_level(self):
+        """
+        This function test that the right output is generated by the function
+        match_repr_transcript_expression_level()
+        """
+        input_path = tFun.find_path("test_gene_exprL")
+        intermediate_path = tFun.find_path_intermediateFile()
+        dict_repr_test = {
+            'ENSMUSG00000079415': 'ENSMUST00000112933',
+            "ENSMUSG00000024691": "ENSMUST00000025595",
+            "ENSMUSG00000063683": "ENSMUST00000119960"}
+
+        match.match_repr_transcript_expression_level(
+            exprTrans=input_path,
+            dict_reprTrans=dict_repr_test,
+            gtf_file=intermediate_path)
+
+        ref_path = tFun.find_path("test_ref_output.tsv")
+        output_path = tFun.find_output()
+
+        with open(ref_path, 'r', encoding="utf-8") as t1,\
+            open(output_path, 'r', encoding="utf-8") as t2,\
+            open(input_path, 'r', encoding="utf-8") as t3:
+            fileRef = t1.readlines()
+            fileOutput = t2.readlines()
+            fileInput = t3.readlines()
+
+        assert (
+            sorted(fileRef) == sorted(fileOutput)
+            ), "the output does't match the expected tsv file"
+        assert (
+            sorted(fileRef) != sorted(fileInput)
+            ), "the output does't match the expected tsv file"
+
+    # def test_txt_to_dict(self):
+    #     """This function tests if txt is convertod to dict"""
+    #     path = tFun.find_path("test_dict_repr_trans.txt")
+    #     dico = match.txt_to_dict(path)
+    #     dict_test = {'ENSMUSG00000079415': 'ENSMUST00000112933',
+    #                 "ENSMUSG00000024691": "ENSMUST00000025595",
+    #                 "ENSMUSG00000063683": "ENSMUST00000119960"}
+    #     assert dico == dict_test
+
+    # def test_transcripts_by_gene_inDf():
+    #     """
+    #     This function test if a dataframe generated from
+    #     the intermediate file is converted in another
+    #     dataframe without the support level column.
+    #     """
+    #     path = tFun.find_path_intermediateFile()
+    #     df = repr.import_gtfSelection_to_df(path)
+    #     df_gene = match.transcripts_by_gene_inDf(df)
+    #     datatype = {'Gene': np.dtype('O'), 'Transcript': np.dtype('O')}
+    #     assert tFun.column_number(df_gene) == (
+    #         2, "number of columns is not equal to 2")
+    #     assert tFun.column_dType(df_gene) == (
+    #         datatype, "at least one column has the wrong datatype")
+    #     assert tFun.duplicated_rows(df_gene).empty, \
+    #         "at least one row are duplicated"
+    #     assert tFun.NA_value(df_gene) == 0, "at least one row contain NA values"
+
+    # def test_output_tsv():
+    #     """
+    #     This function test if a tsv file is generated from a pandas
+    #     dataframe in the right format.
+    #     """
+
+    #     dict_repr_test = {
+    #         'ENSMUSG00000079415': 'ENSMUST00000112933',
+    #         "ENSMUSG00000024691": "ENSMUST00000025595",
+    #         "ENSMUSG00000063683": "ENSMUST00000119960"}
+    #     df_dict_reprTrans = match.dict_repr_trans_to_df(dict_repr_test)
+
+    #     path_tsv = tFun.find_path(r"test_gene_exprL")
+    #     df_tsv_exprL = match.tsv_or_csv_to_df(path_tsv)
+    #     path_intermediate = tFun.find_path_intermediateFile()
+    #     df_intermediate = repr.import_gtfSelection_to_df(path_intermediate)
+    #     df_gene_transcript = match.transcripts_by_gene_inDf(df_intermediate)
+
+    #     df_exprLevel = match.expr_level_by_gene(df_tsv_exprL, df_gene_transcript)
+
+    #     df_match = match.match_by_gene(df_dict_reprTrans, df_exprLevel)
+
+    #     match.output_tsv(df_match)
+
+    #     ref_path = tFun.find_path("test_ref_output.tsv")
+    #     output_path = tFun.find_output()
+
+    #     with open(ref_path, 'r') as t1, open(output_path, 'r') as t2:
+    #         fileRef = t1.readlines()
+    #         fileOutput = t2.readlines()
+
+    #     assert (
+    #         sorted(fileRef) == sorted(fileOutput)
+    #         ), "the output does't match the expected tsv file"
+
+# test_dict_repr_trans_to_df()
+# test_txt_to_dict()
+# test_transcripts_by_gene_inDf()
+# test_tsv_or_csv_to_df()
+# test_expr_level_by_gene()
+# test_match_by_gene()
+# test_output_tsv()
+# test_match_repr_transcript_expression_level()
+
+# print("test_match is done ! No error was found")
diff --git a/test/Test_representative_and_match/test_representative.py b/tests/test_representative.py
similarity index 58%
rename from test/Test_representative_and_match/test_representative.py
rename to tests/test_representative.py
index 4d000977434368393cedeb6ac0d4b93f30609ab7..4ee677838beb99ddfd671f21f7b8452102965c49 100644
--- a/test/Test_representative_and_match/test_representative.py
+++ b/tests/test_representative.py
@@ -1,6 +1,4 @@
 import pytest
-import pandas as pd
-import datatest as dt
 import representative as repr
 import numpy as np 
 import test_Functions as tFun
@@ -14,71 +12,82 @@ def test_import_gtfSelection_to_df():
             None
 
         Returns:
-            Assert results 
-      
+            Assert results
+
         Raises:
             None
     """
     path = tFun.find_path_intermediateFile()
-    df = repr.import_gtfSelection_to_df(path) 
-    datatype={'Gene': np.dtype('O'), 'Transcript': np.dtype('O'), 'Support_level': np.dtype('float64')}
-    assert tFun.column_number(df)==3, "number of columns is not equal to 3"
-    assert tFun.column_dType(df)==datatype, "at lease one column has the wrong datatype"
+    df = repr.import_gtfSelection_to_df(path)
+    datatype = {'Gene': np.dtype('O'), 'Transcript': np.dtype('O'),
+                'Support_level': np.dtype('float64')}
+    assert tFun.column_number(df) == (
+        3, "number of columns is not equal to 3")
+    assert tFun.column_dType(df) == (
+        datatype, "at lease one column has the wrong datatype")
     assert tFun.duplicated_rows(df).empty, "at lease one row are duplicated "
     assert tFun.NA_value(df) == 0, "at lease one row contain NA values "
     with pytest.raises(TypeError, match=r"Only str path is allowed"):
         repr.import_gtfSelection_to_df(123)
 
 
-def test_representative_transcript_inDict(): 
+def test_representative_transcript_inDict():
     """
     Test if df generated by "import_gtfSelection_to_df()" output
-    a dict in the right format 
+    a dict in the right format
         Args:
             Pandas dataframe with [Gene, Transcript, Support_level]
             as columns, validated with test_import_gtfSelection_to_df()
 
         Returns:
-            Assert results 
-      
+            Assert results
+
         Raises:
             None
      """
     path = tFun.find_path_intermediateFile()
-    df = repr.import_gtfSelection_to_df(path) 
+    df = repr.import_gtfSelection_to_df(path)
     dict_to_test = repr.representative_transcripts_inDict(df)
-    dict_expected = {'ENSMUSG00000024691': ['ENSMUST00000025595.5'], 
-    'ENSMUSG00000063683': ['ENSMUST00000044976.12', 'ENSMUST00000119960.2'], 
-    'ENSMUSG00000079415': ['ENSMUST00000112933.2']}
+    dict_expected = {
+        'ENSMUSG00000024691': ['ENSMUST00000025595.5'],
+        'ENSMUSG00000063683': ['ENSMUST00000044976.12',
+                               'ENSMUST00000119960.2'],
+        'ENSMUSG00000079415': ['ENSMUST00000112933.2']}
     assert dict_to_test == dict_expected
     with pytest.raises(TypeError, match=r"Only pandas DataFrame is allowed"):
         repr.representative_transcripts_inDict(123)
     with pytest.raises(TypeError, match=r"Only pandas DataFrame is allowed"):
         repr.representative_transcripts_inDict("hello")
     with pytest.raises(TypeError, match=r"Only pandas DataFrame is allowed"):
-        repr.representative_transcripts_inDict(["hello","world",123])
+        repr.representative_transcripts_inDict(["hello", "world", 123])
     with pytest.raises(TypeError, match=r"Only pandas DataFrame is allowed"):
-        repr.representative_transcripts_inDict({"hello":"world", "bonjour":["le monde", 123]})
+        repr.representative_transcripts_inDict({"hello": "world",
+                                                "bonjour": ["le monde", 123]})
+
 
 def test_find_repr_by_SupportLevel():
     """
-    Test if the correct dict is generated from gencode.vM31.annotation_intermediat_file.txt
+    Test if the correct dict is generated from
+    gencode.vM31.annotation_intermediat_file.txt
         Args:
-            None 
+            None
 
         Returns:
-            Assert results 
-      
+            Assert results
+
         Raises:
             None
     """
     path = tFun.find_path_intermediateFile()
     dict_to_test = repr.find_repr_by_SupportLevel(path)
-    dict_expected = {'ENSMUSG00000024691': ['ENSMUST00000025595.5'], 
-    'ENSMUSG00000063683': ['ENSMUST00000044976.12', 'ENSMUST00000119960.2'], 
-    'ENSMUSG00000079415': ['ENSMUST00000112933.2']}
+    dict_expected = {
+        'ENSMUSG00000024691': ['ENSMUST00000025595.5'],
+        'ENSMUSG00000063683': ['ENSMUST00000044976.12',
+                               'ENSMUST00000119960.2'],
+        'ENSMUSG00000079415': ['ENSMUST00000112933.2']}
     assert dict_to_test == dict_expected
 
+
 test_representative_transcript_inDict()
 test_find_repr_by_SupportLevel()
 test_import_gtfSelection_to_df()
diff --git a/test/Test_representative_and_match/inputs/.gitkeep b/transcript_sampler/__init__.py
similarity index 100%
rename from test/Test_representative_and_match/inputs/.gitkeep
rename to transcript_sampler/__init__.py
diff --git a/transcript_sampler/find_reptrans.py b/transcript_sampler/find_reptrans.py
new file mode 100644
index 0000000000000000000000000000000000000000..6025e29cac6e6501e1b98f627ca4023e6b91128d
--- /dev/null
+++ b/transcript_sampler/find_reptrans.py
@@ -0,0 +1,288 @@
+"""Find representative transcripts"""
+
+import logging
+
+LOG = logging.getLogger(__name__)
+
+
+class FindRepTrans:
+    """Find representative transcripts."""
+
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def attributes_converter(attributes: str) -> list:
+        """
+        This funtion converts the "unstructured" ;-seperated part of
+        the line into a list of identifiers and corresponding data,
+        the structure of which can be used ot find the data easily e.g
+        the index of the identifier transcript_id + 1 will give the
+        transcript id of the current gene.
+        Input:
+            attributes = str() # the unstructured part of the entry
+        Output:
+            attributes = list() # cleaned list with the \
+                                  characteristics described above
+        """
+        attributes = (
+            attributes.replace('"', "")
+            .replace(";", "")
+            .replace("\\n", "")
+            .split(" ")
+        )
+        return attributes
+
+    @staticmethod
+    def find_in_attributes(attributes: list, look_for: str) -> str:
+        """
+        This function finds a keyword and used that to locate the value of that
+        keyword e.g key = gene_id, value = 'ENSMUSG00002074970',
+        this works as they are next to each other in the attributes list.
+        Inputs:
+            attributes = list()
+            look_for = str() # string of the name of the key to look for
+        Output:
+            attributes[index] or NA = str() # NA is returned if the key
+                                            was not found in the attributes
+        """
+        if look_for in attributes:
+            index = attributes.index(look_for) + 1
+            return attributes[index]
+        else:
+            LOG.warning('No %s in the entry, the return was set to NA',
+                        look_for)
+            return "NA"
+
+    @staticmethod
+    def reformat_reptrans(rep_trans_dict: dict) -> dict:
+        """
+        This function is meant to reformat dictionary of the representative
+        transcripts into an dictionary with only one entry per key
+        Input:
+            rep_trans_dict = {gene_id : [
+                transcript_id, transcript_support_level, transcript_length]}
+        Output:
+            rep_transcripts = {gene_id : transcript_id}
+        """
+        rep_transcripts = {}
+        for gene_id in rep_trans_dict:
+            rep_transcripts[gene_id] = rep_trans_dict[gene_id][0]
+
+        return rep_transcripts
+
+    def get_rep_trans(self, file_name: str) -> dict:
+        """
+        This is the main function of this script. It selects one
+        representative transcript per gene based on a GTF annotation file.
+        It does so by two criteria: the transcript support level and if
+        there are several transcripts of one gene that have the same
+        transcript_support_level, it chooses the one that corresponds
+        to the longest mRNA.
+
+        Args:
+            file_name (str): Name of the annotation file with or without
+            the .gtf extension.
+
+        Returns:
+            rep_transcripts (dict): Dictionary of gene_id to transcript_id
+            representing the selected representative transcripts.
+
+        Raises:
+            ValueError: If an unexpected entry is encountered in the GTF file.
+        """
+
+        # setting default variables
+        rep_transcripts = {}
+        cur_g_id = ""
+        # [transcript_id, transcript_support_level, transcript_length]
+        cur_best_trans = ["", 100, 0]
+
+        with open(file_name, "r", encoding="utf-8") as file:
+            for line in file:
+                entry = line.split("\t")
+
+                # removes expected but unneeded entries
+                if len(entry) == 1 or entry[2] in [
+                    "CDS", "stop_codon",
+                    "five_prime_utr", "three_prime_utr",
+                    "start_codon", "Selenocysteine"
+                        ]:
+                    continue
+
+                # this function turns the less organized part of the entry
+                # into a readable list
+                attributes = self.attributes_converter(entry[8])
+
+                # looking for and processing exons entries
+                if entry[2] == "exon":
+                    if cur_g_id != attributes[1]:
+                        LOG.error()
+                        raise ValueError("Exon from an unexpected gene")
+                    elif (
+                        self.find_in_attributes(
+                            attributes, "transcript_id"
+                        ) != cur_tID
+                        ):
+                        LOG.error()
+                        raise ValueError("Exon from an unexpected transcript")
+
+                    # adding the length of the exon to the appropriate list and
+                    # checking for changes in best transcript
+                    if pot_best_trans:
+                        pot_best_trans[2] += int(entry[4]) - int(entry[3])
+                        if pot_best_trans[2] > cur_best_trans[2]:
+                            cur_best_trans = pot_best_trans
+                            pot_best_trans = False
+                    else:
+                        cur_best_trans[2] += int(entry[4]) - int(entry[3])
+
+                # looking for and processing transcript entries
+                elif entry[2] == "transcript":
+                    # verify that the gen is correct
+                    if cur_g_id != attributes[1]:
+                        LOG.error()
+                        raise ValueError("Transcript from an unexpected gene")
+
+                    # finding the transcript id and the support level
+                    cur_tID = self.find_in_attributes(
+                        attributes, "transcript_id"
+                        )
+                    t_supp_lvl = self.find_in_attributes(
+                        attributes, "transcript_support_level"
+                        )
+
+                    # If there is no transcript support level or the level is
+                    # given as NA it is nomed as 100. else the transcript
+                    # support level is turned into int
+                    if t_supp_lvl == "NA":
+                        t_supp_lvl = 100
+                    else:
+                        if t_supp_lvl.isdigit():
+                            t_supp_lvl = int(t_supp_lvl)
+                        else:
+                            t_supp_lvl = 100
+
+                    # decides if the transcript has potential to become the
+                    # representative transcript
+                    if t_supp_lvl < cur_best_trans[1] or cur_best_trans[0] == "":
+                        cur_best_trans = [cur_tID, t_supp_lvl, 0]
+                        pot_best_trans = False
+                        ignor_trans = False
+                    elif t_supp_lvl == cur_best_trans[1]:
+                        pot_best_trans = [cur_tID, t_supp_lvl, 0]
+                    else:
+                        ignor_trans = True
+
+                # looking for and processing gene entries
+                elif entry[2] == "gene":
+                    # updating rep_transcripts dict
+                    if cur_g_id in rep_transcripts:
+                        if (rep_transcripts[cur_g_id][1] > cur_best_trans[1]
+                            or (rep_transcripts[cur_g_id][1] ==
+                                cur_best_trans[1]
+                                and rep_transcripts[cur_g_id][2] <
+                                cur_best_trans[2])):
+                            rep_transcripts[cur_g_id] = cur_best_trans
+                    else:
+                        rep_transcripts[cur_g_id] = cur_best_trans
+
+                    # updating cur_g_id and resetting cur_best_trans
+                    cur_g_id = attributes[1]
+                    cur_best_trans = ["", 100, 0]
+
+                # raises an error for unidentifiable entries
+                else:
+                    LOG.error()
+                    raise ValueError("This entry could not be identified")
+
+            # adding the final gene to the dictionary
+            if cur_g_id in rep_transcripts:
+                if (rep_transcripts[cur_g_id][1] > cur_best_trans[1]
+                    or (rep_transcripts[cur_g_id][1] == cur_best_trans[1]
+                        and rep_transcripts[cur_g_id][2] < cur_best_trans[2])):
+                    rep_transcripts[cur_g_id] = cur_best_trans
+            else:
+                rep_transcripts[cur_g_id] = cur_best_trans
+
+            del rep_transcripts[""]
+            rep_transcripts = self.reformat_reptrans(rep_transcripts)
+            return rep_transcripts
+
+    def gtf_file_writer(self, original_file: str,
+                        rep_transcript_dict: dict, output_file: str):
+        """
+        This function writes the output GTF file.
+        """
+        output = []
+
+        with open(original_file, "r", encoding="utf-8") as f:
+            for line in f:
+                if line.startswith("#"):
+                    continue
+
+                entry = line.split("\t")
+                attributes = self.attributes_converter(entry[8])
+                feature_type = entry[2]
+
+                if feature_type == "gene":
+                    gene_id = self.find_in_attributes(attributes, "gene_id")
+                    output.append(line)
+                else:
+                    transcript_id = self.find_in_attributes(
+                        attributes, "transcript_id"
+                        )
+                    if gene_id in rep_transcript_dict and \
+                            rep_transcript_dict[gene_id] == transcript_id:
+                        output.append(line)
+
+        with open(output_file, "w", encoding="utf-8") as last_file:
+            last_file.writelines(output)
+
+
+# def _test():
+#     """
+#     This funtion is meant to be run for test
+#     Output:
+#         file with the dictionary generated based on the test file
+#     """
+#     file_name = "test.gtf"
+#     rt = get_rep_trans(file_name)
+#     expected_result = {"ENSG00000160072": "ENST00000472194",
+#                        "ENSG00000234396": "ENST00000442483",
+#                        "ENSG00000225972": "ENST00000416931",
+#                        "ENSG00000224315": "ENST00000428803",
+#                        "ENSG00000198744": "ENST00000416718",
+#                        "ENSG00000279928": "ENST00000624431",
+#                        "ENSG00000228037": "ENST00000424215",
+#                        'ENSG00000142611': 'ENST00000378391'}
+#     if rt != expected_result:
+#         print("The test failed due to not yielding the same results")
+#         print("The results the program got\n", rt)
+#         print("The expected results\n", expected_result)
+#     else:
+#         print("The test was successful")
+
+
+# # Execution part #
+# if __name__ == "__main__":
+#     parser = argparse.ArgumentParser(
+#         description="find_representativ_transcripts",
+#         formatter_class=argparse.ArgumentDefaultsHelpFormatter
+#         )
+#     parser.add_argument("-file_name", required=True,
+#                         help="gtf file with genome annotation")
+#     parser.add_argument("-t", required=False, default=False,
+#                         help="to run the test input -t True")
+#     args = parser.parse_args()
+
+#     # standadize the file_name inlude .gtf#
+#     file_name = args.file_name
+#     i_gtf = file_name.find(".gtf")
+#     if i_gtf == -1:
+#         file_name += ".gtf"
+
+#     if args.t:
+#         _test()
+#     else:
+#         get_rep_trans(file_name)
diff --git a/transcript_sampler/match_reptrans_explvl.py b/transcript_sampler/match_reptrans_explvl.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6d7a9a95bd8311ef0b6f8ab3fcfe627c1828df9
--- /dev/null
+++ b/transcript_sampler/match_reptrans_explvl.py
@@ -0,0 +1,345 @@
+"""Match representative transcript with expression level"""
+# Made by Hugo Gillet #
+
+import logging
+import pandas as pd
+from gtfparse import read_gtf
+
+LOG = logging.getLogger(__name__)
+
+
+class MatchReptransExplvl:
+    """Match representative transcript with expression level"""
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def gtf_to_df(gtf_file: str) -> pd.DataFrame:
+        """
+        This function takes a .gtf file and converts it into a pandas DataFrame
+        containing gene_id and their transcript_id.
+
+        Args:
+            gtf_file (str): Path to the .gtf file.
+
+        Returns:
+            df_gtf (pd.DataFrame): Pandas DataFrame containing columns
+            'Gene' and 'Transcript'.
+
+        Raises:
+            None
+        """
+        df_gtf = read_gtf(gtf_file,).to_pandas()
+        df_gtf = df_gtf[df_gtf["feature"] == "transcript"]
+        df_gtf = df_gtf[["gene_id", "transcript_id"]]
+        df_gtf = df_gtf.rename(columns={
+            "gene_id": "Gene", "transcript_id": "Transcript"
+            })
+        return df_gtf
+
+    @staticmethod
+    def dict_repr_trans_to_df(dict_reprTrans: "dict[str, str]") -> pd.DataFrame:
+        """
+        Convert a dictionary of genes and their representative transcript into a DataFrame.
+
+        Args:
+            dict_reprTrans (dict): {'Gene': ['transcriptA', 'transcriptB'], ...}
+
+        Returns:
+            Pandas DataFrame with 'Gene' and 'Transcript' as columns.
+
+        Raises:
+            TypeError: Only dictionaries are allowed.
+            TypeError: Keys should be strings.
+            TypeError: Values should be strings.
+        """
+        if not isinstance(dict_reprTrans, dict):
+            LOG.error("Only dictionaries are allowed")
+            raise TypeError("Only dictionaries are allowed")
+        if not all(isinstance(key, str) for key in dict_reprTrans.keys()):
+            LOG.error("Keys should be strings")
+            raise TypeError("Keys should be strings")
+        if not all(isinstance(value, str) for value in dict_reprTrans.values()):
+            LOG.error("Values should be strings")
+            raise TypeError("Values should be strings")
+
+        df_reprTrans = pd.DataFrame.from_dict(dict_reprTrans, orient="index", columns=["reprTranscript"])
+        df_reprTrans = df_reprTrans.reset_index()
+        df_reprTrans.columns = ["Gene", "reprTrans"]
+        df_reprTrans["reprTrans"] = df_reprTrans["reprTrans"].str.replace(r"\.[1-9]", "", regex=True)
+
+        return df_reprTrans
+
+    @staticmethod
+    def tsv_or_csv_to_df(input_txt: str) -> pd.DataFrame:
+        """
+        Convert a TSV or CSV file into a pandas DataFrame.
+        
+        Args:
+            input_txt (str): TSV or CSV file containing transcript expression levels.
+        
+        Returns:
+            df_gene (pd.DataFrame): Pandas DataFrame with 'Transcript' and 'Expression_level' as columns.
+        
+        Raises:
+            None
+        """
+        df_input = pd.read_csv(
+            input_txt,
+            sep=r"[\t,]",
+            lineterminator="\n",
+            names=["Transcript", "Expression_level"],
+            engine="python",
+        )
+        return df_input
+
+    @staticmethod
+    def expr_level_by_gene(
+        df_exprTranscript: pd.DataFrame, df_output_gtf_selection: pd.DataFrame
+    ) -> pd.DataFrame:
+        """
+        Find the gene of each transcript given by the expression level CSV/TSV file
+        and sum the expression level of all transcripts from the same gene.
+        
+        Args:
+            df_exprTranscript (pd.DataFrame): Pandas DataFrame containing transcripts and their expression levels,
+                                            generated by the "tsv_or_csv_to_df" function.
+            df_output_gtf_selection (pd.DataFrame): Pandas DataFrame containing genes and transcripts,
+                                                    generated by the "transcripts_by_gene_inDf" function.
+        
+        Returns:
+            Pandas DataFrame having 'Gene' and sum of its transcript expression levels.
+        
+        Raises:
+            None
+        """
+        df_merged = pd.merge(df_output_gtf_selection, df_exprTranscript, how="inner", on="Transcript")
+        df_sum = df_merged.groupby("Gene")["Expression_level"].sum().reset_index()
+        return df_sum
+
+    @staticmethod
+    def match_by_gene(
+        df_reprTranscript: pd.DataFrame, df_expressionLevel_byGene: pd.DataFrame
+    ) -> pd.DataFrame:
+        """
+        Find matching genes between the two DataFrames.
+        
+        Args:
+            df_reprTranscript (pd.DataFrame): Pandas DataFrame containing genes and their representative transcripts,
+                                            generated by the "dict_repr_trans_to_df()" function.
+            df_expressionLevel_byGene (pd.DataFrame): Pandas DataFrame containing genes and their expression levels,
+                                                    generated by the "transcript_by_gene_inDf()" function.
+        
+        Returns:
+            Pandas DataFrame having representative transcripts and their expression levels.
+        
+        Raises:
+            None
+        """
+        df_merged = pd.merge(df_reprTranscript, df_expressionLevel_byGene, how="inner", on="Gene")
+        df_clean = df_merged.loc[:, ["reprTrans", "Expression_level"]]
+        return df_clean
+    
+    def match_repr_transcript_expression_level(
+        self, exprTrans: str, dict_reprTrans: dict, gtf_file: str,
+    ):
+        """
+        Combine functions to replace transcripts from an expression level CSV/TSV file with representative transcripts.
+
+        Args:
+            exprTrans (str): CSV or TSV file containing transcripts and their expression level.
+            dict_reprTrans (dict): Dictionary of genes and their representative transcripts.
+            gtf_file (str): Path to the GTF file.
+
+        Returns:
+            Pandas DataFrame of representative transcripts and their expression level.
+
+        Raises:
+            None
+        """
+        df_gene_transcript = self.gtf_to_df(gtf_file)
+        df_exprTrans = self.tsv_or_csv_to_df(exprTrans)
+        df_reprTrans = self.dict_repr_trans_to_df(dict_reprTrans)
+        df_expr_level_by_gene = self.expr_level_by_gene(df_exprTrans, df_gene_transcript)
+        df_match = self.match_by_gene(df_reprTrans, df_expr_level_by_gene)
+        df_match.rename(columns={"reprTrans": "id", "Expression_level": "level"}, inplace=True)
+        return df_match
+
+
+
+# def dict_repr_trans_to_df(dict_reprTrans: "dict[str, str]") -> pd.DataFrame:
+
+#     """Convert a dictionary of genes and their representative
+#     transcript into a dataframe
+
+#         Args:
+#             dict_reprTrans (dict): {'Gene':['transcriptA', 'transcriptB'], ...}
+
+#         Returns:
+#             Pandas dataframe having Gene and transcript as columns
+
+#         Raises:
+#             Only dict are allowed
+#             Key should be strings
+#             Value should be strings
+
+#     """
+#     pass
+#     if not type(dict_reprTrans) is dict:
+#         raise TypeError("Only dict are allowed")
+#     if type(list(dict_reprTrans.keys())[0]) is not str:
+#         raise TypeError("Key should be strings")
+#     if type(list(dict_reprTrans.values())[0]) is not str:
+#         raise TypeError("Values should be strings")
+
+#     df_reprTrans = pd.DataFrame.from_dict(
+#         dict_reprTrans, orient="index", columns=["reprTranscript"]
+#     )
+#     df_reprTrans = df_reprTrans.reset_index(level=0)
+#     df_reprTrans.columns = ["Gene", "reprTrans"]
+#     df_reprTrans["reprTrans"] = df_reprTrans["reprTrans"].str.replace(
+#         r"\.[1-9]", "", regex=True
+#     )
+#     return df_reprTrans
+
+
+# def gene_and_transcript(gtf_file: str) -> pd.DataFrame:
+#     """
+#     This function take a .gtf file and convert it into a
+#     dataframe containing gene_id and their transcripts_id.
+#         Args:
+#             gtf_file(str) : path to the .gtf file
+
+#         Returns:
+#             df_gtf(pd.DataFrame): pandas df containing having has columns
+#             gene_id and their transcripts_id.
+#         Raises:
+#             None
+#     """
+#     df_gtf = read_gtf(gtf_file)
+#     df_gtf = df_gtf.loc[df_gtf["feature"] == "transcript"]
+#     df_gtf = df_gtf[["gene_id", "transcript_id"]]
+#     df_gtf = df_gtf.rename(columns={"gene_id": "Gene",
+#                                     "transcript_id": "Transcript"})
+#     return df_gtf
+
+
+# def tsv_or_csv_to_df(input_txt: str) -> pd.DataFrame:
+#     """Convert tsv or csv file into a pandas dataframe
+
+#         Args:
+#             input_txt (str): csv or tsv file containing transcript exp level
+
+#         Returns:
+#             df_gene (str): Pandas dataframe having transcript and exp level
+#             as columns
+
+#         Raises:
+#             None
+#     """
+#     pass
+#     df_input = pd.read_csv(
+#         input_txt,
+#         sep=r"[\t,]",
+#         lineterminator="\n",
+#         names=["Transcript", "Expression_level"],
+#         engine="python",
+#     )
+#     return df_input
+
+
+# def expr_level_by_gene(
+#     df_exprTrasncript: pd.DataFrame, df_output_gtf_selection: pd.DataFrame
+# ) -> pd.DataFrame:
+#     """find the gene of each transcipt given by the expression level csv/tsv
+#     file, and summ expression level of all transcipts from the same gene.
+
+#         Args:
+#             df_exprTranscript: pandas df containing transcript and
+#             their exp level generated by "tsv_or_csv_to_df" function
+#             df_output_gtf_selection : pandas df containing genes and
+#             transcripts, generated by "transcripts_by_gene_inDf" function
+
+#         Returns:
+#             Pandas dataframe having gene and sum of its transcript exp level
+
+#         Raises:
+#             None
+#     """
+#     pass
+#     df_merged = pd.merge(
+#         df_output_gtf_selection, df_exprTrasncript,
+#         how="inner", on="Transcript"
+#     )
+#     df_sum = df_merged.groupby("Gene").sum(
+#         "Expression_level"
+#     )
+#     return df_sum
+
+
+# def match_by_gene(
+#     df_reprTranscript: pd.DataFrame, df_expressionLevel_byGene: pd.DataFrame
+# ) -> pd.DataFrame:
+#     """Find matching genes bewteen the 2 args
+
+#         Args:
+#             df_reprTranscript : pandas Dataframe containing genes
+#             and their representative transcript, generated by
+#             "dict_repr_trans_to_df()"
+#             df_expressionLevel_byGene : pandas Dataframe containing
+#             genes and their expression level generated by
+#             "transcript_by_gene_inDf()"
+
+#         Returns:
+#             Pandas dataframe having representative trasncripts
+#             and their expression level
+
+#         Raises:
+#             None
+#     """
+#     pass
+#     df_merged = pd.merge(
+#         df_reprTranscript, df_expressionLevel_byGene, how="outer", on="Gene"
+#     )
+#     df_clean = df_merged.dropna(axis=0)
+#     df_clean = df_clean.loc[:, ["reprTrans", "Expression_level"]]
+#     return df_clean
+
+
+# # functions to run this part of the programm
+# def match_repr_transcript_expression_level(
+#     exprTrans: str, dict_reprTrans: dict, gtf_file: str,
+# ):
+#     """Combine functions to replace transcripts from an exp level csv/tsv file
+#        with representative transcripts
+
+#         Args:
+#             exprTrans (str): csv or tsv file containing transcripts
+#             and their expression level
+#             dict_reprTrans (dict) : dict of genes and their
+#             representative transcipt
+#             intemediate_file (str) : txt file containing genes, transcript
+#             and their expression level from the transkript_extractor function
+#             output_path : path indicating were the tsv file should be written
+
+#         Returns:
+#             tsv file of representative trasncripts and their expression level
+
+#         Raises:
+#             None
+#     """
+#     df_gene_transcript = gene_and_transcript(gtf_file)
+#     df_exprTrans = tsv_or_csv_to_df(exprTrans)
+#     df_reprTrans = dict_repr_trans_to_df(dict_reprTrans)
+#     df_expr_level_by_gene = expr_level_by_gene(
+#         df_exprTrans, df_gene_transcript
+#         )  # error here
+#     df_match = match_by_gene(df_reprTrans, df_expr_level_by_gene)
+#     df_match.rename(columns={'reprTrans': 'id', 'Expression_level': 'level'},
+#                     inplace=True)
+#     return df_match
+
+
+# # run the program
+# if __name__ == "__main__":
+#     match_repr_transcript_expression_level()
diff --git a/transcript_sampler/new_exe.py b/transcript_sampler/new_exe.py
new file mode 100644
index 0000000000000000000000000000000000000000..d96f6136bf181b6c98078bb94a222893ab6e1ef5
--- /dev/null
+++ b/transcript_sampler/new_exe.py
@@ -0,0 +1,78 @@
+"""This module executes the transcript_sampler"""
+import argparse
+import time
+import logging
+logging.basicConfig(
+        format='[%(asctime)s: %(levelname)s] %(message)s (module "%(module)s")',
+        level=logging.INFO,
+    )
+from find_reptrans import FindRepTrans  # pylint: disable=E0401,C0413
+from match_reptrans_explvl import MatchReptransExplvl  # pylint: disable=E0401,C0413
+from poisson_sampling import SampleTranscript  # pylint: disable=E0401,C0413
+
+find_rep_trans = FindRepTrans()
+match_reptrs_explvl = MatchReptransExplvl()
+poisson_sample = SampleTranscript()
+
+LOG = logging.getLogger(__name__)
+
+
+def exe(input_gtf, input_csv, output_gtf, output_csv, transcript_nr):
+    """Execute transcript sampler."""
+    start = time.time()
+    LOG.info("Started transcript sampler...")
+    dict_repr_trans = find_rep_trans.get_rep_trans(input_gtf)
+    df_repr = match_reptrs_explvl.match_repr_transcript_expression_level(
+        dict_reprTrans=dict_repr_trans, exprTrans=input_csv, gtf_file=input_gtf
+        )
+    LOG.info(
+        "Finding match between representative transcripts \
+            and expression level file"
+        )
+    LOG.info("Poisson sampling of transcripts")
+    poisson_sample.transcript_sampling(transcript_nr, df_repr, output_csv)
+    LOG.info("Output CSV file ready")
+
+    LOG.info("Writing output GTF file")
+    find_rep_trans.gtf_file_writer(input_gtf, dict_repr_trans, output_gtf)
+
+    end = time.time()
+    LOG.info("Script executed in %s sec", (end - start))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Transcript sampler",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--input_gtf", required=True,
+        help="GTF file with genome annotation"
+        )
+    parser.add_argument(
+        "--input_csv", required=True,
+        help="CSV or TSV file with transcripts and their expression level"
+        )
+    parser.add_argument(
+        "--output_gtf", required=True,
+        help="Output path for the new GTF file of representative transcripts"
+        )
+    parser.add_argument(
+        "--output_csv", required=True,
+        help="Output path for the new CSV file of representative transcripts \
+            and their sampled number"
+        )
+    parser.add_argument(
+        "--n_to_sample", required=True,
+        help="Total number of transcripts to sample"
+        )
+    args = parser.parse_args()
+    print(args)
+
+    exe(
+        args.input_gtf,
+        args.input_csv,
+        args.output_gtf,
+        args.output_csv,
+        args.n_to_sample,
+    )
diff --git a/transcript_sampler/poisson_sampling.py b/transcript_sampler/poisson_sampling.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c586aca38c5cc56664d7d8d0344a7cb5b3d6048
--- /dev/null
+++ b/transcript_sampler/poisson_sampling.py
@@ -0,0 +1,75 @@
+"""Sample transcripts by Poisson-sampling"""
+
+import pandas as pd
+import numpy as np
+
+
+class SampleTranscript:
+    '''
+    Sample transcript
+
+    This part of the code does Poisson sampling proportionally
+    to gene expression levels for each gene.
+
+    input:  total transcript number (int)
+            csv file with gene id and  gene expression levels
+            (columns named 'id' and 'level')
+
+    output: csv file with gene id and count
+            gtf file with transcript samples
+    '''
+    @staticmethod
+    def transcript_sampling(total_transcript_number, df_repr, output_csv):
+        """Samples transcript based on Poisson-sampling"""
+        total = df_repr["level"].sum()
+        total_transcript_number = int(total_transcript_number)
+        normalized = total_transcript_number / total
+        levels = np.random.poisson(df_repr["level"] * normalized)
+        transcript_numbers = pd.DataFrame({
+            "id": df_repr["id"], "count": levels
+            })
+        transcript_numbers.to_csv(output_csv, index=False)
+
+
+# python_version = "3.7.13"
+# module_list = [pd, np, argparse]
+# modul_name_list = ["pd", "np", "argparse"]
+
+# def transcript_sampling(total_transcript_number, df_repr, output_csv):
+#     # df = pd.read_csv(
+#     # csv_file, sep="\t", lineterminator="\n",  names=["id", "level"])
+#     # the function match_reprTranscript_expressionLevel() now outputs a df
+#     df = df_repr
+#     levels = []
+#     sums = df['level'].tolist()
+#     total = sum(sums)
+#     # I added this because writting a number in the terminal inputed a string
+#     total_transcript_number = int(total_transcript_number)
+#     normalized = total_transcript_number/total
+#     for expression_level in df['level']:
+#         poisson_sampled = np.random.poisson(expression_level*normalized)
+#         levels.append(poisson_sampled)
+
+#     transcript_numbers = pd.DataFrame({'id': df['id'], 'count': levels})
+#     pd.DataFrame.to_csv(transcript_numbers, output_csv)
+
+
+# if __name__ == '__main__':
+#     # te.version_control(module_list,modul_name_list,python_version)
+#     parser = argparse.ArgumentParser(
+#         description="Transcript Poisson sampler, csv output",
+#         formatter_class=argparse.ArgumentDefaultsHelpFormatter
+#     )
+
+#     parser.add_argument("--expression_level", required=True,
+#                         help="csv file with expression level")
+#     parser.add_argument("--output_csv", required=True,
+#                         help="output csv file")
+#     parser.add_argument("--input_csv", required=True,
+#                         help="input csv file")
+#     parser.add_argument("--transcript_number", required=True,
+#                         help="total number of transcripts to sample")
+#     args = parser.parse_args()
+
+#     transcript_sampling(args.transcript_number, args.input_csv,
+#                         args.output_csv, args.transcript_number)
diff --git a/scripts/transcript_sampler.ipynb b/transcript_sampler/transcript_sampler_org.ipynb
similarity index 100%
rename from scripts/transcript_sampler.ipynb
rename to transcript_sampler/transcript_sampler_org.ipynb
diff --git a/transcript_sampler/transcript_sampler_org.py b/transcript_sampler/transcript_sampler_org.py
new file mode 100644
index 0000000000000000000000000000000000000000..923cbf81367972b0c39fd7631243aac39a9322b3
--- /dev/null
+++ b/transcript_sampler/transcript_sampler_org.py
@@ -0,0 +1,400 @@
+import pandas as pd
+import numpy as np
+import logging
+from gtfparse import read_gtf
+
+LOG = logging.getLogger(__name__)
+
+def attributes_converter(attributes: str) -> list:
+    """
+    This funtion converts the "unstructured" ;-seperated part of he line into
+    a list of identifiers and corresponding data, the structure of
+    which can be used ot find the data easily e.g the index of the identifier
+    transcript_id + 1 will give the transcript id of the current gene
+    Input:
+        attributes = str() # the unstructured part of the entry
+    Output:
+        attributes = list() # cleaned list with the characteristics described
+    """
+    attributes = (
+        attributes.replace('"', "")
+        .replace(";", "")
+        .replace("\\n", "")
+        .split(" ")
+    )
+    return attributes
+
+
+def find_in_attributes(attributes: list, look_for: str) -> str:
+    """
+    This function finds a keyword and used that to locate the value of that
+    keyword e.g key = gene_id, value = 'ENSMUSG00002074970',
+    this works as they are next to each other in the attributes list.
+    Inputs:
+        attributes = list()
+        look_for = str() # string of the name of the key to look for
+    Output:
+        attributes[index] or NA = str() # NA is returned if the key
+                                        was not found in the attributes
+    """
+    if look_for in attributes:
+        index = attributes.index(look_for) + 1
+        return attributes[index]
+    else:
+        LOG.warning(f'No {look_for} in the entry, the return was set to NA')
+        return "NA"
+
+
+def _re_format(rep_trans_dict: dict) -> dict:
+    """
+    This function is meant to reformat dictionary of the representative
+    transcripts into an dictionary with only one entry per key
+    Input:
+        rep_trans_dict = {gene_id : [
+            transcript_id, transcript_support_level, transcript_length]}
+    Output:
+        rep_transcripts = {gene_id : transcript_id}
+    """
+    rep_transcripts = dict()
+    for gene_id in rep_trans_dict:
+        rep_transcripts[gene_id] = rep_trans_dict[gene_id][0]
+
+    return rep_transcripts
+
+
+def get_rep_trans(file_name: str = "test.gtf") -> dict:
+    """
+    This is the main function of this script. It selects one representative transcript per gene based on a GTF annotation file.
+    It does so by two criteria: the transcript support level and if there are several transcripts of one gene that have the same transcript_support_level, it chooses the one that corresponds to the longest mRNA.
+
+    Args:
+        file_name (str): Name of the annotation file with or without the .gtf extension.
+
+    Returns:
+        rep_transcripts (dict): Dictionary of gene_id to transcript_id representing the selected representative transcripts.
+
+    Raises:
+        ValueError: If an unexpected entry is encountered in the GTF file.
+    """
+
+    # setting default variables
+    rep_transcripts = {}
+    cur_gID = ""
+    cur_best_trans = ["", 100, 0]  # [transcript_id, transcript_support_level, transcript_length]
+
+    with open(file_name, "r") as f:
+        for line in f:
+            entry = line.split("\t")
+
+            # removes expected but unneeded entries
+            if len(entry) == 1 or entry[2] in [
+                "CDS",
+                "stop_codon",
+                "five_prime_utr",
+                "three_prime_utr",
+                "start_codon",
+                "Selenocysteine"
+                ]:
+                continue
+
+            # this function turns the less organized part of the entry
+            # into a readable list
+            attributes = attributes_converter(entry[8])
+
+            # looking for and processing exons entries
+            if entry[2] == "exon":
+                if ignor_trans:
+                    continue
+                elif cur_gID != attributes[1]:
+                    LOG.error()
+                    raise ValueError("Exon from an unexpected gene")
+                elif find_in_attributes(attributes, "transcript_id") != cur_tID:
+                    LOG.error()
+                    raise ValueError("Exon from an unexpected transcript")
+
+                # adding the length of the exon to the appropriate list and
+                # checking for changes in best transcript
+                if pot_best_trans:
+                    pot_best_trans[2] += int(entry[4]) - int(entry[3])
+                    if pot_best_trans[2] > cur_best_trans[2]:
+                        cur_best_trans = pot_best_trans
+                        pot_best_trans = False
+                else:
+                    cur_best_trans[2] += int(entry[4]) - int(entry[3])
+
+            # looking for and processing transcript entries
+            elif entry[2] == "transcript":
+                # verify that the gen is correct
+                if cur_gID != attributes[1]:
+                    LOG.error()
+                    raise ValueError("Transcript from an unexpected gene")
+
+                # finding the transcript id and the support level
+                cur_tID = find_in_attributes(attributes, "transcript_id")
+                t_supp_lvl = find_in_attributes(attributes, "transcript_support_level")
+
+                # If there is no transcript support level or the level is
+                # given as NA it is nomed as 100. else the transcript
+                # support level is turned into int
+                if t_supp_lvl == "NA":
+                    t_supp_lvl = 100
+                else:
+                    if t_supp_lvl.isdigit():
+                        t_supp_lvl = int(t_supp_lvl)
+                    else:
+                        t_supp_lvl = 100
+
+                # decides if the transcript has potential to become the
+                # representative transcript
+                if t_supp_lvl < cur_best_trans[1] or cur_best_trans[0] == "":
+                    cur_best_trans = [cur_tID, t_supp_lvl, 0]
+                    pot_best_trans = False
+                    ignor_trans = False
+                elif t_supp_lvl == cur_best_trans[1]:
+                    pot_best_trans = [cur_tID, t_supp_lvl, 0]
+                else:
+                    ignor_trans = True
+
+            # looking for and processing gene entries
+            elif entry[2] == "gene":
+                 # updating rep_transcripts dict
+                if cur_gID in rep_transcripts:
+                    if rep_transcripts[cur_gID][1] > cur_best_trans[1] or (rep_transcripts[cur_gID][1] == cur_best_trans[1] and rep_transcripts[cur_gID][2] < cur_best_trans[2]):
+                        rep_transcripts[cur_gID] = cur_best_trans
+                else:
+                    rep_transcripts[cur_gID] = cur_best_trans
+
+                # updating cur_gID and resetting cur_best_trans
+                cur_gID = attributes[1]
+                cur_best_trans = ["", 100, 0]
+
+            # raises an error for unidentifiable entries
+            else:
+                LOG.error()
+                raise ValueError("This entry could not be identified")
+
+         # adding the final gene to the dictionary
+        if cur_gID in rep_transcripts:
+            if rep_transcripts[cur_gID][1] > cur_best_trans[1] or (rep_transcripts[cur_gID][1] == cur_best_trans[1] and rep_transcripts[cur_gID][2] < cur_best_trans[2]):
+                rep_transcripts[cur_gID] = cur_best_trans
+        else:
+            rep_transcripts[cur_gID] = cur_best_trans
+
+        del rep_transcripts[""]
+        rep_transcripts = _re_format(rep_transcripts)
+        return rep_transcripts
+
+
+def _test():
+    """
+    This funtion is meant to be run for test
+    Output:
+        file with the dictionary generated based on the test file
+    """
+    file_name = "test.gtf"
+    rt = get_rep_trans(file_name)
+    expected_result = {
+        "ENSG00000160072": "ENST00000472194",
+        "ENSG00000234396": "ENST00000442483",
+        "ENSG00000225972": "ENST00000416931",
+        "ENSG00000224315": "ENST00000428803",
+        "ENSG00000198744": "ENST00000416718",
+        "ENSG00000279928": "ENST00000624431",
+        "ENSG00000228037": "ENST00000424215",
+        "ENSG00000142611": "ENST00000378391",
+    }
+    if rt != expected_result:
+        print("The test fail due to not yieding the same results")
+        print("The results the program got\n", rt)
+        print("The expected results\n", expected_result)
+    else:
+        print("The test was succsesfull")
+
+
+def gtf_file_writer(original_file: str, rep_transcript_dict: dict, output_file: str):
+    """
+    This function writes the output GTF file.
+    """
+    output = []
+
+    with open(original_file, "r") as f:
+        for line in f:
+            if line.startswith("#"):
+                continue
+
+            entry = line.split("\t")
+            attributes = attributes_converter(entry[8])
+            feature_type = entry[2]
+
+            if feature_type == "gene":
+                gene_id = find_in_attributes(attributes, "gene_id")
+                output.append(line)
+            else:
+                transcript_id = find_in_attributes(attributes, "transcript_id")
+                if gene_id in rep_transcript_dict and rep_transcript_dict[gene_id] == transcript_id:
+                    output.append(line)
+
+    with open(output_file, "w") as last_file:
+        last_file.writelines(output)
+
+
+def gtf_to_df(gtf_file: str) -> pd.DataFrame:
+    """
+    This function takes a .gtf file and converts it into a pandas DataFrame
+    containing gene_id and their transcript_id.
+    
+    Args:
+        gtf_file (str): Path to the .gtf file.
+    
+    Returns:
+        df_gtf (pd.DataFrame): Pandas DataFrame containing columns 'Gene' and 'Transcript'.
+    
+    Raises:
+        None
+    """
+    df_gtf = read_gtf(gtf_file,).to_pandas()
+    df_gtf = df_gtf[df_gtf["feature"] == "transcript"]
+    df_gtf = df_gtf[["gene_id", "transcript_id"]]
+    df_gtf = df_gtf.rename(columns={"gene_id": "Gene", "transcript_id": "Transcript"})
+    return df_gtf
+
+
+def dict_reprTrans_to_df(dict_reprTrans: "dict[str, str]") -> pd.DataFrame:
+    """
+    Convert a dictionary of genes and their representative transcript into a DataFrame.
+    
+    Args:
+        dict_reprTrans (dict): {'Gene': ['transcriptA', 'transcriptB'], ...}
+    
+    Returns:
+        Pandas DataFrame with 'Gene' and 'Transcript' as columns.
+    
+    Raises:
+        TypeError: Only dictionaries are allowed.
+        TypeError: Keys should be strings.
+        TypeError: Values should be strings.
+    """
+    if not isinstance(dict_reprTrans, dict):
+        LOG.error()
+        raise TypeError("Only dictionaries are allowed")
+    if not all(isinstance(key, str) for key in dict_reprTrans.keys()):
+        LOG.error()
+        raise TypeError("Keys should be strings")
+    if not all(isinstance(value, str) for value in dict_reprTrans.values()):
+        LOG.error()
+        raise TypeError("Values should be strings")
+
+    df_reprTrans = pd.DataFrame.from_dict(dict_reprTrans, orient="index", columns=["reprTranscript"])
+    df_reprTrans = df_reprTrans.reset_index()
+    df_reprTrans.columns = ["Gene", "reprTrans"]
+    df_reprTrans["reprTrans"] = df_reprTrans["reprTrans"].str.replace(r"\.[1-9]", "", regex=True)
+
+    return df_reprTrans
+
+
+def tsv_or_csv_to_df(input_txt: str) -> pd.DataFrame:
+    """
+    Convert a TSV or CSV file into a pandas DataFrame.
+    
+    Args:
+        input_txt (str): TSV or CSV file containing transcript expression levels.
+    
+    Returns:
+        df_gene (pd.DataFrame): Pandas DataFrame with 'Transcript' and 'Expression_level' as columns.
+    
+    Raises:
+        None
+    """
+    df_input = pd.read_csv(
+        input_txt,
+        sep=r"[\t,]",
+        lineterminator="\n",
+        names=["Transcript", "Expression_level"],
+        engine="python",
+    )
+    return df_input
+
+
+def exprLevel_byGene(
+    df_exprTranscript: pd.DataFrame, df_output_gtf_selection: pd.DataFrame
+) -> pd.DataFrame:
+    """
+    Find the gene of each transcript given by the expression level CSV/TSV file
+    and sum the expression level of all transcripts from the same gene.
+    
+    Args:
+        df_exprTranscript (pd.DataFrame): Pandas DataFrame containing transcripts and their expression levels,
+                                          generated by the "tsv_or_csv_to_df" function.
+        df_output_gtf_selection (pd.DataFrame): Pandas DataFrame containing genes and transcripts,
+                                                generated by the "transcripts_by_gene_inDf" function.
+    
+    Returns:
+        Pandas DataFrame having 'Gene' and sum of its transcript expression levels.
+    
+    Raises:
+        None
+    """
+    df_merged = pd.merge(df_output_gtf_selection, df_exprTranscript, how="inner", on="Transcript")
+    df_sum = df_merged.groupby("Gene")["Expression_level"].sum().reset_index()
+    return df_sum
+
+
+def match_byGene(
+    df_reprTranscript: pd.DataFrame, df_expressionLevel_byGene: pd.DataFrame
+) -> pd.DataFrame:
+    """
+    Find matching genes between the two DataFrames.
+    
+    Args:
+        df_reprTranscript (pd.DataFrame): Pandas DataFrame containing genes and their representative transcripts,
+                                          generated by the "dict_reprTrans_to_df()" function.
+        df_expressionLevel_byGene (pd.DataFrame): Pandas DataFrame containing genes and their expression levels,
+                                                  generated by the "transcript_by_gene_inDf()" function.
+    
+    Returns:
+        Pandas DataFrame having representative transcripts and their expression levels.
+    
+    Raises:
+        None
+    """
+    df_merged = pd.merge(df_reprTranscript, df_expressionLevel_byGene, how="inner", on="Gene")
+    df_clean = df_merged.loc[:, ["reprTrans", "Expression_level"]]
+    return df_clean
+
+
+# functions to run this part of the program
+
+
+def match_reprTranscript_expressionLevel(
+    exprTrans: str, dict_reprTrans: dict, gtf_file: str,
+):
+    """
+    Combine functions to replace transcripts from an expression level CSV/TSV file with representative transcripts.
+
+    Args:
+        exprTrans (str): CSV or TSV file containing transcripts and their expression level.
+        dict_reprTrans (dict): Dictionary of genes and their representative transcripts.
+        gtf_file (str): Path to the GTF file.
+
+    Returns:
+        Pandas DataFrame of representative transcripts and their expression level.
+
+    Raises:
+        None
+    """
+    df_gene_transcript = gtf_to_df(gtf_file)
+    df_exprTrans = tsv_or_csv_to_df(exprTrans)
+    df_reprTrans = dict_reprTrans_to_df(dict_reprTrans)
+    df_exprLevel_byGene = exprLevel_byGene(df_exprTrans, df_gene_transcript)
+    df_match = match_byGene(df_reprTrans, df_exprLevel_byGene)
+    df_match.rename(columns={"reprTrans": "id", "Expression_level": "level"}, inplace=True)
+    return df_match
+
+
+def transcript_sampling(total_transcript_number, df_repr, output_csv):
+    total = df_repr["level"].sum()
+    total_transcript_number = int(total_transcript_number)
+    normalized = total_transcript_number / total
+    levels = np.random.poisson(df_repr["level"] * normalized)
+    transcript_numbers = pd.DataFrame({"id": df_repr["id"], "count": levels})
+    transcript_numbers.to_csv(output_csv, index=False)