diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index 2e906cc871537e0816ae7fd4437ade59afa2986c..0000000000000000000000000000000000000000
--- a/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-# ignore ALL .log files
-*.log
-
-# ignore ALL files in ANY directory named temp
-temp/ 
diff --git a/Inputs_files/mini_file.txt b/Inputs_files/mini_file.txt
deleted file mode 100644
index ba6539a78d4752c68a2408395dd471a0f42f501e..0000000000000000000000000000000000000000
--- a/Inputs_files/mini_file.txt
+++ /dev/null
@@ -1,19 +0,0 @@
->ATAD3B
-1	 ENST00000673477	NA	6
-2	 ENST00000472194	1	43
-3	 ENST00000378736	5	58
-4	 ENST00000485748	2	63
-5	 ENST00000474481	2	74
-6	 ENST00000308647	1 	80
-7	 ENST00000442483	3	113
->PRDM16
-1	 ENST00000511072	5	138
-2	 ENST00000607632	2	175
-3	 ENST00000378391	1	178
-4	 ENST00000514189	5	217
-5	 ENST00000270722	1 	254
-6	 ENST00000512462	1	293
-7	 ENST00000463591	5	310
-8	 ENST00000509860	5	319
-9	 ENST00000378389	5	348
-10	 ENST00000606170	4	354
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index edb874900e3f120cb97930894f9a0c54cbb340b1..0000000000000000000000000000000000000000
--- a/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2022 zavolan_group / tools
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/README.md b/README.md
deleted file mode 100644
index 64a092fbd9ad6482a7c3cac4b6b0570aa7743a20..0000000000000000000000000000000000000000
--- a/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# Transcript Sampler
-
-This workflow takes as input:
- - genome annotation gtf file
- - expression levels of each gene
- - csv file with transcript IDs and expression levels
- 
- The output is a trancript sample gtf file and csv file containing transcript IDs and counts.
- 
- 
diff --git a/images/.gitkeep b/images/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/images/screenshot_git_tutorial_1_hGillet.png b/images/screenshot_git_tutorial_1_hGillet.png
deleted file mode 100644
index 68151e8b0b837c03b7fcac317aa6a989333244f5..0000000000000000000000000000000000000000
Binary files a/images/screenshot_git_tutorial_1_hGillet.png and /dev/null differ
diff --git a/images/screenshot_git_tutorial_2_hGillet.png b/images/screenshot_git_tutorial_2_hGillet.png
deleted file mode 100644
index ec1d38848ce1a364475038fb0a74b49e4f6cce07..0000000000000000000000000000000000000000
Binary files a/images/screenshot_git_tutorial_2_hGillet.png and /dev/null differ
diff --git a/images/screenshot_markdown_tutorial_hGillet.png b/images/screenshot_markdown_tutorial_hGillet.png
deleted file mode 100644
index a3ea90d1c9fa47190015f028d4b7fb09a0e0031b..0000000000000000000000000000000000000000
Binary files a/images/screenshot_markdown_tutorial_hGillet.png and /dev/null differ
diff --git a/scripts/Excecution_file.py b/scripts/Excecution_file.py
deleted file mode 100644
index 788525eb4438f485ed1e2a554be3f61ccc102170..0000000000000000000000000000000000000000
--- a/scripts/Excecution_file.py
+++ /dev/null
@@ -1,20 +0,0 @@
-### Imports ###
-import os
-
-import transkript_extractor as te
-import Exon_length_filter as elf
-import representative_v4 as rtcl
-
-### Scipt ###
-def exe(file_name = "test",source_pathway_name = os.getcwd(),deposit_pathway_name = os.getcwd(),Input_free = True):
-    file_name,source_pathway_name_2,deposit_pathway_name_2 = te.extract_transkript(file_name,source_pathway_name,deposit_pathway_name,Input_free = Input_free)
-    inter_mediate_file_directory = os.path.join(deposit_pathway_name,file_name+"_intermediate_file.txt")
-    print("Transcripts are filterd based on transcipt score please wait...")
-    pre_filter_representative_transcripts_dict = rtcl.find_repr_by_SupportLevel(inter_mediate_file_directory)
-    print("Transcripts filtered\n")
-    elf.exon_length_filter(file_name,source_pathway_name,deposit_pathway_name,gen_dict= pre_filter_representative_transcripts_dict,Input_free = Input_free)
-    return(file_name,source_pathway_name,deposit_pathway_name)
-### from consol ####
-##D:\\Uni\\Sem 9\\Programing in the Life sciences\\Projekt\\Intermediat Files
-if __name__ == "__main__":
-    exe()
\ No newline at end of file
diff --git a/scripts/Exon_length_filter.py b/scripts/Exon_length_filter.py
deleted file mode 100644
index 162ff1f302b4ef144d9c34bde47b5b69c4ba72c0..0000000000000000000000000000000000000000
--- a/scripts/Exon_length_filter.py
+++ /dev/null
@@ -1,188 +0,0 @@
-#### Exon length filter #####
-"""Exon length filter 
-Version 1.1.0"""
-### Called Packages ###
-import re
-import os
-
-import transkript_extractor as te
-### Functions ###
-
-def exon_length_calculator(entry): 
-    """This funtion finds the start and end cordinates of the exon and uses them to calculate its lenght"""
-    try:
-        find_exon_coordinates = re.compile("\t\d{1,15}\t")
-        #this difines the pattern of the coordinates 
-        try_find_start_coordinates = find_exon_coordinates.search(entry)
-        #this line findes the start coordinares based on the pattern 
-        start_coordinates = int(try_find_start_coordinates[0].replace("\t",""))
-        #this line removes the \t at the end and the start of the pattern and 
-        #turn the string of the coordinates into intergers  
-        final_index_start_coordinates = entry.find(try_find_start_coordinates[0])+len(try_find_start_coordinates[0])-1
-        #this line determines the indes of the final digit of the start coordinates    
-        sub_entry = entry[final_index_start_coordinates:]
-        #this lineused the index determin above a starting point for a new sub entry
-        try_find_end_coordinates = find_exon_coordinates.search(sub_entry)
-        end_coordinates = int(try_find_end_coordinates[0].replace("\t",""))
-        #these two lines find the end coordinates and turn tham int an int 
-        exon_lenght = end_coordinates-start_coordinates
-        #this line claculates the transcript length 
-    except:
-        print("\n\nIn the following enty only one or no valid coordinates could be found:\n",entry,"the value will be set to NA")
-        exon_lenght = "NA"
-    return(exon_lenght)
-
-def exon_fider(entry):
-    """This funtion determines if a given entry belongs to an exon
-    Expected inputs:
-        entry: str #any enty of a gtf file"""
-    exon_test = entry.find("\texon\t")
-    #This line look for the entry exon in the file
-    if exon_test == -1: 
-        try_exon_test = False
-    else:
-        try_exon_test = True
-    #The block above evaluates the results of the search for the wort exon
-    return(try_exon_test)
-
-def __longest_transcript_finder(current_exon_length,longest_transcript,longest_transcript_ID,old_transcript_ID):
-    """This funtion encapsulates an opperation that has to be carried out at several point ind the exon_length_filter funktion and servers to make that funktion more modular"""
-    if current_exon_length > longest_transcript: 
-        #This condition updates the most promesing for
-        #beeing the representative transcript
-        longest_transcript = current_exon_length
-        longest_transcript_ID = old_transcript_ID
-    current_exon_length = 0
-    return(current_exon_length,longest_transcript,longest_transcript_ID)
-
-def _representative_transcript_csv (representative_transcript,file_name = "test",deposit_pathway_name =os.getcwd()):
-    with open(os.path.join(deposit_pathway_name,file_name+"_"+"representative_transcripts"+".csv"),"w") as rt:
-        for i in representative_transcript:
-            transcript = representative_transcript[i]
-            new_entry = str(i)+","+transcript+"\n"
-            rt.write(new_entry)
-    
-
-        
-def _exon_length_filter(file_name = "test",source_pathway_name = os.getcwd(),deposit_pathway_name =os.getcwd(),gen_dict = {"ENSG00000160072":["ENST00000673477","ENST00000472194","ENST00000378736","ENST00000308647","ENST00000442483"],"ENSG00000225972":["ENST00000416931"],"ENSG00000279928":["ENST00000624431","ENST00000424215"],"ENSG00000142611":["ENST00000378391","ENST00000607632","ENST00000511072"]}):
-    """This funtion selects only the transcripts for a dictionar that have the longest total mRNA"""  
-    bar,start_time = te.bar_builder(length_multiplyer = 3)
-    total_genes = len(gen_dict)
-    gens_done = 0
-
-    with open(os.path.join(source_pathway_name,file_name+".gtf"), 'r') as f:
-        
-        old_gen = str()
-        old_transcript_ID = str()
-        representative_transcript = dict()
-        representative_trasnscript_not_found = True
-        longest_transcript_ID = str()
-        current_exon_length = 0
-        longest_transcript = 0 
-        percentage_done = 0
-        
-        for entry in f: 
-            
-            try:
-                corrent_gen = te.gene_ID_finder(entry)
-            except:
-                corrent_gen = old_gen
-            #The block above test if there is a gen name in the entry
-            if corrent_gen != old_gen:   
-                representative_trasnscript_not_found = True
-
-            #The block above determines if the Gen name is new and set the test
-            #representative_trasnscript_not_found back to true which is used to 
-            #make the program faster if there is just one transcript for a given
-            #gen in the dict
-            if representative_trasnscript_not_found and corrent_gen != str():
-                #print(corrent_gen)
-                #The conditon prvents serges if a representative transcript has
-                #all ready been chosen
-                if corrent_gen != old_gen:
-                    current_exon_length,longest_transcript,longest_transcript_ID = __longest_transcript_finder(current_exon_length,longest_transcript,longest_transcript_ID,old_transcript_ID)
-                    representative_transcript[old_gen] = longest_transcript_ID
-                    try:
-                        del gen_dict[old_gen]
-                        old_gen = corrent_gen                   
-                        gens_done += 1
-                        corrent_percentage_done = (gens_done/total_genes)*100
-                        if corrent_percentage_done > percentage_done+10:
-                            bar,start_time = te.bar_builder(percentage=percentage_done+10,length_multiplyer = 3,start_time=start_time,bar =bar)
-                            percentage_done = int(corrent_percentage_done)  
-                        
-                         
-                    except:
-                        old_gen = corrent_gen
-                    longest_transcript = 0
-                    #The block above adds the transcript of the last gen that 
-                    #had the longest exons into the representative transcripts dict
-                    try: 
-                        #This try / except block test if the gen is in the input dictionary
-                        transcript_IDs = gen_dict[corrent_gen]
-                        if len(gen_dict[corrent_gen]) == 1:
-                            #This conditions is a short cut for Genes that 
-                            #allready have a representative transcript
-                            representative_transcript=gen_dict[corrent_gen[0]]
-                            representative_trasnscript_not_found = False
-                            continue
-                    except:
-                        continue
-                    
-                try: 
-                    current_transcript_ID = te.transcript_ID_finder(entry)         
-                except: 
-                    continue
-                #The block above searches for a trnascript ID in the  current enty
-
-                if current_transcript_ID in transcript_IDs:
-                    #This condition test if the Transcript is one of the 
-                    #candidates for representative transcripts
-                    if current_transcript_ID != old_transcript_ID:
-                        #This condition if the enty still belongs to the 
-                        #previous transcript and is triggers if that is not the case
-                        current_exon_length,longest_transcript,longest_transcript_ID = __longest_transcript_finder(current_exon_length,longest_transcript,longest_transcript_ID,old_transcript_ID)
-                        try:
-                            transcript_IDs.remove(old_transcript_ID)
-                            old_transcript_ID = current_transcript_ID
-                        except:
-                            old_transcript_ID = current_transcript_ID
-                    if exon_fider(entry): 
-                        exon_length = exon_length_calculator(entry)
-                        current_exon_length += exon_length
-                    else: 
-                        continue 
-        current_exon_length,longest_transcript,longest_transcript_ID = __longest_transcript_finder(current_exon_length,longest_transcript,longest_transcript_ID,old_transcript_ID)
-        representative_transcript[old_gen] = longest_transcript_ID
-    del representative_transcript[str()]
-    te.bar_builder(100,length_multiplyer = 3,start_time=start_time,bar =bar)
-    return(representative_transcript)
-
-def exon_length_filter(file_name = "test",source_pathway_name = os.getcwd(),deposit_pathway_name =os.getcwd(),gen_dict = {"ENSG00000160072":["ENST00000673477","ENST00000472194","ENST00000378736","ENST00000308647","ENST00000442483"],"ENSG00000225972":["ENST00000416931"],"ENSG00000279928":["ENST00000624431","ENST00000424215"],"ENSG00000142611":["ENST00000378391","ENST00000607632","ENST00000511072"]},Input_free = False):   
-    """This function filters a dictionary of genes and there transcripts by the length of there exons an selects the longes transcript for each gene ans saves tham in a "," seperated csv file.
-    Expected inputs: 
-        file_name: str ; default = test #the name of the gft file you want to look at
-        source_pathway_name: str ; default = current work directory #path of the gtf file       
-        deposit_pathway_name: str ; default = current work directory #path for saving the csv file
-        gen_dict:dict{key == gene ID:[transcript IDs that belong to that gene]}
-        Input_free: tuple ; default = False # this input should be set to True for automation""" 
-    
-    print("Representative trascipts are filterd based on exon length please wait...")
-    source_pathway_name,deposit_pathway_name = te.__do_pathways_exist__(source_pathway_name,deposit_pathway_name)
-    if Input_free:
-        pre_existing_file = False
-    else:
-        search_profile  = file_name+"_"+"representative_transcripts"+".csv"
-        pre_existing_file = te.__searche_for_preexisting_files(search_profile,deposit_pathway_name)
-    if pre_existing_file == False: 
-        representative_transcript = _exon_length_filter(file_name,source_pathway_name,deposit_pathway_name,gen_dict)
-        _representative_transcript_csv(representative_transcript,file_name,deposit_pathway_name)
-        print("\nRepresentative transcripts collected")
-    
-
-if __name__ == "__main__":
-    help(exon_length_filter)
-    exon_length_filter()
-    
-    
-#This line allows the file to be executed on its own also from 
\ No newline at end of file
diff --git a/scripts/match_reprTranscript_expressionLevel.py b/scripts/match_reprTranscript_expressionLevel.py
deleted file mode 100644
index 2dfca50a3c3ee1458b7358fbd1e1d801751d727c..0000000000000000000000000000000000000000
--- a/scripts/match_reprTranscript_expressionLevel.py
+++ /dev/null
@@ -1,200 +0,0 @@
-
-import pandas as pd
-import json
-import re
-import rerpresentative_v4 as repr
-import os
-
-
-def dict_reprTrans_to_df(dict_reprTrans: dict):
-
-    """Convert a dictionary of genes and their representative transcript into a dataframe 
-
-        Args:
-            dict_reprTrans (dict) : {'Gene':['transcriptA', 'transcriptB'], ...}
-
-        Returns:
-            Pandas dataframe having Gene and transcript as columns
-      
-        Raises:
-            /!\ None, I wasn't able to make a TypeError with dict  
-            : Only dict made of key string and value string is allowed
-          
-    """
-    pass
-
-    df_reprTrans = pd.DataFrame.from_dict(dict_reprTrans, orient="index", columns=["reprTranscript"])
-    df_reprTrans = df_reprTrans.reset_index(level=0)
-    df_reprTrans.columns = ["Gene", 'reprTrans']
-    df_reprTrans["reprTrans"] = df_reprTrans["reprTrans"].str.replace(r'\.[1-9]', '', regex=True)
-    return df_reprTrans
-
-
-def txt_to_dict(dict_txt: str):
-    """Convert a txt file into a dictionary 
-
-        Args:
-            dict_txt (str) : pathe to a txt file of a dict
-            structured as {'Gene':['transcriptA', 'transcriptB'], ...}
-
-        Returns:
-            dict (dict) : dictionary stuctured as {'Gene':['transcriptA', 'transcriptB'], ...}
-      
-        Raises:
-            None          
-    """
-    pass
-
-    input : str = open(dict_txt, "r").read()
-    input : str = input.replace("\'", "\"")
-    dict = json.loads(input)
-    return dict
-
-
-
-def transcripts_by_gene_inDf(df_gtfSelection: str) -> pd.DataFrame:
-    """Convert multiindex dataframe from function into a simple dataframe 
-
-        Args:
-            df_gtfSelection (str): Pandas multiindex dataframe having Gene,
-            transcript as indexs and support level as columns. 
-            Come from the function import_gtfSelection_to_df()
-
-        Returns:
-            df_gene (str): Pandas dataframe having Gene and
-            transcript as columns 
-      
-        Raises:
-            None          
-    """
-    pass
-    df_gene = df_gtfSelection.set_index(["Gene"])
-    df_gene = df_gene.drop(columns=["Support_level"])
-    df_gene['Transcript']=df_gene['Transcript'].str.replace(r"\.[0-9]","", regex=True)
-    df_gene = df_gene.reset_index(level=0)
-    return df_gene
-
-
-def tsv_or_csv_to_df(input_txt:str) :
-    """Convert tsv or csv file into a pandas dataframe
-
-        Args:
-            input_txt (str): csv or tsv file containing transcript expression level
-
-        Returns:
-            df_gene (str): Pandas dataframe having transcript and expression level
-            as columns  
-      
-        Raises:
-            None          
-    """
-    pass
-    df_input =pd.read_csv(input_txt, sep=r"[\t,]", lineterminator='\n',
-     names=["Transcript", "Expression_level"],
-     engine = "python")
-    return df_input
-
-
-def exprLevel_byGene(df_exprTrasncript:str, df_output_gtf_selection:str) -> pd.DataFrame :
-    """Find matching transcripts bewteen the 2 args 
-
-        Args:
-            df_exprTranscript (str): pandas Dataframe containing transcript and their expression level
-            df_output_gtf_selection (str) : pandas Dataframe containing genes and transcripts 
-
-        Returns:
-            Pandas dataframe having gene and sum of its transcript expression level
-      
-        Raises:
-            None          
-    """
-    pass 
-    df_merged = pd.merge(df_output_gtf_selection, df_exprTrasncript , how="inner", on="Transcript")
-    df_sum = df_merged.groupby("Gene").sum("Expression_level") # sum transcripts comming from the same gene  
-    return df_sum
-
-def match_byGene(df_reprTranscript:str, df_expressionLevel_byGene:str) -> pd.DataFrame: 
-    """Find matching genes bewteen the 2 args 
-
-        Args:
-            df_reprTranscript (str): pandas Dataframe containing genes 
-            and their representative transcript
-            df_expressionLevel_byGene (str) : pandas Dataframe containing 
-            genes and their expression level 
-
-        Returns:
-            Pandas dataframe having representative trasncripts 
-            and their expression level
-      
-        Raises:
-            None          
-    """
-    pass 
-    df_merged = pd.merge(df_reprTranscript, df_expressionLevel_byGene , how="outer", on="Gene")
-    df_clean = df_merged.dropna(axis=0)
-    df_clean = df_clean.loc[:, ["reprTrans","Expression_level"]]
-    return df_clean
-
-def output_tsv(dataframe:str)-> pd.DataFrame :
-    """Convert pandas dataframe into a tsv file 
-
-        Args:
-            dataframe (str): Pandas dataframe containing
-            representative transcripts and their expression level 
-
-        Returns:
-            Tsv file containing representative transcripts
-             and their expression level in the same directory
-      
-        Raises:
-            None          
-    """
-    pass 
-
-    csv_file = dataframe.to_csv(os.getcwd()+"\ReprTrans_ExpressionLevel.tsv", sep="\t", 
-    index=False, header=False)
-    return csv_file
-
-### functions to run this part of the programm
-
-def match_reprTranscript_expressionLevel(exprTrans:str, dict_reprTrans:dict, intermediate_file:str): 
-    """Combine functions to replace transcripts from an expression level csv/tsv file 
-       with representative transcripts 
-
-        Args:
-            exprTrans (str): csv or tsv file containing transcripts
-            and their expression level 
-            dict_reprTrans (dict) : dict of genes and their 
-            representative transcipt
-            intemediate_file (str) : txt file containing genes, transcript 
-            and their expression level from the transkript_extractor function
-
-        Returns:
-            tsv file of representative trasncripts and their expression level
-      
-        Raises:
-            None          
-    """
-    df_intermediate = repr.import_gtfSelection_to_df(intermediate_file)
-    df_geneTrans = transcripts_by_gene_inDf(df_intermediate)
-    df_exprTrans = tsv_or_csv_to_df(exprTrans)
-    df_reprTrans = dict_reprTrans_to_df(dict_reprTrans)
-    df_exprLevel_byGene = exprLevel_byGene(df_exprTrans, df_geneTrans)
-    df_match = match_byGene(df_reprTrans, df_exprLevel_byGene)
-    output = output_tsv(df_match)
-    return output
-
-
-# run the programm 
-
-dict_txt = a #input a dict of {gene:reprTrans} in the form of a txt file
-input_intermediate_file = b #input the intermediate file generated by transckript extractor
-input_expr = c #input a csv or tsv file containing the expr level 
-
-dict_reprTrans = txt_to_dict(dict_txt)
-match_final = match_reprTranscript_expressionLevel(input_expr, dict_reprTrans, input_intermediate_file)
-print("this is the function :\n\n {}".format(match_final))
-
-if __name__ == "__main__":  
-    match_reprTranscript_expressionLevel()
- 
\ No newline at end of file
diff --git a/scripts/poisson_sampling.py b/scripts/poisson_sampling.py
deleted file mode 100644
index 60d043db32e0daed7e4cfdf685ed024942af5747..0000000000000000000000000000000000000000
--- a/scripts/poisson_sampling.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import pandas as pd
-import numpy as np
-import argparse
-
-
-'''
-Sample transcript 
-
-This part of the code does Poisson sampling proportionally to gene expression levels for each gene. 
- 
-input:  total transcript number (int) 
-        csv file with gene id and  gene expression levels (columns named 'id' and 'level')
-
-output: csv file with gene id and count
-        gtf file with transcript samples
-'''
-
-
-def transcript_sampling(total_transcript_number, csv_file, output_csv):
-    df = pd.read_csv(csv_file, sep='\t', lineterminator='\n', names=["id", "level"])
-    levels = []
-    sums = df['level'].tolist()
-    total = sum(sums)
-    normalized = total_transcript_number/total
-    for expression_level in df['level']:
-        poisson_sampled = np.random.poisson(expression_level*normalized)
-        levels.append(poisson_sampled)
-
-    transcript_numbers = pd.DataFrame({'id': df['id'],'count': levels})
-    pd.DataFrame.to_csv(transcript_numbers, output_csv)
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description="Transcript Poisson sampler, csv output",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-
-    parser.add_argument("--expression_level", required=True, help="csv file with expression level")
-    parser.add_argument("--output_csv", required=True, help="output csv file")
-    parser.add_argument("--input_csv", required=True, help="input csv file")
-    parser.add_argument("--transcript_number", required=True, help="total number of transcripts to sample")
-    args = parser.parse_args()
-
-
-    transcript_sampling(args.transcript_number, args.input_csv, args.output_csv, args.transcript_number)
-
-
diff --git a/scripts/representative_v4.py b/scripts/representative_v4.py
deleted file mode 100644
index c940686b2126121b33d12a858026338aff737706..0000000000000000000000000000000000000000
--- a/scripts/representative_v4.py
+++ /dev/null
@@ -1,96 +0,0 @@
-
-import pandas as pd
-
-'''
-This part of the code take as input a gtf modified file 
-and return a dictionary of transcripts with best
-support level for each gene of the input
-
-'''
-
-
-
-
-def import_gtfSelection_to_df(gtf_modified_file: str):
-    """Import intermediate file from gtf and create a df
-
-        Args:
-            gtf_modified_file (str) : path to the intermediate file
-
-        Returns:
-            Pandas dataframe having Gene, transcript 
-            and support level as columns
-      
-        Raises:
-            TypeError : Only str path is allowed
-          
-    """
-    pass
-    if not type(gtf_modified_file) is str:
-      raise TypeError("Only str path is allowed")
-    df_input = pd.read_csv(gtf_modified_file, sep = '\t', lineterminator = '\n', 
-names = ["Gene_mixed", "Transcript", "Support_level", "Na1", "Na2"] )
-    df_input["Support_level"] = df_input["Support_level"].replace(" ", "")
-    df_input["Gene"] = df_input["Gene_mixed"].str.extract('([A-Z]\w{0,})', expand=True)
-    df_input["Transcript_number"] = df_input["Gene_mixed"].str.extract('(^\d)', expand=True)
-    df_clean = df_input.loc[:, ["Gene", "Transcript","Support_level"]]
-    df_clean["Gene"] = df_clean["Gene"].fillna(method = 'ffill')
-    df_clean = df_clean.dropna(axis = 0)
-    return df_clean
-
-
-
-
-def representative_transcripts_inDict(df_gtfSelection: str) -> pd.DataFrame:
-    """Return a dict containing for each gene transcripts 
-        with highest confidence level
-
-        Args:
-            df_gtfSelection (str): Pandas dataframe having Gene,
-            transcript and support level as columns
-
-        Returns:
-            Dict {'Gene':['transcriptA', 'transcriptB'], ...}
-      
-        Raises:
-            TypeError : Only pandas DataFrame is allowed
-    """
-    pass 
-
-    if not type(df_gtfSelection) is pd.DataFrame:
-        raise TypeError("Only pandas DataFrame is allowed")
-  
-    df_multIndex = df_gtfSelection.set_index(["Gene", "Transcript"])
-    #highest support level = 1 , worst = 5, NA = 100
-    df_min = df_multIndex.groupby(level=["Gene"])["Support_level"].transform("min")
-    df_final = df_min.reset_index(level = "Transcript")
-    df_final = df_final.drop(columns = ["Support_level"])
-    dict_representative_transcripts = df_final.groupby("Gene")["Transcript"].apply(list).to_dict()
-    return dict_representative_transcripts  
-
-
-
-def find_repr_by_SupportLevel(intermediate_file:str): 
-    """Combine functions import_gtfSelection_to_df() 
-        and representative_transcripts_inDict()
-
-        Args:
-            intermediate_file : path to the intermediate file
-
-        Returns:
-            Dict {'Gene':['transcriptA', 'transcriptB'], ...}
-      
-        Raises:
-            None
-
-          
-    """
-    pass 
-    df_gtf = import_gtfSelection_to_df(intermediate_file)
-    dict_reprTrans = representative_transcripts_inDict(df_gtf)
-    return dict_reprTrans
-
-
-
-if __name__ == "__main__":  
-    find_repr_by_SupportLevel() 
diff --git a/scripts/transkript_extractor.py b/scripts/transkript_extractor.py
deleted file mode 100644
index 6bcd13bd151ae173cfbacfd57ebb2a247320c669..0000000000000000000000000000000000000000
--- a/scripts/transkript_extractor.py
+++ /dev/null
@@ -1,311 +0,0 @@
-#### Transcript extractor #####
-"""Transcript extractor 
-Version 1.1.0"""
-### Called Packages ###
-import re
-import os
-import time
-
-### Functions ###
-
-
-
-def __parameter_editor(file_name,source_pathway_name,deposit_pathway_name):
-    """This function allows for chaging the parameters after running the program"""
-    while True:
-        print("The program will run with the following parameters:\nFile name:\t\t",file_name,"\nSource pathway:\t",source_pathway_name,"\nDeposit pathway:\t",deposit_pathway_name,"\n")
-        parameter_conformation = input("To continue with these parameters input [continue or c] to change them input [edit]\n>")
-        if parameter_conformation == "continue"or parameter_conformation =="c":
-            break
-        elif parameter_conformation == "edit":
-            #edit the parameters
-            while True: 
-                change_question = input("select the parameter you want to change [nfile/spath/dpath] or input [b] to go back\n>")
-                if change_question == "nfile":
-                    #This condition allows the user to chenge the file name 
-                    file_name = input("Please input the new file name\n>")
-                    break
-                elif  change_question == "spath":
-                    #This condition allows the user to change the source path
-                    source_pathway_name = input("Please input the new source path\n>")
-                    
-                    does_source_pathway_exist = os.path.exists(source_pathway_name)
-                    if does_source_pathway_exist:
-                        break
-                    else: 
-                        print("The new source pathway:",source_pathway_name,"does not exist\nThe source pathway was returned to default:",os.getcwd())
-                        source_pathway_name = os.getcwd()
-                elif  change_question == "dpath":
-                    #This condition allows the user to change output file location
-                    deposit_pathway_name = input("Please input the new output file path name\n>")
-                    does_deposit_pathway_exist = os.path.exists(deposit_pathway_name)
-                    if does_deposit_pathway_exist:
-                        break
-                    else:
-                        print("The new deposit pathway:",deposit_pathway_name,"does not existe\nThe deposit pathway was returnt to default:",source_pathway_name)
-                        deposit_pathway_name = source_pathway_name
-                    #The block above test if the new deposit pathway is valid
-                elif  change_question == "b":
-                    # This condition allows the user to return to the main loop
-                    break             
-                else:
-                    #This condition covers all non valid inputs into the secund loop
-                    print("The input",change_question,"is not valid. Please use one of the specified commands") 
-                    
-        else: 
-            #This condition covers all non valid input for the main loop 
-           print("The input",parameter_conformation,"is not valide please use one of the specified comands\n") 
-    return(file_name,source_pathway_name,deposit_pathway_name)    
-    
-    
-    
-    
-    
-    
-    
-def __searche_for_preexisting_files(file_name,deposit_pathway_name = os.getcwd()):
-    """This function searches for preexisting files of the same name as the results file of the current program. It allows the user to choose to move on with the pre-existing file """
-    File_of_same_name_found = False
-    generat_new_file = False
-    directory_content = os.listdir(deposit_pathway_name)
-    for file in directory_content: 
-        if file == file_name: 
-            while True: 
-                File_found_input = input (file_name+" has allready been generated\nDo you want to generate a new one [y/n] \n>")
-                if File_found_input == "n":                     
-                    File_of_same_name_found = True
-                    break
-                elif File_found_input == "y":
-                    generat_new_file = True
-                    break
-                else: 
-                    print("Invalid input\nPlease press [y] if you want to generate a new file or [n] if you want to use the preexisting file")
-            break
-        else: 
-            continue
-    if File_of_same_name_found: 
-        print("No new file will be generated, the program can continue")
-    elif generat_new_file: 
-        print("A new file will be generated please wait...\n")
-    else:            
-        print("No pre-existing file of the relevant type has been found.\nA new file will be generated please wait...\n")
-    return(File_of_same_name_found)
-
-def bar_builder(percentage = 0,length_multiplyer = 2,start_time = time.time(),bar = str()):
-    """This function creates a loading bar that can load in 10% increments starting a 0% and ending at 100%
-    Expected inputs: 
-        percentage: int between 0 and 100 in steps of 10; default = 0 #defines the current loading increment
-        length_multiplyer: int > 0 ; default = 2 #determiens the amount of symbols per loading increment
-        start_time: any int ; default= time.time() #for determening loading time
-        bar: str ; default = str()#input of the current bar status does not need to be defined if for the 0% increment
-        """
-    if percentage == 100:
-        bar = bar.replace("-","#")
-        print("\r"+bar+"\t"+"100%\t\t"+str(int(time.time()-start_time)))
-    elif percentage > 0:
-        bar = bar.replace("-","#",length_multiplyer)
-        print("\r"+bar+"\t"+str(percentage)+"%", end='',flush=True)
-    elif percentage == 0: 
-        bar = "["+"-"*length_multiplyer*10+"]"
-        print(bar+"\t", end='',flush=True)
-    return(bar,start_time)
-
-def __test_file_name(file_name,source_pathway_name = os.getcwd()):
-    """This function validates that the source file exists at the source path. It turns the file name input in a standardized format that can be used in the next steps"""
-    
-    directory_content = os.listdir(source_pathway_name)
-    
-    index_of_the_dot = file_name.rfind(".")
-    valide_source_file = False
-    validate_source_file = True
-    if index_of_the_dot ==-1:
-        file_name += ".gtf"       
-    else: 
-        source_file_typ = file_name[index_of_the_dot:]
-        not_a_file_type = re.compile(".\d{1,13}")
-        try_not_a_file_type = not_a_file_type.search(source_file_typ)
-        if source_file_typ == ".gtf":
-            file_name = file_name
-        elif try_not_a_file_type:
-            file_name += ".gtf"
-        else: 
-            print("This program can not handle",source_file_typ,"files. \nplease use a .gtf file" )
-            validate_source_file = False
-    #The block above tests if the file_name includes the file type and if no 
-    #file type is found adds ".gtf" und if a non ".gtf" file is found gives an error
-    
-    if validate_source_file: 
-        for file in directory_content: 
-            if file == file_name:
-                valide_source_file = True 
-                break
-    #The block above tests if a file on the given name is in the given directora 
-    
-    if valide_source_file:
-        print("The file:",file_name,"has been found.\n")
-    else: 
-        print("No .gtf file of the name",file_name,"has been found in this pathway")
-    #The bock above gives feed back regarding the results of the file test 
-    
-    file_name = file_name.replace(".gtf","")
-    #This line normalizes the file name 
-    return(valide_source_file,file_name)
-
-def __do_pathways_exist__(source_pathway_name,deposit_pathway_name):
-    """This funtion tests that the entered pathways actualy exist"""
-    does_source_pathway_exist = os.path.exists(source_pathway_name)
-    does_deposit_pathway_exist = os.path.exists(deposit_pathway_name)
-    #The Block above does the actual testing
-    if does_source_pathway_exist:
-        source_pathway_name = source_pathway_name
-    else: 
-        print("The source pathway:",source_pathway_name,"has not been found\nThe source pathway was set to the default")
-        source_pathway_name = os.getcwd()
-    #The block above detail the possible reactions for the source pathe existing or not existing
-    if does_deposit_pathway_exist: 
-        deposit_pathway_name = deposit_pathway_name
-    else: 
-        print("The deposit pathway:",deposit_pathway_name,"has not been found\nThe deposit pathway was set to the default")
-        deposit_pathway_name = source_pathway_name
-    #The block above details the possible reactions for the deposit pathway existing or not existing 
-    return(source_pathway_name,deposit_pathway_name)
-        
-def gene_ID_finder(entry):
-    """This function is supposed to find the gene ID of a known gene entry
-    Expected inputs:
-        entry: str #a line from a gtf file that contains a gene ID"""
-    index_gene_id = entry.find("gene_id")
-    find_gene_id_name = re.compile("\"\S{1,25}\"")
-    sub_entry = entry[index_gene_id:]
-    try_find_gene_id_name = find_gene_id_name.search(sub_entry)   
-    gene_ID = try_find_gene_id_name[0].replace("\"","")
-    return (gene_ID)
-       
-def transcript_ID_finder (entry):
-    """This function is supposed to finde the transcript ID in a known transcript entry
-    Expected inputs:
-        entry: str #a line from a gtf file that contains a transcript ID"""
-    index_transcript_id = entry.find("transcript_id")
-    find_transcript_id_name = re.compile("\"\S{1,25}\"")
-    sub_entry = entry[index_transcript_id:]
-    try_find_transcript_id_name = find_transcript_id_name.search(sub_entry)   
-    
-    try: 
-        transcript_ID = try_find_transcript_id_name[0].replace("\"","")
-    except:
-        transcript_ID = ""
-    return (transcript_ID)
-        
-def transcript_support_level_finder(entry):
-    """This function is supposed to find the transcript support level in a known transcript entry
-    Expected input: 
-        entry: str #a line from a gtf file that be blongs to a transcript"""
-    transcript_support_level_start_ID = entry.find("transcript_support_level")
-    sub_entry = entry[transcript_support_level_start_ID:]
-    
-    try:
-        score_finder = re.compile("\W\w{1,16}\W{2}")
-        try_score_finder = score_finder.search(sub_entry)              
-        Pre_score_1 = try_score_finder[0]
-        Pre_score_2 = Pre_score_1.replace("\"","")
-        Pre_score_2 = Pre_score_2.replace("(","")
-        transcript_support_level = Pre_score_2.replace(";","")
-        if "NA" in transcript_support_level:
-            transcript_support_level = 100
-        #I changed This tell laura
-        
-
-    except:
-        transcript_support_level = 100
-    return (transcript_support_level)
-
-
-
-    
-def _transcript_extractor (file_name,source_pathway_name,deposit_pathway_name): 
-    """This function extracts the transcript number ,transcript ID, the transcript support level, the transcrip length and the line index from a gtf file of a given name and saves tham as a new file name given_name_intermediat_file.txt. 
-    Expected input:
-        file_name: str #the name of the gft file you want to look at without the .gtf part
-        source_pathway_name: str #path of the gtf file       
-        deposit_pathway_name: str #path for saving the intermediat file"""
-        
-    with open(os.path.join(source_pathway_name,file_name+".gtf"), 'r') as f:      
-        total_entrys =len(f.readlines())
-    with open(os.path.join(source_pathway_name,file_name+".gtf"), 'r') as f:
-        current_entry = 0 
-        percentage_done = 0 
-        bar,start_time = bar_builder(length_multiplyer = 3)
-        
-        
-        Old_gen_ID = str() 
-        #stand-in as the first couple entrys are not genes
-        with open(os.path.join(deposit_pathway_name,file_name+"_"+"intermediate_file"+".txt"),"w") as IMF:
-            transcript_number = 0
-            for entry in f: 
-
-                
-                current_entry += 1
-                current_percentage_done = 100* current_entry/total_entrys
-                if current_percentage_done > percentage_done +10: 
-                    bar,start_time = bar_builder(percentage=percentage_done+10,length_multiplyer = 3,start_time=start_time,bar =bar)
-                    percentage_done = int(current_percentage_done)  
-                
-                if "gene_id" in entry:
-                    Gen_ID = gene_ID_finder(entry)
-                else:
-                    Gen_ID = Old_gen_ID
-  
-                if Gen_ID != Old_gen_ID:
-                    Gen_entry = ">"+ Gen_ID +"\n"
-                    IMF.write(Gen_entry)
-                    transcript_number = 0
-                    Old_gen_ID = Gen_ID
-                
-                if "\ttranscript\t" in entry:
-                    transcript_number += 1
-                    Transcript_ID  = transcript_ID_finder(entry)
-                    #the function that determins the transcript ID is called
-                    transcript_support_level = transcript_support_level_finder(entry)
-                    #the function that determins the transcript support level is called
-                    New_entry = str(transcript_number)+"\t"+str(Transcript_ID)+"\t"+str(transcript_support_level)+"\t"+"\t\n"
-                    IMF.write(New_entry)
-        bar_builder(100,length_multiplyer = 3,start_time=start_time,bar =bar)
-        print("The transcripts have been collected") 
-        
-        
-def extract_transkript (file_name = "test",source_pathway_name = os.getcwd(),deposit_pathway_name = False,Input_free = False): 
-   """This it the overall exetutable funtion that will execute the transcript extraction process for a given file with all checks. 
-    Expected input:
-        file_name: str ; default = test #the name of the gft file you want to look at
-        source_pathway_name: str ; default = current work directory #path of the gtf file       
-        deposit_pathway_name: str ; default = source_pathway_name #path for saving the intermediat file
-    Outputs: 
-        file_name: str 
-        source_pathway_name: str
-        deposit_pathway_name: str"""
-        
-   if deposit_pathway_name == False: 
-       deposit_pathway_name = source_pathway_name
-   if Input_free:
-       validated_file_name = __test_file_name(file_name,source_pathway_name)
-       file_name = validated_file_name[1]
-       _transcript_extractor (file_name,source_pathway_name,deposit_pathway_name)
-   else:
-       file_name,source_pathway_name,deposit_pathway_name = __parameter_editor(file_name,source_pathway_name,deposit_pathway_name)
-       source_pathway_name,deposit_pathway_name =__do_pathways_exist__(source_pathway_name,deposit_pathway_name)
-       validated_file_name = __test_file_name(file_name,source_pathway_name)
-       file_name = validated_file_name[1]
-       if validated_file_name[0]:
-           if __searche_for_preexisting_files(file_name+"_intermediate_file.txt",deposit_pathway_name):
-               print("The transcripts has been collected\n")
-           else:
-               _transcript_extractor (file_name,source_pathway_name,deposit_pathway_name)
-   return(file_name,source_pathway_name,deposit_pathway_name)
-
-#### Dev part ####
-
-if __name__ == "__main__":
-    extract_transkript()
-#This line allows the file to be executed on its own also from 
-
-
diff --git a/scripts/writegtf.py b/scripts/writegtf.py
deleted file mode 100644
index 9a8c7a8a7bbafbf7191f679c1a4d654f1ba9e9b9..0000000000000000000000000000000000000000
--- a/scripts/writegtf.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import pandas as pd
-import numpy as np
-import argparse
-import re
-
-def transcript_ID_finder (entry):
-    index_transcript_id = entry.find("transcript_id")
-    find_transcript_id_name = re.compile("\"\S{1,25}\"")
-    sub_entry = entry[index_transcript_id:]
-    try_find_transcript_id_name = find_transcript_id_name.search(sub_entry)   
-    
-    try: 
-        transcript_ID = try_find_transcript_id_name[0].replace("\"","")
-    except:
-        transcript_ID = ""
-    return (transcript_ID)
-
-
-'''gtf_file_writer takes as input the original gtf file and the csv file containing relevant transcripts.
-
-    It produces a gtf file containing only the transcript entries of those contained in the csv file
-    
-    based on id'''
-
-
-def gtf_file_writer (original_file, csv_file, output_file): 
-    output = []
-
-    df = pd.read_csv(csv_file)
-    listoftranscripts = df['id'].tolist()
-    if df['id'] == False:
-        print('Error. \'id\' column needed in input csv file.')
-
-    with open(original_file, 'r') as f:
-            for entry in f: 
-                if "\ttranscript\t" in entry:
-                    transcript_id = transcript_ID_finder(entry)
-                    if transcript_id in listoftranscripts:
-                        output.append(entry)
-    with open(output_file, 'w') as last_file:
-        last_file.write(output)
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description="gtf output file writer",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument("--annotation", required=True, help="gtf file with genome annotation")
-    parser.add_argument("--output_gtf", required=True, help="output gtf file")
-    parser.add_argument("--input_csv", required=True, help="input csv file")
-    args = parser.parse_args()
-
-    gtf_file_writer(args.annotation, args.input_csv, args.output_gtf)