From 2c662762a2040f5e05a1dcba8f9798aa38209ffe Mon Sep 17 00:00:00 2001
From: LauraU123 <laura.urbanska@stud.unibas.ch>
Date: Mon, 21 Nov 2022 15:12:56 +0100
Subject: [PATCH] updated script names and started updated execution file

---
 scripts/Excecution_file.py                    |  20 ----
 ...length_filter.py => exon_length_filter.py} |  18 ++--
 ...> match_reprtranscript_expressionlevel.py} |   0
 scripts/new_exe_file.ipynb                    | 101 ++++++++++++++++++
 ...representative_v4.py => representative.py} |   0
 ...t_extractor.py => transcript_extractor.py} |   6 +-
 6 files changed, 113 insertions(+), 32 deletions(-)
 delete mode 100644 scripts/Excecution_file.py
 rename scripts/{Exon_length_filter.py => exon_length_filter.py} (92%)
 rename scripts/{match_reprTranscript_expressionLevel.py => match_reprtranscript_expressionlevel.py} (100%)
 create mode 100644 scripts/new_exe_file.ipynb
 rename scripts/{representative_v4.py => representative.py} (100%)
 rename scripts/{transkript_extractor.py => transcript_extractor.py} (96%)

diff --git a/scripts/Excecution_file.py b/scripts/Excecution_file.py
deleted file mode 100644
index 788525e..0000000
--- a/scripts/Excecution_file.py
+++ /dev/null
@@ -1,20 +0,0 @@
-### Imports ###
-import os
-
-import transkript_extractor as te
-import Exon_length_filter as elf
-import representative_v4 as rtcl
-
-### Scipt ###
-def exe(file_name = "test",source_pathway_name = os.getcwd(),deposit_pathway_name = os.getcwd(),Input_free = True):
-    file_name,source_pathway_name_2,deposit_pathway_name_2 = te.extract_transkript(file_name,source_pathway_name,deposit_pathway_name,Input_free = Input_free)
-    inter_mediate_file_directory = os.path.join(deposit_pathway_name,file_name+"_intermediate_file.txt")
-    print("Transcripts are filterd based on transcipt score please wait...")
-    pre_filter_representative_transcripts_dict = rtcl.find_repr_by_SupportLevel(inter_mediate_file_directory)
-    print("Transcripts filtered\n")
-    elf.exon_length_filter(file_name,source_pathway_name,deposit_pathway_name,gen_dict= pre_filter_representative_transcripts_dict,Input_free = Input_free)
-    return(file_name,source_pathway_name,deposit_pathway_name)
-### from consol ####
-##D:\\Uni\\Sem 9\\Programing in the Life sciences\\Projekt\\Intermediat Files
-if __name__ == "__main__":
-    exe()
\ No newline at end of file
diff --git a/scripts/Exon_length_filter.py b/scripts/exon_length_filter.py
similarity index 92%
rename from scripts/Exon_length_filter.py
rename to scripts/exon_length_filter.py
index 162ff1f..db8b12c 100644
--- a/scripts/Exon_length_filter.py
+++ b/scripts/exon_length_filter.py
@@ -5,11 +5,11 @@ Version 1.1.0"""
 import re
 import os
 
-import transkript_extractor as te
+import transcript_extractor as te
 ### Functions ###
 
 def exon_length_calculator(entry): 
-    """This funtion finds the start and end cordinates of the exon and uses them to calculate its lenght"""
+    """This function finds the start and end cordinates of the exon and uses them to calculate its length"""
     try:
         find_exon_coordinates = re.compile("\t\d{1,15}\t")
         #this difines the pattern of the coordinates 
@@ -25,12 +25,12 @@ def exon_length_calculator(entry):
         try_find_end_coordinates = find_exon_coordinates.search(sub_entry)
         end_coordinates = int(try_find_end_coordinates[0].replace("\t",""))
         #these two lines find the end coordinates and turn tham int an int 
-        exon_lenght = end_coordinates-start_coordinates
+        exon_length = end_coordinates-start_coordinates
         #this line claculates the transcript length 
     except:
         print("\n\nIn the following enty only one or no valid coordinates could be found:\n",entry,"the value will be set to NA")
-        exon_lenght = "NA"
-    return(exon_lenght)
+        exon_length = "NA"
+    return(exon_length)
 
 def exon_fider(entry):
     """This funtion determines if a given entry belongs to an exon
@@ -46,7 +46,7 @@ def exon_fider(entry):
     return(try_exon_test)
 
 def __longest_transcript_finder(current_exon_length,longest_transcript,longest_transcript_ID,old_transcript_ID):
-    """This funtion encapsulates an opperation that has to be carried out at several point ind the exon_length_filter funktion and servers to make that funktion more modular"""
+    """This funtion encapsulates an operation that has to be carried out at several points in the exon_length_filter function and serves to make that function more modular"""
     if current_exon_length > longest_transcript: 
         #This condition updates the most promesing for
         #beeing the representative transcript
@@ -65,7 +65,7 @@ def _representative_transcript_csv (representative_transcript,file_name = "test"
 
         
 def _exon_length_filter(file_name = "test",source_pathway_name = os.getcwd(),deposit_pathway_name =os.getcwd(),gen_dict = {"ENSG00000160072":["ENST00000673477","ENST00000472194","ENST00000378736","ENST00000308647","ENST00000442483"],"ENSG00000225972":["ENST00000416931"],"ENSG00000279928":["ENST00000624431","ENST00000424215"],"ENSG00000142611":["ENST00000378391","ENST00000607632","ENST00000511072"]}):
-    """This funtion selects only the transcripts for a dictionar that have the longest total mRNA"""  
+    """This funtion selects only the transcripts for a dictionary that have the longest total mRNA"""  
     bar,start_time = te.bar_builder(length_multiplyer = 3)
     total_genes = len(gen_dict)
     gens_done = 0
@@ -133,7 +133,7 @@ def _exon_length_filter(file_name = "test",source_pathway_name = os.getcwd(),dep
                     current_transcript_ID = te.transcript_ID_finder(entry)         
                 except: 
                     continue
-                #The block above searches for a trnascript ID in the  current enty
+                #The block above searches for a transcript ID in the current entry
 
                 if current_transcript_ID in transcript_IDs:
                     #This condition test if the Transcript is one of the 
@@ -185,4 +185,4 @@ if __name__ == "__main__":
     exon_length_filter()
     
     
-#This line allows the file to be executed on its own also from 
\ No newline at end of file
+#This line allows the file to be executed on its own also from 
diff --git a/scripts/match_reprTranscript_expressionLevel.py b/scripts/match_reprtranscript_expressionlevel.py
similarity index 100%
rename from scripts/match_reprTranscript_expressionLevel.py
rename to scripts/match_reprtranscript_expressionlevel.py
diff --git a/scripts/new_exe_file.ipynb b/scripts/new_exe_file.ipynb
new file mode 100644
index 0000000..c79b091
--- /dev/null
+++ b/scripts/new_exe_file.ipynb
@@ -0,0 +1,101 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "usage: ipykernel_launcher.py [-h] --annotation ANNOTATION --expression_level\n",
+      "                             EXPRESSION_LEVEL --output_csv OUTPUT_CSV\n",
+      "                             --output_gtf OUTPUT_GTF --transcript_number\n",
+      "                             TRANSCRIPT_NUMBER\n",
+      "ipykernel_launcher.py: error: the following arguments are required: --annotation, --expression_level, --output_csv, --output_gtf, --transcript_number\n"
+     ]
+    },
+    {
+     "ename": "SystemExit",
+     "evalue": "2",
+     "output_type": "error",
+     "traceback": [
+      "An exception has occurred, use %tb to see the full traceback.\n",
+      "\u001b[0;31mSystemExit\u001b[0m\u001b[0;31m:\u001b[0m 2\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "\n",
+    "import argparse\n",
+    "import transcript_extractor as te\n",
+    "import exon_length_filter as elf\n",
+    "import representative as rtcl\n",
+    "import representative as rp\n",
+    "\n",
+    "if __name__ == '__main__':\n",
+    "    parser = argparse.ArgumentParser(\n",
+    "        description=\"transcript sampler\",\n",
+    "        formatter_class=argparse.ArgumentDefaultsHelpFormatter\n",
+    "    )\n",
+    "    parser.add_argument(\"--annotation\", required=True, help=\"gtf file with genome annotation\")\n",
+    "    #parser.add_argument(\"--expression_level\", required=True, help=\"csv file with expression level\")\n",
+    "    parser.add_argument(\"--output_csv\", required=True, help=\"output csv file\")\n",
+    "    parser.add_argument(\"--output_gtf\", required=True, help=\"output gtf file\")\n",
+    "    parser.add_argument(\"--transcript_number\", required=True, help=\"total number of transcripts to sample\")\n",
+    "    args = parser.parse_args()\n",
+    "\n",
+    "def exe(input_file, csv, gtf, transcript_nr):\n",
+    "    file_name,source_pathway_name_2,deposit_pathway_name_2 = te.extract_transcript(input_file, deposit_pathway_name = True, Input_free = Input_free)\n",
+    "    inter_mediate_file_directory = input_file +\"_intermediate_file.txt\"\n",
+    "\n",
+    "    print(\"Transcripts are filtered based on transcript score. Please wait...\")\n",
+    "\n",
+    "    pre_filter_representative_transcripts_dict = rtcl.find_repr_by_SupportLevel(inter_mediate_file_directory)\n",
+    "\n",
+    "    print(\"Transcripts filtered\\n\")\n",
+    "    elf.exon_length_filter(file_name,gen_dict= pre_filter_representative_transcripts_dict, Input_free = True)\n",
+    "\n",
+    "\n",
+    "    #return(file_name,source_pathway_name,deposit_pathway_name)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.12 ('nextstrain')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "41a54f34eee8c9e478b3404dd74579d3248e5c82a4969468d7042e338229b1fe"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/scripts/representative_v4.py b/scripts/representative.py
similarity index 100%
rename from scripts/representative_v4.py
rename to scripts/representative.py
diff --git a/scripts/transkript_extractor.py b/scripts/transcript_extractor.py
similarity index 96%
rename from scripts/transkript_extractor.py
rename to scripts/transcript_extractor.py
index 6bcd13b..3538339 100644
--- a/scripts/transkript_extractor.py
+++ b/scripts/transcript_extractor.py
@@ -11,7 +11,7 @@ import time
 
 
 def __parameter_editor(file_name,source_pathway_name,deposit_pathway_name):
-    """This function allows for chaging the parameters after running the program"""
+    """This function allows for changing the parameters after running the program"""
     while True:
         print("The program will run with the following parameters:\nFile name:\t\t",file_name,"\nSource pathway:\t",source_pathway_name,"\nDeposit pathway:\t",deposit_pathway_name,"\n")
         parameter_conformation = input("To continue with these parameters input [continue or c] to change them input [edit]\n>")
@@ -273,7 +273,7 @@ def _transcript_extractor (file_name,source_pathway_name,deposit_pathway_name):
         print("The transcripts have been collected") 
         
         
-def extract_transkript (file_name = "test",source_pathway_name = os.getcwd(),deposit_pathway_name = False,Input_free = False): 
+def extract_transcript(file_name = "test",source_pathway_name = os.getcwd(),deposit_pathway_name = False,Input_free = False): 
    """This it the overall exetutable funtion that will execute the transcript extraction process for a given file with all checks. 
     Expected input:
         file_name: str ; default = test #the name of the gft file you want to look at
@@ -305,7 +305,7 @@ def extract_transkript (file_name = "test",source_pathway_name = os.getcwd(),dep
 #### Dev part ####
 
 if __name__ == "__main__":
-    extract_transkript()
+    extract_transcript()
 #This line allows the file to be executed on its own also from 
 
 
-- 
GitLab