From fe99cc846c09bacd50bb72f72ebb6ae327c1c255 Mon Sep 17 00:00:00 2001
From: Niels Schlusser <niels.schlusser@unibas.ch>
Date: Tue, 30 Jul 2024 13:40:42 +0200
Subject: [PATCH] changed names to translateLSTM

---
 .Rhistory                                           |   0
 README.md                                           |  10 +++++-----
 .../end2end_prediction.py                           |   0
 .../predict_clinvar.py                              |   0
 .../tl_TranslateLSTM_HEK293.h5                      | Bin
 .../tl_TranslateLSTM_end_HEK293.py                  |   8 ++++----
 .../TranslateLSTM_opt100.h5                         | Bin
 .../TranslateLSTM_opt100.py                         |   6 +++---
 .../TranslateLSTM_end_HEK293.h5                     | Bin
 .../TranslateLSTM_end_HEK293.py                     |   6 +++---
 10 files changed, 15 insertions(+), 15 deletions(-)
 create mode 100644 .Rhistory
 rename {tl_TranslateLLM_endogenous => tl_TranslateLSTM_endogenous}/end2end_prediction.py (100%)
 rename {tl_TranslateLLM_endogenous => tl_TranslateLSTM_endogenous}/predict_clinvar.py (100%)
 rename tl_TranslateLLM_endogenous/tl_TranslateLLM_HEK293.h5 => tl_TranslateLSTM_endogenous/tl_TranslateLSTM_HEK293.h5 (100%)
 rename tl_TranslateLLM_endogenous/tl_TranslateLLM_end_HEK293.py => tl_TranslateLSTM_endogenous/tl_TranslateLSTM_end_HEK293.py (96%)
 rename translateLLM_MPRA/TranslateLLM_opt100.h5 => translateLSTM_MPRA/TranslateLSTM_opt100.h5 (100%)
 rename translateLLM_MPRA/TranslateLLM_opt100.py => translateLSTM_MPRA/TranslateLSTM_opt100.py (96%)
 rename translateLLM_endogenous/TranslateLLM_end_HEK293.h5 => translateLSTM_endogenous/TranslateLSTM_end_HEK293.h5 (100%)
 rename translateLLM_endogenous/TranslateLLM_end_HEK293.py => translateLSTM_endogenous/TranslateLSTM_end_HEK293.py (97%)

diff --git a/.Rhistory b/.Rhistory
new file mode 100644
index 0000000..e69de29
diff --git a/README.md b/README.md
index 82c136e..e082aff 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,11 @@
 written by Niels Schlusser, 15.11.2023
 
-This repository contains different python scripts to predict translation initiation efficiency from transcript sequences using TranslateLLM, an artificial neural network architecture as presented in "Predicting the translation output from the mRNA sequence - an assessment of the accuracy and parameter-efficiency of deep learning models".
+This repository contains different python scripts to predict translation initiation efficiency from transcript sequences using TranslateLSTM, an artificial neural network architecture as presented in "Predicting the translation output from the mRNA sequence - an assessment of the accuracy and parameter-efficiency of deep learning models".
 
 There are deep learning scripts for essentially three different usecases:
-(1) training a model on synthetic MPRA data in the directory translateLLM_MPRA/
-(2) training a model on endogenous TE data in the directory translateLLM_endogenous/
-(3) do transfer learning from (1) to (2) in the directory tl_TranslateLLM_endogenous/
+(1) training a model on synthetic MPRA data in the directory translateLSTM_MPRA/
+(2) training a model on endogenous TE data in the directory translateLSTM_endogenous/
+(3) do transfer learning from (1) to (2) in the directory tl_TranslateLSTM_endogenous/
 
 Example training data (MPRA from Sample et.al. (2019), endogenous data based on Alexaki et.al. (2020), and clinvar variations based on Landrum et. al. (2020)) are provided in the directory HEK293_training_data/.
 Scripts to turn the output of RNAseq and ribosome profiling data into an endogenous data set, appending non-sequential features to a given data set, and constructing a data set based on a vcf file can be found in the directory training_data_preprocessing/.
@@ -19,4 +19,4 @@ The transfer-learning directory also contains a script for making end-to-end pre
 For the end-to-end prediction, just the input sequences (UTR and CDS), and, if necessary, the number of exons per transcript need to be provided in a tsv file, all other non-sequential features are computed by the script.
 
 
-This code is published under the MIT license.
\ No newline at end of file
+This code is published under the MIT license.
diff --git a/tl_TranslateLLM_endogenous/end2end_prediction.py b/tl_TranslateLSTM_endogenous/end2end_prediction.py
similarity index 100%
rename from tl_TranslateLLM_endogenous/end2end_prediction.py
rename to tl_TranslateLSTM_endogenous/end2end_prediction.py
diff --git a/tl_TranslateLLM_endogenous/predict_clinvar.py b/tl_TranslateLSTM_endogenous/predict_clinvar.py
similarity index 100%
rename from tl_TranslateLLM_endogenous/predict_clinvar.py
rename to tl_TranslateLSTM_endogenous/predict_clinvar.py
diff --git a/tl_TranslateLLM_endogenous/tl_TranslateLLM_HEK293.h5 b/tl_TranslateLSTM_endogenous/tl_TranslateLSTM_HEK293.h5
similarity index 100%
rename from tl_TranslateLLM_endogenous/tl_TranslateLLM_HEK293.h5
rename to tl_TranslateLSTM_endogenous/tl_TranslateLSTM_HEK293.h5
diff --git a/tl_TranslateLLM_endogenous/tl_TranslateLLM_end_HEK293.py b/tl_TranslateLSTM_endogenous/tl_TranslateLSTM_end_HEK293.py
similarity index 96%
rename from tl_TranslateLLM_endogenous/tl_TranslateLLM_end_HEK293.py
rename to tl_TranslateLSTM_endogenous/tl_TranslateLSTM_end_HEK293.py
index 95cd02d..80c6ff8 100644
--- a/tl_TranslateLLM_endogenous/tl_TranslateLLM_end_HEK293.py
+++ b/tl_TranslateLSTM_endogenous/tl_TranslateLSTM_end_HEK293.py
@@ -130,8 +130,8 @@ output_col='TE'
 
 data_path = '../HEK293_training_data/init_effs_HEK293_endogenous.tsv'
 scaler_dir = '../HEK293_training_data/scalers/'
-pt_model_path = '../translateLLM_MPRA/TranslateLLM_opt100.h5'
-tl_model_path = 'tl_TranslateLLM_HEK293.h5'
+pt_model_path = '../translateLSTM_MPRA/TranslateLSTM_opt100.h5'
+tl_model_path = 'tl_TranslateLSTM_HEK293.h5'
 
 
 #nucleotide dictionary
@@ -218,8 +218,8 @@ if len(sys.argv) < 2 or sys.argv[1] == 'predict':
 	xmin, xmax, ymin, ymax = plt.axis()
 	plt.text(xmin+1.0, ymax-0.3, '$R_{Pearson}$=%.3f, $R_{Spearman}$=%.3f' % (rho_p,rho_s),fontsize = 12,color='black')
 
-	plt.savefig('scatterplot_tl_TranslateLLM_HEK293.pdf')
+	plt.savefig('scatterplot_tl_TranslateLSTM_HEK293.pdf')
 	plt.close()
 
 	raw_test['predicted_'+output_col] = pred
-	raw_test.to_csv("predictions_test_tl_TranslateLLM_HEK293_"+output_col+".tsv",sep="\t",index=False)
+	raw_test.to_csv("predictions_test_tl_TranslateLSTM_HEK293_"+output_col+".tsv",sep="\t",index=False)
diff --git a/translateLLM_MPRA/TranslateLLM_opt100.h5 b/translateLSTM_MPRA/TranslateLSTM_opt100.h5
similarity index 100%
rename from translateLLM_MPRA/TranslateLLM_opt100.h5
rename to translateLSTM_MPRA/TranslateLSTM_opt100.h5
diff --git a/translateLLM_MPRA/TranslateLLM_opt100.py b/translateLSTM_MPRA/TranslateLSTM_opt100.py
similarity index 96%
rename from translateLLM_MPRA/TranslateLLM_opt100.py
rename to translateLSTM_MPRA/TranslateLSTM_opt100.py
index 572b104..99d2882 100644
--- a/translateLLM_MPRA/TranslateLLM_opt100.py
+++ b/translateLSTM_MPRA/TranslateLSTM_opt100.py
@@ -112,7 +112,7 @@ output_col='rl'
 
 data_path = '../HEK293_training_data/opt100_nonseq_feat.tsv'
 scaler_dir = '../HEK293_training_data/scalers/'
-integrated_model_path = 'TranslateLLM_opt100.h5'
+integrated_model_path = 'TranslateLSTM_opt100.h5'
 
 
 #nucleotide dictionary
@@ -175,8 +175,8 @@ if len(sys.argv) < 2 or sys.argv[1] == 'predict':
 	xmin, xmax, ymin, ymax = plt.axis()
 	plt.text(xmin+1.0, ymax-0.3, '$R_{Pearson}$=%.3f, $R_{Spearman}$=%.3f' % (rho_p,rho_s),fontsize = 12,color='black')
 
-	plt.savefig('scatterplot_TranslateLLM_opt100.pdf')
+	plt.savefig('scatterplot_TranslateLSTM_opt100.pdf')
 	plt.close()
 
 	raw_test['predicted_'+output_col] = pred
-	raw_test.to_csv("predictions_test_TranslateLLM_opt100_"+output_col+".tsv",sep="\t",index=False)
+	raw_test.to_csv("predictions_test_TranslateLSTM_opt100_"+output_col+".tsv",sep="\t",index=False)
diff --git a/translateLLM_endogenous/TranslateLLM_end_HEK293.h5 b/translateLSTM_endogenous/TranslateLSTM_end_HEK293.h5
similarity index 100%
rename from translateLLM_endogenous/TranslateLLM_end_HEK293.h5
rename to translateLSTM_endogenous/TranslateLSTM_end_HEK293.h5
diff --git a/translateLLM_endogenous/TranslateLLM_end_HEK293.py b/translateLSTM_endogenous/TranslateLSTM_end_HEK293.py
similarity index 97%
rename from translateLLM_endogenous/TranslateLLM_end_HEK293.py
rename to translateLSTM_endogenous/TranslateLSTM_end_HEK293.py
index c2d8a43..ad4d144 100644
--- a/translateLLM_endogenous/TranslateLLM_end_HEK293.py
+++ b/translateLSTM_endogenous/TranslateLSTM_end_HEK293.py
@@ -128,7 +128,7 @@ output_col='TE'
 
 data_path = '../HEK293_training_data/init_effs_HEK293_endogenous.tsv'
 scaler_dir = '../HEK293_training_data/scalers/'
-integrated_model_path = 'TranslateLLM_end_HEK293.h5'
+integrated_model_path = 'TranslateLSTM_end_HEK293.h5'
 
 
 #nucleotide dictionary
@@ -197,8 +197,8 @@ if len(sys.argv) < 2 or sys.argv[1] == 'predict':
 	xmin, xmax, ymin, ymax = plt.axis()
 	plt.text(xmin+1.0, ymax-0.3, '$R_{Pearson}$=%.3f, $R_{Spearman}$=%.3f' % (rho_p,rho_s),fontsize = 12,color='black')
 
-	plt.savefig('scatterplot_TranslateLLM_end_HEK293.pdf')
+	plt.savefig('scatterplot_TranslateLSTM_end_HEK293.pdf')
 	plt.close()
 
 	raw_test['predicted_'+output_col] = pred
-	raw_test.to_csv("predictions_test_TranslateLLM_end_HEK293_"+output_col+".tsv",sep="\t",index=False)
+	raw_test.to_csv("predictions_test_TranslateLSTM_end_HEK293_"+output_col+".tsv",sep="\t",index=False)
-- 
GitLab