From 95e7ed133bb1ccb5c74ebaac225b42e2d70215d1 Mon Sep 17 00:00:00 2001
From: Lorenzo Pantolini <lorenzo.pantolini@unibas.ch>
Date: Thu, 8 Aug 2024 14:32:12 +0200
Subject: [PATCH] update README

---
 README.md      | 6 +++---
 eba_example.py | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index fdc1ec5..203dfdb 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # Embedding-based alignment (EBA)
 This repository contains the implementation of the EBA method as described in: ["Embedding-based alignment: combining protein language models with dynamic programming alignment to detect structural similarities in the twilight-zone"](https://doi.org/10.1093/bioinformatics/btad786).
 
-Notice that the embedding extraction is independent from the EBA method, and any pLM can be used. However, to facilitate the application we provide a module (plm_extractor.py) that allows the extraction of the per-residue embedding representations for the following pLMs: ProstT5, ProtT5 and ESM-b1.
+Notice that the embedding extraction is independent from the EBA method, and any pLM can be used. However, to facilitate the application we provide a module (plm_extractor.py) that allows the extraction of the per-residue embedding representations for the following pLMs: ProtT5, ESM-b1n and ProstT5.
  
 Note: In case of high dimensionality embeddings (such as ESM2), we suggest to run the EBA with the parameter l=0.1 or l=0.01 to avoid precision errors.
 
@@ -30,7 +30,7 @@ protT5_ext = plm.load_extractor('ProtT5', 'residue', device=device)
 seq1 = 'MLIAFEGIDGSGKTTQAKKLYEYLKQKGYFVSLYREPGGTKVGEVLREILLTEELDERTELLLFEASRSKLIEEKIIPDLKRDKVVILDRFVLSTIAYQGYGKGLDVEFIKNLNEFATRGVKPDITLLLDIPVDIALRRLKEKNRFENKEFLEKVRKGFLELAKEEENVVVIDASGEEEEVFKEILRALSGVLRV'
 seq2 = 'RRGALIVLEGVDRAGKSTQSRKLVEALCAAGHRAELLRFPERSTEIGKLLSSYLQKKSDVEDHSVHLLFSANRWEQVPLIKEKLSQGVTLVVDRYAFSGVAFTGAKENFSLDWCKQPDVGLPKPDLVLFLQLQLADAAKRGAFGHERYENGAFQERALRCFHQLMKDTTLNWKMVDASKSIEAVHEDIRVLSEDAIATATEKPLGELWK'
 
-### extract per-residue embeddings
+### extract per-residue embeddings (if you are using ProstT5, add "<AA2fold> " to the sequences)
 emb1 = protT5_ext.extract(seq1)
 emb2 = protT5_ext.extract(seq2)
 print(emb1.shape)
@@ -39,7 +39,7 @@ print(emb1.shape)
 similarity_matrix = sm.compute_similarity_matrix(emb1, emb2)
 eba_results = methods.compute_eba(similarity_matrix)
 ### to return the alignment itself use:
-#eba_results = eba.EBA(similarity_matrix, extensive_output=True)
+#eba_results = methods.compute_eba(similarity_matrix, extensive_output=True)
 
 ### show results
 print('EBA raw: ', eba_results['EBA_raw'])
diff --git a/eba_example.py b/eba_example.py
index 83d35f5..27d0b0a 100644
--- a/eba_example.py
+++ b/eba_example.py
@@ -7,11 +7,11 @@ from eba import plm_extractor as plm
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 protT5_ext = plm.load_extractor('ProtT5', 'residue', device=device)
 
-### sequences example
+### sequences example 
 seq1 = 'MLIAFEGIDGSGKTTQAKKLYEYLKQKGYFVSLYREPGGTKVGEVLREILLTEELDERTELLLFEASRSKLIEEKIIPDLKRDKVVILDRFVLSTIAYQGYGKGLDVEFIKNLNEFATRGVKPDITLLLDIPVDIALRRLKEKNRFENKEFLEKVRKGFLELAKEEENVVVIDASGEEEEVFKEILRALSGVLRV'
 seq2 = 'RRGALIVLEGVDRAGKSTQSRKLVEALCAAGHRAELLRFPERSTEIGKLLSSYLQKKSDVEDHSVHLLFSANRWEQVPLIKEKLSQGVTLVVDRYAFSGVAFTGAKENFSLDWCKQPDVGLPKPDLVLFLQLQLADAAKRGAFGHERYENGAFQERALRCFHQLMKDTTLNWKMVDASKSIEAVHEDIRVLSEDAIATATEKPLGELWK'
 
-### extract per-residue embeddings
+### extract per-residue embeddings (if you are using ProstT5, add "<AA2fold> " to the sequences)
 emb1 = protT5_ext.extract(seq1)
 emb2 = protT5_ext.extract(seq2)
 print(emb1.shape)
-- 
GitLab