update README

95e7ed13 · Lorenzo Pantolini · 1628ccb3 · 95e7ed13 · 95e7ed13
Commit 95e7ed13 authored 9 months ago by Lorenzo Pantolini
--- a/README.md
+++ b/README.md
 # Embedding-based alignment (EBA)
 This repository contains the implementation of the EBA method as described in: ["Embedding-based alignment: combining protein language models with dynamic programming alignment to detect structural similarities in the twilight-zone"](https://doi.org/10.1093/bioinformatics/btad786).
-Notice that the embedding extraction is independent from the EBA method, and any pLM can be used. However, to facilitate the application we provide a module (plm_extractor.py) that allows the extraction of the per-residue embedding representations for the following pLMs: ProstT5, ProtT5 and ESM-b1.
+Notice that the embedding extraction is independent from the EBA method, and any pLM can be used. However, to facilitate the application we provide a module (plm_extractor.py) that allows the extraction of the per-residue embedding representations for the following pLMs: ProtT5, ESM-b1n and ProstT5.
 Note: In case of high dimensionality embeddings (such as ESM2), we suggest to run the EBA with the parameter l=0.1 or l=0.01 to avoid precision errors.
@@ -30,7 +30,7 @@ protT5_ext = plm.load_extractor('ProtT5', 'residue', device=device)
 seq1 = 'MLIAFEGIDGSGKTTQAKKLYEYLKQKGYFVSLYREPGGTKVGEVLREILLTEELDERTELLLFEASRSKLIEEKIIPDLKRDKVVILDRFVLSTIAYQGYGKGLDVEFIKNLNEFATRGVKPDITLLLDIPVDIALRRLKEKNRFENKEFLEKVRKGFLELAKEEENVVVIDASGEEEEVFKEILRALSGVLRV'
 seq2 = 'RRGALIVLEGVDRAGKSTQSRKLVEALCAAGHRAELLRFPERSTEIGKLLSSYLQKKSDVEDHSVHLLFSANRWEQVPLIKEKLSQGVTLVVDRYAFSGVAFTGAKENFSLDWCKQPDVGLPKPDLVLFLQLQLADAAKRGAFGHERYENGAFQERALRCFHQLMKDTTLNWKMVDASKSIEAVHEDIRVLSEDAIATATEKPLGELWK'
-### extract per-residue embeddings
+### extract per-residue embeddings (if you are using ProstT5, add "<AA2fold> " to the sequences)
 emb1 = protT5_ext.extract(seq1)
 emb2 = protT5_ext.extract(seq2)
 print(emb1.shape)
@@ -39,7 +39,7 @@ print(emb1.shape)
 similarity_matrix = sm.compute_similarity_matrix(emb1, emb2)
 eba_results = methods.compute_eba(similarity_matrix)
 ### to return the alignment itself use:
-#eba_results = eba.EBA(similarity_matrix, extensive_output=True)
+#eba_results = methods.compute_eba(similarity_matrix, extensive_output=True)
 ### show results
 print('EBA raw: ', eba_results['EBA_raw'])

--- a/eba_example.py
+++ b/eba_example.py
@@ -7,11 +7,11 @@ from eba import plm_extractor as plm
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 protT5_ext = plm.load_extractor('ProtT5', 'residue', device=device)
-### sequences example
+### sequences example 
 seq1 = 'MLIAFEGIDGSGKTTQAKKLYEYLKQKGYFVSLYREPGGTKVGEVLREILLTEELDERTELLLFEASRSKLIEEKIIPDLKRDKVVILDRFVLSTIAYQGYGKGLDVEFIKNLNEFATRGVKPDITLLLDIPVDIALRRLKEKNRFENKEFLEKVRKGFLELAKEEENVVVIDASGEEEEVFKEILRALSGVLRV'
 seq2 = 'RRGALIVLEGVDRAGKSTQSRKLVEALCAAGHRAELLRFPERSTEIGKLLSSYLQKKSDVEDHSVHLLFSANRWEQVPLIKEKLSQGVTLVVDRYAFSGVAFTGAKENFSLDWCKQPDVGLPKPDLVLFLQLQLADAAKRGAFGHERYENGAFQERALRCFHQLMKDTTLNWKMVDASKSIEAVHEDIRVLSEDAIATATEKPLGELWK'
-### extract per-residue embeddings
+### extract per-residue embeddings (if you are using ProstT5, add "<AA2fold> " to the sequences)
 emb1 = protT5_ext.extract(seq1)
 emb2 = protT5_ext.extract(seq2)
 print(emb1.shape)