diff --git a/projects/novelfams/translate2modelcif.py b/projects/novelfams/translate2modelcif.py index 15650d4b84a92f0d570714e597275e3d50ae24ee..e508aba56dc2a6ba76228bb85034cb62bad82817 100644 --- a/projects/novelfams/translate2modelcif.py +++ b/projects/novelfams/translate2modelcif.py @@ -342,20 +342,20 @@ def _get_entities(pdb_file, fam_name, trg_seq): # NOTE: can have gaps to accommodate "X" in ref_seq exp_seq = sqe_gaps.replace("-", "X") - len_diff = len(trg_seq.string) - len(exp_seq) + trg_seq = trg_seq.string + len_diff = len(trg_seq) - len(exp_seq) if len_diff > 0: exp_seq += "X" * len_diff - if exp_seq != trg_seq.string: - print( + if exp_seq != trg_seq: + # ToDo: turn into an exception once we got the correct sequence list. + _warn_msg( f"Sequence in {os.path.splitext(os.path.basename(pdb_file))[0]} " - + "does not match target.", - exp_seq, + + "does not match target. Falling back to residue-based sequence." ) - # ToDo: re-enable check - # raise RuntimeError(f"Sequence in {pdb_file} does not match target.") + trg_seq = exp_seq cif_ent = { - "seqres": trg_seq.string, + "seqres": trg_seq, "pdb_sequence": sqe_gaps, "pdb_chain_id": [_get_ch_name(chn, False)], "fam_name": fam_name, @@ -509,8 +509,8 @@ def _get_sequence_dbs_alphafold(seq_dbs): "UniRef90", "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/" + "uniref90.fasta.gz", - version=None, - release_date=None, + version="2021_4", + release_date=datetime.datetime(2021, 11, 17), ), "BFD": modelcif.ReferenceDatabase( "BFD", @@ -970,6 +970,10 @@ def _read_sequences(path): continue sqnz += line + if len(sqnz) > 0: + sqnz = sqnz.rstrip("*") + sqnz_lst.AddSequence(seq.CreateSequence(name, sqnz)) + return sqnz_lst @@ -1008,11 +1012,6 @@ def _main(): ) except (_InvalidCoordinateError, _NoEntitiesError): continue - except Exception as exc: - # ToDo: remove catching ALL exceptions - _warn_msg(f"Uncaught exception for '{f_name}':") - print(str(exc)) - continue # report progress after a bit of time if timer() - tmstmp > 60: