structural_coverage_in_StructureDB.py 2.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from promod3 import loop
import random

def GetCoveredFraction(db, length, N, rmsd_thresh, coil_thresh):

    # get all possible fragments
    frag_infos = list()
    for coord_idx in range(db.GetNumCoords()):
        coord_info = db.GetCoordInfo(coord_idx)
        for frag_start_idx in range(coord_info.size - length):
            frag_info = loop.FragmentInfo(coord_idx, frag_start_idx, length)
            frag_infos.append((frag_info, coord_info.id))
  
    # randomize
    random.shuffle(frag_infos)

    N_processed = 0
    N_covered = 0 
    dummy_seq = 'A' * length 

    for frag_idx in range(len(frag_infos)):
        if N_processed >= N:
            break # its enough!

        frag_info = frag_infos[frag_idx][0]
        pdb_id = frag_infos[frag_idx][1]
 
        # check whether coil threshold is fulfilled
        ss = db.GetDSSPStates(frag_info)
        n_coil = ss.count('C')
        if float(n_coil) / length < coil_thresh:
            continue

        # search for similar fragment in other entry
        frag = db.GetBackboneList(frag_info, dummy_seq)
        is_covered = False
        for coord_idx in range(db.GetNumCoords()):
            if is_covered:
                break
            coord_info = db.GetCoordInfo(coord_idx)
            if pdb_id == coord_info.id:
                # same entry with respect to pdb id
                continue
            for frag_start_idx in range(coord_info.size - length):
                current_frag_info = loop.FragmentInfo(coord_idx, frag_start_idx, 
                                                      length)
                current_frag = db.GetBackboneList(current_frag_info, dummy_seq)
                rmsd = frag.CARMSD(current_frag, superposed_rmsd=True)
                if rmsd < rmsd_thresh:
                    is_covered = True
                    break
        # add up results
        N_processed += 1
        if is_covered:
            N_covered += 1
    return float(N_covered) / N_processed

db = loop.LoadStructureDB()

print('No Thresh:')
for i in range(3,16):
    print('Fraction Covered', i, GetCoveredFraction(db, i, 1000, 1.0, 0.0))

print('50 percent coils:')
for i in range(3,16):
    print('Fraction Covered', i, GetCoveredFraction(db, i, 1000, 1.0, 0.5))