cameo_benchmark_loop_lengths.py 2.39 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import numpy as np
import matplotlib.pyplot as plt

with open('promod3_modelling_out.txt', 'r') as fh:
    modelling_out = fh.readlines()

n_deletions_relaxed = 0
initial_loop_lengths = list()
resolved_loop_lengths = list()

for line in modelling_out:
    if line.startswith('Resolved'):
        loop_string_initial = line.split()[1]
        loop_string_resolved = line.split()[4]
        loop_seq_initial = loop_string_initial.split('-')[1].strip('()')
        loop_seq_resolved = loop_string_resolved.split('-')[1].strip('()')
        initial_loop_lengths.append(len(loop_seq_initial))
        resolved_loop_lengths.append(len(loop_seq_resolved))
    if line.startswith('Closed') and 'relaxing' in line:
        n_deletions_relaxed += 1

initial_histogram = [0] * 26
resolved_histogram = [0] * 26
initial_n_above = 0
resolved_n_above = 0

for l in initial_loop_lengths:
    if l > 25:
        initial_n_above += 1
    else:
        initial_histogram[l] += 1


for l in resolved_loop_lengths:
    if l > 25:
        resolved_n_above += 1
    else:
        resolved_histogram[l] += 1

n_resolved_12 = sum(resolved_histogram[:13])
n_resolved_total = sum(resolved_histogram) + resolved_n_above

print('total:', n_resolved_total)
print('resolved fraction <= 12:', float(n_resolved_12)/(n_resolved_total))
print('initial above 25:', initial_n_above)
print('resolved above 25:', resolved_n_above)
print('relaxed deletions that didnt enter loop modelling', n_deletions_relaxed)

x_initial = np.linspace(0, 25, 26) - 0.2
x_resolved = x_initial + 0.4

cred = (128.0/255,0.0,0.0)
cblue = (102.0/255,153.0/255,204.0/255)

# do the barplots representing the length histograms
plt.bar(x_initial, initial_histogram, width=0.4, color=cred, align='center', 
        linewidth=1.5, label='initial length', edgecolor='k')
plt.bar(x_resolved, resolved_histogram, width=0.4, color=cblue, align='center', 
        linewidth=1.5, label='resolved length', edgecolor='k')

# do the vertical line representing the cutoff for database approach / 
# Monte Carlo fallback
plt.axvline(12.5, linewidth = 2.0, color='k', linestyle='dashed')

plt.legend(frameon=False, fontsize='x-large')
plt.xlim((-0.4, 25.4))
plt.xticks([0, 5, 10, 12, 15, 20, 25],['0', '5', '10', '12', '15', '20', '25'])
plt.tick_params(axis ='both', which ='both', length = 0)
plt.xlabel('Loop Length', fontsize='x-large')
plt.ylabel('N', fontsize='x-large')

plt.savefig('cameo_benchmark_loop_lengths.png')