diff --git a/doc/tests/data/port_str_db.dat b/doc/tests/data/port_str_db.dat
index 5c183190ce902c88a35cc99fb5e4fd69a71ef62f..a861977c6a974dd2a15929b9f5c71415d57901d3 100644
Binary files a/doc/tests/data/port_str_db.dat and b/doc/tests/data/port_str_db.dat differ
diff --git a/doc/tests/scripts/loop_frag_db.py b/doc/tests/scripts/loop_frag_db.py
index c1489985dbb9d51bbea02cf9f41ea9075a1f0556..0efd8226859dbfc115f5973d3d456b6e1b88bfed 100644
--- a/doc/tests/scripts/loop_frag_db.py
+++ b/doc/tests/scripts/loop_frag_db.py
@@ -31,4 +31,4 @@ fragment_infos = frag_db.SearchDB(n_stem, c_stem, 5)
 for i,f_i in enumerate(fragment_infos):
     bb_list = structure_db.GetBackboneList(n_stem, c_stem,
                                            f_i, frag_seq)
-    io.SavePDB(bb_list.ToEntity(), str(i) + ".pdb")
\ No newline at end of file
+    io.SavePDB(bb_list.ToEntity(), str(i) + ".pdb")
diff --git a/doc/tests/scripts/loop_structure_db.py b/doc/tests/scripts/loop_structure_db.py
index 1696e8dd71480993f6556723300707315356796e..31ff521c1ab2fe4e01db0f4df3466e24e65a0108 100644
--- a/doc/tests/scripts/loop_structure_db.py
+++ b/doc/tests/scripts/loop_structure_db.py
@@ -14,8 +14,8 @@ structure_db_two = loop.StructureDB(
 
 # Lets fill in some structures. It gets assumed, that all required
 # data lies in the following directories.
-structure_dir = "../data"
-prof_dir = "../data"
+structure_dir = "data"
+prof_dir = "data"
 
 # The first 4 letters are suposed to be the pdb id, and the last one
 # the corresponding chain name. The naming of the files in the
@@ -84,7 +84,9 @@ for i in range(structure_db_one.GetNumCoords()):
     structure_db_one.SetStructureProfile(i, prof)
 
 # That's it! Let's save both databases down.
-# structure_db_two will use much less memory, 
-# as it contains less data
-structure_db_one.Save("my_db_one.dat")
-structure_db_two.Save("my_db_two.dat")
+# structure_db_two will use much less memory, as it contains less data. 
+# Sidenote: We're saving the portable version. If you intent to use 
+# your database only on one system, the Save / Load functions should
+# be preferred, as the loading will be much faster.
+structure_db_one.SavePortable("my_db_one.dat")
+structure_db_two.SavePortable("my_db_two.dat")
diff --git a/doc/tests/test_doctests.py b/doc/tests/test_doctests.py
index 533148559befc5dc1d2fd23e01fcea2c4a2d962d..e3e6e58b9a1a105871233f3316218b9284b503c4 100644
--- a/doc/tests/test_doctests.py
+++ b/doc/tests/test_doctests.py
@@ -226,9 +226,11 @@ class DocTests(unittest.TestCase):
         # run it
         self.checkPMRun('loop_structure_db.py', [], 0)
         # check that result exists and is readable
-        loop.StructureDB.Load('my_db.dat')
+        loop.StructureDB.LoadPortable('my_db_one.dat')
+        loop.StructureDB.LoadPortable('my_db_two.dat')        
         # clean up
-        os.remove('my_db.dat')
+        os.remove('my_db_one.dat')
+        os.remove('my_db_two.dat')
 
     def testLoopFragDB(self):
         # run it
@@ -248,7 +250,7 @@ class DocTests(unittest.TestCase):
         self.assertEqual(len(out_lines), 101)
         # NOTE: this last output depends on the structure-db!
         self.assertEqual(out_lines[-1].strip(),
-                         'Fraction of fragments below 3A: 0.48')
+                         'Fraction of fragments below 3A: 0.47')
         # check that result exists and is readable
         loop.FraggerMap.LoadBB('frag_map.dat')
         # clean up
diff --git a/extras/data_generation/frag_db/build_frag_db.py b/extras/data_generation/frag_db/build_frag_db.py
index 19602b2f274ac4bd04699dac11c4c89053ceeacf..5faa5ad22d6b209846544ddbc1900aa43e5c7b36 100644
--- a/extras/data_generation/frag_db/build_frag_db.py
+++ b/extras/data_generation/frag_db/build_frag_db.py
@@ -16,5 +16,5 @@ for i in range(3,15):
   fragdb.AddFragments(i,max_pairwise_rmsd,structure_db)
 
 fragdb.PrintStatistics()
-fragdb.Save('frag_db.dat')
+fragdb.SavePortable('frag_db.dat')
 
diff --git a/extras/data_generation/structure_db/README b/extras/data_generation/structure_db/README
index ed5115ddf021c6095061af6b05ae971248816da0..f86fa19e8c37a82083e61ecc780679ff606a4dbb 100644
--- a/extras/data_generation/structure_db/README
+++ b/extras/data_generation/structure_db/README
@@ -9,6 +9,8 @@ Steps to obtain a structural database:
    from PISCES: http://dunbrack.fccc.edu/PISCES.php
    The get_data_from_smtl.py script directly reads in such a file and produces
    the input for the second step in StructureDB generation. 
+   BUT: THE CHAIN NAMING WILL BE THE ONE AS IN THE SMTL, IT DOESN'T RELATE
+   ANYMORE TO THE PDB NAMING!
 
 2. Build an initial database: use build_structure_db.py with input generated
    in step one.
@@ -20,9 +22,12 @@ BUT BE AWARE, THE ACCORDING FREQUENCIES IN THE DATABASE WILL BE SET TO 0.0!
    The script calculates the structure profiles for a subset of the
    sequences in the desired StructureDB and is intended for parallelization
    
-4. Parallelize step 3: use submit_structure_profile_calculations.py
-   This script uses the SGE submission on BC2 and you might have to 
-   adapt for your system
+4. Parallelize step 3: use prepare_slurm_submission_structure_profiles.py
+   This script generates a submission script and a batch command file 
+   that you can submit with sbatch structure_profile_generation_array.sh.
+   This is intended to be used with the SLURM submission system.
+   IMPORTANT: there are many paths to adapt on top of
+              prepare_slurm_submission_structure_profiles.py
 
 5. Assign the structure profiles generated in step 3 and 4 to the 
    initial database generated in step 2: 
@@ -31,10 +36,9 @@ BUT BE AWARE, THE ACCORDING FREQUENCIES IN THE DATABASE WILL BE SET TO 0.0!
 
 To qualitatively reproduce the default StructuralDB in ProMod3, you first 
 perform step 1 and 2 with a non redundant set of protein structures as 
-defined by PISCES with around 25000 chains (e.g. seq id threshold: 90, 
-resolution threshold: 2.2). 
+defined by PISCES with around 20000 chains (e.g. seq id threshold: 60, 
+resolution threshold: 2.5). 
 Repeat step 1 and 2 with a smaller PISCES list (5000-6000 entries, 
-e.g. seq id threshold: 25 , resolution threshold: 1.8). 
+e.g. seq id threshold: 20 , resolution threshold: 2.0). 
 The first database serves as default StructureDB and the second db as 
 the source db for the structural profiles generated in steps 3-5.
-
diff --git a/extras/data_generation/structure_db/assign_structure_profiles.py b/extras/data_generation/structure_db/assign_structure_profiles.py
index 64029be3aedf16222f622dd102fd8c664b5295c5..2c14c44087fbd2a84be740c1f626565cfae81dd4 100644
--- a/extras/data_generation/structure_db/assign_structure_profiles.py
+++ b/extras/data_generation/structure_db/assign_structure_profiles.py
@@ -1,49 +1,50 @@
 import os
 from promod3 import loop
-
+from ost import seq
 
 # Path to StructureDB to which you want to assign the profile information.
 # (relative to current working directory!)
-db_path = "structure_db.dat"
+db_path = "portable_structure_db_60.dat"
 
 # Directory that contains the profile databases (and only profile databases)
 # as generated by the create_structure_profiles.py script.
 # (relative to current working directory)
-profile_dir = "profile_dbs"
+profile_dir = "structure_profile_dbs"
 
 # path to store the StructureDB with assigned_profiles
 # (relative to current working directory!)
-db_out_path = "structure_db_with_structure_profiles.dat"
-
+db_out_path = "portable_structure_db_60_with_structure_profiles.dat"
 
-structure_db = loop.StructureDB.Load(db_path)
+structure_db = loop.StructureDB.LoadPortable(db_path)
 
 # lets gather all profiles in one ProfileDB
 profile_db_files = os.listdir(profile_dir)
-profile_db = ost.seq.ProfileDB()
+profile_db = seq.ProfileDB()
 
 for f in profile_db_files:
   db_path = os.path.join(profile_dir, f)
-  loaded_db = ost.seq.ProfileDB.Load(db_path)
+  loaded_db = seq.ProfileDB.Load(db_path)
   entry_names = loaded_db.GetNames()
   for name in entry_names:
     profile_db.AddProfile(name, loaded_db.GetProfile(name))
-    print name
 
 # Assign the profiles
 for coord_idx in range(structure_db.GetNumCoords()):
   coord_info = structure_db.GetCoordInfo(coord_idx)
-  entry_id = coord_info.pdb_id
+
+  entry_id = coord_info.id
+  entry_chain_name = coord_info.chain_name
+  full_id = '_'.join([entry_id, entry_chain_name])
 
   structure_profile = None
   try:
-    structure_profile = profile_db.GetProfile(entry_id)
+    structure_profile = profile_db.GetProfile(full_id)
   except:
-    print "Could not find structure profile for ", entry_id, " skip..."
+    print "Could not find structure profile for ", full_id, " skip..."
     continue
     
   structure_db.SetStructureProfile(coord_idx, structure_profile)
 
 # DONE! 
-structure_db.Save(db_out_path)
+structure_db.SavePortable(db_out_path)
 
diff --git a/extras/data_generation/structure_db/build_structure_db.py b/extras/data_generation/structure_db/build_structure_db.py
index 409f67b34a8f775ad00db032a1f963282936e447..ac23ae07a5ef94e49cf963ba0dee97be917767af 100644
--- a/extras/data_generation/structure_db/build_structure_db.py
+++ b/extras/data_generation/structure_db/build_structure_db.py
@@ -2,115 +2,14 @@ from promod3 import loop
 import os
 import traceback
 from ost import io
-from ost.bindings import msms
-from ost.mol.alg import AssignSecStruct
-from ost.mol.alg import Accessibility
-
-###############
-# SCROLL DOWN #
-###############
-
-bb_atoms = ["N","CA","C","O","CB"]
-
-parent_residues = dict()
-parent_residues["MSE"] = "MET"
-parent_residues["CSX"] = "CYS"
-parent_residues["FP9"] = "PRO"
-parent_residues["HYP"] = "PRO"
-parent_residues["SEP"] = "SER"
-parent_residues["SAH"] = "CYS"
-parent_residues["MLY"] = "LYS"
-parent_residues["NLE"] = "LEU"
-parent_residues["ABA"] = "ALA"
-parent_residues["SLZ"] = "LYS"
-parent_residues["KCX"] = "LYS"
-parent_residues["OCS"] = "CYS"
-parent_residues["HIC"] = "HIS"
-parent_residues["TPO"] = "THR"
-parent_residues["CME"] = "CYS"
-parent_residues["ALY"] = "LYS"
-parent_residues["SAM"] = "MET"
-parent_residues["MEN"] = "ASN"
-parent_residues["LLP"] = "LYS"
-parent_residues["CSD"] = "ALA"
-parent_residues["PCA"] = "PRO"
-parent_residues["SCY"] = "CYS"
-parent_residues["CSO"] = "CYS"
-parent_residues["PHD"] = "ASP"
-parent_residues["SLZ"] = "LYS"
-parent_residues["TYS"] = "TYR"
-parent_residues["CAS"] = "CYS"
-parent_residues["TRQ"] = "TRP"
-parent_residues["DPR"] = "PRO"
-parent_residues["CSW"] = "CYS"
-parent_residues["CAF"] = "CYS"
-parent_residues["NIY"] = "TYR"
-parent_residues["LYZ"] = "LYS"
-parent_residues["SME"] = "MET"
-parent_residues["CSS"] = "CYS"
-parent_residues["MLZ"] = "LYS"
-parent_residues["DAH"] = "PHE"
-parent_residues["MSO"] = "MET"
-parent_residues["PHI"] = "PHE"
-parent_residues["TYI"] = "TYR"
-parent_residues["GLX"] = "GLU" #could also be GLN
-parent_residues["OMT"] = "MET"
-parent_residues["CSU"] = "CYS"
-parent_residues["HZP"] = "PRO"
-parent_residues["SEC"] = "CYS"
-parent_residues["DPN"] = "PHE"
-parent_residues["NEP"] = "HIS"
-parent_residues["HIP"] = "HIS"
-parent_residues["DGL"] = "GLU"
-
-one_letter_codes = dict()
-one_letter_codes["ALA"] = 'A'
-one_letter_codes["ARG"] = 'R'
-one_letter_codes["HIS"] = 'H'
-one_letter_codes["LYS"] = 'K'
-one_letter_codes["ASP"] = 'D'
-one_letter_codes["GLU"] = 'E'
-one_letter_codes["SER"] = 'S'
-one_letter_codes["THR"] = 'T'
-one_letter_codes["ASN"] = 'N'
-one_letter_codes["GLN"] = 'Q'
-one_letter_codes["CYS"] = 'C'
-one_letter_codes["GLY"] = 'G'
-one_letter_codes["PRO"] = 'P'
-one_letter_codes["VAL"] = 'V'
-one_letter_codes["ILE"] = 'I'
-one_letter_codes["LEU"] = 'L'
-one_letter_codes["MET"] = 'M'
-one_letter_codes["PHE"] = 'F'
-one_letter_codes["TYR"] = 'Y'
-one_letter_codes["TRP"] = 'W'
-
-def Clean(prot):
-  ed = prot.handle.EditXCS()
-
-  for r_view in prot.residues:
-    if r_view.GetName() in parent_residues:
-      r = r_view.handle
-      ed.RenameResidue(r,parent_residues[r.GetName()])
-      r.SetOneLetterCode(one_letter_codes[r.GetName()])
- 
-      atoms_to_delete = list()
-
-      for a in r.atoms:
-        if a.GetName() not in bb_atoms:
-          atoms_to_delete.append(a)
-        else:
-          a.is_hetatom=False
-
-
-      for a in atoms_to_delete:
-        ed.DeleteAtom(a)
-
-
+from ost import seq
+from ost import mol
+from ost import conop
 
 # This script generates a StructureDB using a list of nonredundant pdb
-# files. All pdb files get cleaned using some basic heuristics.
-# please note, that the values for the structure profile in the produced
+# files. All pdb files get cleaned using Molck from openstructure with
+# with settings specified below.
+# Please note, that the values for the structure profile in the produced
 # db are only allocated and simply set to zero.
 # You therefore need to create an initial db, from which you can create
 # these structure profiles and set them in a second step.
@@ -125,7 +24,7 @@ def Clean(prot):
 # file generated by get_data_from_smtl.py (or by yourself)
 # every line requires to be in the format: pdb_id chain_name
 # e.g. 5UZG A
-data = open("data_extracted_from_smtl.txt",'r').readlines()
+data = open("cullpdb_pc60_res2.5_R1.0_d180524_chains25634_data_extracted_from_smtl.txt", 'r').readlines()
 
 # directory containing the structures. naming: pdb_id.pdb
 # (pdb_id in upper case!) e.g. 5UZG.pdb
@@ -135,12 +34,22 @@ structure_dir = "structures"
 # (pdb_id and chain_id in upper case!) e.g. 5UZGA.hhm
 profile_dir = "hmms" 
 
+# molck settings to process the input structures
+molck_settings = mol.alg.MolckSettings()
+molck_settings.rm_unk_atoms = True
+molck_settings.rm_non_std = True
+molck_settings.rm_hyd_atoms = True
+molck_settings.rm_oxt_atoms = True
+molck_settings.rm_zero_occ_atoms = True
+molck_settings.map_nonstd_res = True
+molck_settings.assign_elem = True
 
-path_to_msms_binary = "/scicore/soft/apps/msms/2.6.1-linux-x86_64/msms.x86_64Linux2.2.6.1"
-
+# Assumes that a proper compound lib has been specified when 
+# compiling openstructure
+compound_lib = conop.GetDefaultLib()
 
 # Do it!!
-structure_db = loop.StructureDB()
+structure_db = loop.StructureDB(loop.StructureDBDataType.All)
 
 for i,line in enumerate(data):
 
@@ -149,6 +58,7 @@ for i,line in enumerate(data):
   chain_id = split_line[1]
 
   try:
+
     print 'processing: ',prot_id,' on line: ',i
 
     prot_path = os.path.join(structure_dir, prot_id+'.pdb')
@@ -161,25 +71,31 @@ for i,line in enumerate(data):
       print "Could not find hhm file... skip..."
       continue
 
-    prot = io.LoadPDB(prot_path).Select('peptide=true')
+    # load and clean full structure
+    prot = io.LoadPDB(prot_path)
+    mol.alg.Molck(prot, compound_lib, molck_settings)
+    
+    # hack that will go away when lateste QS-lDDT branch is merged in
+    processor = conop.RuleBasedProcessor(compound_lib)
+    processor.Process(prot)
+
+    # we're only interested in the peptide chains...
+    prot = prot.Select("peptide=true")
+
+    # get profile and seqres
     hmm = io.LoadSequenceProfile(hhm_path)
+    raw_seqres = hmm.sequence
+    seqres = seq.CreateSequence("seqres", raw_seqres)
 
-    Clean(prot)
-    AssignSecStruct(prot)
-    Accessibility(prot, algorithm=mol.alg.AccessibilityAlgorithm.DSSP)
-    surf = msms.CalculateSurface(prot, msms_exe=path_to_msms_binary)[0]
-    prot_single_chain = prot.Select("cname="+chain_id)
-    idx = structure_db.AddCoordinates(prot_id, chain_id, 
-                                      prot_single_chain.chains[0], surf, hmm,
-                                      solvent_accessibility_string="asaAbs") 
-   
-      
+    # add it
+    structure_db.AddCoordinates(prot_id, chain_id, 
+                                prot, seqres, hmm) 
+     
   except:
     traceback.print_exc()
 
 
 structure_db.PrintStatistics()
-structure_db.Save('structure_db.dat')
-
+structure_db.SavePortable('portable_structure_db_60.dat')
 
 
diff --git a/extras/data_generation/structure_db/create_structure_profiles.py b/extras/data_generation/structure_db/create_structure_profiles.py
index ec227549785f97790208abaa3764af16eb06ea6c..a0e335925a4ed4fa5ced2e07a44bd0b379da4bb8 100644
--- a/extras/data_generation/structure_db/create_structure_profiles.py
+++ b/extras/data_generation/structure_db/create_structure_profiles.py
@@ -3,6 +3,7 @@ import sys
 from promod3 import loop
 from ost import seq
 import time
+import traceback
 
 if len(sys.argv) != 6:
 
@@ -47,14 +48,30 @@ profile_db = seq.ProfileDB()
 for i in range(start,end):
   print "processing chain with idx", i
 
-  # generate fragment info object
-  coord_info = structure_db.GetCoordInfo(i)
-  fragment_info = loop.FragmentInfo(i,0,coord_info.size)
-  
-  # generate the structure profile given the source StructureDB
-  profile = structure_db.GenerateStructureProfile(structure_db_source, fragment_info)
-  profile_db.AddProfile(coord_info.pdb_id, profile)
+  try:
+    # generate fragment info object
+    coord_info = structure_db.GetCoordInfo(i)
+    fragment_info = loop.FragmentInfo(i,0,coord_info.size)
+    sequence = structure_db.GetSequence(fragment_info)
+    bb_list = structure_db.GetBackboneList(fragment_info, sequence)
+    residue_depths = structure_db.GetResidueDepths(fragment_info)  
 
+    print "id: ", coord_info.id
+    print "chain_name: ", coord_info.chain_name
+    print "sequence length: ", len(sequence)
+
+    # generate the structure profile given the source StructureDB
+    start = time.time()
+    profile = structure_db_source.GenerateStructureProfile(bb_list, residue_depths)
+    end = time.time()
+
+    print "it took: ", end-start
+
+    profile_db.AddProfile('_'.join([coord_info.id, coord_info.chain_name]), profile)
+
+  except:
+    traceback.print_exc()
+    print "failed to create structure profile... skip..."
 
 profile_db.Save(profile_db_out_path)
 
diff --git a/extras/data_generation/structure_db/get_data_from_smtl.py b/extras/data_generation/structure_db/get_data_from_smtl.py
index a881e5206502bd07fe11234d0162d7e4dc274fa7..7965046a728bd55260ab275073d7449ce7147db7 100644
--- a/extras/data_generation/structure_db/get_data_from_smtl.py
+++ b/extras/data_generation/structure_db/get_data_from_smtl.py
@@ -15,7 +15,7 @@ from sm import smtl
 # 2. Fetch the data manually from the PDB and obtain the profiles with 
 #    the hh-suite available at: https://github.com/soedinglab/hh-suite.
 
-pisces_path = "cullpdb_pc20_res1.8_R0.25_d170820_chains4892"
+pisces_path = "cullpdb_pc20_res2.0_R0.25_d180524_chains6670"
 smtl_path = "/scicore/data/databases/SMTL/"
 
 profile_out_dir = "hmms"
@@ -29,13 +29,11 @@ pisces_data = open(pisces_path).readlines()
 # in here we store pdb id and chain name relative to the SMTL
 fetched_structures = list()
 
-
 if not os.path.exists(profile_out_dir):
   os.makedirs(profile_out_dir)
 if not os.path.exists(structure_out_dir):
   os.makedirs(structure_out_dir)
 
-
 for i,line in enumerate(pisces_data[1:]):
 
   prot_id = line[:4]
@@ -76,8 +74,8 @@ for i,line in enumerate(pisces_data[1:]):
           structure = ch.structure
           profile_path = tpl_lib.ProfileForTemplate(unique_name)
 
-          profile_out_path = os.path.join(profile_out_dir, prot_id+ch.name+".hhm")
-          structure_out_path = os.path.join(structure_out_dir, prot_id+".pdb")
+          profile_out_path = os.path.join(profile_out_dir, prot_id + ch.name + ".hhm")
+          structure_out_path = os.path.join(structure_out_dir, prot_id + ".pdb")
 
           # copy over the profile
           os.system(' '.join(["cp", profile_path, profile_out_path]))
@@ -98,7 +96,7 @@ for i,line in enumerate(pisces_data[1:]):
   
 
 
-outfile = open("data_extracted_from_smtl.txt",'w')
+outfile = open(pisces_path + "_data_extracted_from_smtl.txt",'w')
 outfile.write('\n'.join([(x[0] + ' ' + x[1]) for x in fetched_structures]))
 outfile.close()
 
diff --git a/extras/data_generation/structure_db/prepare_slurm_submission_structure_profiles.py b/extras/data_generation/structure_db/prepare_slurm_submission_structure_profiles.py
new file mode 100644
index 0000000000000000000000000000000000000000..d982a468161a2ccfbcceb994961f4f7e706f543d
--- /dev/null
+++ b/extras/data_generation/structure_db/prepare_slurm_submission_structure_profiles.py
@@ -0,0 +1,57 @@
+from promod3 import loop
+import os
+
+profiles_per_job = 60
+structure_db_path = "/scicore/home/schwede/studga00/data/promod3_databases/final_shit/structure_db_60.dat"
+source_structure_db_path = "/scicore/home/schwede/studga00/data/promod3_databases/final_shit/structure_db_20.dat"
+profile_db_out_dir = "/scicore/home/schwede/studga00/data/promod3_databases/final_shit/structure_profile_dbs"
+awesome_script = "/scicore/home/schwede/studga00/data/promod3_databases/final_shit/create_structure_profiles.py"
+
+
+structure_db = loop.StructureDB.Load(structure_db_path)
+n_entries = structure_db.GetNumCoords()
+
+from_idx = 0
+to_idx = profiles_per_job
+
+n_jobs = int(n_entries / profiles_per_job) + 1
+
+commands = list()
+
+for job_idx in range(n_jobs):
+
+  current_cmd = list()
+  current_cmd.append("pm")
+  current_cmd.append(awesome_script)
+  current_cmd.append(structure_db_path)
+  current_cmd.append(source_structure_db_path)
+  current_cmd.append(str(from_idx))
+  current_cmd.append(str(to_idx))
+  current_cmd.append(os.path.join(profile_db_out_dir, str(job_idx)+".dat"))
+
+  commands.append(' '.join(current_cmd))  
+
+  from_idx = min(n_entries - 1, from_idx + profiles_per_job)
+  to_idx = min(n_entries, to_idx + profiles_per_job)
+
+outfile = open("structure_profile_generation.cmd", 'w')
+outfile.write('\n'.join(commands))
+outfile.close()
+
+stuff_to_write = list()
+stuff_to_write.append("#!/bin/bash")
+stuff_to_write.append("#SBATCH --job-name=structure_scoring_array")
+stuff_to_write.append("#SBATCH --time=06:00:00")
+stuff_to_write.append("#SBATCH --output=%s"%(os.path.join("stdout", "%A_%a.out")))
+stuff_to_write.append("#SBATCH --mem=8G")
+stuff_to_write.append("#SBATCH --array=1-%i"%(len(commands)))
+stuff_to_write.append("source /scicore/home/schwede/studga00/data/promod3_databases/final_shit/setup_bc2_centos7")
+stuff_to_write.append("SEEDFILE=%s"%(os.path.join(os.getcwd(), "structure_profile_generation.cmd")))
+stuff_to_write.append("SEED=$(sed -n ${SLURM_ARRAY_TASK_ID}p $SEEDFILE)")
+stuff_to_write.append("eval $SEED")
+
+outfile = open("structure_profile_generation_array.sh", 'w')
+outfile.write('\n'.join(stuff_to_write))
+outfile.close()
+
+
diff --git a/extras/data_generation/structure_db/submit_structure_profile_calculations.py b/extras/data_generation/structure_db/submit_structure_profile_calculations.py
deleted file mode 100644
index 87f26a5f99cd2df34a38958f6a5f8acaf54a7fd2..0000000000000000000000000000000000000000
--- a/extras/data_generation/structure_db/submit_structure_profile_calculations.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import os, math
-from promod3 import loop
-
-# path to StructureDB for which you want to calculate the profiles:
-# (relative to current working directory!)
-structure_db_path = "structure_db.dat"
-
-# path to StructureDB from which you extract the structural information
-# if the StructureDB above is very large, you might want to give it a smaller
-# database here to limit CPU time. A database with around 5000 chains should 
-# be sufficient
-# (relative to current working directory!)
-structure_db_source_path = "structure_db.dat"
-
-# the profiles that get generated end up here:
-# (relative to current working directory!)
-out_dir = "profile_dbs"
-
-# number of jobs
-num_jobs = 100
-
-# all stdout, stderr and submission_script files will go here
-# (relative to current working directory!)
-submission_workspace = "submission_workspace"
-
-
-# path to the pm bash script
-# Once we have the latest and greatest ProMod3 module, this should not 
-# be necessary anymore and we can simply call pm
-pm_path = "/scicore/home/schwede/studga00/prog/promod3_dev/build/stage/bin/pm"
-
-
-# This is stuff relevant for the submission script
-max_runtime = "13:00:00"
-membycore = "3G"
-my_email = "gabriel.studer@unibas.ch"
-
-# we only need ost and promod3 as soon as they're released...
-required_modules = ["Boost/1.53.0-goolf-1.4.10-Python-2.7.5",
-                    "Python/2.7.5-goolf-1.4.10",
-                    "OpenMM/7.1.1-goolf-1.4.10-Python-2.7.5"]
-
-# they won't be necessary anymore as soon as there are ost
-# and promod3 modules
-ost_python_path = "/scicore/home/schwede/studga00/prog/ost_dev/build/stage/lib64/python2.7/site-packages"
-promod_python_path = "/scicore/home/schwede/studga00/prog/promod3_dev/build/stage/lib64/python2.7/site-packages"
-
-
-#################################################################
-# NO EDITING BELOW THIS POINT (EXCEPT ON DIFFERENT SYSTEMS ;) ) #
-#################################################################
-
-if not os.path.exists(submission_workspace):
-  os.makedirs(submission_workspace)
-
-if not os.path.exists(out_dir):
-  os.makedirs(out_dir)
-
-structure_db = loop.StructureDB.Load(structure_db_path)
-num_coords = structure_db.GetNumCoords()
-chunk_size = math.ceil(float(num_coords ) / num_jobs)
-current_start = 0
-current_end = chunk_size
-
-for job_idx in range(num_jobs):
-
-  start = current_start
-  end = current_end
-  if end > num_coords:
-    end = num_coords
-
-  # we estimate the chunk size with a ceil => we might get away with even
-  # less jobs...
-  if start >= end:
-    break
-
-  start = int(start)
-  end = int(end)
-
-  current_start += chunk_size
-  current_end += chunk_size
-
-  cmd = [pm_path, \
-         os.path.join(os.getcwd(),"create_structure_profiles.py"), \
-         os.path.join(os.getcwd(),structure_db_path), \
-         os.path.join(os.getcwd(),structure_db_source_path), \
-         str(start), str(end), \
-         os.path.join(os.getcwd(), out_dir, str(job_idx)+".dat")]
-
-  stdout_path = os.path.join(os.getcwd(), submission_workspace, str(job_idx) + ".stdout")
-  stderr_path = os.path.join(os.getcwd(), submission_workspace, str(job_idx) + ".stderr")
-
-  content = list()
-  content.append("#!/bin/bash")
-  content.append("#$ -o " + stdout_path)
-  content.append("#$ -e " + stderr_path)
-  content.append("#$ -l runtime=" + max_runtime)
-  content.append("#$ -l membycore=" + membycore)
-  content.append("#$ -m as -M " + my_email)   
-
-  for module in required_modules:
-    content.append("ml " + module)
-
-  content.append("export PYTHONPATH=" + ost_python_path + ":PYTHONPATH")
-  content.append("export PYTHONPATH=" + promod_python_path + ":PYTHONPATH")
-  content.append(' '.join(cmd))
-
-  s_script_path = os.path.join(submission_workspace, "sub_"+str(job_idx)+".sh")
-  outfile = open(s_script_path,'w')
-  outfile.write('\n'.join(content))
-  outfile.close()
-
-  # FIRE!!!
-  os.system("qsub "+ s_script_path)
-
diff --git a/loop/data/CMakeLists.txt b/loop/data/CMakeLists.txt
index 8c79b4397cdcbfefc0f2eb9f46591a71c9f38221..118be4c53f957e9d33a6de1b21bf3761393890b5 100644
--- a/loop/data/CMakeLists.txt
+++ b/loop/data/CMakeLists.txt
@@ -8,11 +8,11 @@ convert_module_data(MODULE loop FILE torsion_sampler_extended.dat
 convert_module_data(MODULE loop FILE torsion_sampler_helical.dat
                     SCRIPT convert_binaries.py ARGS TorsionSampler)
 
-#convert_module_data(MODULE loop FILE frag_db.dat
-#                    SCRIPT convert_binaries.py ARGS FragDB)
+convert_module_data(MODULE loop FILE frag_db.dat
+                    SCRIPT convert_binaries.py ARGS FragDB)
 
-#convert_module_data(MODULE loop FILE structure_db.dat
-#                    SCRIPT convert_binaries.py ARGS StructureDB)
+convert_module_data(MODULE loop FILE structure_db.dat
+                    SCRIPT convert_binaries.py ARGS StructureDB)
 
 convert_module_data(MODULE loop FILE ff_lookup_charmm.dat
                     SCRIPT convert_binaries.py ARGS ForcefieldLookup)
diff --git a/loop/data/portable_frag_db.dat b/loop/data/portable_frag_db.dat
index 802c3651567e94c49ffa483cab3827049869c141..f612b3cd9f7ae1d878363545027ea4158565b71e 100644
Binary files a/loop/data/portable_frag_db.dat and b/loop/data/portable_frag_db.dat differ
diff --git a/loop/data/portable_structure_db.dat b/loop/data/portable_structure_db.dat
index 73ab352c929d9c0c693b57f4a8da568c833d8cb5..0498195c094fbc0dae623c42903b5c91c5990a2c 100644
Binary files a/loop/data/portable_structure_db.dat and b/loop/data/portable_structure_db.dat differ
diff --git a/loop/src/structure_db.hh b/loop/src/structure_db.hh
index d34ea6edb053915ba0eab01d77674bc5a29404fc..e058724cb48dca0df5f3040570a892d117b72079 100644
--- a/loop/src/structure_db.hh
+++ b/loop/src/structure_db.hh
@@ -129,7 +129,7 @@ public:
   void SavePortable(const String& filename);
 
   bool HasData(DBDataType data_type) const {
-    return (data_type & stored_data_) > 0;
+    return (data_type & stored_data_) == data_type;
   }
 
   bool operator==(const StructureDB& rhs) const;
diff --git a/modelling/tests/data/port_frag_db.dat b/modelling/tests/data/port_frag_db.dat
index 1cc848360e16ec2840bc6c540fa6c7eda9e4fb95..167582ab4c931037fc88519a53f8a6d841a67e1c 100644
Binary files a/modelling/tests/data/port_frag_db.dat and b/modelling/tests/data/port_frag_db.dat differ
diff --git a/modelling/tests/data/port_str_db.dat b/modelling/tests/data/port_str_db.dat
index 5c183190ce902c88a35cc99fb5e4fd69a71ef62f..a861977c6a974dd2a15929b9f5c71415d57901d3 100644
Binary files a/modelling/tests/data/port_str_db.dat and b/modelling/tests/data/port_str_db.dat differ
diff --git a/modelling/tests/test_loop_candidates.cc b/modelling/tests/test_loop_candidates.cc
index fe902a579d7de55cad3a2a06026aa325b765847b..70c92d55fb35c002c0d3077aaaa1c334b9df29b5 100644
--- a/modelling/tests/test_loop_candidates.cc
+++ b/modelling/tests/test_loop_candidates.cc
@@ -303,7 +303,7 @@ BOOST_AUTO_TEST_CASE(test_loop_candidates_from_db) {
   ost::mol::EntityHandle ent = LoadTestStructure("data/1CRN.pdb");
   ost::seq::ProfileHandlePtr prof;
   prof = ost::io::LoadSequenceProfile("data/1CRNA.hhm");
-  const uint loop_length = 5;
+  const uint loop_length = 7;
   const uint start_idx = 10;
 
   // get dbs (small DBs created as in example codes using crambin)
@@ -311,15 +311,15 @@ BOOST_AUTO_TEST_CASE(test_loop_candidates_from_db) {
   structure_db = loop::StructureDB::LoadPortable("data/port_str_db.dat");
   // get known fragments to put in here
   std::vector<loop::FragmentInfo> fragments;
-  fragments.push_back(loop::FragmentInfo(1, 108, 5));
-  fragments.push_back(loop::FragmentInfo(1, 24, 5));
-  fragments.push_back(loop::FragmentInfo(1, 93, 5));
-  fragments.push_back(loop::FragmentInfo(0, 10, 5));
-  fragments.push_back(loop::FragmentInfo(0, 9, 5));
-  fragments.push_back(loop::FragmentInfo(1, 90, 5));
-  fragments.push_back(loop::FragmentInfo(0, 7, 5));
-  fragments.push_back(loop::FragmentInfo(1, 28, 5));
-  fragments.push_back(loop::FragmentInfo(1, 4, 5));
+  fragments.push_back(loop::FragmentInfo(1, 108, loop_length));
+  fragments.push_back(loop::FragmentInfo(1, 24, loop_length));
+  fragments.push_back(loop::FragmentInfo(1, 93, loop_length));
+  fragments.push_back(loop::FragmentInfo(0, 10, loop_length));
+  fragments.push_back(loop::FragmentInfo(0, 9, loop_length));
+  fragments.push_back(loop::FragmentInfo(1, 90, loop_length));
+  fragments.push_back(loop::FragmentInfo(0, 7, loop_length));
+  fragments.push_back(loop::FragmentInfo(1, 28, loop_length));
+  fragments.push_back(loop::FragmentInfo(1, 4, loop_length));
 
   // manually fill loop candidates
   String loop_seq = "";
@@ -343,9 +343,9 @@ BOOST_AUTO_TEST_CASE(test_loop_candidates_from_db) {
                         structure_db, *prof, start_idx);
   stem_rmsds = loop_candidates->CalculateStemRMSDs(n_stem, c_stem);
   // check scores
-  CheckScore(seq_prof_scores, N_cand, -2.12749, 2.07746, -0.484273);
-  CheckScore(str_prof_scores, N_cand, 0.149246, 0.426513, 0.282906);
-  CheckScore(stem_rmsds, N_cand, 0.00893292, 1.66355, 0.691202);
+  CheckScore(seq_prof_scores, N_cand, -2.73226428, 2.37290597, -1.04137301);
+  CheckScore(str_prof_scores, N_cand, 0.0652512088, 0.749628067, 0.391301066);
+  CheckScore(stem_rmsds, N_cand, 0.0314558744, 2.9163394, 1.22806919);
 
   // check ApplyXXX
   // -> expected ApplyCCD: 0, 2, .., 8 converging
@@ -354,41 +354,51 @@ BOOST_AUTO_TEST_CASE(test_loop_candidates_from_db) {
   orig_indices = lc.ApplyCCD(n_stem, c_stem);
   CheckOrigIndices(lc, orig_indices, fragments);
   // ShowIndices(orig_indices);
-  BOOST_CHECK_EQUAL(orig_indices.size(), uint(8));
+  BOOST_CHECK_EQUAL(orig_indices.size(), uint(9));
   BOOST_CHECK_EQUAL(orig_indices[0], uint(0));
-  BOOST_CHECK_EQUAL(orig_indices[1], uint(2));
-  BOOST_CHECK_EQUAL(orig_indices[7], uint(8));
+  BOOST_CHECK_EQUAL(orig_indices[1], uint(1));
+  BOOST_CHECK_EQUAL(orig_indices[8], uint(8));
   CheckProfScores(lc, orig_indices, structure_db, prof, start_idx,
                   seq_prof_scores, str_prof_scores);
   // -> expected ApplyKIC: 0 converging with 2 LC
   lc = *loop_candidates; // COPY
-  orig_indices = lc.ApplyKIC(n_stem, c_stem, 1, 2, 3);
+  orig_indices = lc.ApplyKIC(n_stem, c_stem, 1, 3, 5);
   CheckOrigIndices(lc, orig_indices, fragments);
   // ShowIndices(orig_indices);
-  BOOST_CHECK_EQUAL(orig_indices.size(), uint(2));
-  BOOST_CHECK_EQUAL(orig_indices[0], uint(3));
-  BOOST_CHECK_EQUAL(orig_indices[1], uint(3));
+  BOOST_CHECK_EQUAL(orig_indices.size(), uint(50));
+
+
+  BOOST_CHECK_EQUAL(orig_indices[0], uint(0));
+  BOOST_CHECK_EQUAL(orig_indices[1], uint(0));
+  BOOST_CHECK_EQUAL(orig_indices[2], uint(0));
+  BOOST_CHECK_EQUAL(orig_indices[3], uint(0));
+  BOOST_CHECK_EQUAL(orig_indices[4], uint(1));
+  BOOST_CHECK_EQUAL(orig_indices[5], uint(1));
+  BOOST_CHECK_EQUAL(orig_indices[6], uint(1));
+  BOOST_CHECK_EQUAL(orig_indices[49], uint(8));
+
   CheckProfScores(lc, orig_indices, structure_db, prof, start_idx,
                   seq_prof_scores, str_prof_scores);
 
   // cluster them
   std::vector< std::vector<uint> > clusters;
-  const Real max_dist = 0.23;
+  const Real max_dist = 0.35;
   loop_candidates->GetClusters(max_dist, clusters);
   // ShowClusters(clusters);
 
-  BOOST_CHECK_EQUAL(clusters.size(), uint(4));
+  BOOST_CHECK_EQUAL(clusters.size(), uint(5));
 
   std::vector<uint> cluster_sizes;
-  for(int i = 0; i < 4; ++i){
+  for(uint i = 0; i < clusters.size(); ++i){
     cluster_sizes.push_back(clusters[i].size());
   }
   std::sort(cluster_sizes.begin(), cluster_sizes.end());
 
   BOOST_CHECK_EQUAL(cluster_sizes[0], uint(1));
   BOOST_CHECK_EQUAL(cluster_sizes[1], uint(1));
-  BOOST_CHECK_EQUAL(cluster_sizes[2], uint(3));
-  BOOST_CHECK_EQUAL(cluster_sizes[3], uint(4));
+  BOOST_CHECK_EQUAL(cluster_sizes[2], uint(1));
+  BOOST_CHECK_EQUAL(cluster_sizes[3], uint(3));
+  BOOST_CHECK_EQUAL(cluster_sizes[4], uint(3));
 
   CheckClusters(clusters, N_cand);
   // get filtered variant to check neglect_size_one below