diff --git a/PAQR/scripts/deNovo-used-sites-and-usage-inference.py b/PAQR/scripts/deNovo-used-sites-and-usage-inference.py
deleted file mode 100644
index e51ce9e4d0726be0874733050612d836984f6c49..0000000000000000000000000000000000000000
--- a/PAQR/scripts/deNovo-used-sites-and-usage-inference.py
+++ /dev/null
@@ -1,2001 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Oct 30 13:30:36 2016
-
-Define poly(A) sites and calculate relative usages parallelized
-
-@author: schmiral
-"""
-__date__ = "Wed Oct 05 13:30:36 2016"
-__author__ = "Ralf Schmidt"
-__email__ = "ralf.schmidt@unibas.ch"
-__license__ = "GPL"
-
-
-# imports
-import sys
-import os
-import time
-import HTSeq
-from argparse import ArgumentParser, RawTextHelpFormatter
-import numpy as np
-import cPickle
-import datetime
-import multiprocessing
-import bisect
-
-parser = ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
-parser.add_argument("-v",
-                    "--verbose",
-                    dest="verbose",
-                    action="store_true",
-                    default=False,
-                    help="Be loud!")
-                    
-parser.add_argument("--clusters",
-                    dest="polyA",
-                    help=("file name providing the poly(A) " +
-                        "sites in extended BED file format") )
-
-parser.add_argument("--coverages",
-                    dest="coverages",
-                    nargs="*",
-                    help=("name of the pickle files that " +
-                        "store the HTSeq objects with coverages") )
-
-parser.add_argument("--conditions",
-                    dest="conditions",
-                    nargs="*",
-                    help=("values that match the coverages " +
-                        "parameters and indicate the condition each " +
-                        "coverage object belongs to") )
-                    
-parser.add_argument("--read_length", 
-                    dest="read_length",
-                    type=int,
-                    default=100,
-                    help=("integer value for the expected read length " +
-                        "(used for window to fit a regression line to the " +
-                        "upstream region of poly(A) sites) [default: 100]") )
-
-parser.add_argument("--min_coverage_region",
-                    dest="min_coverage_region",
-                    type=int, 
-                    default=100,
-                    help=("integer value that defines the minimum region " +
-                        "that needs to be available to calculate a mean " +
-                        "coverage and the mean squared error [default: 100]") )
-
-parser.add_argument("--min_mean_coverage",
-                    dest="min_mean_coverage",
-                    type=float, 
-                    default=20.0,
-                    help=("float value that defines the minimum mean " +
-                        "coverage required for an exon to be considered " +
-                        "[default: 20.0]") )
-
-parser.add_argument("--min_cluster_distance",
-                    dest="min_cluster_distance",
-                    type=int, 
-                    default=200,
-                    help=("integer value that defines the distance till " +
-                        "which clusters are merged and analyzed separately " +
-                        "[default: 200]") )
-                    
-parser.add_argument("--mse_ratio_threshold",
-                    dest="mse_ratio_threshold",
-                    type=float,
-                    default=0.7,
-                    help=("the ratio of the mse after including a new break " +
-                        "point devided by the mse before must be below this " +
-                        "threshold [default: 0.7]") )
-                    
-parser.add_argument("--best_break_point_upstream_extension",
-                    dest="best_break_point_upstream_extension",
-                    type=int,
-                    default=200,
-                    help=("nucleotide extension added to the cluster's " +
-                        "upstream end during the check for global break " +
-                        "points [default: 200]") )
-                    
-parser.add_argument("--ds_reg_for_no_coverage",
-                    dest="ds_reg_for_no_coverage",
-                    type=int, 
-                    default=200,
-                    help=("integer value that defines the region that is " +
-                        "downstream of the exon and necessary to search for " +
-                        "zero coverage downstream of the distal site  [default: 200]") )
-
-parser.add_argument("--max_downstream_coverage",
-                    dest="max_downstream_coverage",
-                    default=5,
-                    type=float,
-                    help=("<FLOAT> maximum percentage of the coverage at the exon " +
-                          "start is allowed for the mean coverage in the downstream " +
-                          "region of a valid distal site [default: 5]") )
-
-parser.add_argument("--processors",
-                    dest="proc",
-                    type=int,
-                    default=1,
-                    help="<INT> Number of processors used [default: 1]")
-
-parser.add_argument("-d",
-                    "--debug",
-                    dest="debug",
-                    action="store_true",
-                    default=False,
-                    help="Print additional diagnostic comments for single exons")
-
-parser.add_argument("--ex_extension_files",
-                    dest="ex_extension_files",
-                    nargs="*",
-                    help=("name of the files that " +
-                        "store the upstream extensions per exon for the corresponding sample") )
-
-parser.add_argument("--expressions_out",
-                    dest="expressions_out",
-                    help="name of file to write the raw expression values per site per sample to")
-
-#parser.add_argument("--min_r_squared", 
-#                    dest="r_sq",
-#                    type=float,
-#                    default = 0.2,
-#                    help="r-squared threshold for the regression line fit to the coverage [default: 0.2]")
-
-# redefine a functions for writing to stdout and stderr to save some writting
-syserr = sys.stderr.write
-sysout = sys.stdout.write
-
-np.seterr(all='raise')
-
-def time_now():#return time
-    curr_time = datetime.datetime.now()
-    return curr_time.strftime("%c")            
-    
-def find_distal_site(site,
-                     ex_start,
-                     strand, 
-                     region, 
-                     merged_exons,
-                     max_downstream_coverage,
-                     read_length, 
-                     ds_reg_extend,
-                     debug):
-    
-    """Check if the passed poly(A)site is applicable as distal site
-    
-    Use the combined coverage of all samples here"""
-
-    #----------------------------------------
-
-    # define the exon start reference region
-    # get the mean coverage of this region
-    # (ex_start saves the strand specific exon start)
-    if strand == "+":
-        ex_start_idx = region.index(ex_start)
-
-        if ex_start + read_length > site[1].start:
-            # consistency check:
-            # is exon start more upstream than site start
-            if site[1].start < ex_start:
-                syserr("[INFO] Distal site search stopped whith the current site %s that overlaps the exon start\n"
-                       % site[0] )
-                return(None, -1)
-
-            # exon is very short: use the start of the currently investigated
-            # poly(A) site as boundary
-            ex_start_2_idx = region.index(site[1].start)
-        else:
-            ex_start_2_idx = region.index(ex_start + read_length)
-    else:
-        if ex_start - read_length < site[1].end:
-            # consistency check:
-            # is exon start more upstream than site start (meaning the strand specific starts)
-            if site[1].end >= ex_start:
-                syserr("[INFO] Distal site search stopped whith the current site %s that overlaps the exon start\n"
-                       % site[0] )
-                return(None, -1)
-            
-            # exon is very short: use the start of the currently investigated
-            # poly(A) site as boundary
-            ex_start_idx = region.index( site[1].end)
-        else:
-            ex_start_idx = region.index(ex_start - read_length)
-            
-        ex_start_2_idx = region.index(ex_start - 1) + 1
-
-    if ex_start_2_idx - ex_start_idx <= 0:
-        syserr("[INFO] Distal site search stopped whith the current site %s that overlaps the exon start\n"
-               % site[0])
-        return(None, -1)
-        
-    ex_start_cov = np.mean(merged_exons[ ex_start_idx:ex_start_2_idx ])
-
-    if ex_start_cov == 0:
-        if debug:
-            syserr( ("[INFO] No coverage at exon start of site %s\n" +
-                     "[INFO] Found during search for distal site. Exon is skipped\n")
-                    % site[0] )
-        return (None, -1)
-
-    #----------------------------------------
-
-    # define the upstream reference region
-    # get the mean coverage of this region
-
-    if strand == "+":
-        if site[1].start - (2 * read_length) < ex_start:
-            # exon is very short
-            # use the exon start as ref-region start
-            ex_us_idx = region.index(ex_start)
-        else:
-            ex_us_idx = region.index( site[1].start - (2 * read_length) )
-        ex_us_2_idx = region.index( site[1].start )
-    else:
-        ex_us_idx = region.index( site[1].end )
-        if site[1].end + (2 * read_length) >= ex_start:
-            # exon is very short
-            # use the exon start as ref-region start
-            ex_us_2_idx = region.index(ex_start - 1) + 1
-        else:
-            ex_us_2_idx = region.index( site[1].end + (2 * read_length) )
-    ex_us_cov = np.mean(merged_exons[ ex_us_idx:ex_us_2_idx ])
-
-    #----------------------------------------
-
-    # define the downstream reference region
-    # get the mean coverage of this region
-
-    if strand == "+":
-        ex_ds_idx = region.index(site[1].end )
-        # ex_ds_2_idx = region.index( site[1].end + (2 * read_length) )
-        ex_ds_2_idx = region.index( site[1].end + (ds_reg_extend) - 1 ) + 1
-        ex_ds_cov = np.mean( merged_exons[ ex_ds_idx:ex_ds_2_idx] )
-    else:
-        # ex_ds_idx = region.index( site[1].start - (2 * read_length) )
-        ex_ds_idx = region.index( site[1].start - (ds_reg_extend) )
-        ex_ds_2_idx = region.index( site[1].start )
-        ex_ds_cov = np.mean(merged_exons[ ex_ds_idx:ex_ds_2_idx ])
-
-    #----------------------------------------
-
-    # a valid distal site has a higher us_mean_cvg than ds_mean_cvg
-    # and the percentage of the ds_mean_cvg with respect to
-    # the start_mean_cvg is below the threshold
-    # (threshold defines the maximum percentage the ds_region is allowed to have)
-
-    if( ex_us_cov > ex_ds_cov and
-        ex_ds_cov / float(ex_start_cov) * 100 <= max_downstream_coverage):
-        # distal site found
-        return(1, 1)
-    else:
-        # do not use this site as distal site
-        return (None, 0)
-
-def assess_minimum_cvg(exon_structure, bp_coverages_dict, region, min_mean_cvg):
-    '''Iterate over the dict with coverages and assess
-    for every single sample if it has sufficient mean covearge
-    at the current exon
-    '''
-
-    return_dict = {}
-    
-    start = exon_structure[2]
-    end = exon_structure[3]
-
-    # infer the indices for the current exon boundaries
-    start_idx = region.index( start )
-    try:
-        end_idx = region.index( end )
-    except ValueError:
-        # for negative strand sites:
-        # exon end may not be accessible as position
-        end_idx = region.index( end - 1) + 1
-
-    for cond in bp_coverages_dict:
-        return_dict[ cond ] = []
-        for cvg in bp_coverages_dict[cond]:
-            if np.mean( cvg[start_idx:end_idx] ) < min_mean_cvg:
-                return_dict[ cond ].append(0)
-            else:
-                return_dict[ cond ].append(1)
-
-    return return_dict
-
-    
-def check_mse_improvement(bp_coverages_dict,
-                          valid_mean_cvg_dict,
-                          region, 
-                          site, 
-                          strand, 
-                          segment_start,
-                          segment_end,
-                          min_coverage_region, 
-                          best_break_point_upstream_extension, 
-                          mse_ratio_threshold, 
-                          upstream_extension, 
-                          prev_mse,
-                          extended_prev_mse=None):
-    """check if current poly(A) site improves the mean squared error"""
-    
-    us_mse_dict = {}
-    ds_mse_dict = {}
-    mse_ratio_dict = {}
-
-    segment_starts = {}
-    segment_ends = {}
-
-    # for both strands: iterate over region from 3' to 5'
-    fit_region_to_iterate = []
-    fit_region_start = site[1].start
-    fit_region_end = site[1].end
-
-    if strand == "+" :
-        fit_region_start -= best_break_point_upstream_extension
-        fit_region_to_iterate = range( fit_region_start, fit_region_end )[::-1]
-    else:
-        fit_region_end += best_break_point_upstream_extension
-        fit_region_to_iterate = range( fit_region_start, fit_region_end )
-
-    for cond in bp_coverages_dict:
-        us_mse_dict[cond] = []
-        ds_mse_dict[cond] = []
-        mse_ratio_dict[cond] = []
-        segment_starts[cond] = []
-        segment_ends[cond] = []
-        
-        cvg_idx = -1
-        for cvg in bp_coverages_dict[cond]:
-            cvg_idx += 1
-            curr_best_ratio = 10
-
-            # store whether the exon was already extended umstream
-            # (artificial exon upstream extension only possible once per site)
-            already_extended = False
-
-            # if the current sample has no valid mean cvg:
-            # append 10 as curr_best_ratio and continue
-            if valid_mean_cvg_dict[cond][cvg_idx] == 0:
-                mse_ratio_dict[cond].append(curr_best_ratio)
-                # append a placeholder value also as segment start/end (strand dependent)
-                if strand == "+":
-                    segment_starts[cond].append(0)
-                else:
-                    segment_ends[cond].append(0)
-                continue
-
-            # set up up- and downstream region
-            # (depending on the strand, some values are "None" still afterwards)
-            if strand == "+":
-                up_start = segment_start
-                down_end = segment_end
-                # store the upstream starts in case these are extended in the next for loop
-                segment_starts[cond].append(up_start)
-            else:
-                down_start = segment_start
-                up_end = segment_end
-                # store the upstream end (aka strand specific starts) in case these are extended in the next for loop
-                segment_ends[cond].append(up_end)
-                
-            prev_reg_mse = prev_mse[cond][cvg_idx]
-            if prev_reg_mse == 0:
-                # skip this coverage because for this region it is already 
-                # completely flat
-                syserr( ("[INFO] Skip cond %s cvg %i. Found previous reg " + 
-                       "to have zero mse for site %s!\n")
-                       % (cond, cvg_idx, str(site[0])) )
-                # no reasonable position was found: still the starting value of 10
-                # is appended as curr_best_ratio
-                mse_ratio_dict[cond].append(curr_best_ratio)
-                continue
-
-            for pos in fit_region_to_iterate:
-                
-                # leave out the current pos (
-                # pos does not belong to the upstream nor to the downstream region)
-                if strand == "+":
-                    up_end = pos
-                    down_start = pos + 1
-                else:
-                    up_start = pos + 1
-                    down_end = pos
-
-                if(up_end - 
-                    up_start < min_coverage_region):
-                    # upstream region is too small
-                    # if currently the proximal site is considered
-                    # the additional upstream extension of coverage is exploited
-                    # current site is the most proximal == extended_prev_mse != None
-                    if extended_prev_mse is not None and not already_extended:
-                        # extended upstream region is used, hence, 
-                        # the extended previous mse should be used as well
-                        prev_reg_mse = extended_prev_mse[cond][cvg_idx]
-
-                        if strand == "+":
-                            up_start -= upstream_extension[cond][cvg_idx]
-                            segment_starts[cond][cvg_idx] = up_start
-                        else:
-                            up_end += upstream_extension[cond][cvg_idx]
-                            segment_ends[cond][cvg_idx]= up_end
-                        already_extended = True
-
-                        if(up_end - 
-                            up_start < min_coverage_region):
-                            # syserr(("[INFO] No upstream region for %s at pos " +
-                            #         "%i and potential more upstream positions " +
-                            #         "even though upstream extension of the exon. " +
-                            #         "Skip this and more upstream positions.\n") 
-                            #         % (site[0], pos))
-                            break
-                    else:
-                        # syserr(("[INFO] No upstream region for internal pA %s at pos %i. " +
-                        #         "Skip this and more upstream positions.\n")
-                        #         % (site[0], pos))
-                        break
-                if(down_end - 
-                    down_start <= min_coverage_region):
-                    syserr("[INFO] No downstream region for %s at pos %i\n" 
-                            % (site[0], pos))
-                    break
-
-                ### Finished definition of indexes
-
-                ### set up upstream region
-                try:
-                    up_start_idx = region.index(up_start)
-                except ValueError:
-                    syserr("[ERROR] upstream start pos not in list\n")
-                    syserr("[ERROR] Upstream start: %i, upstream stop: %i, diff: %i, min_diff: %i\n" 
-                           %( up_start, up_end, up_end - up_start, min_coverage_region) )
-                    raise ValueError("[ERROR] Interrupt due to missing position\n")
-
-                try:
-                    up_end_idx = region.index(up_end)
-                except ValueError:
-                    # for negative strand sites:
-                    # exon end not may not be accessible as position
-                    up_end_idx = region.index(up_end - 1) + 1
-
-                cvg_upstream = cvg[up_start_idx:up_end_idx]
-                mean_upstream = np.mean( cvg_upstream )
-
-                ### set up downstream region
-                down_start_idx = region.index(down_start)
-                down_end_idx = region.index(down_end)
-
-                cvg_downstream = cvg[down_start_idx:down_end_idx]
-                mean_downstream = np.mean( cvg_downstream )
-
-                # continue if the downstream mean is higher than the upstream mean
-                if mean_downstream > mean_upstream:
-                    continue
-
-                # get a mean squared error of up- and downstream region
-                cov_diff = cvg_downstream - mean_downstream
-                cov_diff = np.append( cov_diff, cvg_upstream - mean_upstream )
-                curr_mse = np.mean( cov_diff**2)
-                
-                mse_ratio = float(curr_mse) / prev_reg_mse
-                if mse_ratio < curr_best_ratio:
-                    curr_best_ratio = mse_ratio
-
-            # if no reasonable position was found: still the starting value of 10
-            # is appended as curr_best_ratio
-            mse_ratio_dict[cond].append(curr_best_ratio)
-
-    # # test
-    # syserr("[tmp] inferred mse_ratios for current site %s: %s\n" % (site[0], str(mse_ratio_dict)))
-                
-    # check if site has valid mse improvement in any of the conditions
-    # for this the median mse ratio must meet the threshold criterion
-    valid_conds = []
-    for cond in mse_ratio_dict:
-        # only consider samples with valid mean_cvg
-        mse_ratio_subset = np.array([mse_ratio_dict[cond][i] for i in range(len( mse_ratio_dict[cond] ) ) if valid_mean_cvg_dict[cond][i] == 1])
-        if len(mse_ratio_subset) == 0:
-            continue
-        
-        # different possibilities a condition is considered as "valid":
-        # two samples or less: both must be below the threshold
-        # three to five samples: the median must be below the threshold
-        # six or more samples: at least three samples must be below th threshold
-        if len(mse_ratio_subset) <= 2:
-            if sum( mse_ratio_subset < mse_ratio_threshold ) == len(mse_ratio_subset):
-                valid_conds.append(cond)
-        elif len(mse_ratio_subset) <= 5:
-            if len(mse_ratio_subset) % 2 == 1 and np.median( mse_ratio_subset ) < mse_ratio_threshold:
-                valid_conds.append(cond)
-            elif len(mse_ratio_subset) % 2 == 0 and np.median( np.append(mse_ratio_subset,0) ) < mse_ratio_threshold:
-                # the number of samples is even, therefore a zero is appended temporarily
-                # this ensures that the median corresponds to half of the samples
-                valid_conds.append(cond)
-        else:
-            if sum( mse_ratio_subset < mse_ratio_threshold) >= 3:
-                valid_conds.append(cond)
-            
-    if len(valid_conds) > 0:
-        
-        # potential site found
-        # complete infos for mse dicts
-        # define the new up_mse and ds_mse values based on 
-        # the poly(A) site coordinates
-        for cond in bp_coverages_dict:
-            cvg_idx = -1
-            for cvg in bp_coverages_dict[cond]:
-
-                cvg_idx += 1
-
-                if valid_mean_cvg_dict[cond][cvg_idx] == 0:
-                    us_mse_dict[cond].append(100)
-                    ds_mse_dict[cond].append(100)
-                    continue
-
-                # calculate the missing up- and downstream mses for the coverages
-                # exclude the region of the poly(A) site cluster when calculating
-                # the mses
-                if strand == "+":
-                    up_start_idx = region.index(segment_starts[cond][cvg_idx])
-                    up_end_idx = region.index(site[1].start)
-                    down_start_idx = region.index(site[1].end)
-                    down_end_idx = region.index(segment_end)
-                else:
-                    up_start_idx = region.index(site[1].end)
-                    try:
-                        up_end_idx = region.index(segment_ends[cond][cvg_idx])
-                    except ValueError:
-                        # for negative strand sites:
-                        # exon end may not be accessible as position
-                        up_end_idx = region.index(segment_ends[cond][cvg_idx] - 1) + 1
-                    down_start_idx = region.index(segment_start)
-                    down_end_idx = region.index(site[1].start)
-
-                cvg_upstream = cvg[up_start_idx:up_end_idx]
-                mean_upstream = np.mean(cvg_upstream)
-                us_mse = np.mean((cvg_upstream - mean_upstream)**2)
-
-                cvg_downstream = cvg[down_start_idx:down_end_idx]
-                mean_downstream = np.mean( cvg_downstream )
-                ds_mse = np.mean((cvg_downstream - mean_downstream)**2)
-
-                us_mse_dict[cond].append(us_mse)
-                ds_mse_dict[cond].append(ds_mse)
-
-    return (mse_ratio_dict, us_mse_dict, ds_mse_dict, valid_conds)
-
-def check_global_best_breaking_point(bp_cvg,
-                                     used_sites_positions,
-                                     start_idx,
-                                     end_idx,
-                                     min_coverage_region):
-
-    """Find best breaking point irrespective of poly(A) sites.
-    Function to process single sites."""
-
-    cvg_global = bp_cvg[start_idx:end_idx]
-    global_mse = np.mean( (cvg_global - np.mean(cvg_global))**2 )
-    smallest_mse = global_mse
-    best_pos_idx = None
-    for pos_idx in range(start_idx + min_coverage_region, end_idx - min_coverage_region):
-        # get mse for the current two parts
-        cvg_right = bp_cvg[pos_idx + 1: end_idx]
-        cvg_left = bp_cvg[start_idx:pos_idx]
-        cov_diff = cvg_left - np.mean(cvg_left)
-        cov_diff = np.append( cov_diff, (cvg_right - np.mean(cvg_right) ) )
-        curr_mse = np.mean( cov_diff**2 )
-        if curr_mse < smallest_mse:
-            smallest_mse = curr_mse
-            best_pos_idx = pos_idx
-
-    if best_pos_idx is not None:
-                
-        if best_pos_idx not in used_sites_positions:
-            # break point is outside of regions that are poly(A)-site supported
-            return(0, best_pos_idx)
-
-        else:
-            # valid break point found
-            return( best_pos_idx, best_pos_idx )
-
-    # no break point was found; return "None"
-    return( best_pos_idx, best_pos_idx )
-    
-def check_global_best_breaking_points(bp_cvg,
-                                      cond,
-                                      sample_idx,
-                                      region,
-                                      used_sites_positions,
-                                      still_undetected,
-                                      start_idx,
-                                      end_idx,
-                                      min_coverage_region):
-    """Find best breaking points irrespective of poly(A) sites.
-    Wrapper for the entire exon."""
-    
-    boundary_indices = [start_idx, end_idx]
-    while any(x is not None for x in still_undetected):
-        check_curr_segment = False
-        for segment_idx in range(1,len(boundary_indices)):
-            for idx in range(boundary_indices[segment_idx -1], boundary_indices[segment_idx]):
-                if idx in used_sites_positions and still_undetected[used_sites_positions[idx]] is not None:
-                    check_curr_segment = True
-                    break
-            if check_curr_segment:
-                break
-
-        
-        # conds_to_check = set()
-        # for segment_idx in range(1,len(boundary_indices)):
-        #     # get the conditions for which poly(A) sites that act as breaking points were inferred
-        #     conds_to_check = set()
-        #     samples_to_consider = {}
-        #     for idx in range(boundary_indices[segment_idx -1], boundary_indices[segment_idx]):
-        #         if idx in used_sites_positions and still_undetected[used_sites_positions[idx]] is not None:
-        #             conds_to_check.update(still_undetected[used_sites_positions[idx]][3])
-        #             # iterate over the supporting_cvgs dict of the current site
-        #             for cond in still_undetected[used_sites_positions[idx]][4]:
-        #                 if cond not in samples_to_consider:
-        #                     samples_to_consider[cond] = still_undetected[used_sites_positions[idx]][4][cond]
-        #                 else:
-        #                     # update the currently stored supporting_cvgs for cond
-        #                     # include so far missing samples
-        #                     for i in range(len( samples_to_consider[cond] )):
-        #                         if still_undetected[used_sites_positions[idx]][4][cond][i]:
-        #                             samples_to_consider[cond][i] = True
-
-        #     if len(conds_to_check) > 0:
-        #         break
-        # if len(conds_to_check):
-
-            
-        # use the current borders to find the best breaking point
-        (pos_idx_indicator, pos_idx) = check_global_best_breaking_point(bp_cvg,
-                                                                        used_sites_positions,
-                                                                        boundary_indices[segment_idx -1],
-                                                                        boundary_indices[segment_idx],
-                                                                        min_coverage_region)
-        if pos_idx_indicator == 0:
-            # syserr("[ERROR] Break point outside of poly(A) site regions\n")
-            return (boundary_indices, pos_idx)
-        elif pos_idx_indicator is None:
-            # syserr("[ERROR] No valid break point found")
-            return (boundary_indices, pos_idx)
-
-        else:
-            # test
-            #if still_undetected[used_sites_positions[pos_idx]] is not None:
-            #    rep_site = still_undetected[used_sites_positions[pos_idx]][0]
-            #    pos_idx = region.index( int(rep_site.split(":")[2]) )
-            #syserr("[INFO] Inferred break point: %i\n" % pos_idx)
-            bisect.insort(boundary_indices, pos_idx)
-            still_undetected[used_sites_positions[pos_idx]] = None
-
-        # else:
-        #     syserr(("[ERROR] No sites found anymore even though still " +
-        #             "undetected sites are available. undetected_sites: %s\n") 
-        #             % str(still_undetected))
-        #     syserr("[ERROR] Current boundary indices: %s\n" % str(boundary_indices))
-        #     return "Error"
-            
-    return "Finished"
-    
-def find_sites_with_usage(sites,
-                          segment_start,
-                          segment_end,
-                          segment_mse,
-                          original_extended_segment_mse,
-                          bp_coverages_dict,
-                          valid_mean_cvg_dict,
-                          region,
-                          strand,
-                          min_coverage_region,
-                          best_break_point_upstream_extension, 
-                          mse_ratio_threshold, 
-                          add_us_extension):
-    """iterate over the poly(A) sites
-    and find the best poly(A) site
-    """
-
-    # keep the original value of extended_segment_mse untouched
-    # to know  whether it needs to be updated below in the code
-    extended_segment_mse = original_extended_segment_mse
-
-    top_site = None
-    top_site_idx = None
-    top_site_prot_support = 0
-    top_mse_ratio = 1
-    top_us_mse_dict = {}
-    top_ds_mse_dict = {}
-    top_site_valid_conditions = []
-    top_site_supporting_cvgs = {}
-    top_site_is_from_cluster = None
-    
-    for site_idx in range(len(sites)):
-        site = sites[site_idx]
-
-        if isinstance(site[0], list):
-            # set the extended_segment_mse to None if the most proximal
-            # site has enough distance to the exon start
-            # (extended_segment_mse might be None anyways already 
-            #  if the current segment is a subsegment of the exon that
-            #  is not most proximally located)
-            if( (strand == "+" and (site[0][1].start
-                                    - segment_start
-                                    - best_break_point_upstream_extension) >= min_coverage_region) or
-                (strand == "-" and (segment_end
-                                    - site[0][1].end
-                                    - best_break_point_upstream_extension) >= min_coverage_region)):
-                extended_segment_mse = None
-
-            # process a set of closely spaced clusters
-            best_candidate_ratio = 1
-            best_candidate_prot_support = 0
-            best_candidate_up_mse_values = []
-            best_candidate_ds_mse_values = []
-            best_candidate_site = None
-            best_candidate_valid_conditions = []
-            best_candidate_supporting_cvgs = {}
-            # iterate over set of closely spaced sites (prox -> dist):
-            subsite_idx = -1
-            for subsite in site:
-                subsite_idx += 1
-                (mse_ratios, 
-                 us_mse_dict,
-                 ds_mse_dict,
-                 valid_conds_list) = check_mse_improvement(bp_coverages_dict,
-                                                           valid_mean_cvg_dict,
-                                                           region, 
-                                                           subsite, 
-                                                           strand, 
-                                                           segment_start,
-                                                           segment_end,
-                                                           min_coverage_region,  
-                                                           best_break_point_upstream_extension, 
-                                                           mse_ratio_threshold, 
-                                                           add_us_extension,
-                                                           segment_mse,
-                                                           extended_segment_mse)
-                 
-                # Use the MINIMAL median-ratio of this site (only from samples below the cutoff)
-                # check all conditions separately
-                # when ratio is the same: take the site with higher protocol support
-                # when two sites share the same top ratio AND the protocol support:
-                # take the more proximal site
-                curr_best_median_ratio = 2
-                curr_supporting_cvgs = {}
-                for cond in valid_conds_list:
-                    valid_ratios = [ i for i in mse_ratios[cond] if i < mse_ratio_threshold ]
-                    curr_supporting_cvgs[cond] = [ i < mse_ratio_threshold for i in mse_ratios[cond]]
-                    if np.median( valid_ratios ) < curr_best_median_ratio:
-                        curr_best_median_ratio = np.median( valid_ratios )
-                if curr_best_median_ratio < best_candidate_ratio:
-                    best_candidate_ratio = curr_best_median_ratio
-                    best_candidate_site = subsite
-                    best_candidate_prot_support = subsite[2]
-                    best_candidate_up_mse_dict = us_mse_dict
-                    best_candidate_ds_mse_dict = ds_mse_dict
-                    best_candidate_valid_conditions = valid_conds_list
-                    best_candidate_supporting_cvgs = curr_supporting_cvgs
-                elif( curr_best_median_ratio == best_candidate_ratio and
-                      subsite[2] > best_candidate_prot_support):
-                    best_candidate_ratio = curr_best_median_ratio
-                    best_candidate_site = subsite
-                    best_candidate_prot_support = subsite[2]
-                    best_candidate_up_mse_dict = us_mse_dict
-                    best_candidate_ds_mse_dict = ds_mse_dict
-                    best_candidate_valid_conditions = valid_conds_list
-                    best_candidate_supporting_cvgs = curr_supporting_cvgs
-            if best_candidate_ratio < 1:
-                # for this set of sites, a potentially used sites was found
-                if best_candidate_ratio < top_mse_ratio:
-                    top_site = best_candidate_site
-                    top_site_idx = site_idx
-                    top_mse_ratio = best_candidate_ratio
-                    top_site_prot_support = best_candidate_prot_support
-                    top_us_mse_dict = best_candidate_up_mse_dict
-                    top_ds_mse_dict = best_candidate_ds_mse_dict
-                    top_site_valid_conditions = best_candidate_valid_conditions
-                    top_site_supporting_cvgs = best_candidate_supporting_cvgs
-                    top_site_is_from_cluster = subsite_idx
-                # elif( best_candidate_ratio == top_mse_ratio and
-                #      top_site_prot_support < best_candidate_prot_support):
-                #     # do nothing and preferentially consider the more
-                #     # proximal site
-                #     continue
-                #     # top_site = best_candidate_site
-                #     # top_site_idx = site_idx
-                #     # top_site_prot_support = best_candidate_prot_support
-                #     # top_us_mse_dict = best_candidate_up_mse_dict
-                #     # top_ds_mse_dict = best_candidate_ds_mse_dict
-                #     # top_site_valid_conditions = best_candidate_valid_conditions
-                    
-        else:
-            # site is solitary
-
-            # only use the extended_segment_mse if necessary
-            if( (strand == "+" and (sites[site_idx][1].start
-                                    - segment_start
-                                    - best_break_point_upstream_extension) >= min_coverage_region) or
-                (strand == "-" and (segment_end
-                                    - site[1].end
-                                    - best_break_point_upstream_extension) >= min_coverage_region)):
-                extended_segment_mse = None
-
-            site = sites[site_idx]
-            (mse_ratios, 
-             us_mse_dict,
-             ds_mse_dict,
-             valid_conds_list) = check_mse_improvement(bp_coverages_dict,
-                                                       valid_mean_cvg_dict,
-                                                       region,
-                                                       site,
-                                                       strand,
-                                                       segment_start,
-                                                       segment_end,
-                                                       min_coverage_region,
-                                                       best_break_point_upstream_extension,
-                                                       mse_ratio_threshold,
-                                                       add_us_extension,
-                                                       segment_mse,
-                                                       extended_segment_mse)
-            
-            curr_best_median_ratio = 2
-            curr_supporting_cvgs = {}
-            for cond in valid_conds_list:
-                valid_ratios = [ i for i in mse_ratios[cond] if i < mse_ratio_threshold ]
-                curr_supporting_cvgs[cond] = [ i < mse_ratio_threshold for i in mse_ratios[cond]]
-                if np.median( valid_ratios ) < curr_best_median_ratio:
-                    curr_best_median_ratio = np.median( valid_ratios )
-            if curr_best_median_ratio < top_mse_ratio:
-                top_site = site
-                top_site_idx = site_idx
-                top_mse_ratio = curr_best_median_ratio
-                top_site_prot_support = site[2]
-                top_us_mse_dict = us_mse_dict
-                top_ds_mse_dict = ds_mse_dict
-                top_site_valid_conditions = valid_conds_list
-                top_site_supporting_cvgs = curr_supporting_cvgs
-                top_site_is_from_cluster = None
-            # elif( curr_best_median_ratio == top_mse_ratio and
-            #      top_site_prot_support < best_candidate_prot_support):
-            #     # do nothing and preferentially consider the more
-            #     # proximal site
-            #     continue
-            #     # top_site = site
-            #     # top_site_idx = site_idx
-            #     # top_site_prot_support = site[2]
-            #     # top_us_mse_dict = us_mse_dict
-            #     # top_ds_mse_dict = ds_mse_dict
-            #     # top_site_valid_conditions = valid_conds_list
-                
-    if top_site is not None:
-        # site with usage found
-
-        # update information for this site:
-        # valid conditions
-        top_site.append(top_site_valid_conditions)
-        # samples that indicate usage of this site
-        top_site.append(top_site_supporting_cvgs)
-        
-        upstream_sites = sites[:top_site_idx]
-        if len(upstream_sites) > 0:
-            # determine the characteristics 
-            # of the region upstream of the top site
-            if strand == "+":
-                up_seg_start = segment_start
-                # check if top site belonged to a set of closely spaced clusters
-                if top_site_is_from_cluster is None:
-                    up_seg_end = sites[top_site_idx][1].start
-                else:
-                    up_seg_end = sites[top_site_idx][top_site_is_from_cluster][1].start
-            else:
-                if top_site_is_from_cluster is None:
-                    up_seg_start = sites[top_site_idx][1].end
-                else:
-                    up_seg_start = sites[top_site_idx][top_site_is_from_cluster][1].end
-                up_seg_end = segment_end
-
-            if original_extended_segment_mse is not None:
-                # update the extended_segment_mse
-                extended_segment_mse = original_extended_segment_mse
-                for cond in bp_coverages_dict:
-                    extended_segment_mse[cond] = []
-                    for cvg_idx in range(len(bp_coverages_dict[cond])):
-                        cvg = bp_coverages_dict[cond][cvg_idx]
-                        if strand == "+":
-                            tmp_start_idx =  region.index(segment_start - add_us_extension[cond][cvg_idx])
-                            if top_site_is_from_cluster is None:
-                                tmp_end_idx = region.index(sites[top_site_idx][1].start)
-                            else:
-                                tmp_end_idx = region.index(sites[top_site_idx][top_site_is_from_cluster][1].start)
-                        else:
-                            if top_site_is_from_cluster is None:
-                                tmp_start_idx = region.index(sites[top_site_idx][1].end)
-                            else:
-                                tmp_start_idx = region.index(sites[top_site_idx][top_site_is_from_cluster][1].end)
-                            try:
-                                tmp_end_idx = region.index(segment_end + add_us_extension[cond][cvg_idx])
-                            except ValueError:
-                                # the end of the extended exon might not be accessible as position anymore
-                                tmp_end_idx = region.index(segment_end + add_us_extension[cond][cvg_idx] - 1) + 1
-
-                        extended_y = cvg[tmp_start_idx:tmp_end_idx]
-                        ext_seg_mean = np.mean( extended_y )
-                        ext_seg_diff = extended_y - ext_seg_mean
-                        ext_seg_mse = np.mean( ext_seg_diff**2)
-                        extended_segment_mse[cond].append( ext_seg_mse)
-                    
-            else:
-                # no extended_segment_mse necessary
-                extended_segment_mse = None       
-                
-        if top_site_idx < len(sites) - 2:
-            # downstream region also still contains potential sites
-            downstream_sites = sites[top_site_idx + 1:]
-            if strand == "+":
-                if top_site_is_from_cluster is None:
-                    ds_seg_start = sites[top_site_idx][1].end
-                else:
-                    ds_seg_start = sites[top_site_idx][top_site_is_from_cluster][1].end
-                ds_seg_end = segment_end
-            else:
-                ds_seg_start = segment_start
-                if top_site_is_from_cluster is None:
-                    ds_seg_end = sites[top_site_idx][1].start
-                else:
-                    ds_seg_end = sites[top_site_idx][top_site_is_from_cluster][1].start
-
-            if len(upstream_sites) > 0:
-                # up- and downstream remain sites that need to be checked
-                return( find_sites_with_usage(upstream_sites,
-                                              up_seg_start,
-                                              up_seg_end,
-                                              top_us_mse_dict,
-                                              extended_segment_mse,
-                                              bp_coverages_dict,
-                                              valid_mean_cvg_dict,
-                                              region,
-                                              strand,
-                                              min_coverage_region,
-                                              best_break_point_upstream_extension, 
-                                              mse_ratio_threshold, 
-                                              add_us_extension) + 
-                        [top_site] + 
-                        find_sites_with_usage(downstream_sites,
-                                              ds_seg_start,
-                                              ds_seg_end,
-                                              top_ds_mse_dict,
-                                              None, # do not provide an extended upstream mse
-                                              bp_coverages_dict,
-                                              valid_mean_cvg_dict,
-                                              region,
-                                              strand,
-                                              min_coverage_region,
-                                              best_break_point_upstream_extension, 
-                                              mse_ratio_threshold, 
-                                              add_us_extension))
-            else:
-                # only downstream remain sites that need to be checked
-                return( [top_site] + 
-                        find_sites_with_usage(downstream_sites,
-                                              ds_seg_start,
-                                              ds_seg_end,
-                                              top_ds_mse_dict,
-                                              None,
-                                              bp_coverages_dict,
-                                              valid_mean_cvg_dict,
-                                              region,
-                                              strand,
-                                              min_coverage_region,
-                                              best_break_point_upstream_extension, 
-                                              mse_ratio_threshold, 
-                                              add_us_extension))
-            
-        elif len(upstream_sites) > 0:
-            # only upstream remain sites that need to be checked
-            return( find_sites_with_usage(upstream_sites,
-                                          up_seg_start,
-                                          up_seg_end,
-                                          top_us_mse_dict,
-                                          extended_segment_mse,
-                                          bp_coverages_dict,
-                                          valid_mean_cvg_dict,
-                                          region,
-                                          strand,
-                                          min_coverage_region,
-                                          best_break_point_upstream_extension, 
-                                          mse_ratio_threshold, 
-                                          add_us_extension) + 
-                        [top_site] )
-                    
-        else:
-            # return the current site as only possible site with usage
-            return [top_site]
-            
-    else:
-        # no site with usage found anymore
-        return []
-    
-def get_pA_set(exon_id, 
-               exon_structure, 
-               region, 
-               bp_coverages_dict, 
-               merged_exons, 
-               polyA_sites,
-               exon_ext_dict,
-               options):
-    """Define a set of used poly(A) sites"""
-
-    strand = exon_structure[1]
-    # save the strand specific exon start (used during distal site determination)
-    ex_start = exon_structure[2] if strand == "+" else exon_structure[3]
-    # store the poly(A) sites that are inferred to be used in the current exon
-    used_sites = []
-    # get the exon upstream extensions
-    add_us_ext = exon_ext_dict
-    
-    # # test
-    # syserr("[INFO] ##### Processing of exon %s #####\n" % exon_id)
-    
-    ######
-    # first part
-    ######
-    
-    ### define the distal site
-
-    distal_site = None
-    best_fit_pos = None
-    # iterate over the sites from distal to proximal
-    for site_idx in range(len(polyA_sites))[::-1]:
-            if isinstance(polyA_sites[site_idx][0], list):
-                # current "site" is a set of closely spaced sites
-                # that are processed together
-                site_found = False
-                # iterate over the set of sites, from distal to proximal
-                for subsite_idx in range(len(polyA_sites[site_idx]))[::-1]:
-                    site = polyA_sites[site_idx][subsite_idx]
-                    (best_fit_pos, best_fit) = find_distal_site(site,
-                                                                ex_start,
-                                                                strand,
-                                                                region,
-                                                                merged_exons, 
-                                                                options.max_downstream_coverage,
-                                                                options.read_length,
-                                                                options.ds_reg_for_no_coverage,
-                                                                options.debug)
-                    if best_fit == -1:
-                        # skip exon because it has no exon start coverage
-                        # or the currently processed site already overlaps
-                        # with the exon start
-                        site_found = True
-                        break
-                        
-                    if best_fit_pos is not None:
-                        # a valid regression line was fitted to 
-                        # the current upstream region
-                        distal_site = site
-                        site_found = True
-                        break
-                if site_found:
-                    # break also outer loop that runs over all poly(A)sites
-                    break
-                    
-            else:
-                # the current site is a solitary site
-                site = polyA_sites[site_idx]
-                (best_fit_pos, best_fit) = find_distal_site(site,
-                                                            ex_start,
-                                                            strand,
-                                                            region,
-                                                            merged_exons,
-                                                            options.max_downstream_coverage,
-                                                            options.read_length,
-                                                            options.ds_reg_for_no_coverage,
-                                                            options.debug)
-                if best_fit == -1:
-                    # skip exon because it has no exon start coverage
-                    # or the currently processed site already overlaps
-                    # with the exon start
-                    break
-                    
-                if best_fit_pos is not None:
-                    distal_site = site
-                    # a valid regression line was fitted to 
-                    # the current upstream region
-                    break
-
-    if distal_site is None:
-        if options.debug:
-            syserr("[INFO] No distal site found for %s. Exon skipped\n" 
-                   % exon_id)
-        return ([], {})
-        
-    # limit the set of sites to the ones upstream of the distal site
-    # (if the distal sites comes from a set of closely spaced sites
-    # the entire set is marked as distal and excluded from further analysis)
-    polyA_sites = polyA_sites[0:site_idx]
-
-    if len(polyA_sites) == 0:
-        if options.debug:
-            syserr("[INFO] No APA possible anymore after distal site definition for exon %s. Exon skipped\n" 
-                   % exon_id)
-        return ([], {})
-        
-    # adjust the exon end according to the distal site end
-    if strand == "+":
-        exon_structure[3] = distal_site[1].end
-    else:
-        exon_structure[2] = distal_site[1].start
-
-    ######
-    # second part
-    ######
-
-    # check if the mean coverage per sample is above the given cutoff
-    # return a dict with the conditions as keys and a list as value
-    # that holds 0 or 1 at idx-positions that correspond to the coverages
-    valid_mean_cvg_dict = assess_minimum_cvg(exon_structure, bp_coverages_dict, region, options.min_mean_coverage)
-
-    if sum([i for lst in valid_mean_cvg_dict.itervalues() for i in lst]) == 0:
-        if options.debug:
-            syserr("[INFO] No sample with sufficient coverage for %s. Exon skipped\n" 
-                   % exon_id)
-        return ([],valid_mean_cvg_dict)
-
-    ######
-    # third part
-    ######
-
-    # not necessary anymore
-
-    ######
-    # fourth part
-    ######
-            
-    # find sites with usage
-    
-    # define the mse of the total region for all samples
-    mse_total_region = {}
-    mse_total_ext_region = {}
-    for cond in bp_coverages_dict:
-        mse_total_region[cond] = []
-        mse_total_ext_region[cond] = []
-        cvg_idx = -1
-        for cvg in bp_coverages_dict[cond]:
-            cvg_idx += 1
-
-            # do not consider coverages with little cvg
-            if valid_mean_cvg_dict[cond][cvg_idx] == 0:
-                mse_total_region[cond].append(0)
-                mse_total_ext_region[cond].append(0)
-                continue
-            
-            if strand == "+":
-                start_idx = region.index(exon_structure[2])
-                extended_start_idx =  region.index(exon_structure[2] - add_us_ext[cond][cvg_idx])
-
-                end_idx = region.index(exon_structure[3])
-            else:
-                start_idx = region.index(exon_structure[2])
-                try:
-                    end_idx = region.index(exon_structure[3])
-                except ValueError:
-                    end_idx = region.index(exon_structure[3] - 1) + 1
-                try:
-                    extended_end_idx = region.index(exon_structure[3] + add_us_ext[cond][cvg_idx])
-                except ValueError:
-                    # the end of the extended exon might not be accessible as position anymore
-                    extended_end_idx = region.index(exon_structure[3] + add_us_ext[cond][cvg_idx] - 1) + 1
-
-            if strand == "+":
-                y = cvg[start_idx:end_idx]
-                extended_y = cvg[extended_start_idx:end_idx]
-            else:
-                # for the mse calculation, the orientation of the coverage is irrelevant
-                y = cvg[start_idx:end_idx]
-                extended_y = cvg[start_idx:extended_end_idx]
-            reg_mean = np.mean( y )
-            reg_diff = y - reg_mean
-            reg_mse = np.mean( reg_diff**2)
-            mse_total_region[cond].append(reg_mse)
-            
-            # also calculate the mse 
-            # with the exon upstream extension (might be used later)
-            ext_reg_mean = np.mean( extended_y )
-            ext_reg_diff = extended_y - ext_reg_mean
-            ext_reg_mse = np.mean( ext_reg_diff**2)
-            mse_total_ext_region[cond].append(ext_reg_mse)
-
-    used_sites = find_sites_with_usage(polyA_sites,
-                                       exon_structure[2],
-                                       exon_structure[3],
-                                       mse_total_region,
-                                       mse_total_ext_region,
-                                       bp_coverages_dict,
-                                       valid_mean_cvg_dict,
-                                       region,
-                                       strand,
-                                       options.min_coverage_region,
-                                       options.best_break_point_upstream_extension, 
-                                       options.mse_ratio_threshold, 
-                                       add_us_ext)
-
-    # append distal site
-    used_sites.append(distal_site)
-
-    # # test
-    # syserr("sites of exon %s\n" % exon_id)
-    # for site_list in used_sites:
-    #     syserr("%s\n" % str(site_list))
-
-    ######
-    # fivth part
-    ######
-
-    # go over the exon once more
-    # check if any better global break point exists
-
-    unsupported_break_point_cases = 0
-    if len(used_sites) > 1:
-        # create a list containing all genomic positions covered by the 
-        # used poly(A) sites
-        still_undetected = used_sites[:-1]
-
-        # prepare individual dicts for every sample:
-
-        # genomic pos' that are covered by sites in each sample
-        used_sites_positions = {}
-        # exon boundaries
-        curr_ex_start = exon_structure[2]
-        curr_ex_start_dict = {}
-        for cond in bp_coverages_dict:
-            curr_ex_start_dict[cond]=[]
-            for i in range(len(bp_coverages_dict[cond])):
-                # initialize the exon start with the annotated start pos
-                # append the extended upstream region later
-                curr_ex_start_dict[cond].append(curr_ex_start)
-        curr_ex_end = exon_structure[3]
-        curr_ex_end_dict = {}
-        # same for exon end that is appended by the upstream region
-        # later in the case of minus strand exons
-        for cond in bp_coverages_dict:
-            curr_ex_end_dict[cond]=[]
-            for i in range(len(bp_coverages_dict[cond])):
-                curr_ex_end_dict[cond].append(curr_ex_end)
-        
-        # treat the most proximal site separately;
-        # here, for different samples the site might cover different genomic regions because of
-        # variable upstream_extend_regions
-        if strand == "+":
-            start_pos = used_sites[0][1].start - options.best_break_point_upstream_extension
-            end_pos = used_sites[0][1].end
-            end_pos_idx = region.index(end_pos)
-        else:
-            start_pos = used_sites[0][1].start
-            start_pos_idx = region.index(start_pos)
-            end_pos = used_sites[0][1].end + options.best_break_point_upstream_extension
-        for cond in bp_coverages_dict:
-            # also, prepare the dict to store the covered genomic positions
-            used_sites_positions[cond] = []
-            
-            for cvg_idx in range(len(bp_coverages_dict[cond])):
-                used_sites_positions[cond].append({})
-
-                if cond not in still_undetected[0][3]:
-                    # this site is not supported by any sample of this condition
-                    # hence, no dict preparation necessary
-                    continue
-
-                # only treat the current sample if it supports any of the
-                # inferred poly(A) sites
-                if not still_undetected[0][4][cond][cvg_idx]:
-                    continue
-                
-                curr_add_us_ext = add_us_ext[cond][cvg_idx]
-                if strand == "+":
-                    if start_pos < curr_ex_start - curr_add_us_ext:
-                        # start pos (site_start - best-break-point-extension is even more upstream
-                        # than the extended exon; hence, use the extended exon as boundary
-                        start_pos_idx = region.index( curr_ex_start - curr_add_us_ext )
-                        curr_ex_start_dict[cond][cvg_idx] -= curr_add_us_ext
-                    elif start_pos < curr_ex_start:
-                        # start pos is more upstream than the unextended exon start;
-                        # hence, use this start_pos but adjust exon start pos
-                        start_pos_idx = region.index( start_pos )
-                        curr_ex_start_dict[cond][cvg_idx] -= curr_add_us_ext
-                    else:
-                        # start_pos is within normal exon annotation
-                        start_pos_idx = region.index(start_pos)
-                else:
-                    # same for negative strand exons; see comments above for explanation
-                    if end_pos  >= curr_ex_end + curr_add_us_ext:
-                        end_pos_idx = region.index( curr_ex_end + curr_add_us_ext -1 ) + 1
-                        curr_ex_end_dict[cond][cvg_idx] += curr_add_us_ext
-                    elif end_pos > curr_ex_end:
-                        end_pos_idx = region.index( end_pos )
-                        curr_ex_end_dict[cond][cvg_idx] += curr_add_us_ext
-                    else:
-                        end_pos_idx = region.index(end_pos)
-                for i in range(start_pos_idx, end_pos_idx):
-                    used_sites_positions[cond][cvg_idx][i] = 0
-        
-        # iterate over all other sites except the most proximal one
-        # for these sites, the covered genomic positions are all the same for all samples
-        for site_idx in range(1,len(still_undetected)):
-
-            if strand == "+":
-                start_pos = (used_sites[site_idx][1].start
-                             - options.best_break_point_upstream_extension)
-
-                # first: make the region as long as necessary but not longer than possible
-                if start_pos < used_sites[site_idx - 1][1].end:
-                    start_pos_idx = region.index( used_sites[site_idx - 1][1].end )
-                else:
-                    start_pos_idx = region.index(start_pos)
-
-                end_pos = used_sites[site_idx][1].end
-                end_pos_idx = region.index(end_pos)
-            else:
-                start_pos = used_sites[site_idx][1].start
-                start_pos_idx = region.index(start_pos)
-                
-                end_pos = (used_sites[site_idx][1].end
-                           + options.best_break_point_upstream_extension)
-
-                # first: make the region as long as necessary but not longer than possible
-                if end_pos > used_sites[site_idx - 1][1].start:
-                    end_pos_idx = region.index( used_sites[site_idx - 1][1].start)
-                else:
-                    end_pos_idx = region.index(end_pos)
-
-            # mark the genomic positions for all relevant samples
-            for cond in bp_coverages_dict:
-                if cond not in still_undetected[site_idx][3]:
-                    # this site is not supported by any sample of this condition
-                    # hence, no dict preparation necessary
-                    continue
-                for cvg_idx in range(len(bp_coverages_dict[cond])):
-                    # only treat the current sample if it supports any of the
-                    # inferred poly(A) sites
-                    if not still_undetected[site_idx][4][cond][cvg_idx]:
-                        continue
-                    for i in range(start_pos_idx, end_pos_idx):
-                        used_sites_positions[cond][cvg_idx][i] = site_idx
-
-                        
-        # if the proximal site is close to the exon start, the 
-        # extension of the exon is used
-        # during inference of the best break point
-        # (this was ensured above when the exon
-        # start positions were inferred individually for every sample)
-
-        # iterate over all samples and infer the global best break point(s)
-        # separately for each sample when this sample supports any
-        # inferred poly(A) site
-        for cond in bp_coverages_dict:
-            for cvg_idx in range(len(bp_coverages_dict[cond])):
-                sample_specific_still_undetected = []
-                # collect the sites that are supported by the current sample
-                for site_idx in range(len(still_undetected)):
-                    if cond not in still_undetected[site_idx][3]:
-                        # this site is not supported by any sample of this condition
-                        sample_specific_still_undetected.append( None )
-                        continue
-                    if still_undetected[site_idx][4][cond][cvg_idx]:
-                        sample_specific_still_undetected.append( still_undetected[site_idx] )
-                    else:
-                        sample_specific_still_undetected.append( None )
-
-                if len([i for i in sample_specific_still_undetected if i is not None]) == 0:
-                    # this sample does not support any poly(A) site;
-                    # skip global break point inference for it
-                    continue
-
-
-                start_idx = region.index(curr_ex_start_dict[cond][cvg_idx])
-                end_idx = region.index(curr_ex_end_dict[cond][cvg_idx] - 1) + 1
-
-                global_mse_result = check_global_best_breaking_points(bp_coverages_dict[cond][cvg_idx],
-                                                                      cond,
-                                                                      cvg_idx,
-                                                                      region,
-                                                                      used_sites_positions[cond][cvg_idx],
-                                                                      sample_specific_still_undetected,
-                                                                      start_idx,
-                                                                      end_idx,
-                                                                      options.min_coverage_region)
-
-                if isinstance(global_mse_result, tuple):
-                    # # test -->
-                    # boundaries = [region[x] for x in global_mse_result[0][:-1] ]
-                    # try:
-                    #     tmp = region[ global_mse_result[0][-1] ]
-                    # except IndexError:
-                    #     tmp = region[-1] + 1
-                    # boundaries.append(tmp)
-                    # syserr("[ERROR] Break points so far: %s\n" % str(boundaries))
-                    # # <-- test
-
-                    if global_mse_result[1] is None:
-                        syserr("[ERROR] No (further) break point found for exon: %s\n" 
-                               % exon_id)
-                        return ([], valid_mean_cvg_dict)
-                    else:
-                        # unsupported break point for current coverage detected
-                        
-                        # syserr("[ERROR] Unsupported break point %i in sample #%i (cond %s) for exon: %s. Exclude this sample for the current exon!\n" 
-                        #        % (region[ global_mse_result[1] ], cvg_idx, cond, exon_id) )
-                        # return ([], valid_mean_cvg_dict,add_us_ext)
-                        valid_mean_cvg_dict[cond][cvg_idx] = 0
-                        unsupported_break_point_cases += 1
-                        # retract support for any site by this sample
-                        for tmp_site_idx in range(len(used_sites) - 1):
-                            if cond in used_sites[tmp_site_idx][3]:
-                                used_sites[tmp_site_idx][4][cond][cvg_idx] = False
-                        continue
-            
-                # elif global_mse_result is "Error":
-                #     return ([], valid_mean_cvg_dict, add_us_ext)
-
-    else:
-        # only the distal site was inferred
-        if options.debug:
-            syserr("[INFO] No additional sites except for the distal one inferred for %s. Exon skipped\n" 
-                   % exon_id)
-        return ([], valid_mean_cvg_dict)
-
-    if unsupported_break_point_cases > 0:
-        syserr("[INFO] For exon %s, %i samples are not further processed due to unsupported global break points in their coverage\n" % (exon_id, unsupported_break_point_cases))
-    return (used_sites, valid_mean_cvg_dict)
-    
-def get_relative_usages(used_sites,
-                        exon_structure,
-                        exon_id,
-                        region,
-                        bp_coverages_dict,
-                        valid_mean_cvg_dict,
-                        read_length,
-                        min_coverage_region,
-                        add_us_extension):
-    """For the obtained sites, infer the relative 
-    usage based on the mean expression of regions"""
-    
-    # define the indices that mark the segment boundaries
-    # store them from proximal to distal
-    strand = exon_structure[1]
-    region_means = {}
-    ds_corrected_means = {}
-    relative_usages = {}
-    expressions = {}
-
-    for cond in bp_coverages_dict:
-        region_means[cond] = []
-        relative_usages[cond] = []
-        expressions[cond] = []
-        ds_corrected_means[cond] = []
-        cvg_counter = -1
-        for cvg in bp_coverages_dict[cond]:
-            cvg_counter += 1
-            
-	    if strand == "+":
-	        start_idx = region.index( exon_structure[2] )
-	        if used_sites[0][1].start - read_length - min_coverage_region < exon_structure[2]:
-	            start_idx = region.index( exon_structure[2] - add_us_extension[cond][cvg_counter] )
-                try:
-	            end_idx = region.index( used_sites[0][1].start - read_length )
-                except ValueError:
-                    end_idx = region.index( exon_structure[2] - add_us_extension[cond][cvg_counter] + 1)
-	        if end_idx - start_idx < min_coverage_region:
-	            syserr(("[INFO] Distance from proximal site to exon start of exon %s cond %s sample #%i " +
-	                    "is very small despite upstream extension (%i nt). For relative usage inference, " +
-	                    "the read length is not excluded from the upstream region of the proximal site\n")
-	                   % (exon_id, cond, cvg_counter, add_us_extension[cond][cvg_counter]))
-	            end_idx = region.index( used_sites[0][1].start )
-	    else:
-	        if used_sites[0][1].end  + read_length + min_coverage_region > exon_structure[3]:
-                    end_idx = region.index( exon_structure[3] + add_us_extension[cond][cvg_counter] - 1) + 1
-	        else:
-	            end_idx = region.index( exon_structure[3] - 1) + 1
-                try:
-	            start_idx = region.index( used_sites[0][1].end  + read_length)
-                except ValueError:
-                    start_idx = end_idx = region.index( exon_structure[3] + add_us_extension[cond][cvg_counter] - 2) + 1
-	        if end_idx - start_idx < min_coverage_region:
-	            syserr(("[INFO] Distance from proximal site to exon start of exon %s cond %s sample #%i " +
-	                    "is very small despite upstream extension (%i nt). For relative usage inference, " +
-	                    "the read length is not excluded from the upstream region of the proximal site\n")
-	                   % (exon_id, cond, cvg_counter, add_us_extension[cond][cvg_counter]))
-	            start_idx = region.index( used_sites[0][1].end )
-       
-            region_means[cond].append([])
-            relative_usages[cond].append([])
-            expressions[cond].append([])
-            ds_corrected_means[cond].append([])
-            if valid_mean_cvg_dict[cond][cvg_counter] == 0:
-                region_means[cond][cvg_counter].append(0)
-            else:
-                y = cvg[start_idx:end_idx]
-                region_means[cond][cvg_counter].append(np.mean(y))
-            
-    # iterate over all poly(A) sites (from prox to dist) and get the mean 
-    for site_idx in range(1, len(used_sites)):
-        if strand == "+":
-            start_idx = region.index( used_sites[site_idx-1][1].end )
-            end_idx = region.index( used_sites[site_idx][1].start - read_length )
-        else:
-            start_idx = region.index( used_sites[site_idx][1].end  + read_length )
-            end_idx = region.index( used_sites[site_idx-1][1].start )
-        
-        # consistency check: is region big enough
-        if end_idx - start_idx < min_coverage_region:
-            syserr(("[ERROR] Upstream region of site %s is too small " +
-                    "for reliable mean estimation\n") % used_sites[site_idx][0])
-            return {}, {}
-    
-        for cond in bp_coverages_dict:
-            counter = -1
-            for cvg in bp_coverages_dict[cond]:
-                counter += 1
-                if valid_mean_cvg_dict[cond][counter] == 0:
-                    region_means[cond][counter].append(0)
-                else:
-                    y = cvg[start_idx:end_idx]
-                    region_means[cond][counter].append(np.mean(y))
-                
-    
-    # now, all mean values are obtained
-    # next: calculate downstream corrected mean values
-    # from distal to proximal site
-    for cond in region_means:
-        for sample_arr_idx in range(len(region_means[cond])):
-            if valid_mean_cvg_dict[cond][sample_arr_idx] == 0:
-                continue
-            ds_corrected_means[cond][sample_arr_idx].append( region_means[cond][sample_arr_idx][-1] )
-            
-    for site_idx in reversed(range(len(used_sites) -1)):
-        for cond in region_means:
-            for sample_arr_idx in range(len(region_means[cond])):
-                if valid_mean_cvg_dict[cond][sample_arr_idx] == 0:
-                    continue
-                downstream_corrected_mean = region_means[cond][sample_arr_idx][site_idx] - sum(ds_corrected_means[cond][sample_arr_idx])
-                if downstream_corrected_mean < 0:
-                    downstream_corrected_mean = 0
-                ds_corrected_means[cond][sample_arr_idx].insert(0, downstream_corrected_mean)
-
-    # iterate once more over the mean values and calculate actual relative usage values
-    for site_idx in range(len(used_sites)):
-        for cond in region_means:
-            for sample_arr_idx in range(len(region_means[cond])):
-                if valid_mean_cvg_dict[cond][sample_arr_idx] == 0:
-                    relative_usages[cond][sample_arr_idx].append(-1.0)
-                    expressions[cond][sample_arr_idx].append(-1.0)
-                else:
-                    rel_use = float("{0:.2f}".format( float(ds_corrected_means[cond][sample_arr_idx][site_idx]) / sum(ds_corrected_means[cond][sample_arr_idx]) * 100 ) )
-                    expr = float("{0:.2f}".format( float(ds_corrected_means[cond][sample_arr_idx][site_idx]) ))
-                    relative_usages[cond][sample_arr_idx].append(rel_use)
-                    expressions[cond][sample_arr_idx].append(expr)
-                
-    return relative_usages, expressions
-
-def process_exon_wrapper_function( input_tuple ):
-    """Wrapper function to conduct the 
-    actual processing of each exon and its sites"""
-
-    exon, exon_structure, region, bp_coverages_dict, merged_exons, polyA_sites, exon_ext_dict, options = list(input_tuple)
-
-    (used_sites, valid_mean_cvg_dict) = get_pA_set(exon, 
-                                                   exon_structure, 
-                                                   region, 
-                                                   bp_coverages_dict, 
-                                                   merged_exons, 
-                                                   polyA_sites,
-                                                   exon_ext_dict,
-                                                   options)
-
-    if sum([i for j in valid_mean_cvg_dict for i in valid_mean_cvg_dict[j]]) == 0:
-        # no sample had proper global best break points
-        syserr("[INFO] For no sample global best break points and inferred sites matched for exon %s. Skipped exon!\n"
-               % str(exon))
-        return (exon, [],[],[])
-
-    # iterate over all sites except the distal one
-    # it might occur that a site lost support by all its supporting coverages
-    # due to global break point inconsistencies. -> Then, remove this site
-    to_del_idx = []
-    for site_idx in range(len(used_sites) - 1):
-        supporting_samples = sum([i for b in used_sites[site_idx][3] for i in used_sites[site_idx][4][b] ] )
-        if 0 == supporting_samples:
-            to_del_idx.append(site_idx)
-
-    for site_idx in sorted(to_del_idx)[::-1]:
-        del used_sites[ site_idx ]
-
-    if len(to_del_idx) > 0:
-        syserr("[INFO] Number of sites was reduced for exon %s. They had no support by any sample anymore due to global break point inconsistencies\n"
-               % str(exon))
-        
-
-    if len(used_sites) > 1:
-        relative_usage_dict, expression_dict = get_relative_usages(used_sites,
-                                                                   exon_structure,
-                                                                   exon,
-                                                                   region,
-                                                                   bp_coverages_dict,
-                                                                   valid_mean_cvg_dict,
-                                                                   options.read_length,
-                                                                   options.min_coverage_region,
-                                                                   exon_ext_dict)
-
-
-        if len(relative_usage_dict) > 0:
-            rel_use_array = []
-            expression_array = []
-            for site_idx in range(len(used_sites)):
-                conditions_counter = {}
-                rel_use_array.append([])
-                expression_array.append([])
-                for file_idx in range(len(options.coverages)):
-                    condition = options.conditions[file_idx]
-                    if condition not in conditions_counter:
-                        conditions_counter[condition] = -1
-                    conditions_counter[condition] += 1
-                    sample = conditions_counter[condition]
-                    rel_use_array[-1].append( relative_usage_dict[condition][ sample ][site_idx])
-                    expression_array[-1].append( expression_dict[condition][ sample ][site_idx])
-
-            return (exon, used_sites, rel_use_array, expression_array)
-
-        else:
-            # no relative usages obtained
-            syserr("[ERROR] Failed to infer relative usages for %s\n" 
-                   % str(exon) )
-            return (exon, [],[], [])
-    else:
-        # not multiple sites inferred
-        syserr("[INFO] Only a single site was inferred for %s. Skipped exon!\n"
-               % str(exon))
-        return (exon, [],[], [])
-
-def main(options):
-    """Main logic of the script"""
-
-    # initialize the pool for multimapping
-    pool = multiprocessing.Pool( options.proc )
-
-    expression_output_file_name = options.expressions_out
-    
-    ###
-    # load poly(A) sites
-    ###
-
-    # store the terminal exons
-    terminal_exons_dict = {}
-
-    # store the exon upstream extension size per sample
-    term_ex_us_ex_dict = {}
-    
-    with open(options.polyA) as f:
-        for line in f:
-            F = line.strip().split("\t")
-            chrom = F[0]
-            strand = F[5]
-            if F[8] not in terminal_exons_dict:
-                # save stats for this exon and an empty list for the poly(A) sites
-                # adjust the start coordinate of the exon to match
-                # HTSeq and bed conventions
-                exon_stats = F[8].split(":")
-                exon_start = int(exon_stats[3]) - 1
-                exon_end = int(exon_stats[4])
-                terminal_exons_dict[ F[8] ] = [chrom, strand, exon_start, exon_end, [] ]
-            # for each poly(A) site store: id, HTSeq.GenomicInterval of its location, protocol support
-            terminal_exons_dict[ F[8] ][4].append( [ F[3], HTSeq.GenomicInterval( chrom, int(F[1]), int(F[2]), strand ), F[4] ] )
-            term_ex_us_ex_dict[ F[8] ] = {}
-            
-    # go through list of all terminal exons and separate two lists:
-    # 1. exons with clusters that assemble in close proximity
-    # 2. exons with distinct clusters
-    # clusters are expected to be sorted proximal -> distal
-    for exon in terminal_exons_dict:
-        strand = terminal_exons_dict[exon][1]
-        list_of_close_clusters = []
-        close_cluster_idxs = set()
-        polyA_sites = terminal_exons_dict[ exon ][4]
-        for cl_idx in range(1, len(polyA_sites)):
-            prox_site = polyA_sites[cl_idx -1]
-            dist_site = polyA_sites[cl_idx]
-            if ( (strand == "+" and (dist_site[1].start - prox_site[1].end) < options.min_cluster_distance) or
-            (strand == "-" and (prox_site[1].start - dist_site[1].end) < options.min_cluster_distance) ):
-                # note these two sites as interferring
-                if (cl_idx - 1) in close_cluster_idxs:
-                    # simply update the set of close clusters
-                    close_cluster_idxs.update([cl_idx])
-                else:
-                    # remove all clusters that are in the set so far
-                    # store them separately as one unit
-                    # update the set of closely spaced sites
-                    if len(close_cluster_idxs) > 0:
-                        if len(close_cluster_idxs) < 2:
-                            syserr("[ERROR] set of closely spaced sites has less than 2 entries (%s) for exon %s\n" % (str(close_cluster_idxs), exon))
-                            sys.exit(2) 
-                        list_of_close_clusters.append(sorted(close_cluster_idxs))
-                        close_cluster_idxs = set()
-                    close_cluster_idxs.update([(cl_idx-1), cl_idx])
-        # empty set if necessary
-        if len(close_cluster_idxs) > 0:
-            list_of_close_clusters.append(sorted(close_cluster_idxs))
-                        
-        if len(list_of_close_clusters) > 0:
-            new_pA_list = []
-            last_processed_pA = 0
-            for close_clusters in list_of_close_clusters:
-                for idx in range(last_processed_pA,close_clusters[0]):
-                    new_pA_list.append(polyA_sites[idx])
-                tmp_list = []
-                for close_cl in close_clusters:
-                    tmp_list.append(polyA_sites[close_cl])
-                new_pA_list.append(tmp_list)
-                last_processed_pA = close_clusters[-1] + 1
-            for idx in range(last_processed_pA, len(polyA_sites)):
-                new_pA_list.append(polyA_sites[idx])
-            terminal_exons_dict[exon][4] = new_pA_list
-
-    ###
-    # load the coverages
-    # and conditions
-    ###
-
-    coverage_pkl_dict = {}
-    for pkl_idx in range(len(options.coverages)):
-        extension_file = None
-        pkl_file = options.coverages[pkl_idx]
-        
-        sample_basename = pkl_file.replace(".pkl", "")
-        sample_basename = os.path.basename(sample_basename)
-        for tmp_file in options.ex_extension_files:
-            if sample_basename in tmp_file:
-                extension_file = tmp_file
-                
-        if options.conditions[pkl_idx] not in coverage_pkl_dict:
-            coverage_pkl_dict[options.conditions[pkl_idx]] = []
-            # prepare the dict for the exon upstream extensions
-            for ex in term_ex_us_ex_dict:
-                term_ex_us_ex_dict[ex][options.conditions[pkl_idx]] = []
-
-        if not pkl_file.endswith("pkl"):
-            syserr("[ERROR] No proper pkl file as input: %s\n" % pkl_file)
-            sys.exit(2)
-
-        with open(pkl_file, 'rb') as input:
-            cvg = cPickle.load(input)
-            coverage_pkl_dict[options.conditions[pkl_idx] ].append(cvg)
-
-        number_of_files = len( coverage_pkl_dict[options.conditions[pkl_idx] ] )
-        if extension_file is not None and os.path.isfile(extension_file):
-            with open(extension_file, "r") as  ext_file:
-                for line in ext_file:
-                    F = line.rstrip().split("\t")
-
-                    # # test -->
-                    # if F[0] not in term_ex_us_ex_dict:
-                    #     term_ex_us_ex_dict[F[0]] = {}
-                    #     term_ex_us_ex_dict[F[0]][ options.conditions[pkl_idx] ] = [0 for i in range(number_of_files - 1) ]
-                    # # <-- test
-
-                    # append the extension for all exons found in the file
-                    term_ex_us_ex_dict[F[0]][ options.conditions[pkl_idx] ].append( int(F[1]) )
-        # if an exon was not in the extension file
-        # store 0 as extension
-        for ex in term_ex_us_ex_dict:
-            if len( term_ex_us_ex_dict[ex][options.conditions[pkl_idx] ] ) == number_of_files - 1:
-                term_ex_us_ex_dict[ex][ options.conditions[pkl_idx] ].append(0)
-            elif len( term_ex_us_ex_dict[ex][options.conditions[pkl_idx] ] ) != number_of_files:
-                syserr(("[ERROR] Number of entries for exon extensions of ex %s does " +
-                        "not match current number of processed pickle files for cond %s\n")
-                       % (ex, options.conditions[pkl_idx]))
-                sys.exit(2)
-
-    # Finished reading input
-    syserr("[INFO] %s Finished reading input\n" % time_now())
-    
-    ###
-    # start iterating over all exons:
-    ###
-
-    # stats
-    nr_skipped_exons = 0
-    nr_no_negative_slope_fitted = 0
-    nr_too_little_cov = 0
-    
-    data_entries = []
-
-    for exon in terminal_exons_dict:
-        
-        exon_structure = terminal_exons_dict[ exon ][0:4]
-        chrom = exon_structure[0]
-        strand = exon_structure[1]
-        start = exon_structure[2]
-        end = exon_structure[3]
-
-        polyA_sites = terminal_exons_dict[ exon ][4]
-
-        max_ex_upstream_ext = max( [i for k in term_ex_us_ex_dict[exon] for i in term_ex_us_ex_dict[exon][k] ] )
-        
-        # extend the exon 3' boundary if the most distal site 
-        # end more downstream than the annotated exon end
-        if strand == "+":
-            if isinstance(polyA_sites[-1][0], list) and polyA_sites[-1][-1][1].end > end:
-                end = polyA_sites[-1][-1][1].end
-            if not isinstance(polyA_sites[-1][0], list) and polyA_sites[-1][1].end > end:
-                end = polyA_sites[-1][1].end
-                
-        else:
-            if isinstance(polyA_sites[-1][0], list) and polyA_sites[-1][-1][1].start < start:
-                start = polyA_sites[-1][-1][1].start
-            if not isinstance(polyA_sites[-1][0], list) and polyA_sites[-1][1].start < start:
-                start = polyA_sites[-1][1].start
-            
-        ### merge the coverages of this exon (used for distal site definition)
-        if strand == "+":
-            region = range( (start - max_ex_upstream_ext ), (end + options.ds_reg_for_no_coverage) )
-        else:
-            region = range( (start - options.ds_reg_for_no_coverage), (end + max_ex_upstream_ext) )
-        bp_coverages_dict = {}
-        merged_exons = np.zeros(end - start + options.ds_reg_for_no_coverage + max_ex_upstream_ext)
-        for cond in coverage_pkl_dict:
-            bp_coverages_dict[cond] = []
-            for cvg in coverage_pkl_dict[cond]:
-            
-                # extend the 3' end to be able to check the downstream region as well
-                # extend the 5' start to allow handling of proximal site close to the exon start
-                if strand == "+":
-                    curr_cov_arr = np.array( list( cvg[
-                        HTSeq.GenomicInterval( chrom, 
-                                               (start - max_ex_upstream_ext), 
-                                               (end + options.ds_reg_for_no_coverage), 
-                                               strand)
-                    ]
-                    )
-                    )
-                    
-                else:
-                    # do NOT reverse coverage array because
-                    # it needs to match the region list
-                    curr_cov_arr = np.array( list( cvg[
-                        HTSeq.GenomicInterval( chrom, 
-                                               (start - options.ds_reg_for_no_coverage), 
-                                               (end + max_ex_upstream_ext), 
-                                               strand)
-                    ]
-                    )
-                    )
-                # test
-                # normalize the region so that the maximum is at 100
-                #curr_cov_arr = curr_cov_arr / max(curr_cov_arr) * 100
-                bp_coverages_dict[cond].append(curr_cov_arr)
-                merged_exons = merged_exons + curr_cov_arr
-
-        data_entries.append((exon, exon_structure, region, bp_coverages_dict, merged_exons, polyA_sites, term_ex_us_ex_dict[exon], options))
-
-    result_tuples = pool.map( process_exon_wrapper_function, data_entries)
-
-    with open(expression_output_file_name, "w") as ofile:
-
-        for res_tu in result_tuples:
-            exon = res_tu[0]
-            used_sites = res_tu[1]
-            rel_use_array = res_tu[2]
-            expression_array = res_tu[3]
-            if len(used_sites) == 0 and len(rel_use_array) == 0:
-                continue
-            elif len(used_sites) == 0 or len(rel_use_array) == 0:
-                syserr(("[ERROR] length of list with inferred sites: %i\n" +
-                        "length of list with relative usages: %i\n")
-                       % (len(used_sites), len(rel_use_array)))
-                syserr("[ERROR] Only the case of both lists have zero length " +
-                   "is expected\n")
-                sys.exit(2)
-            else: 
-                for idx in range(len(rel_use_array)):
-                    rel_use_array[idx] = "\t".join(str(x) for x in rel_use_array[idx])
-                    expression_array[idx] = "\t".join(str(x) for x in expression_array[idx])
-                total_number_of_sites = len(used_sites)
-                transcript = exon.split(":")[0]
-                cnt = 0
-                for site_idx in range(len(used_sites)):
-                    site = used_sites[site_idx]
-                    cnt += 1
-                    sysout("%s\t%i\t%i\t%s\t%s\t%s\t%i\t%i\t%s\t%s\t%s\n" % (site[1].chrom,
-                                                                         site[1].start,
-                                                                         site[1].end,
-                                                                         site[0],
-                                                                         site[2],
-                                                                         site[1].strand,
-                                                                         cnt,
-                                                                         total_number_of_sites,
-                                                                         exon,
-                                                                         transcript,
-                                                                         rel_use_array[site_idx]))
-
-                    ofile.write("%s\t%i\t%i\t%s\t%s\t%s\t%i\t%i\t%s\t%s\t%s\n" % (site[1].chrom,
-                                                                                  site[1].start,
-                                                                                  site[1].end,
-                                                                                  site[0],
-                                                                                  site[2],
-                                                                                  site[1].strand,
-                                                                                  cnt,
-                                                                                  total_number_of_sites,
-                                                                                  exon,
-                                                                                  transcript,
-                                                                                  expression_array[site_idx]))
-
-    # for dat in data_entries:
-    #     res_tu = process_exon_wrapper_function(dat)
-    #     exon = res_tu[0]
-    #     used_sites = res_tu[1]
-    #     rel_use_array = res_tu[2]
-    #     expression_array = res_tu[3]
-    #     if len(used_sites) == 0 and len(rel_use_array) == 0:
-    #         continue
-    #     elif len(used_sites) == 0 or len(rel_use_array) == 0:
-    #         syserr(("[ERROR] length of list with inferred sites: %i\n" +
-    #                 "length of list with relative usages: %i\n")
-    #                % (len(used_sites), len(rel_use_array)))
-    #         syserr("[ERROR] Only the case of both lists have zero length " +
-    #                "is expected\n")
-    #         sys.exit(2)
-    #     else: 
-    #         for idx in range(len(rel_use_array)):
-    #             rel_use_array[idx] = "\t".join(str(x) for x in rel_use_array[idx])
-    #         total_number_of_sites = len(used_sites)
-    #         transcript = exon.split(":")[0]
-    #         cnt = 0
-    #         for site_idx in range(len(used_sites)):
-    #             site = used_sites[site_idx]
-    #             cnt += 1
-    #             sysout("%s\t%i\t%i\t%s\t%s\t%s\t%i\t%i\t%s\t%s\t%s\n" % (site[1].chrom,
-    #                                                                  site[1].start,
-    #                                                                  site[1].end,
-    #                                                                  site[0],
-    #                                                                  site[2],
-    #                                                                  site[1].strand,
-    #                                                                  cnt,
-    #                                                                  total_number_of_sites,
-    #                                                                  exon,
-    #                                                                  transcript,
-    #                                                                  rel_use_array[site_idx]))
-
-    #           ofile.write("%s\t%i\t%i\t%s\t%s\t%s\t%i\t%i\t%s\t%s\t%s\n" % (site[1].chrom,
-    #                                                                         site[1].start,
-    #                                                                         site[1].end,
-    #                                                                         site[0],
-    #                                                                         site[2],
-    #                                                                         site[1].strand,
-    #                                                                         cnt,
-    #                                                                         total_number_of_sites,
-    #                                                                         exon,
-    #                                                                         transcript,
-    #                                                                         expression_array[site_idx]))
-
-
-if __name__ == '__main__':
-    try:
-        try:
-            options = parser.parse_args()
-        except Exception, e:
-            parser.print_help()
-            sys.exit()
-        if options.verbose:
-            start_time = time.time()
-            start_date = time.strftime("%d-%m-%Y at %H:%M:%S")
-            syserr("############## Started script on %s ##############\n" %
-                   start_date)
-
-        main(options)
-        if options.verbose:
-            syserr("### Successfully finished in %i seconds, on %s ###\n" %
-                   (time.time() - start_time,
-                    time.strftime("%d-%m-%Y at %H:%M:%S")))
-    except KeyboardInterrupt:
-        syserr("Interrupted by user after %i seconds!\n" %
-               (time.time() - start_time))
-        sys.exit(-1)