Skip to content
Snippets Groups Projects
Select Git revision
  • 726f8e36cf5c869b1a73c583840dc5bb85afd12d
  • master default protected
  • develop protected
  • cmake_boost_refactor
  • ubuntu_ci
  • mmtf
  • non-orthogonal-maps
  • no_boost_filesystem
  • data_viewer
  • 2.11.1
  • 2.11.0
  • 2.10.0
  • 2.9.3
  • 2.9.2
  • 2.9.1
  • 2.9.0
  • 2.8.0
  • 2.7.0
  • 2.6.1
  • 2.6.0
  • 2.6.0-rc4
  • 2.6.0-rc3
  • 2.6.0-rc2
  • 2.6.0-rc
  • 2.5.0
  • 2.5.0-rc2
  • 2.5.0-rc
  • 2.4.0
  • 2.4.0-rc2
29 results

stutil.py

Blame
  • user avatar
    Marco Biasini authored
    34cb9751
    History
    stutil.py 1.76 KiB
    import math
    
    def Mean(xs):
      """
      Calculate mean of dataset
      """
      if len(xs)==0:
        raise RuntimeError("Can't calculate mean of empty sequence")
      return float(sum(xs))/len(xs)
    
    def Median(xs):
      """
      Calculate median of dataset
      """
      if len(xs)==0:
        raise RuntimeError("Can't calculate median of empty sequence")
      sorted_xs=sorted(xs)
      if (len(xs) % 2)==0:
        return (sorted_xs[(len(xs)-1)/2]+sorted_xs[(len(xs)-1)/2+1])/2.0
      else:
        return sorted_xs[(len(xs)-1)/2]
    
    def StdDev(xs):
      """
      Calculate standard-deviation of dataset
      
                | sum[xi-<x>]^2 |
      sigma=sqrt|---------------|
                |       n       |
      """
      mean=Mean(xs)
      return math.sqrt(sum([(x-mean)**2 for x in xs])/len(xs))
    
    def Min(xs):
      return min(xs)
    
    def Max(xs):
      return max(xs)
    
    def Correl(xs, ys):
      """
      Calculates the correlation coefficient between xs and ys as
      
        sum[(xi-<x>)*(yi-<y>)]
      r=----------------------
              sx*sy
              
      where <x>, <y> are the mean of dataset xs and ys, and, sx and sy are the 
      standard deviations.
      """
      if len(xs)!=len(ys):
        raise RuntimeError("Can't calculate correl. Sequence lengths do not match.")
      if len(xs)==1:
        raise RuntimeError("Can't calculate correl of sequences with length 1.")
      mean_x=Mean(xs)
      mean_y=Mean(ys)
      sigma_x, sigma_y=(0.0, 0.0)
      cross_term=0.0
      for x, y in zip(xs, ys):
        cross_term+=(x-mean_x)*(y-mean_y)
        sigma_x+=(x-mean_x)**2
        sigma_y+=(y-mean_y)**2
      sigma_x=math.sqrt(sigma_x)
      sigma_y=math.sqrt(sigma_y)
      return cross_term/(sigma_x*sigma_y)
    
    def Histogram(xs, bounds, num_bins):
      bins=[0 for i in range(num_bins)]
      d=1.0*num_bins/(bounds[1]-bounds[0])
      for x in xs:
        index=int((x-bounds[0])*d)
        if index>num_bins-1 or index<0:
          continue
        bins[index]+=1
      return bins