Skip to content
Snippets Groups Projects
Select Git revision
  • a39263b2dfd25f8c8427cd6d0f426158a834fcd4
  • master default protected
  • develop protected
  • cmake_boost_refactor
  • ubuntu_ci
  • mmtf
  • non-orthogonal-maps
  • no_boost_filesystem
  • data_viewer
  • 2.11.1
  • 2.11.0
  • 2.10.0
  • 2.9.3
  • 2.9.2
  • 2.9.1
  • 2.9.0
  • 2.8.0
  • 2.7.0
  • 2.6.1
  • 2.6.0
  • 2.6.0-rc4
  • 2.6.0-rc3
  • 2.6.0-rc2
  • 2.6.0-rc
  • 2.5.0
  • 2.5.0-rc2
  • 2.5.0-rc
  • 2.4.0
  • 2.4.0-rc2
29 results

export_scene_win.cc

Blame
  • mean-variance-function.py 2.50 KiB
    import glob, io
    import matplotlib.pyplot as plt
    import csv
    import os
    
    
    
    
    def mean_variance(filepath):
        """Given the counts observed for a given gene across all cells, calculate the mean and variance.
    
        Input: 
            directory with files of gene expression counts in individual cells
        
        Output:
            1. Path to Csv-formatted table with GeneID, Mean, Variance of the counts
            2. Scatterplot of mean vs variance for all genes
        """
        # Open each file in the input directory, raises error if no file is fund
        files = [file for file in glob.glob(filepath)]
    
        if len(files) == 0:
            raise ValueError('No files in directory:', filepath)
    
        # Creates all required dictionaries to cinstruct the mean, variance
        gene_counts = {}
        occurence = {}
        individual_values = {}
        mean = {}
        variance = {}
    
        # Added together all gene counts in gene_counts, and occurence in occurence
        for file_name in files:
            with io.open(file_name, 'r') as fh:
                for line in fh:
                    geneid, copies = str(line.split()[0]), int(line.split()[1])
                    if geneid not in gene_counts:
                        gene_counts[geneid] = copies
                        occurence[geneid] = 1
                        individual_values[geneid] = [copies]
                    else:
                        gene_counts[geneid] += copies
                        occurence[geneid] += 1
                        individual_values[geneid] += [copies]
        
        # Calculate mean of each gene
        for i in gene_counts:
            mean[i] = gene_counts[i]/occurence[i] 
    
        # Calculate the variance
        for i in individual_values:
            for j in range(0, len(individual_values[i])):
                variance[i] = (individual_values[i][j]-mean[i])**2/occurence[i] 
    
        # Plot mean against variance
        plt.scatter(mean.values(), variance.values())
        for value in list(mean.keys()):
            plt.text(mean[value], variance[value], value)
        plt.xlabel('mean')
        plt.ylabel('variance')
        plt.title('Mean gene expression vs. variance')
        plt.show()
        
        # Constructs csv file and saves it in the users directory
        with open(os.path.expanduser("~")+'/results_mean_var_function.csv', 'w') as csv_file:
            filewriter = csv.writer(csv_file, delimiter = ',', quotechar = '|', quoting = csv.QUOTE_MINIMAL)
            filewriter.writerow(['geneid', 'mean', 'variance'])
            for id in gene_counts.keys():
                filewriter.writerow([id, mean[id], variance[id]])
        
        return os.path.expanduser("~")+'/results_mean_var_function.csv'