chore: cleaned up function and added correct path to usr directory

283577f8 · Reto Tschannen · Reto Tschannen · 37aaef90 · 283577f8
Commit 283577f8 authored 3 years ago by Reto Tschannen Committed by Reto Tschannen 3 years ago
--- a/src/mean-variance-function.py
+++ b/src/mean-variance-function.py
 import glob, io
 import matplotlib.pyplot as plt
 import csv
+import os
@@ -12,62 +13,62 @@ def mean_variance(filepath):
        directory with files of gene expression counts in individual cells
    Output:
-        1. Csv-formatted table with GeneID, Mean, Variance of the counts
+        1. Path to Csv-formatted table with GeneID, Mean, Variance of the counts
        2. Scatterplot of mean vs variance for all genes
    """
    # Open each file in the input directory, raises error if no file is fund
    files = [file for file in glob.glob(filepath)]
    if len(files) == 0:
        raise ValueError('No files in directory:', filepath)
+    # Creates all required dictionaries to cinstruct the mean, variance
    nog = {}
-    count = {}
+    occurence = {}
-    test = {}
+    individual_values = {}
    mean = {}
    variance = {}
+    # Added together all gene counts in nog, and occurence in occurence
    for file_name in files:
        with io.open(file_name, 'r') as fh:
            for line in fh:
                geneid, copies = str(line.split()[0]), int(line.split()[1])
                if geneid not in nog:
                    nog[geneid] = copies
-                    count[geneid] = 1
+                    occurence[geneid] = 1
-                    test[geneid] = [copies]
+                    individual_values[geneid] = [copies]
                else:
                    nog[geneid] += copies
-                    count[geneid] += 1
+                    occurence[geneid] += 1
-                    test[geneid] += [copies]
+                    individual_values[geneid] += [copies]
    # Calculate mean of each gene
    for i in nog:
-        mean[i] = nog[i]/count[i] 
+        mean[i] = nog[i]/occurence[i] 
    # Calculate the variance
-    for i in test:
+    for i in individual_values:
-        for j in range(0, len(test[i])):
+        for j in range(0, len(individual_values[i])):
-            variance[i] = (test[i][j]-mean[i])**2/count[i] 
+            variance[i] = (individual_values[i][j]-mean[i])**2/occurence[i] 
    # Plot mean against variance
    plt.scatter(mean.values(), variance.values())
    for value in list(mean.keys()):
        plt.text(mean[value], variance[value], value)
-    #plt.annotate(list(mean.keys()), mean.values(), variance.values())
    plt.xlabel('mean')
    plt.ylabel('variance')
    plt.title('Mean gene expression vs. variance')
    plt.show()
-    with open('/home/reto/results_mean_var_function.csv', 'w') as csv_file:
+    # Constructs csv file and saves it in the users directory
+    with open(os.path.expanduser("~")+'/results_mean_var_function.csv', 'w') as csv_file:
        filewriter = csv.writer(csv_file, delimiter = ',', quotechar = '|', quoting = csv.QUOTE_MINIMAL)
        filewriter.writerow(['geneid', 'mean', 'variance'])
        for id in nog.keys():
            filewriter.writerow([id, mean[id], variance[id]])
-    return files, nog, count, mean, 'var', variance, test, list(mean.keys())
+    return os.path.expanduser("~")+'/results_mean_var_function.csv'
 print(mean_variance('/home/reto/2021_project_folder/2021_test/*'))