Skip to content
Snippets Groups Projects
Commit 283577f8 authored by Reto Tschannen's avatar Reto Tschannen Committed by Reto Tschannen
Browse files

chore: cleaned up function and added correct path to usr directory

parent 37aaef90
No related branches found
No related tags found
1 merge request!13feat: add function to calculate mean and variance
import glob, io import glob, io
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import csv import csv
import os
...@@ -12,62 +13,62 @@ def mean_variance(filepath): ...@@ -12,62 +13,62 @@ def mean_variance(filepath):
directory with files of gene expression counts in individual cells directory with files of gene expression counts in individual cells
Output: Output:
1. Csv-formatted table with GeneID, Mean, Variance of the counts 1. Path to Csv-formatted table with GeneID, Mean, Variance of the counts
2. Scatterplot of mean vs variance for all genes 2. Scatterplot of mean vs variance for all genes
""" """
# Open each file in the input directory, raises error if no file is fund # Open each file in the input directory, raises error if no file is fund
files = [file for file in glob.glob(filepath)] files = [file for file in glob.glob(filepath)]
if len(files) == 0: if len(files) == 0:
raise ValueError('No files in directory:', filepath) raise ValueError('No files in directory:', filepath)
# Creates all required dictionaries to cinstruct the mean, variance
nog = {} nog = {}
count = {} occurence = {}
test = {} individual_values = {}
mean = {} mean = {}
variance = {} variance = {}
# Added together all gene counts in nog, and occurence in occurence
for file_name in files: for file_name in files:
with io.open(file_name, 'r') as fh: with io.open(file_name, 'r') as fh:
for line in fh: for line in fh:
geneid, copies = str(line.split()[0]), int(line.split()[1]) geneid, copies = str(line.split()[0]), int(line.split()[1])
if geneid not in nog: if geneid not in nog:
nog[geneid] = copies nog[geneid] = copies
count[geneid] = 1 occurence[geneid] = 1
test[geneid] = [copies] individual_values[geneid] = [copies]
else: else:
nog[geneid] += copies nog[geneid] += copies
count[geneid] += 1 occurence[geneid] += 1
test[geneid] += [copies] individual_values[geneid] += [copies]
# Calculate mean of each gene # Calculate mean of each gene
for i in nog: for i in nog:
mean[i] = nog[i]/count[i] mean[i] = nog[i]/occurence[i]
# Calculate the variance # Calculate the variance
for i in test: for i in individual_values:
for j in range(0, len(test[i])): for j in range(0, len(individual_values[i])):
variance[i] = (test[i][j]-mean[i])**2/count[i] variance[i] = (individual_values[i][j]-mean[i])**2/occurence[i]
# Plot mean against variance # Plot mean against variance
plt.scatter(mean.values(), variance.values()) plt.scatter(mean.values(), variance.values())
for value in list(mean.keys()): for value in list(mean.keys()):
plt.text(mean[value], variance[value], value) plt.text(mean[value], variance[value], value)
#plt.annotate(list(mean.keys()), mean.values(), variance.values())
plt.xlabel('mean') plt.xlabel('mean')
plt.ylabel('variance') plt.ylabel('variance')
plt.title('Mean gene expression vs. variance') plt.title('Mean gene expression vs. variance')
plt.show() plt.show()
with open('/home/reto/results_mean_var_function.csv', 'w') as csv_file: # Constructs csv file and saves it in the users directory
with open(os.path.expanduser("~")+'/results_mean_var_function.csv', 'w') as csv_file:
filewriter = csv.writer(csv_file, delimiter = ',', quotechar = '|', quoting = csv.QUOTE_MINIMAL) filewriter = csv.writer(csv_file, delimiter = ',', quotechar = '|', quoting = csv.QUOTE_MINIMAL)
filewriter.writerow(['geneid', 'mean', 'variance']) filewriter.writerow(['geneid', 'mean', 'variance'])
for id in nog.keys(): for id in nog.keys():
filewriter.writerow([id, mean[id], variance[id]]) filewriter.writerow([id, mean[id], variance[id]])
return files, nog, count, mean, 'var', variance, test, list(mean.keys()) return os.path.expanduser("~")+'/results_mean_var_function.csv'
print(mean_variance('/home/reto/2021_project_folder/2021_test/*')) print(mean_variance('/home/reto/2021_project_folder/2021_test/*'))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment