Skip to content
Snippets Groups Projects

feat: add function to calculate mean and variance

Closed Reto Tschannen requested to merge issue12 into main
1 file
+ 75
0
Compare changes
  • Side-by-side
  • Inline
+ 75
0
import glob, io
import matplotlib.pyplot as plt
import csv
def mean_variance(filepath):
"""Given the counts observed for a given gene across all cells, calculate the mean and variance.
Input:
directory with files of gene expression counts in individual cells
Output:
1. Csv-formatted table with GeneID, Mean, Variance of the counts
2. Scatterplot of mean vs variance for all genes
"""
# Open each file in the input directory, raises error if no file is fund
files = [file for file in glob.glob(filepath)]
if len(files) == 0:
raise ValueError('No files in directory:', filepath)
nog = {}
count = {}
test = {}
mean = {}
variance = {}
for file_name in files:
with io.open(file_name, 'r') as fh:
for line in fh:
geneid, copies = str(line.split()[0]), int(line.split()[1])
if geneid not in nog:
nog[geneid] = copies
count[geneid] = 1
test[geneid] = [copies]
else:
nog[geneid] += copies
count[geneid] += 1
test[geneid] += [copies]
# Calculate mean of each gene
for i in nog:
mean[i] = nog[i]/count[i]
# Calculate the variance
for i in test:
for j in range(0, len(test[i])):
variance[i] = (test[i][j]-mean[i])**2/count[i]
# Plot mean against variance
plt.scatter(mean.values(), variance.values())
for value in list(mean.keys()):
plt.text(mean[value], variance[value], value)
#plt.annotate(list(mean.keys()), mean.values(), variance.values())
plt.xlabel('mean')
plt.ylabel('variance')
plt.title('Mean gene expression vs. variance')
plt.show()
with open('/home/reto/results_mean_var_function.csv', 'w') as csv_file:
filewriter = csv.writer(csv_file, delimiter = ',', quotechar = '|', quoting = csv.QUOTE_MINIMAL)
filewriter.writerow(['geneid', 'mean', 'variance'])
for id in nog.keys():
filewriter.writerow([id, mean[id], variance[id]])
return files, nog, count, mean, 'var', variance, test, list(mean.keys())
print(mean_variance('/home/reto/2021_project_folder/2021_test/*'))
Loading