Skip to content
Snippets Groups Projects
Commit c4132770 authored by Reto Tschannen's avatar Reto Tschannen
Browse files

chore: completed CI corrections

parent cd2c8690
No related branches found
No related tags found
1 merge request!13feat: add function to calculate mean and variance
This commit is part of merge request !13. Comments created here will be created in the context of that merge request.
import glob
import io
import matplotlib.pyplot as plt
import csv
import os
def mean_variance(filepath):
"""Given the counts observed for a given gene across all cells,
calculate the mean and variance.
Input:
directory with files of gene expression counts in individual cells
Output:
1. Path to Csv-formatted table with GeneID, Mean, Variance of the count
2. Scatterplot of mean vs variance for all genes
Raises:
ValueError: If there are no files in diretory
"""
# Open each file in the input directory, raises error if no file is fund
files = [file for file in glob.glob(filepath)]
if len(files) == 0:
raise ValueError('No files in directory:', filepath)
# Creates all required dictionaries to construct the mean and variance
gene_counts = {}
occurence = {}
individual_values = {}
mean = {}
variance = {}
# Adds together all gene counts in gene_counts, and occurences in occurence
for file_name in files:
with io.open(file_name, 'r') as fh:
for line in fh:
geneid, copies = str(line.split()[0]), int(line.split()[1])
if geneid not in gene_counts:
gene_counts[geneid] = copies
occurence[geneid] = 1
individual_values[geneid] = [copies]
else:
gene_counts[geneid] += copies
occurence[geneid] += 1
individual_values[geneid] += [copies]
# Calculates mean of each gene
for i in gene_counts:
mean[i] = gene_counts[i]/occurence[i]
# Calculates the variance
for i in individual_values:
for j in range(0, len(individual_values[i])):
variance[i] = (individual_values[i][j]-mean[i])**2/occurence[i]
# Plots mean against variance
plt.scatter(mean.values(), variance.values())
for value in list(mean.keys()):
plt.text(mean[value], variance[value], value)
plt.xlabel('mean')
plt.ylabel('variance')
plt.title('Mean gene expression vs. variance')
plt.show()
# Constructs csv file and saves it in the users directory
path = os.path.expanduser("~")+'/results_mean_var_function.csv'
with open(path, 'w') as csv_file:
filewriter = csv.writer(csv_file, delimiter=',', quotechar='|',
quoting=csv.QUOTE_MINIMAL)
filewriter.writerow(['geneid', 'mean', 'variance'])
for id in gene_counts.keys():
filewriter.writerow([id, mean[id], variance[id]])
return path
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment