Skip to content
Snippets Groups Projects
Commit 58561bde authored by Reto Tschannen's avatar Reto Tschannen
Browse files

chore: cleaned up function and added correct path to usr directory

parent 663d5b8f
No related branches found
No related tags found
1 merge request!13feat: add function to calculate mean and variance
Pipeline #13633 failed
This commit is part of merge request !13. Comments created here will be created in the context of that merge request.
import glob, io import glob, io
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import csv import csv
import os
...@@ -12,62 +13,62 @@ def mean_variance(filepath): ...@@ -12,62 +13,62 @@ def mean_variance(filepath):
directory with files of gene expression counts in individual cells directory with files of gene expression counts in individual cells
Output: Output:
1. Csv-formatted table with GeneID, Mean, Variance of the counts 1. Path to Csv-formatted table with GeneID, Mean, Variance of the counts
2. Scatterplot of mean vs variance for all genes 2. Scatterplot of mean vs variance for all genes
""" """
# Open each file in the input directory, raises error if no file is fund # Open each file in the input directory, raises error if no file is fund
files = [file for file in glob.glob(filepath)] files = [file for file in glob.glob(filepath)]
if len(files) == 0: if len(files) == 0:
raise ValueError('No files in directory:', filepath) raise ValueError('No files in directory:', filepath)
# Creates all required dictionaries to cinstruct the mean, variance
nog = {} nog = {}
count = {} occurence = {}
test = {} individual_values = {}
mean = {} mean = {}
variance = {} variance = {}
# Added together all gene counts in nog, and occurence in occurence
for file_name in files: for file_name in files:
with io.open(file_name, 'r') as fh: with io.open(file_name, 'r') as fh:
for line in fh: for line in fh:
geneid, copies = str(line.split()[0]), int(line.split()[1]) geneid, copies = str(line.split()[0]), int(line.split()[1])
if geneid not in nog: if geneid not in nog:
nog[geneid] = copies nog[geneid] = copies
count[geneid] = 1 occurence[geneid] = 1
test[geneid] = [copies] individual_values[geneid] = [copies]
else: else:
nog[geneid] += copies nog[geneid] += copies
count[geneid] += 1 occurence[geneid] += 1
test[geneid] += [copies] individual_values[geneid] += [copies]
# Calculate mean of each gene # Calculate mean of each gene
for i in nog: for i in nog:
mean[i] = nog[i]/count[i] mean[i] = nog[i]/occurence[i]
# Calculate the variance # Calculate the variance
for i in test: for i in individual_values:
for j in range(0, len(test[i])): for j in range(0, len(individual_values[i])):
variance[i] = (test[i][j]-mean[i])**2/count[i] variance[i] = (individual_values[i][j]-mean[i])**2/occurence[i]
# Plot mean against variance # Plot mean against variance
plt.scatter(mean.values(), variance.values()) plt.scatter(mean.values(), variance.values())
for value in list(mean.keys()): for value in list(mean.keys()):
plt.text(mean[value], variance[value], value) plt.text(mean[value], variance[value], value)
#plt.annotate(list(mean.keys()), mean.values(), variance.values())
plt.xlabel('mean') plt.xlabel('mean')
plt.ylabel('variance') plt.ylabel('variance')
plt.title('Mean gene expression vs. variance') plt.title('Mean gene expression vs. variance')
plt.show() plt.show()
with open('/home/reto/results_mean_var_function.csv', 'w') as csv_file: # Constructs csv file and saves it in the users directory
with open(os.path.expanduser("~")+'/results_mean_var_function.csv', 'w') as csv_file:
filewriter = csv.writer(csv_file, delimiter = ',', quotechar = '|', quoting = csv.QUOTE_MINIMAL) filewriter = csv.writer(csv_file, delimiter = ',', quotechar = '|', quoting = csv.QUOTE_MINIMAL)
filewriter.writerow(['geneid', 'mean', 'variance']) filewriter.writerow(['geneid', 'mean', 'variance'])
for id in nog.keys(): for id in nog.keys():
filewriter.writerow([id, mean[id], variance[id]]) filewriter.writerow([id, mean[id], variance[id]])
return files, nog, count, mean, 'var', variance, test, list(mean.keys()) return os.path.expanduser("~")+'/results_mean_var_function.csv'
print(mean_variance('/home/reto/2021_project_folder/2021_test/*')) print(mean_variance('/home/reto/2021_project_folder/2021_test/*'))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment