Skip to content
Snippets Groups Projects

feat: add function to calculate mean and variance

Closed Reto Tschannen requested to merge issue12 into main
1 file
+ 8
10
Compare changes
  • Side-by-side
  • Inline
@@ -23,29 +23,29 @@ def mean_variance(filepath):
raise ValueError('No files in directory:', filepath)
# Creates all required dictionaries to cinstruct the mean, variance
nog = {}
gene_counts = {}
occurence = {}
individual_values = {}
mean = {}
variance = {}
# Added together all gene counts in nog, and occurence in occurence
# Added together all gene counts in gene_counts, and occurence in occurence
for file_name in files:
with io.open(file_name, 'r') as fh:
for line in fh:
geneid, copies = str(line.split()[0]), int(line.split()[1])
if geneid not in nog:
nog[geneid] = copies
if geneid not in gene_counts:
gene_counts[geneid] = copies
occurence[geneid] = 1
individual_values[geneid] = [copies]
else:
nog[geneid] += copies
gene_counts[geneid] += copies
occurence[geneid] += 1
individual_values[geneid] += [copies]
# Calculate mean of each gene
for i in nog:
mean[i] = nog[i]/occurence[i]
for i in gene_counts:
mean[i] = gene_counts[i]/occurence[i]
# Calculate the variance
for i in individual_values:
@@ -65,12 +65,10 @@ def mean_variance(filepath):
with open(os.path.expanduser("~")+'/results_mean_var_function.csv', 'w') as csv_file:
filewriter = csv.writer(csv_file, delimiter = ',', quotechar = '|', quoting = csv.QUOTE_MINIMAL)
filewriter.writerow(['geneid', 'mean', 'variance'])
for id in nog.keys():
for id in gene_counts.keys():
filewriter.writerow([id, mean[id], variance[id]])
return os.path.expanduser("~")+'/results_mean_var_function.csv'
print(mean_variance('/home/reto/2021_project_folder/2021_test/*'))
Loading