From 132757a573da495050d144b83aae67fd3afbce7a Mon Sep 17 00:00:00 2001
From: LauraU123 <laura.urbanska@stud.unibas.ch>
Date: Fri, 11 Nov 2022 09:39:32 +0100
Subject: [PATCH] updated poisson sampling

---
 scripts/poisson_sampling.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/scripts/poisson_sampling.py b/scripts/poisson_sampling.py
index 92537e8..60d043d 100644
--- a/scripts/poisson_sampling.py
+++ b/scripts/poisson_sampling.py
@@ -16,13 +16,14 @@ output: csv file with gene id and count
 '''
 
 
-def transcript_sampling(total_transcript_number, tsv_file, output_csv):
-    df = pd.read_csv(tsv_file, sep='\t', lineterminator='\n', names=["id", "level"]))
+def transcript_sampling(total_transcript_number, csv_file, output_csv):
+    df = pd.read_csv(csv_file, sep='\t', lineterminator='\n', names=["id", "level"])
     levels = []
-
+    sums = df['level'].tolist()
+    total = sum(sums)
+    normalized = total_transcript_number/total
     for expression_level in df['level']:
-
-        poisson_sampled = np.random.poisson(total_transcript_number*expression_level)
+        poisson_sampled = np.random.poisson(expression_level*normalized)
         levels.append(poisson_sampled)
 
     transcript_numbers = pd.DataFrame({'id': df['id'],'count': levels})
-- 
GitLab