From 132757a573da495050d144b83aae67fd3afbce7a Mon Sep 17 00:00:00 2001 From: LauraU123 <laura.urbanska@stud.unibas.ch> Date: Fri, 11 Nov 2022 09:39:32 +0100 Subject: [PATCH] updated poisson sampling --- scripts/poisson_sampling.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scripts/poisson_sampling.py b/scripts/poisson_sampling.py index 92537e8..60d043d 100644 --- a/scripts/poisson_sampling.py +++ b/scripts/poisson_sampling.py @@ -16,13 +16,14 @@ output: csv file with gene id and count ''' -def transcript_sampling(total_transcript_number, tsv_file, output_csv): - df = pd.read_csv(tsv_file, sep='\t', lineterminator='\n', names=["id", "level"])) +def transcript_sampling(total_transcript_number, csv_file, output_csv): + df = pd.read_csv(csv_file, sep='\t', lineterminator='\n', names=["id", "level"]) levels = [] - + sums = df['level'].tolist() + total = sum(sums) + normalized = total_transcript_number/total for expression_level in df['level']: - - poisson_sampled = np.random.poisson(total_transcript_number*expression_level) + poisson_sampled = np.random.poisson(expression_level*normalized) levels.append(poisson_sampled) transcript_numbers = pd.DataFrame({'id': df['id'],'count': levels}) -- GitLab