From bd9215fa4a1342ba8b29df051c541410becc4848 Mon Sep 17 00:00:00 2001 From: BIOPZ-Katsantoni Maria <maria.katsantoni@unibas.ch> Date: Thu, 19 Mar 2020 01:24:32 +0100 Subject: [PATCH] Fixed polya-trimming rule In labkey_to_snakemake.py fixed the parameters so that there is 3p as well 5p polya feature for every mate, which can be matched to the -a -g -A and -G options of cutadapt depending on which is the sense or antisense mate the appropriate variable is populated and the rest of variables are filled with 'XXXXXXXXXXXX' which leads to no trimming by cutadapt. The poly-A trimming rules are fixed to contain all -a -g -A -G options. --- scripts/labkey_to_snakemake.py | 23 +++++++++++++++-------- workflow/rules/paired_end.snakefile.smk | 10 ++++++++-- workflow/rules/single_end.snakefile.smk | 5 ++++- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/scripts/labkey_to_snakemake.py b/scripts/labkey_to_snakemake.py index defefe3..41fbb3e 100755 --- a/scripts/labkey_to_snakemake.py +++ b/scripts/labkey_to_snakemake.py @@ -228,9 +228,13 @@ def main(): snakemake_table.loc[index, 'soft_clip'] = options.soft_clip snakemake_table.loc[index, 'pass_mode'] = options.pass_mode snakemake_table.loc[index, 'libtype'] = options.libtype + if options.trim_polya is True: - snakemake_table.loc[index, 'fq1_polya'] = trim_polya( + fq1_polya_3p, fq1_polya_5p = trim_polya( row[input_dict.loc['mate1_direction', 'labkey']]) + snakemake_table.loc[index, 'fq1_polya_3p'] = fq1_polya_3p + snakemake_table.loc[index, 'fq1_polya_5p'] = fq1_polya_5p + snakemake_table.loc[index, 'kallisto_directionality'] = \ get_kallisto_directionality( row[input_dict.loc['mate1_direction', 'labkey']]) @@ -247,8 +251,10 @@ def main(): input_dict.loc['fq2_5p', 'labkey']] if options.trim_polya is True: - snakemake_table.loc[index, 'fq2_polya'] = trim_polya( + fq2_polya_3p, fq2_polya_5p = trim_polya( row[input_dict.loc['mate2_direction', 'labkey']]) + snakemake_table.loc[index, 'fq2_polya_3p'] = fq2_polya_3p + snakemake_table.loc[index, 'fq2_polya_5p'] = fq2_polya_5p snakemake_table.fillna('XXXXXXXXXXXXX', inplace=True) snakemake_table = snakemake_table.astype( @@ -322,14 +328,15 @@ def get_kallisto_directionality(directionality): def trim_polya(sense): if sense == 'SENSE': - polya = 'AAAAAAAAAAAAAAAAA' + polya_3p = 'AAAAAAAAAAAAAAAAA' + polya_5p = 'XXXXXXXXXXXXXXXXX' elif sense == 'ANTISENSE': - polya = 'TTTTTTTTTTTTTTTTT' - elif sense == 'RANDOM': - polya = 'AAAAAAAAAAAAAAAAA' + polya_3p = 'XXXXXXXXXXXXXXXXX' + polya_5p = 'TTTTTTTTTTTTTTTTT' else: - polya = 'XXXXXXXXXXXXXXXXX' - return polya + polya_3p = 'XXXXXXXXXXXXXXXXX' + polya_5p = 'XXXXXXXXXXXXXXXXX' + return polya_3p, polya_5p if __name__ == '__main__': diff --git a/workflow/rules/paired_end.snakefile.smk b/workflow/rules/paired_end.snakefile.smk index f7326c3..72685e3 100644 --- a/workflow/rules/paired_end.snakefile.smk +++ b/workflow/rules/paired_end.snakefile.smk @@ -143,9 +143,13 @@ rule pe_remove_polya_cutadapt: params: polya_3_mate1 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq1_polya'], + samples_table.loc[wildcards.sample, 'fq1_polya_3p'], + polya_5_mate1 = lambda wildcards: + samples_table.loc[wildcards.sample, 'fq1_polya_5p'], polya_3_mate2 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq2_polya'] + samples_table.loc[wildcards.sample, 'fq2_polya_3p'], + polya_5_mate2 = lambda wildcards: + samples_table.loc[wildcards.sample, 'fq2_polya_5p'] singularity: "docker://zavolab/cutadapt:1.16-slim" @@ -173,7 +177,9 @@ rule pe_remove_polya_cutadapt: -e 0.1 \ -O 1 \ -a {params.polya_3_mate1} \ + -g {params.polya_5_mate1} \ -A {params.polya_3_mate2} \ + -G {params.polya_5_mate2} \ -o {output.reads1} \ -p {output.reads2} \ {input.reads1} \ diff --git a/workflow/rules/single_end.snakefile.smk b/workflow/rules/single_end.snakefile.smk index 415634d..3e09fc0 100644 --- a/workflow/rules/single_end.snakefile.smk +++ b/workflow/rules/single_end.snakefile.smk @@ -108,7 +108,9 @@ rule remove_polya_cutadapt: params: polya_3 = lambda wildcards: - samples_table.loc[wildcards.sample, "fq1_polya"] + samples_table.loc[wildcards.sample, "fq1_polya_3p"], + polya_5 = lambda wildcards: + samples_table.loc[wildcards.sample, "fq1_polya_5p"] singularity: "docker://zavolab/cutadapt:1.16-slim" @@ -135,6 +137,7 @@ rule remove_polya_cutadapt: -O 1 \ -m 10 \ -a {params.polya_3} \ + -g {params.polya_5} \ -o {output.reads} \ {input.reads};) \ 1> {log.stdout} 2> {log.stderr}" -- GitLab