From bd9215fa4a1342ba8b29df051c541410becc4848 Mon Sep 17 00:00:00 2001
From: BIOPZ-Katsantoni Maria <maria.katsantoni@unibas.ch>
Date: Thu, 19 Mar 2020 01:24:32 +0100
Subject: [PATCH] Fixed polya-trimming rule

In labkey_to_snakemake.py fixed the parameters so that there is 3p as well 5p polya
feature for every mate, which can be matched to the -a -g -A and -G options of cutadapt
depending on which is the sense or antisense mate the appropriate variable is populated
and the rest of variables are filled with 'XXXXXXXXXXXX' which leads to no trimming by
cutadapt. The poly-A trimming rules are fixed to contain all -a -g -A -G options.
---
 scripts/labkey_to_snakemake.py          | 23 +++++++++++++++--------
 workflow/rules/paired_end.snakefile.smk | 10 ++++++++--
 workflow/rules/single_end.snakefile.smk |  5 ++++-
 3 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/scripts/labkey_to_snakemake.py b/scripts/labkey_to_snakemake.py
index defefe3..41fbb3e 100755
--- a/scripts/labkey_to_snakemake.py
+++ b/scripts/labkey_to_snakemake.py
@@ -228,9 +228,13 @@ def main():
         snakemake_table.loc[index, 'soft_clip'] = options.soft_clip
         snakemake_table.loc[index, 'pass_mode'] = options.pass_mode
         snakemake_table.loc[index, 'libtype'] = options.libtype
+
         if options.trim_polya is True:
-            snakemake_table.loc[index, 'fq1_polya'] = trim_polya(
+            fq1_polya_3p, fq1_polya_5p = trim_polya(
                 row[input_dict.loc['mate1_direction', 'labkey']])
+            snakemake_table.loc[index, 'fq1_polya_3p'] = fq1_polya_3p
+            snakemake_table.loc[index, 'fq1_polya_5p'] = fq1_polya_5p
+
         snakemake_table.loc[index, 'kallisto_directionality'] = \
             get_kallisto_directionality(
                 row[input_dict.loc['mate1_direction', 'labkey']])
@@ -247,8 +251,10 @@ def main():
                 input_dict.loc['fq2_5p', 'labkey']]
 
             if options.trim_polya is True:
-                snakemake_table.loc[index, 'fq2_polya'] = trim_polya(
+                fq2_polya_3p, fq2_polya_5p = trim_polya(
                     row[input_dict.loc['mate2_direction', 'labkey']])
+                snakemake_table.loc[index, 'fq2_polya_3p'] = fq2_polya_3p
+                snakemake_table.loc[index, 'fq2_polya_5p'] = fq2_polya_5p
 
     snakemake_table.fillna('XXXXXXXXXXXXX', inplace=True)
     snakemake_table = snakemake_table.astype(
@@ -322,14 +328,15 @@ def get_kallisto_directionality(directionality):
 
 def trim_polya(sense):
     if sense == 'SENSE':
-        polya = 'AAAAAAAAAAAAAAAAA'
+        polya_3p = 'AAAAAAAAAAAAAAAAA'
+        polya_5p = 'XXXXXXXXXXXXXXXXX'
     elif sense == 'ANTISENSE':
-        polya = 'TTTTTTTTTTTTTTTTT'
-    elif sense == 'RANDOM':
-        polya = 'AAAAAAAAAAAAAAAAA'
+        polya_3p = 'XXXXXXXXXXXXXXXXX'
+        polya_5p = 'TTTTTTTTTTTTTTTTT'
     else:
-        polya = 'XXXXXXXXXXXXXXXXX'
-    return polya
+        polya_3p = 'XXXXXXXXXXXXXXXXX'
+        polya_5p = 'XXXXXXXXXXXXXXXXX'
+    return polya_3p, polya_5p
 
 
 if __name__ == '__main__':
diff --git a/workflow/rules/paired_end.snakefile.smk b/workflow/rules/paired_end.snakefile.smk
index f7326c3..72685e3 100644
--- a/workflow/rules/paired_end.snakefile.smk
+++ b/workflow/rules/paired_end.snakefile.smk
@@ -143,9 +143,13 @@ rule pe_remove_polya_cutadapt:
 
     params:
         polya_3_mate1 = lambda wildcards:
-            samples_table.loc[wildcards.sample, 'fq1_polya'],
+            samples_table.loc[wildcards.sample, 'fq1_polya_3p'],
+        polya_5_mate1 = lambda wildcards:
+            samples_table.loc[wildcards.sample, 'fq1_polya_5p'],
         polya_3_mate2 = lambda wildcards:
-            samples_table.loc[wildcards.sample, 'fq2_polya']
+            samples_table.loc[wildcards.sample, 'fq2_polya_3p'],
+        polya_5_mate2 = lambda wildcards:
+            samples_table.loc[wildcards.sample, 'fq2_polya_5p']
 
     singularity:
         "docker://zavolab/cutadapt:1.16-slim"
@@ -173,7 +177,9 @@ rule pe_remove_polya_cutadapt:
         -e 0.1 \
         -O 1 \
         -a {params.polya_3_mate1} \
+        -g {params.polya_5_mate1} \
         -A {params.polya_3_mate2} \
+        -G {params.polya_5_mate2} \
         -o {output.reads1} \
         -p {output.reads2} \
         {input.reads1} \
diff --git a/workflow/rules/single_end.snakefile.smk b/workflow/rules/single_end.snakefile.smk
index 415634d..3e09fc0 100644
--- a/workflow/rules/single_end.snakefile.smk
+++ b/workflow/rules/single_end.snakefile.smk
@@ -108,7 +108,9 @@ rule remove_polya_cutadapt:
 
     params:
         polya_3 = lambda wildcards:
-            samples_table.loc[wildcards.sample, "fq1_polya"]
+            samples_table.loc[wildcards.sample, "fq1_polya_3p"],
+        polya_5 = lambda wildcards:
+            samples_table.loc[wildcards.sample, "fq1_polya_5p"]
 
     singularity:
         "docker://zavolab/cutadapt:1.16-slim"
@@ -135,6 +137,7 @@ rule remove_polya_cutadapt:
         -O 1 \
         -m 10  \
         -a {params.polya_3} \
+        -g {params.polya_5} \
         -o {output.reads} \
         {input.reads};) \
         1> {log.stdout} 2> {log.stderr}"
-- 
GitLab