From 392b04d2b5538bab2ec04c4e3ff8c267f6a122cc Mon Sep 17 00:00:00 2001
From: BIOPZ-Katsantoni Maria <maria.katsantoni@unibas.ch>
Date: Fri, 20 Mar 2020 13:35:03 +0100
Subject: [PATCH] Fix Poly(A)-trimming rule

In labkey_to_snakemake.py fixed the parameters so that there is 3p as well 5p polya
feature for every mate, which can be matched to the -a -g -A and -G options of cutadapt
depending on which is the sense or antisense mate the appropriate variable is populated
and the rest of variables are filled with 'XXXXXXXXXXXX' which leads to no trimming by
cutadapt. The poly-A trimming rules are fixed to contain all -a -g -A -G options.
---
 pipeline_documentation.md                     | 33 +++++++------------
 scripts/labkey_to_snakemake.py                | 23 ++++++++-----
 tests/input_files/samples.tsv                 |  6 ++--
 .../expected_output.md5                       | 20 +++++------
 tests/test_integration_workflow/test.local.sh |  2 +-
 .../expected_output.md5                       |  2 +-
 .../expected_output.md5                       |  2 +-
 .../test.sh                                   |  1 +
 workflow/rules/paired_end.snakefile.smk       | 10 ++++--
 workflow/rules/single_end.snakefile.smk       |  5 ++-
 10 files changed, 56 insertions(+), 48 deletions(-)

diff --git a/pipeline_documentation.md b/pipeline_documentation.md
index 41bc095..3774eb0 100644
--- a/pipeline_documentation.md
+++ b/pipeline_documentation.md
@@ -39,7 +39,7 @@ This document describes the individual rules of the pipeline for information pur
 ## Detailed description of steps
 The pipeline consists of three snakefiles: A main Snakefile and an individual Snakefile for each sequencing mode (single-end and paired-end), as parameters to individual tools differ between the sequencing modes. The main Snakefile contains some general rules for the creation of indices, rules that are applicable to both sequencing modes, and rules that deal with summary steps and combining results across samples of the run.     
 Individual rules of the pipeline are described briefly, and links to the respective software manuals are given. If parameters can be influenced by the user (via the samples table) they are also described.
-Description of paired- and single-end rules are combined, only differences are highlighted.
+Description of paired and single-end rules are combined, only differences are highlighted.
 
 
 ### General
@@ -74,9 +74,10 @@ soft_clip | "Local": standard local alignment with soft-clipping allowed. "EndTo
 pass_mode | "None": 1-pass mapping; "Basic": basic 2-pass mapping, with all 1st pass junctions inserted into the genome indices on the fly; for star mapping (type=STRING)
 libtype | "A": automatically infer. For more info see [salmon manual](https://salmon.readthedocs.io/en/latest/salmon.html) (type=STRING)
 kallisto_directionality | "--fr-stranded":Strand specific reads, first read forward. "--rf-stranded": Strand specific reads, first read reverse; for kallisto (type=STRING)
-fq1_polya | stretch of As or Ts, depending on read orientation; for cutadapt (type=STRING)
-fq2_polya | stretch of As or Ts, depending on read orientation; for cutadapt (type=STRING)
-
+fq1_polya3p | stretch of As or Ts, depending on read orientation, trimmed from the 3' end of the read; for cutadapt (type=STRING)
+fq1_polya5p | stretch of As or Ts, depending on read orientation, trimmed from the 5' end of the read; for cutadapt (type=STRING)
+fq2_polya3p| stretch of As or Ts, depending on read orientation, trimmed from the 3' end of the read; for cutadapt (type=STRING)
+fq2_polya5p| stretch of As or Ts, depending on read orientation, trimmed from the 5' end of the read; for cutadapt (type=STRING)
 
 #### create log directories
 Currently not implemented as Snakemake rule, but general statement.
@@ -260,33 +261,26 @@ Creates an interactive report after the pipeline is finished. [MultiQC](https://
 **Output:** fastq files with adapters removed, reads shorter than 10nt will be discarded.    
 
 
-**Arguments not influencable by user:**        
+**Non-customisable arguments:**        
 -e 0.1  maximum error-rate of 10%    
 -j 8    use 8 threads    
 -m 10   Discard processed reads that are shorter than 10    
--n 3    search for all the given adapter sequences repeatedly, either until no adapter match was found or until 3 rounds have been performed.    
+-n 2    search for all the given adapter sequences repeatedly, either until no adapter match was found or until 2 rounds have been performed.    
 
 *paired end:*    
---pair-filter=both      filtering criteria must apply to both reads in order for a read pair to be discarded
-
-*single end:*    
--O 1    minimal overlap of 1
+--pair-filter=any      filtering criteria must apply to any of the two reads in order for a read pair to be discarded
 
 #### (pe_)remove_polya_cutadapt
-Here, [Cutadapt](https://cutadapt.readthedocs.io/en/stable/)t is used to remove poly(A) tails. 
+Here, [Cutadapt](https://cutadapt.readthedocs.io/en/stable/) is used to remove poly(A) tails. 
 
 **Input:** fastq reads    
 **Parameters:** Adapters to be removed, specified by user in the columns 'fq1_polya', 'fq2_polya', respectively.    
 **Output:** fastq files with poly(A) tails removed, reads shorter than 10nt will be discarded. 
 
-**Arguments like in remove_adapters_cutadapt and additionally:**    
---match-read-wildcards This option is used to allow matching wildcard characters also within reads, because if no tail should be trimmed "XXXXXX" is specified in the samples table, which doesn't match any nucleotides, and thus nothing will be done here.    
--n 2    search for all the given adapter sequences repeatedly, either until no adapter match was found or until 2 rounds have been performed.    
--q 6    trim low-quality 3'ends with a cutoff of 6 nucleotides    
-
-
+**Arguments similar to remove_adapters_cutadapt and additionally:**    
+-n 1    search for all the given adapter sequences repeatedly, either until no adapter match was found or until 1 round has been performed.    
 *paired end:*
---pair-filter=both      filtering criteria must apply to both reads in order for a read pair to be discarded
+--pair-filter=any      filtering criteria must apply to both reads in order for a read pair to be discarded
 
 *single end:*    
 -O 1    minimal overlap of 1
@@ -318,8 +312,6 @@ Spliced Transcripts Alignment to a Reference; Read the [Publication](https://www
 *Same for single- and paired-end.*
 
 
-
-
 #### (pe_)quantification_salmon
 [Salmon](https://salmon.readthedocs.io/en/latest/salmon.html) is a tool for wicked-fast transcript quantification from RNA-seq data.
 
@@ -358,4 +350,3 @@ Spliced Transcripts Alignment to a Reference; Read the [Publication](https://www
 * -l: fragment length, user specified as `mean`
 * -s: fragment length SD, user specified as `sd` 
 
-
diff --git a/scripts/labkey_to_snakemake.py b/scripts/labkey_to_snakemake.py
index defefe3..41fbb3e 100755
--- a/scripts/labkey_to_snakemake.py
+++ b/scripts/labkey_to_snakemake.py
@@ -228,9 +228,13 @@ def main():
         snakemake_table.loc[index, 'soft_clip'] = options.soft_clip
         snakemake_table.loc[index, 'pass_mode'] = options.pass_mode
         snakemake_table.loc[index, 'libtype'] = options.libtype
+
         if options.trim_polya is True:
-            snakemake_table.loc[index, 'fq1_polya'] = trim_polya(
+            fq1_polya_3p, fq1_polya_5p = trim_polya(
                 row[input_dict.loc['mate1_direction', 'labkey']])
+            snakemake_table.loc[index, 'fq1_polya_3p'] = fq1_polya_3p
+            snakemake_table.loc[index, 'fq1_polya_5p'] = fq1_polya_5p
+
         snakemake_table.loc[index, 'kallisto_directionality'] = \
             get_kallisto_directionality(
                 row[input_dict.loc['mate1_direction', 'labkey']])
@@ -247,8 +251,10 @@ def main():
                 input_dict.loc['fq2_5p', 'labkey']]
 
             if options.trim_polya is True:
-                snakemake_table.loc[index, 'fq2_polya'] = trim_polya(
+                fq2_polya_3p, fq2_polya_5p = trim_polya(
                     row[input_dict.loc['mate2_direction', 'labkey']])
+                snakemake_table.loc[index, 'fq2_polya_3p'] = fq2_polya_3p
+                snakemake_table.loc[index, 'fq2_polya_5p'] = fq2_polya_5p
 
     snakemake_table.fillna('XXXXXXXXXXXXX', inplace=True)
     snakemake_table = snakemake_table.astype(
@@ -322,14 +328,15 @@ def get_kallisto_directionality(directionality):
 
 def trim_polya(sense):
     if sense == 'SENSE':
-        polya = 'AAAAAAAAAAAAAAAAA'
+        polya_3p = 'AAAAAAAAAAAAAAAAA'
+        polya_5p = 'XXXXXXXXXXXXXXXXX'
     elif sense == 'ANTISENSE':
-        polya = 'TTTTTTTTTTTTTTTTT'
-    elif sense == 'RANDOM':
-        polya = 'AAAAAAAAAAAAAAAAA'
+        polya_3p = 'XXXXXXXXXXXXXXXXX'
+        polya_5p = 'TTTTTTTTTTTTTTTTT'
     else:
-        polya = 'XXXXXXXXXXXXXXXXX'
-    return polya
+        polya_3p = 'XXXXXXXXXXXXXXXXX'
+        polya_5p = 'XXXXXXXXXXXXXXXXX'
+    return polya_3p, polya_5p
 
 
 if __name__ == '__main__':
diff --git a/tests/input_files/samples.tsv b/tests/input_files/samples.tsv
index cc0ec82..d7ff5bb 100644
--- a/tests/input_files/samples.tsv
+++ b/tests/input_files/samples.tsv
@@ -1,3 +1,3 @@
-sample	seqmode	fq1	index_size	kmer	fq2	fq1_3p	fq1_5p	fq2_3p	fq2_5p	organism	gtf	gtf_filtered	genome	tr_fasta_filtered	sd	mean	multimappers	soft_clip	pass_mode	libtype	kallisto_directionality	fq1_polya	fq2_polya
-synthetic_10_reads_paired_synthetic_10_reads_paired	paired_end	../input_files/project1/synthetic.mate_1.fastq.gz	75	31	../input_files/project1/synthetic.mate_2.fastq.gz	AGATCGGAAGAGCACA	XXXXXXXXXXXXX	AGATCGGAAGAGCGT	XXXXXXXXXXXXX	homo_sapiens	../input_files/homo_sapiens/annotation.gtf	../input_files/homo_sapiens/annotation.gtf	../input_files/homo_sapiens/genome.fa	../input_files/homo_sapiens/transcriptome.fa	100	250	10	EndToEnd	None	A	--fr	AAAAAAAAAAAAAAAAA	TTTTTTTTTTTTTTTTT
-synthetic_10_reads_mate_1_synthetic_10_reads_mate_1	single_end	../input_files/project2/synthetic.mate_1.fastq.gz	75	31	XXXXXXXXXXXXX	AGATCGGAAGAGCACA	XXXXXXXXXXXXX	XXXXXXXXXXXXX	XXXXXXXXXXXXX	homo_sapiens	../input_files/homo_sapiens/annotation.gtf	../input_files/homo_sapiens/annotation.gtf	../input_files/homo_sapiens/genome.fa	../input_files/homo_sapiens/transcriptome.fa	100	250	10	EndToEnd	None	A	--fr	AAAAAAAAAAAAAAAAA	XXXXXXXXXXXXX
+sample	seqmode	fq1	index_size	kmer	fq1_3p	fq1_5p	organism	gtf	gtf_filtered	genome	tr_fasta_filtered	sd	mean	multimappers	soft_clip	pass_mode	libtype	fq1_polya_3p	fq1_polya_5p	kallisto_directionality	fq2	fq2_3p	fq2_5p	fq2_polya_3p	fq2_polya_5p
+synthetic_10_reads_paired_synthetic_10_reads_paired	paired_end	../input_files/project1/synthetic.mate_1.fastq.gz	75	31	AGATCGGAAGAGCACA	XXXXXXXXXXXXX	homo_sapiens	../input_files/homo_sapiens/annotation.gtf	../input_files/homo_sapiens/annotation.gtf	../input_files/homo_sapiens/genome.fa	../input_files/homo_sapiens/transcriptome.fa	100	250	10	EndToEnd	None	A	AAAAAAAAAAAAAAAAA	XXXXXXXXXXXXXXXXX	--fr	../input_files/project1/synthetic.mate_2.fastq.gz	AGATCGGAAGAGCGT	XXXXXXXXXXXXX	XXXXXXXXXXXXXXXXX	TTTTTTTTTTTTTTTTT
+synthetic_10_reads_mate_1_synthetic_10_reads_mate_1	single_end	../input_files/project2/synthetic.mate_1.fastq.gz	75	31	AGATCGGAAGAGCACA	XXXXXXXXXXXXX	homo_sapiens	../input_files/homo_sapiens/annotation.gtf	../input_files/homo_sapiens/annotation.gtf	../input_files/homo_sapiens/genome.fa	../input_files/homo_sapiens/transcriptome.fa	100	250	10	EndToEnd	None	A	AAAAAAAAAAAAAAAAA	XXXXXXXXXXXXXXXXX	--fr	XXXXXXXXXXXXX	XXXXXXXXXXXXX	XXXXXXXXXXXXX	XXXXXXXXXXXXX	XXXXXXXXXXXXX
diff --git a/tests/test_integration_workflow/expected_output.md5 b/tests/test_integration_workflow/expected_output.md5
index 4f9b8f2..31cbf64 100644
--- a/tests/test_integration_workflow/expected_output.md5
+++ b/tests/test_integration_workflow/expected_output.md5
@@ -19,7 +19,7 @@ ea36f062eedc7f54ceffea2b635a25a8  results/star_indexes/homo_sapiens/75/STAR_inde
 500dd49da40b16799aba62aa5cf239ba  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.remove_adapters_mate1.fastq
 e90e31db1ce51d930645eb74ff70d21b  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.remove_adapters_mate2.fastq
 500dd49da40b16799aba62aa5cf239ba  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.remove_polya_mate1.fastq
-e90e31db1ce51d930645eb74ff70d21b  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.remove_polya_mate2.fastq
+1c0796d7e0bdab0e99780b2e11d80c19  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.remove_polya_mate2.fastq
 d41d8cd98f00b204e9800998ecf8427e  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired_SJ.out.tab
 f551ff091e920357ec0a76807cb51dba  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/mate1_fastqc/synthetic.mate_1_fastqc/fastqc_data.txt
 c0df759ceab72ea4b1a560f991fe6497  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/mate1_fastqc/synthetic.mate_1_fastqc/fastqc.fo
@@ -45,8 +45,8 @@ b28aac49f537b8cba364b6422458ad28  results/samples/synthetic_10_reads_paired_synt
 69b70e3f561b749bf10b186dd2480a8a  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/mate2_fastqc/synthetic.mate_2_fastqc/Images/per_sequence_quality.png
 b28aac49f537b8cba364b6422458ad28  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/mate2_fastqc/synthetic.mate_2_fastqc/Images/per_tile_quality.png
 5b950b5dfe3c7407e9aac153db330a38  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/mate2_fastqc/synthetic.mate_2_fastqc/Images/sequence_length_distribution.png
-5e07e870d516a91647808bd84068d829  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/abundance.tsv
-6180a904511292b0f173794ae98af991  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/pseudoalignments.bam
+2e77276535976efccb244627231624bf  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/abundance.tsv
+d013650f813b815a790c9e6a51c7559b  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/pseudoalignments.bam
 d41d8cd98f00b204e9800998ecf8427e  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/synthetic_10_reads_paired_synthetic_10_reads_paired.kallisto.pseudo.sam
 c77480e0235761f2d7f80dbceb2e2806  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/salmon_quant/synthetic_10_reads_paired_synthetic_10_reads_paired/lib_format_counts.json
 989d6ee63b728fced9ec0249735ab83d  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/salmon_quant/synthetic_10_reads_paired_synthetic_10_reads_paired/aux_info/ambig_info.tsv
@@ -78,10 +78,10 @@ e72f5d798c99272f8c0166dc77247db1  results/samples/synthetic_10_reads_mate_1_synt
 92bcd0592d22a6a58d0360fc76103e56  results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/aux_info/observed_bias
 92bcd0592d22a6a58d0360fc76103e56  results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/aux_info/observed_bias_3p
 d41d8cd98f00b204e9800998ecf8427e  results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/aux_info/unmapped_names.txt
-0139e75ddbfe6eb081c2c2d9b9108ab4  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str1.out.bg
-c266d31e0a2ad84975cb9de335891e64  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str2.out.bg
-0139e75ddbfe6eb081c2c2d9b9108ab4  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.str1.out.bg
-c266d31e0a2ad84975cb9de335891e64  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.str2.out.bg
+16652c037090f3eed1123618a2e75107  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str1.out.bg
+90ae442ebf35015eab2dd4e804c2bafb  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str2.out.bg
+16652c037090f3eed1123618a2e75107  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.str1.out.bg
+90ae442ebf35015eab2dd4e804c2bafb  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.str2.out.bg
 ea91b4f85622561158bff2f7c9c312b3  results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/STAR_coverage/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.str1.out.bg
 bcccf679a8c083d01527514c9f5680a0  results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/STAR_coverage/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.str2.out.bg
 ea91b4f85622561158bff2f7c9c312b3  results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/STAR_coverage/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.Unique.str1.out.bg
@@ -89,9 +89,9 @@ bcccf679a8c083d01527514c9f5680a0  results/samples/synthetic_10_reads_mate_1_synt
 3ce47cb1d62482c5d62337751d7e8552  results/transcriptome/homo_sapiens/transcriptome.fa
 6b44c507f0a1c9f7369db0bb1deef0fd  results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.stranded.ALFA_index
 2caebc23faf78fdbbbdbb118d28bd6b5  results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.unstranded.ALFA_index
-c1254a0bae19ac3ffc39f73099ffcf2b  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired.ALFA_feature_counts.tsv
-c266d31e0a2ad84975cb9de335891e64  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.minus.bg
-0139e75ddbfe6eb081c2c2d9b9108ab4  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.plus.bg
+53fd53f884352d0493b2ca99cef5d76d  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired.ALFA_feature_counts.tsv
+90ae442ebf35015eab2dd4e804c2bafb  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.minus.bg
+16652c037090f3eed1123618a2e75107  results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.plus.bg
 c1254a0bae19ac3ffc39f73099ffcf2b  results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.ALFA_feature_counts.tsv
 bcccf679a8c083d01527514c9f5680a0  results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.out.minus.bg
 ea91b4f85622561158bff2f7c9c312b3  results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.out.plus.bg
\ No newline at end of file
diff --git a/tests/test_integration_workflow/test.local.sh b/tests/test_integration_workflow/test.local.sh
index ac6e45e..6b641f2 100755
--- a/tests/test_integration_workflow/test.local.sh
+++ b/tests/test_integration_workflow/test.local.sh
@@ -7,7 +7,7 @@ cleanup () {
     rm -rf .java/
     rm -rf .snakemake/
     rm -rf logs/
-    rm -rf results/
+    # rm -rf results/
     cd $user_dir
     echo "Exit status: $rc"
 }
diff --git a/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5 b/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5
index 569a4a8..b2a6861 100644
--- a/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5
+++ b/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5
@@ -1,2 +1,2 @@
 b163e7b06bd9e0a71f2fd1fc4935fea9  config.yaml
-cb58e046242c2702038e6e21dbd0bdb4  samples.tsv
+071f0e942321df5e38c8b2d458f7be06  samples.tsv
diff --git a/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5 b/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5
index 569a4a8..b2a6861 100644
--- a/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5
+++ b/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5
@@ -1,2 +1,2 @@
 b163e7b06bd9e0a71f2fd1fc4935fea9  config.yaml
-cb58e046242c2702038e6e21dbd0bdb4  samples.tsv
+071f0e942321df5e38c8b2d458f7be06  samples.tsv
diff --git a/tests/test_scripts_labkey_to_snakemake_table/test.sh b/tests/test_scripts_labkey_to_snakemake_table/test.sh
index 37014ed..dd2707e 100755
--- a/tests/test_scripts_labkey_to_snakemake_table/test.sh
+++ b/tests/test_scripts_labkey_to_snakemake_table/test.sh
@@ -6,6 +6,7 @@ cleanup () {
     rm -rf .snakemake/
     rm -rf config.yaml
     rm -rf samples.tsv
+    rm -rf logs
     cd $user_dir
     echo "Exit status: $rc"
 }
diff --git a/workflow/rules/paired_end.snakefile.smk b/workflow/rules/paired_end.snakefile.smk
index 07ae6a3..a642c65 100644
--- a/workflow/rules/paired_end.snakefile.smk
+++ b/workflow/rules/paired_end.snakefile.smk
@@ -143,9 +143,13 @@ rule pe_remove_polya_cutadapt:
 
     params:
         polya_3_mate1 = lambda wildcards:
-            samples_table.loc[wildcards.sample, 'fq1_polya'],
+            samples_table.loc[wildcards.sample, 'fq1_polya_3p'],
+        polya_5_mate1 = lambda wildcards:
+            samples_table.loc[wildcards.sample, 'fq1_polya_5p'],
         polya_3_mate2 = lambda wildcards:
-            samples_table.loc[wildcards.sample, 'fq2_polya']
+            samples_table.loc[wildcards.sample, 'fq2_polya_3p'],
+        polya_5_mate2 = lambda wildcards:
+            samples_table.loc[wildcards.sample, 'fq2_polya_5p']
 
     singularity:
         "docker://zavolab/cutadapt:1.16-slim"
@@ -173,7 +177,9 @@ rule pe_remove_polya_cutadapt:
         -e 0.1 \
         -O 1 \
         -a {params.polya_3_mate1} \
+        -g {params.polya_5_mate1} \
         -A {params.polya_3_mate2} \
+        -G {params.polya_5_mate2} \
         -o {output.reads1} \
         -p {output.reads2} \
         {input.reads1} \
diff --git a/workflow/rules/single_end.snakefile.smk b/workflow/rules/single_end.snakefile.smk
index b22e587..d0b8445 100644
--- a/workflow/rules/single_end.snakefile.smk
+++ b/workflow/rules/single_end.snakefile.smk
@@ -108,7 +108,9 @@ rule remove_polya_cutadapt:
 
     params:
         polya_3 = lambda wildcards:
-            samples_table.loc[wildcards.sample, "fq1_polya"]
+            samples_table.loc[wildcards.sample, "fq1_polya_3p"],
+        polya_5 = lambda wildcards:
+            samples_table.loc[wildcards.sample, "fq1_polya_5p"]
 
     singularity:
         "docker://zavolab/cutadapt:1.16-slim"
@@ -135,6 +137,7 @@ rule remove_polya_cutadapt:
         -O 1 \
         -m 10  \
         -a {params.polya_3} \
+        -g {params.polya_5} \
         -o {output.reads} \
         {input.reads};) \
         1> {log.stdout} 2> {log.stderr}"
-- 
GitLab