From 392b04d2b5538bab2ec04c4e3ff8c267f6a122cc Mon Sep 17 00:00:00 2001 From: BIOPZ-Katsantoni Maria <maria.katsantoni@unibas.ch> Date: Fri, 20 Mar 2020 13:35:03 +0100 Subject: [PATCH] Fix Poly(A)-trimming rule In labkey_to_snakemake.py fixed the parameters so that there is 3p as well 5p polya feature for every mate, which can be matched to the -a -g -A and -G options of cutadapt depending on which is the sense or antisense mate the appropriate variable is populated and the rest of variables are filled with 'XXXXXXXXXXXX' which leads to no trimming by cutadapt. The poly-A trimming rules are fixed to contain all -a -g -A -G options. --- pipeline_documentation.md | 33 +++++++------------ scripts/labkey_to_snakemake.py | 23 ++++++++----- tests/input_files/samples.tsv | 6 ++-- .../expected_output.md5 | 20 +++++------ tests/test_integration_workflow/test.local.sh | 2 +- .../expected_output.md5 | 2 +- .../expected_output.md5 | 2 +- .../test.sh | 1 + workflow/rules/paired_end.snakefile.smk | 10 ++++-- workflow/rules/single_end.snakefile.smk | 5 ++- 10 files changed, 56 insertions(+), 48 deletions(-) diff --git a/pipeline_documentation.md b/pipeline_documentation.md index 41bc095..3774eb0 100644 --- a/pipeline_documentation.md +++ b/pipeline_documentation.md @@ -39,7 +39,7 @@ This document describes the individual rules of the pipeline for information pur ## Detailed description of steps The pipeline consists of three snakefiles: A main Snakefile and an individual Snakefile for each sequencing mode (single-end and paired-end), as parameters to individual tools differ between the sequencing modes. The main Snakefile contains some general rules for the creation of indices, rules that are applicable to both sequencing modes, and rules that deal with summary steps and combining results across samples of the run. Individual rules of the pipeline are described briefly, and links to the respective software manuals are given. If parameters can be influenced by the user (via the samples table) they are also described. -Description of paired- and single-end rules are combined, only differences are highlighted. +Description of paired and single-end rules are combined, only differences are highlighted. ### General @@ -74,9 +74,10 @@ soft_clip | "Local": standard local alignment with soft-clipping allowed. "EndTo pass_mode | "None": 1-pass mapping; "Basic": basic 2-pass mapping, with all 1st pass junctions inserted into the genome indices on the fly; for star mapping (type=STRING) libtype | "A": automatically infer. For more info see [salmon manual](https://salmon.readthedocs.io/en/latest/salmon.html) (type=STRING) kallisto_directionality | "--fr-stranded":Strand specific reads, first read forward. "--rf-stranded": Strand specific reads, first read reverse; for kallisto (type=STRING) -fq1_polya | stretch of As or Ts, depending on read orientation; for cutadapt (type=STRING) -fq2_polya | stretch of As or Ts, depending on read orientation; for cutadapt (type=STRING) - +fq1_polya3p | stretch of As or Ts, depending on read orientation, trimmed from the 3' end of the read; for cutadapt (type=STRING) +fq1_polya5p | stretch of As or Ts, depending on read orientation, trimmed from the 5' end of the read; for cutadapt (type=STRING) +fq2_polya3p| stretch of As or Ts, depending on read orientation, trimmed from the 3' end of the read; for cutadapt (type=STRING) +fq2_polya5p| stretch of As or Ts, depending on read orientation, trimmed from the 5' end of the read; for cutadapt (type=STRING) #### create log directories Currently not implemented as Snakemake rule, but general statement. @@ -260,33 +261,26 @@ Creates an interactive report after the pipeline is finished. [MultiQC](https:// **Output:** fastq files with adapters removed, reads shorter than 10nt will be discarded. -**Arguments not influencable by user:** +**Non-customisable arguments:** -e 0.1 maximum error-rate of 10% -j 8 use 8 threads -m 10 Discard processed reads that are shorter than 10 --n 3 search for all the given adapter sequences repeatedly, either until no adapter match was found or until 3 rounds have been performed. +-n 2 search for all the given adapter sequences repeatedly, either until no adapter match was found or until 2 rounds have been performed. *paired end:* ---pair-filter=both filtering criteria must apply to both reads in order for a read pair to be discarded - -*single end:* --O 1 minimal overlap of 1 +--pair-filter=any filtering criteria must apply to any of the two reads in order for a read pair to be discarded #### (pe_)remove_polya_cutadapt -Here, [Cutadapt](https://cutadapt.readthedocs.io/en/stable/)t is used to remove poly(A) tails. +Here, [Cutadapt](https://cutadapt.readthedocs.io/en/stable/) is used to remove poly(A) tails. **Input:** fastq reads **Parameters:** Adapters to be removed, specified by user in the columns 'fq1_polya', 'fq2_polya', respectively. **Output:** fastq files with poly(A) tails removed, reads shorter than 10nt will be discarded. -**Arguments like in remove_adapters_cutadapt and additionally:** ---match-read-wildcards This option is used to allow matching wildcard characters also within reads, because if no tail should be trimmed "XXXXXX" is specified in the samples table, which doesn't match any nucleotides, and thus nothing will be done here. --n 2 search for all the given adapter sequences repeatedly, either until no adapter match was found or until 2 rounds have been performed. --q 6 trim low-quality 3'ends with a cutoff of 6 nucleotides - - +**Arguments similar to remove_adapters_cutadapt and additionally:** +-n 1 search for all the given adapter sequences repeatedly, either until no adapter match was found or until 1 round has been performed. *paired end:* ---pair-filter=both filtering criteria must apply to both reads in order for a read pair to be discarded +--pair-filter=any filtering criteria must apply to both reads in order for a read pair to be discarded *single end:* -O 1 minimal overlap of 1 @@ -318,8 +312,6 @@ Spliced Transcripts Alignment to a Reference; Read the [Publication](https://www *Same for single- and paired-end.* - - #### (pe_)quantification_salmon [Salmon](https://salmon.readthedocs.io/en/latest/salmon.html) is a tool for wicked-fast transcript quantification from RNA-seq data. @@ -358,4 +350,3 @@ Spliced Transcripts Alignment to a Reference; Read the [Publication](https://www * -l: fragment length, user specified as `mean` * -s: fragment length SD, user specified as `sd` - diff --git a/scripts/labkey_to_snakemake.py b/scripts/labkey_to_snakemake.py index defefe3..41fbb3e 100755 --- a/scripts/labkey_to_snakemake.py +++ b/scripts/labkey_to_snakemake.py @@ -228,9 +228,13 @@ def main(): snakemake_table.loc[index, 'soft_clip'] = options.soft_clip snakemake_table.loc[index, 'pass_mode'] = options.pass_mode snakemake_table.loc[index, 'libtype'] = options.libtype + if options.trim_polya is True: - snakemake_table.loc[index, 'fq1_polya'] = trim_polya( + fq1_polya_3p, fq1_polya_5p = trim_polya( row[input_dict.loc['mate1_direction', 'labkey']]) + snakemake_table.loc[index, 'fq1_polya_3p'] = fq1_polya_3p + snakemake_table.loc[index, 'fq1_polya_5p'] = fq1_polya_5p + snakemake_table.loc[index, 'kallisto_directionality'] = \ get_kallisto_directionality( row[input_dict.loc['mate1_direction', 'labkey']]) @@ -247,8 +251,10 @@ def main(): input_dict.loc['fq2_5p', 'labkey']] if options.trim_polya is True: - snakemake_table.loc[index, 'fq2_polya'] = trim_polya( + fq2_polya_3p, fq2_polya_5p = trim_polya( row[input_dict.loc['mate2_direction', 'labkey']]) + snakemake_table.loc[index, 'fq2_polya_3p'] = fq2_polya_3p + snakemake_table.loc[index, 'fq2_polya_5p'] = fq2_polya_5p snakemake_table.fillna('XXXXXXXXXXXXX', inplace=True) snakemake_table = snakemake_table.astype( @@ -322,14 +328,15 @@ def get_kallisto_directionality(directionality): def trim_polya(sense): if sense == 'SENSE': - polya = 'AAAAAAAAAAAAAAAAA' + polya_3p = 'AAAAAAAAAAAAAAAAA' + polya_5p = 'XXXXXXXXXXXXXXXXX' elif sense == 'ANTISENSE': - polya = 'TTTTTTTTTTTTTTTTT' - elif sense == 'RANDOM': - polya = 'AAAAAAAAAAAAAAAAA' + polya_3p = 'XXXXXXXXXXXXXXXXX' + polya_5p = 'TTTTTTTTTTTTTTTTT' else: - polya = 'XXXXXXXXXXXXXXXXX' - return polya + polya_3p = 'XXXXXXXXXXXXXXXXX' + polya_5p = 'XXXXXXXXXXXXXXXXX' + return polya_3p, polya_5p if __name__ == '__main__': diff --git a/tests/input_files/samples.tsv b/tests/input_files/samples.tsv index cc0ec82..d7ff5bb 100644 --- a/tests/input_files/samples.tsv +++ b/tests/input_files/samples.tsv @@ -1,3 +1,3 @@ -sample seqmode fq1 index_size kmer fq2 fq1_3p fq1_5p fq2_3p fq2_5p organism gtf gtf_filtered genome tr_fasta_filtered sd mean multimappers soft_clip pass_mode libtype kallisto_directionality fq1_polya fq2_polya -synthetic_10_reads_paired_synthetic_10_reads_paired paired_end ../input_files/project1/synthetic.mate_1.fastq.gz 75 31 ../input_files/project1/synthetic.mate_2.fastq.gz AGATCGGAAGAGCACA XXXXXXXXXXXXX AGATCGGAAGAGCGT XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa ../input_files/homo_sapiens/transcriptome.fa 100 250 10 EndToEnd None A --fr AAAAAAAAAAAAAAAAA TTTTTTTTTTTTTTTTT -synthetic_10_reads_mate_1_synthetic_10_reads_mate_1 single_end ../input_files/project2/synthetic.mate_1.fastq.gz 75 31 XXXXXXXXXXXXX AGATCGGAAGAGCACA XXXXXXXXXXXXX XXXXXXXXXXXXX XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa ../input_files/homo_sapiens/transcriptome.fa 100 250 10 EndToEnd None A --fr AAAAAAAAAAAAAAAAA XXXXXXXXXXXXX +sample seqmode fq1 index_size kmer fq1_3p fq1_5p organism gtf gtf_filtered genome tr_fasta_filtered sd mean multimappers soft_clip pass_mode libtype fq1_polya_3p fq1_polya_5p kallisto_directionality fq2 fq2_3p fq2_5p fq2_polya_3p fq2_polya_5p +synthetic_10_reads_paired_synthetic_10_reads_paired paired_end ../input_files/project1/synthetic.mate_1.fastq.gz 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa ../input_files/homo_sapiens/transcriptome.fa 100 250 10 EndToEnd None A AAAAAAAAAAAAAAAAA XXXXXXXXXXXXXXXXX --fr ../input_files/project1/synthetic.mate_2.fastq.gz AGATCGGAAGAGCGT XXXXXXXXXXXXX XXXXXXXXXXXXXXXXX TTTTTTTTTTTTTTTTT +synthetic_10_reads_mate_1_synthetic_10_reads_mate_1 single_end ../input_files/project2/synthetic.mate_1.fastq.gz 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa ../input_files/homo_sapiens/transcriptome.fa 100 250 10 EndToEnd None A AAAAAAAAAAAAAAAAA XXXXXXXXXXXXXXXXX --fr XXXXXXXXXXXXX XXXXXXXXXXXXX XXXXXXXXXXXXX XXXXXXXXXXXXX XXXXXXXXXXXXX diff --git a/tests/test_integration_workflow/expected_output.md5 b/tests/test_integration_workflow/expected_output.md5 index 4f9b8f2..31cbf64 100644 --- a/tests/test_integration_workflow/expected_output.md5 +++ b/tests/test_integration_workflow/expected_output.md5 @@ -19,7 +19,7 @@ ea36f062eedc7f54ceffea2b635a25a8 results/star_indexes/homo_sapiens/75/STAR_inde 500dd49da40b16799aba62aa5cf239ba results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.remove_adapters_mate1.fastq e90e31db1ce51d930645eb74ff70d21b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.remove_adapters_mate2.fastq 500dd49da40b16799aba62aa5cf239ba results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.remove_polya_mate1.fastq -e90e31db1ce51d930645eb74ff70d21b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.remove_polya_mate2.fastq +1c0796d7e0bdab0e99780b2e11d80c19 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.remove_polya_mate2.fastq d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired_SJ.out.tab f551ff091e920357ec0a76807cb51dba results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/mate1_fastqc/synthetic.mate_1_fastqc/fastqc_data.txt c0df759ceab72ea4b1a560f991fe6497 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/mate1_fastqc/synthetic.mate_1_fastqc/fastqc.fo @@ -45,8 +45,8 @@ b28aac49f537b8cba364b6422458ad28 results/samples/synthetic_10_reads_paired_synt 69b70e3f561b749bf10b186dd2480a8a results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/mate2_fastqc/synthetic.mate_2_fastqc/Images/per_sequence_quality.png b28aac49f537b8cba364b6422458ad28 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/mate2_fastqc/synthetic.mate_2_fastqc/Images/per_tile_quality.png 5b950b5dfe3c7407e9aac153db330a38 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/mate2_fastqc/synthetic.mate_2_fastqc/Images/sequence_length_distribution.png -5e07e870d516a91647808bd84068d829 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/abundance.tsv -6180a904511292b0f173794ae98af991 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/pseudoalignments.bam +2e77276535976efccb244627231624bf results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/abundance.tsv +d013650f813b815a790c9e6a51c7559b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/pseudoalignments.bam d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/synthetic_10_reads_paired_synthetic_10_reads_paired.kallisto.pseudo.sam c77480e0235761f2d7f80dbceb2e2806 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/salmon_quant/synthetic_10_reads_paired_synthetic_10_reads_paired/lib_format_counts.json 989d6ee63b728fced9ec0249735ab83d results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/salmon_quant/synthetic_10_reads_paired_synthetic_10_reads_paired/aux_info/ambig_info.tsv @@ -78,10 +78,10 @@ e72f5d798c99272f8c0166dc77247db1 results/samples/synthetic_10_reads_mate_1_synt 92bcd0592d22a6a58d0360fc76103e56 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/aux_info/observed_bias 92bcd0592d22a6a58d0360fc76103e56 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/aux_info/observed_bias_3p d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/aux_info/unmapped_names.txt -0139e75ddbfe6eb081c2c2d9b9108ab4 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str1.out.bg -c266d31e0a2ad84975cb9de335891e64 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str2.out.bg -0139e75ddbfe6eb081c2c2d9b9108ab4 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.str1.out.bg -c266d31e0a2ad84975cb9de335891e64 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.str2.out.bg +16652c037090f3eed1123618a2e75107 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str1.out.bg +90ae442ebf35015eab2dd4e804c2bafb results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str2.out.bg +16652c037090f3eed1123618a2e75107 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.str1.out.bg +90ae442ebf35015eab2dd4e804c2bafb results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.str2.out.bg ea91b4f85622561158bff2f7c9c312b3 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/STAR_coverage/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.str1.out.bg bcccf679a8c083d01527514c9f5680a0 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/STAR_coverage/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.str2.out.bg ea91b4f85622561158bff2f7c9c312b3 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/STAR_coverage/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.Unique.str1.out.bg @@ -89,9 +89,9 @@ bcccf679a8c083d01527514c9f5680a0 results/samples/synthetic_10_reads_mate_1_synt 3ce47cb1d62482c5d62337751d7e8552 results/transcriptome/homo_sapiens/transcriptome.fa 6b44c507f0a1c9f7369db0bb1deef0fd results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.stranded.ALFA_index 2caebc23faf78fdbbbdbb118d28bd6b5 results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.unstranded.ALFA_index -c1254a0bae19ac3ffc39f73099ffcf2b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired.ALFA_feature_counts.tsv -c266d31e0a2ad84975cb9de335891e64 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.minus.bg -0139e75ddbfe6eb081c2c2d9b9108ab4 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.plus.bg +53fd53f884352d0493b2ca99cef5d76d results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired.ALFA_feature_counts.tsv +90ae442ebf35015eab2dd4e804c2bafb results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.minus.bg +16652c037090f3eed1123618a2e75107 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.plus.bg c1254a0bae19ac3ffc39f73099ffcf2b results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.ALFA_feature_counts.tsv bcccf679a8c083d01527514c9f5680a0 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.out.minus.bg ea91b4f85622561158bff2f7c9c312b3 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.out.plus.bg \ No newline at end of file diff --git a/tests/test_integration_workflow/test.local.sh b/tests/test_integration_workflow/test.local.sh index ac6e45e..6b641f2 100755 --- a/tests/test_integration_workflow/test.local.sh +++ b/tests/test_integration_workflow/test.local.sh @@ -7,7 +7,7 @@ cleanup () { rm -rf .java/ rm -rf .snakemake/ rm -rf logs/ - rm -rf results/ + # rm -rf results/ cd $user_dir echo "Exit status: $rc" } diff --git a/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5 b/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5 index 569a4a8..b2a6861 100644 --- a/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5 +++ b/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5 @@ -1,2 +1,2 @@ b163e7b06bd9e0a71f2fd1fc4935fea9 config.yaml -cb58e046242c2702038e6e21dbd0bdb4 samples.tsv +071f0e942321df5e38c8b2d458f7be06 samples.tsv diff --git a/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5 b/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5 index 569a4a8..b2a6861 100644 --- a/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5 +++ b/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5 @@ -1,2 +1,2 @@ b163e7b06bd9e0a71f2fd1fc4935fea9 config.yaml -cb58e046242c2702038e6e21dbd0bdb4 samples.tsv +071f0e942321df5e38c8b2d458f7be06 samples.tsv diff --git a/tests/test_scripts_labkey_to_snakemake_table/test.sh b/tests/test_scripts_labkey_to_snakemake_table/test.sh index 37014ed..dd2707e 100755 --- a/tests/test_scripts_labkey_to_snakemake_table/test.sh +++ b/tests/test_scripts_labkey_to_snakemake_table/test.sh @@ -6,6 +6,7 @@ cleanup () { rm -rf .snakemake/ rm -rf config.yaml rm -rf samples.tsv + rm -rf logs cd $user_dir echo "Exit status: $rc" } diff --git a/workflow/rules/paired_end.snakefile.smk b/workflow/rules/paired_end.snakefile.smk index 07ae6a3..a642c65 100644 --- a/workflow/rules/paired_end.snakefile.smk +++ b/workflow/rules/paired_end.snakefile.smk @@ -143,9 +143,13 @@ rule pe_remove_polya_cutadapt: params: polya_3_mate1 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq1_polya'], + samples_table.loc[wildcards.sample, 'fq1_polya_3p'], + polya_5_mate1 = lambda wildcards: + samples_table.loc[wildcards.sample, 'fq1_polya_5p'], polya_3_mate2 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq2_polya'] + samples_table.loc[wildcards.sample, 'fq2_polya_3p'], + polya_5_mate2 = lambda wildcards: + samples_table.loc[wildcards.sample, 'fq2_polya_5p'] singularity: "docker://zavolab/cutadapt:1.16-slim" @@ -173,7 +177,9 @@ rule pe_remove_polya_cutadapt: -e 0.1 \ -O 1 \ -a {params.polya_3_mate1} \ + -g {params.polya_5_mate1} \ -A {params.polya_3_mate2} \ + -G {params.polya_5_mate2} \ -o {output.reads1} \ -p {output.reads2} \ {input.reads1} \ diff --git a/workflow/rules/single_end.snakefile.smk b/workflow/rules/single_end.snakefile.smk index b22e587..d0b8445 100644 --- a/workflow/rules/single_end.snakefile.smk +++ b/workflow/rules/single_end.snakefile.smk @@ -108,7 +108,9 @@ rule remove_polya_cutadapt: params: polya_3 = lambda wildcards: - samples_table.loc[wildcards.sample, "fq1_polya"] + samples_table.loc[wildcards.sample, "fq1_polya_3p"], + polya_5 = lambda wildcards: + samples_table.loc[wildcards.sample, "fq1_polya_5p"] singularity: "docker://zavolab/cutadapt:1.16-slim" @@ -135,6 +137,7 @@ rule remove_polya_cutadapt: -O 1 \ -m 10 \ -a {params.polya_3} \ + -g {params.polya_5} \ -o {output.reads} \ {input.reads};) \ 1> {log.stdout} 2> {log.stderr}" -- GitLab