diff --git a/pipeline_documentation.md b/pipeline_documentation.md index 41bc09574a0795e94593744c4be876cfc55f52f8..3774eb0fbc930234fea3eb9c38ac3eb065d545e5 100644 --- a/pipeline_documentation.md +++ b/pipeline_documentation.md @@ -39,7 +39,7 @@ This document describes the individual rules of the pipeline for information pur ## Detailed description of steps The pipeline consists of three snakefiles: A main Snakefile and an individual Snakefile for each sequencing mode (single-end and paired-end), as parameters to individual tools differ between the sequencing modes. The main Snakefile contains some general rules for the creation of indices, rules that are applicable to both sequencing modes, and rules that deal with summary steps and combining results across samples of the run. Individual rules of the pipeline are described briefly, and links to the respective software manuals are given. If parameters can be influenced by the user (via the samples table) they are also described. -Description of paired- and single-end rules are combined, only differences are highlighted. +Description of paired and single-end rules are combined, only differences are highlighted. ### General @@ -74,9 +74,10 @@ soft_clip | "Local": standard local alignment with soft-clipping allowed. "EndTo pass_mode | "None": 1-pass mapping; "Basic": basic 2-pass mapping, with all 1st pass junctions inserted into the genome indices on the fly; for star mapping (type=STRING) libtype | "A": automatically infer. For more info see [salmon manual](https://salmon.readthedocs.io/en/latest/salmon.html) (type=STRING) kallisto_directionality | "--fr-stranded":Strand specific reads, first read forward. "--rf-stranded": Strand specific reads, first read reverse; for kallisto (type=STRING) -fq1_polya | stretch of As or Ts, depending on read orientation; for cutadapt (type=STRING) -fq2_polya | stretch of As or Ts, depending on read orientation; for cutadapt (type=STRING) - +fq1_polya3p | stretch of As or Ts, depending on read orientation, trimmed from the 3' end of the read; for cutadapt (type=STRING) +fq1_polya5p | stretch of As or Ts, depending on read orientation, trimmed from the 5' end of the read; for cutadapt (type=STRING) +fq2_polya3p| stretch of As or Ts, depending on read orientation, trimmed from the 3' end of the read; for cutadapt (type=STRING) +fq2_polya5p| stretch of As or Ts, depending on read orientation, trimmed from the 5' end of the read; for cutadapt (type=STRING) #### create log directories Currently not implemented as Snakemake rule, but general statement. @@ -260,33 +261,26 @@ Creates an interactive report after the pipeline is finished. [MultiQC](https:// **Output:** fastq files with adapters removed, reads shorter than 10nt will be discarded. -**Arguments not influencable by user:** +**Non-customisable arguments:** -e 0.1 maximum error-rate of 10% -j 8 use 8 threads -m 10 Discard processed reads that are shorter than 10 --n 3 search for all the given adapter sequences repeatedly, either until no adapter match was found or until 3 rounds have been performed. +-n 2 search for all the given adapter sequences repeatedly, either until no adapter match was found or until 2 rounds have been performed. *paired end:* ---pair-filter=both filtering criteria must apply to both reads in order for a read pair to be discarded - -*single end:* --O 1 minimal overlap of 1 +--pair-filter=any filtering criteria must apply to any of the two reads in order for a read pair to be discarded #### (pe_)remove_polya_cutadapt -Here, [Cutadapt](https://cutadapt.readthedocs.io/en/stable/)t is used to remove poly(A) tails. +Here, [Cutadapt](https://cutadapt.readthedocs.io/en/stable/) is used to remove poly(A) tails. **Input:** fastq reads **Parameters:** Adapters to be removed, specified by user in the columns 'fq1_polya', 'fq2_polya', respectively. **Output:** fastq files with poly(A) tails removed, reads shorter than 10nt will be discarded. -**Arguments like in remove_adapters_cutadapt and additionally:** ---match-read-wildcards This option is used to allow matching wildcard characters also within reads, because if no tail should be trimmed "XXXXXX" is specified in the samples table, which doesn't match any nucleotides, and thus nothing will be done here. --n 2 search for all the given adapter sequences repeatedly, either until no adapter match was found or until 2 rounds have been performed. --q 6 trim low-quality 3'ends with a cutoff of 6 nucleotides - - +**Arguments similar to remove_adapters_cutadapt and additionally:** +-n 1 search for all the given adapter sequences repeatedly, either until no adapter match was found or until 1 round has been performed. *paired end:* ---pair-filter=both filtering criteria must apply to both reads in order for a read pair to be discarded +--pair-filter=any filtering criteria must apply to both reads in order for a read pair to be discarded *single end:* -O 1 minimal overlap of 1 @@ -318,8 +312,6 @@ Spliced Transcripts Alignment to a Reference; Read the [Publication](https://www *Same for single- and paired-end.* - - #### (pe_)quantification_salmon [Salmon](https://salmon.readthedocs.io/en/latest/salmon.html) is a tool for wicked-fast transcript quantification from RNA-seq data. @@ -358,4 +350,3 @@ Spliced Transcripts Alignment to a Reference; Read the [Publication](https://www * -l: fragment length, user specified as `mean` * -s: fragment length SD, user specified as `sd` - diff --git a/scripts/labkey_to_snakemake.py b/scripts/labkey_to_snakemake.py index defefe3d57d86fd7dc2c96f0bb2002b1031127b7..41fbb3e1455a47fb7377805ef602314d0836c46a 100755 --- a/scripts/labkey_to_snakemake.py +++ b/scripts/labkey_to_snakemake.py @@ -228,9 +228,13 @@ def main(): snakemake_table.loc[index, 'soft_clip'] = options.soft_clip snakemake_table.loc[index, 'pass_mode'] = options.pass_mode snakemake_table.loc[index, 'libtype'] = options.libtype + if options.trim_polya is True: - snakemake_table.loc[index, 'fq1_polya'] = trim_polya( + fq1_polya_3p, fq1_polya_5p = trim_polya( row[input_dict.loc['mate1_direction', 'labkey']]) + snakemake_table.loc[index, 'fq1_polya_3p'] = fq1_polya_3p + snakemake_table.loc[index, 'fq1_polya_5p'] = fq1_polya_5p + snakemake_table.loc[index, 'kallisto_directionality'] = \ get_kallisto_directionality( row[input_dict.loc['mate1_direction', 'labkey']]) @@ -247,8 +251,10 @@ def main(): input_dict.loc['fq2_5p', 'labkey']] if options.trim_polya is True: - snakemake_table.loc[index, 'fq2_polya'] = trim_polya( + fq2_polya_3p, fq2_polya_5p = trim_polya( row[input_dict.loc['mate2_direction', 'labkey']]) + snakemake_table.loc[index, 'fq2_polya_3p'] = fq2_polya_3p + snakemake_table.loc[index, 'fq2_polya_5p'] = fq2_polya_5p snakemake_table.fillna('XXXXXXXXXXXXX', inplace=True) snakemake_table = snakemake_table.astype( @@ -322,14 +328,15 @@ def get_kallisto_directionality(directionality): def trim_polya(sense): if sense == 'SENSE': - polya = 'AAAAAAAAAAAAAAAAA' + polya_3p = 'AAAAAAAAAAAAAAAAA' + polya_5p = 'XXXXXXXXXXXXXXXXX' elif sense == 'ANTISENSE': - polya = 'TTTTTTTTTTTTTTTTT' - elif sense == 'RANDOM': - polya = 'AAAAAAAAAAAAAAAAA' + polya_3p = 'XXXXXXXXXXXXXXXXX' + polya_5p = 'TTTTTTTTTTTTTTTTT' else: - polya = 'XXXXXXXXXXXXXXXXX' - return polya + polya_3p = 'XXXXXXXXXXXXXXXXX' + polya_5p = 'XXXXXXXXXXXXXXXXX' + return polya_3p, polya_5p if __name__ == '__main__': diff --git a/tests/input_files/samples.tsv b/tests/input_files/samples.tsv index cc0ec82c5caa7d5b7fa0618e80260bc6f2676b92..d7ff5bba2366cb611105b667a45815b0d9dd3902 100644 --- a/tests/input_files/samples.tsv +++ b/tests/input_files/samples.tsv @@ -1,3 +1,3 @@ -sample seqmode fq1 index_size kmer fq2 fq1_3p fq1_5p fq2_3p fq2_5p organism gtf gtf_filtered genome tr_fasta_filtered sd mean multimappers soft_clip pass_mode libtype kallisto_directionality fq1_polya fq2_polya -synthetic_10_reads_paired_synthetic_10_reads_paired paired_end ../input_files/project1/synthetic.mate_1.fastq.gz 75 31 ../input_files/project1/synthetic.mate_2.fastq.gz AGATCGGAAGAGCACA XXXXXXXXXXXXX AGATCGGAAGAGCGT XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa ../input_files/homo_sapiens/transcriptome.fa 100 250 10 EndToEnd None A --fr AAAAAAAAAAAAAAAAA TTTTTTTTTTTTTTTTT -synthetic_10_reads_mate_1_synthetic_10_reads_mate_1 single_end ../input_files/project2/synthetic.mate_1.fastq.gz 75 31 XXXXXXXXXXXXX AGATCGGAAGAGCACA XXXXXXXXXXXXX XXXXXXXXXXXXX XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa ../input_files/homo_sapiens/transcriptome.fa 100 250 10 EndToEnd None A --fr AAAAAAAAAAAAAAAAA XXXXXXXXXXXXX +sample seqmode fq1 index_size kmer fq1_3p fq1_5p organism gtf gtf_filtered genome tr_fasta_filtered sd mean multimappers soft_clip pass_mode libtype fq1_polya_3p fq1_polya_5p kallisto_directionality fq2 fq2_3p fq2_5p fq2_polya_3p fq2_polya_5p +synthetic_10_reads_paired_synthetic_10_reads_paired paired_end ../input_files/project1/synthetic.mate_1.fastq.gz 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa ../input_files/homo_sapiens/transcriptome.fa 100 250 10 EndToEnd None A AAAAAAAAAAAAAAAAA XXXXXXXXXXXXXXXXX --fr ../input_files/project1/synthetic.mate_2.fastq.gz AGATCGGAAGAGCGT XXXXXXXXXXXXX XXXXXXXXXXXXXXXXX TTTTTTTTTTTTTTTTT +synthetic_10_reads_mate_1_synthetic_10_reads_mate_1 single_end ../input_files/project2/synthetic.mate_1.fastq.gz 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa ../input_files/homo_sapiens/transcriptome.fa 100 250 10 EndToEnd None A AAAAAAAAAAAAAAAAA XXXXXXXXXXXXXXXXX --fr XXXXXXXXXXXXX XXXXXXXXXXXXX XXXXXXXXXXXXX XXXXXXXXXXXXX XXXXXXXXXXXXX diff --git a/tests/test_integration_workflow/expected_output.md5 b/tests/test_integration_workflow/expected_output.md5 index 4f9b8f2ed2a3d3d17fd10324f647086e1336fae4..31cbf6460fa03ea3e0cdc1edd6d132791d44a8ba 100644 --- a/tests/test_integration_workflow/expected_output.md5 +++ b/tests/test_integration_workflow/expected_output.md5 @@ -19,7 +19,7 @@ ea36f062eedc7f54ceffea2b635a25a8 results/star_indexes/homo_sapiens/75/STAR_inde 500dd49da40b16799aba62aa5cf239ba results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.remove_adapters_mate1.fastq e90e31db1ce51d930645eb74ff70d21b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.remove_adapters_mate2.fastq 500dd49da40b16799aba62aa5cf239ba results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.remove_polya_mate1.fastq -e90e31db1ce51d930645eb74ff70d21b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.remove_polya_mate2.fastq +1c0796d7e0bdab0e99780b2e11d80c19 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.remove_polya_mate2.fastq d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired_SJ.out.tab f551ff091e920357ec0a76807cb51dba results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/mate1_fastqc/synthetic.mate_1_fastqc/fastqc_data.txt c0df759ceab72ea4b1a560f991fe6497 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/mate1_fastqc/synthetic.mate_1_fastqc/fastqc.fo @@ -45,8 +45,8 @@ b28aac49f537b8cba364b6422458ad28 results/samples/synthetic_10_reads_paired_synt 69b70e3f561b749bf10b186dd2480a8a results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/mate2_fastqc/synthetic.mate_2_fastqc/Images/per_sequence_quality.png b28aac49f537b8cba364b6422458ad28 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/mate2_fastqc/synthetic.mate_2_fastqc/Images/per_tile_quality.png 5b950b5dfe3c7407e9aac153db330a38 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/mate2_fastqc/synthetic.mate_2_fastqc/Images/sequence_length_distribution.png -5e07e870d516a91647808bd84068d829 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/abundance.tsv -6180a904511292b0f173794ae98af991 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/pseudoalignments.bam +2e77276535976efccb244627231624bf results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/abundance.tsv +d013650f813b815a790c9e6a51c7559b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/pseudoalignments.bam d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/synthetic_10_reads_paired_synthetic_10_reads_paired.kallisto.pseudo.sam c77480e0235761f2d7f80dbceb2e2806 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/salmon_quant/synthetic_10_reads_paired_synthetic_10_reads_paired/lib_format_counts.json 989d6ee63b728fced9ec0249735ab83d results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/salmon_quant/synthetic_10_reads_paired_synthetic_10_reads_paired/aux_info/ambig_info.tsv @@ -78,10 +78,10 @@ e72f5d798c99272f8c0166dc77247db1 results/samples/synthetic_10_reads_mate_1_synt 92bcd0592d22a6a58d0360fc76103e56 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/aux_info/observed_bias 92bcd0592d22a6a58d0360fc76103e56 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/aux_info/observed_bias_3p d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/aux_info/unmapped_names.txt -0139e75ddbfe6eb081c2c2d9b9108ab4 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str1.out.bg -c266d31e0a2ad84975cb9de335891e64 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str2.out.bg -0139e75ddbfe6eb081c2c2d9b9108ab4 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.str1.out.bg -c266d31e0a2ad84975cb9de335891e64 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.str2.out.bg +16652c037090f3eed1123618a2e75107 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str1.out.bg +90ae442ebf35015eab2dd4e804c2bafb results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str2.out.bg +16652c037090f3eed1123618a2e75107 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.str1.out.bg +90ae442ebf35015eab2dd4e804c2bafb results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.str2.out.bg ea91b4f85622561158bff2f7c9c312b3 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/STAR_coverage/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.str1.out.bg bcccf679a8c083d01527514c9f5680a0 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/STAR_coverage/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.str2.out.bg ea91b4f85622561158bff2f7c9c312b3 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/STAR_coverage/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.Unique.str1.out.bg @@ -89,9 +89,9 @@ bcccf679a8c083d01527514c9f5680a0 results/samples/synthetic_10_reads_mate_1_synt 3ce47cb1d62482c5d62337751d7e8552 results/transcriptome/homo_sapiens/transcriptome.fa 6b44c507f0a1c9f7369db0bb1deef0fd results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.stranded.ALFA_index 2caebc23faf78fdbbbdbb118d28bd6b5 results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.unstranded.ALFA_index -c1254a0bae19ac3ffc39f73099ffcf2b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired.ALFA_feature_counts.tsv -c266d31e0a2ad84975cb9de335891e64 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.minus.bg -0139e75ddbfe6eb081c2c2d9b9108ab4 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.plus.bg +53fd53f884352d0493b2ca99cef5d76d results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired.ALFA_feature_counts.tsv +90ae442ebf35015eab2dd4e804c2bafb results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.minus.bg +16652c037090f3eed1123618a2e75107 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.plus.bg c1254a0bae19ac3ffc39f73099ffcf2b results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.ALFA_feature_counts.tsv bcccf679a8c083d01527514c9f5680a0 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.out.minus.bg ea91b4f85622561158bff2f7c9c312b3 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.out.plus.bg \ No newline at end of file diff --git a/tests/test_integration_workflow/test.local.sh b/tests/test_integration_workflow/test.local.sh index ac6e45ec44898fb8204019cc26e3d85faaaecb62..6b641f2923a28beb5937a1668b54b38ae627716b 100755 --- a/tests/test_integration_workflow/test.local.sh +++ b/tests/test_integration_workflow/test.local.sh @@ -7,7 +7,7 @@ cleanup () { rm -rf .java/ rm -rf .snakemake/ rm -rf logs/ - rm -rf results/ + # rm -rf results/ cd $user_dir echo "Exit status: $rc" } diff --git a/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5 b/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5 index 569a4a8d598d09e9a978283e03c2992d16fe42e0..b2a68615ae1fadc89294480c79e20d5db14b8efb 100644 --- a/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5 +++ b/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5 @@ -1,2 +1,2 @@ b163e7b06bd9e0a71f2fd1fc4935fea9 config.yaml -cb58e046242c2702038e6e21dbd0bdb4 samples.tsv +071f0e942321df5e38c8b2d458f7be06 samples.tsv diff --git a/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5 b/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5 index 569a4a8d598d09e9a978283e03c2992d16fe42e0..b2a68615ae1fadc89294480c79e20d5db14b8efb 100644 --- a/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5 +++ b/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5 @@ -1,2 +1,2 @@ b163e7b06bd9e0a71f2fd1fc4935fea9 config.yaml -cb58e046242c2702038e6e21dbd0bdb4 samples.tsv +071f0e942321df5e38c8b2d458f7be06 samples.tsv diff --git a/tests/test_scripts_labkey_to_snakemake_table/test.sh b/tests/test_scripts_labkey_to_snakemake_table/test.sh index 37014eda5e9fb41b564f6e4b78743c09511e5f3c..dd2707e95c8f98156c994440c63b4f15239e53b2 100755 --- a/tests/test_scripts_labkey_to_snakemake_table/test.sh +++ b/tests/test_scripts_labkey_to_snakemake_table/test.sh @@ -6,6 +6,7 @@ cleanup () { rm -rf .snakemake/ rm -rf config.yaml rm -rf samples.tsv + rm -rf logs cd $user_dir echo "Exit status: $rc" } diff --git a/workflow/rules/paired_end.snakefile.smk b/workflow/rules/paired_end.snakefile.smk index 07ae6a342478e351e980051c8917a8003e09e05e..a642c65ba093b4798c03fae59a51c091266728a6 100644 --- a/workflow/rules/paired_end.snakefile.smk +++ b/workflow/rules/paired_end.snakefile.smk @@ -143,9 +143,13 @@ rule pe_remove_polya_cutadapt: params: polya_3_mate1 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq1_polya'], + samples_table.loc[wildcards.sample, 'fq1_polya_3p'], + polya_5_mate1 = lambda wildcards: + samples_table.loc[wildcards.sample, 'fq1_polya_5p'], polya_3_mate2 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq2_polya'] + samples_table.loc[wildcards.sample, 'fq2_polya_3p'], + polya_5_mate2 = lambda wildcards: + samples_table.loc[wildcards.sample, 'fq2_polya_5p'] singularity: "docker://zavolab/cutadapt:1.16-slim" @@ -173,7 +177,9 @@ rule pe_remove_polya_cutadapt: -e 0.1 \ -O 1 \ -a {params.polya_3_mate1} \ + -g {params.polya_5_mate1} \ -A {params.polya_3_mate2} \ + -G {params.polya_5_mate2} \ -o {output.reads1} \ -p {output.reads2} \ {input.reads1} \ diff --git a/workflow/rules/single_end.snakefile.smk b/workflow/rules/single_end.snakefile.smk index b22e58771a95427045aaf5e5091b7811ab9d195c..d0b844544718f3df3d02e2343ae7518257741bd3 100644 --- a/workflow/rules/single_end.snakefile.smk +++ b/workflow/rules/single_end.snakefile.smk @@ -108,7 +108,9 @@ rule remove_polya_cutadapt: params: polya_3 = lambda wildcards: - samples_table.loc[wildcards.sample, "fq1_polya"] + samples_table.loc[wildcards.sample, "fq1_polya_3p"], + polya_5 = lambda wildcards: + samples_table.loc[wildcards.sample, "fq1_polya_5p"] singularity: "docker://zavolab/cutadapt:1.16-slim" @@ -135,6 +137,7 @@ rule remove_polya_cutadapt: -O 1 \ -m 10 \ -a {params.polya_3} \ + -g {params.polya_5} \ -o {output.reads} \ {input.reads};) \ 1> {log.stdout} 2> {log.stderr}"