diff --git a/rules/merge_samples.smk b/rules/merge_samples.smk deleted file mode 100644 index d138546f3040a9e625eafc26d86cec7caf82b669..0000000000000000000000000000000000000000 --- a/rules/merge_samples.smk +++ /dev/null @@ -1,93 +0,0 @@ -''' -Author: Monica R. Ticlla -Afiliation(s): SIB, SwissTPH, UNIBAS -Description: after pre-processing of paired fastq files form paired-end shotgun -DNA sequencing of metagenomic samples, fastq files corresponding to the same -sample are merged into a single pair of fastq files. - -''' -localrules: - multiqc_merged_list_files, - multiqc_merged -##----------------------------------------------------------------------------## -## Local variables -##----------------------------------------------------------------------------## -singularity_img = 'shub://mticlla/MetagenomicSnake:preqc_v0_1' - -##----------------------------------------------------------------------------## -## Rules with target files -##----------------------------------------------------------------------------## - -###**THESE TARGET FILES ARE THE FINAL CLEAN**### -# concatenate cleaned,deduplicated and trimmed fastqs from the same samples -rule concatenate_fastqs: - input: - # - sample_fwds = lambda wildcards: ['{}/{}_dfinaltrim/{}-{}_{}-R1.clean.nodup.fastp.fastq.gz'.format( - PRE_PROC_DIR, DATASETS[ix], value, RUNS[ix], LANES[ix]) - for ix,value in enumerate(SAMPLES) if (value == wildcards.sample) and (DATASETS[ix] == wildcards.dataset)], - # - sample_revs = lambda wildcards: ['{}/{}_dfinaltrim/{}-{}_{}-R2.clean.nodup.fastp.fastq.gz'.format( - PRE_PROC_DIR, DATASETS[ix], value, RUNS[ix], LANES[ix]) - for ix,value in enumerate(SAMPLES) if (value==wildcards.sample) and (DATASETS[ix]==wildcards.dataset)] - output: - sample_fwd = MERGE_DIR + '/{dataset}_merged/{sample}-R1.fastq.gz', - sample_rev = MERGE_DIR + '/{dataset}_merged/{sample}-R2.fastq.gz' - wildcard_constraints: - sample = '\w+' - group: 'preprocess' - shell: - ''' - cat {input.sample_fwds} > {output.sample_fwd} - cat {input.sample_revs} > {output.sample_rev} - ''' -# Final quality check with Fastp, but no further QC processing -rule fastp_concatenated: - input: - sample_fwd = MERGE_DIR + '/{dataset}_merged/{sample}-R1.fastq.gz', - sample_rev = MERGE_DIR + '/{dataset}_merged/{sample}-R2.fastq.gz' - output: - report1 = MERGE_DIR + '/{dataset}_merged/{sample}.fastp.html', - report2 = MERGE_DIR + '/{dataset}_merged/{sample}.fastp.json' - wildcard_constraints: - sample = '\w+' - threads: cpus_avail - group: 'preprocess' - singularity: singularity_img - shell: - ''' - fastp \ - -A \ - --in1 {input.sample_fwd} --in2 {input.sample_rev} \ - --html {output.report1} --json {output.report2} \ - --thread {threads} - ''' - -rule multiqc_merged_list_files: - input: - sample_fastp_report = lambda wildcards: ['{}/{}_merged/{}.fastp.json'.format( - MERGE_DIR, wildcards.dataset, value) for ix,value in enumerate(SAMPLES) - if DATASETS[ix]==wildcards.dataset] - output: - multiqc_input_list = MERGE_REPORT + '/{dataset}_multiqc_inputs.txt' - run: - import os - try: - os.makedirs(os.path.dirname(output.multiqc_input_list)) - except OSError: - pass - - with open(output.multiqc_input_list, mode='w', encoding='utf-8') as out: - for item in set(input.sample_fastp_report): - out.write("%s\n" % item) -rule multiqc_merged: - input: - MERGE_REPORT + '/{dataset}_multiqc_inputs.txt' - output: - multiqc_report=report(MERGE_REPORT + '/{dataset}_multiqc.html', - category='merge_samples') - singularity: singularity_img - shell: - ''' - multiqc -f --file-list {input} --filename {output.multiqc_report} - '''