Skip to content
Snippets Groups Projects
Commit ff17689d authored by Ticlla Ccenhua Monica Roxana's avatar Ticlla Ccenhua Monica Roxana
Browse files

remove unnecessary file

parent 63873d58
Branches
No related tags found
No related merge requests found
'''
Author: Monica R. Ticlla
Afiliation(s): SIB, SwissTPH, UNIBAS
Description: after pre-processing of paired fastq files form paired-end shotgun
DNA sequencing of metagenomic samples, fastq files corresponding to the same
sample are merged into a single pair of fastq files.
'''
localrules:
multiqc_merged_list_files,
multiqc_merged
##----------------------------------------------------------------------------##
## Local variables
##----------------------------------------------------------------------------##
singularity_img = 'shub://mticlla/MetagenomicSnake:preqc_v0_1'
##----------------------------------------------------------------------------##
## Rules with target files
##----------------------------------------------------------------------------##
###**THESE TARGET FILES ARE THE FINAL CLEAN**###
# concatenate cleaned,deduplicated and trimmed fastqs from the same samples
rule concatenate_fastqs:
input:
#
sample_fwds = lambda wildcards: ['{}/{}_dfinaltrim/{}-{}_{}-R1.clean.nodup.fastp.fastq.gz'.format(
PRE_PROC_DIR, DATASETS[ix], value, RUNS[ix], LANES[ix])
for ix,value in enumerate(SAMPLES) if (value == wildcards.sample) and (DATASETS[ix] == wildcards.dataset)],
#
sample_revs = lambda wildcards: ['{}/{}_dfinaltrim/{}-{}_{}-R2.clean.nodup.fastp.fastq.gz'.format(
PRE_PROC_DIR, DATASETS[ix], value, RUNS[ix], LANES[ix])
for ix,value in enumerate(SAMPLES) if (value==wildcards.sample) and (DATASETS[ix]==wildcards.dataset)]
output:
sample_fwd = MERGE_DIR + '/{dataset}_merged/{sample}-R1.fastq.gz',
sample_rev = MERGE_DIR + '/{dataset}_merged/{sample}-R2.fastq.gz'
wildcard_constraints:
sample = '\w+'
group: 'preprocess'
shell:
'''
cat {input.sample_fwds} > {output.sample_fwd}
cat {input.sample_revs} > {output.sample_rev}
'''
# Final quality check with Fastp, but no further QC processing
rule fastp_concatenated:
input:
sample_fwd = MERGE_DIR + '/{dataset}_merged/{sample}-R1.fastq.gz',
sample_rev = MERGE_DIR + '/{dataset}_merged/{sample}-R2.fastq.gz'
output:
report1 = MERGE_DIR + '/{dataset}_merged/{sample}.fastp.html',
report2 = MERGE_DIR + '/{dataset}_merged/{sample}.fastp.json'
wildcard_constraints:
sample = '\w+'
threads: cpus_avail
group: 'preprocess'
singularity: singularity_img
shell:
'''
fastp \
-A \
--in1 {input.sample_fwd} --in2 {input.sample_rev} \
--html {output.report1} --json {output.report2} \
--thread {threads}
'''
rule multiqc_merged_list_files:
input:
sample_fastp_report = lambda wildcards: ['{}/{}_merged/{}.fastp.json'.format(
MERGE_DIR, wildcards.dataset, value) for ix,value in enumerate(SAMPLES)
if DATASETS[ix]==wildcards.dataset]
output:
multiqc_input_list = MERGE_REPORT + '/{dataset}_multiqc_inputs.txt'
run:
import os
try:
os.makedirs(os.path.dirname(output.multiqc_input_list))
except OSError:
pass
with open(output.multiqc_input_list, mode='w', encoding='utf-8') as out:
for item in set(input.sample_fastp_report):
out.write("%s\n" % item)
rule multiqc_merged:
input:
MERGE_REPORT + '/{dataset}_multiqc_inputs.txt'
output:
multiqc_report=report(MERGE_REPORT + '/{dataset}_multiqc.html',
category='merge_samples')
singularity: singularity_img
shell:
'''
multiqc -f --file-list {input} --filename {output.multiqc_report}
'''
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment