diff --git a/rules/preprocess.smk b/rules/preprocess.smk index 7d0f1a5abd388a4e4f700ebcbbf8b8f686f71249..c4e6e6d95791634c0378b1aa5b6fa40b749a80a7 100644 --- a/rules/preprocess.smk +++ b/rules/preprocess.smk @@ -216,32 +216,37 @@ rule dedupe: rev_clean = OUT_DIR+'/{dataset}/preQC/bfiltered/{fastq_file}-R2.clean.fastq.gz', output: fwd_clean_dedup = temp(OUT_DIR+'/{dataset}/preQC/cdedupe/{fastq_file}-R1.clean.nodup.fastq.gz'), - rev_clean_dedup = temp(OUT_DIR+'/{dataset}/preQC/cdedupe/{fastq_file}-R2.clean.nodup.fastq.gz'), - fastq_duplicates = OUT_DIR+'/{dataset}/preQC/cdedupe/{fastq_file}.clean.duplicates.fastq.gz' + rev_clean_dedup = temp(OUT_DIR+'/{dataset}/preQC/cdedupe/{fastq_file}-R2.clean.nodup.fastq.gz') log: OUT_DIR + '/{dataset}/preQC/logs/cdedupe/{fastq_file}.log' threads:cpus_avail params: - dd_mem_gb = (config['preprocess']['filter_human']['bbmap_mem']/3)*2, - rf_mem_gb = config['preprocess']['filter_human']['bbmap_mem']/4 + dd_mem_gb = config['preprocess']['filter_human']['bbmap_mem'] singularity: singularity_img group: 'preprocess' message: "Running dedupe with {threads} cores." shell: ''' - (dedupe.sh \ + (clumpify.sh \ in1={input.fwd_clean} in2={input.rev_clean} \ - out=stdout.fq \ - outd={output.fastq_duplicates} \ - ac=f minidentity=99 \ - -Xmx{params.dd_mem_gb}g| \ - reformat.sh \ - int=t in=stdin.fq \ out1={output.fwd_clean_dedup} \ out2={output.rev_clean_dedup} \ - threads={threads} \ - -Xmx{params.rf_mem_gb}g) &>{log} + dedupe=t \ + t={threads} \ + -Xmx{params.dd_mem_gb}g -eoom) &>{log} ''' + #(dedupe.sh \ + #in1={input.fwd_clean} in2={input.rev_clean} \ + #out=stdout.fq \ + #outd={output.fastq_duplicates} \ + #ac=f minidentity=99 \ + #-Xmx{params.dd_mem_gb}g| \ + #reformat.sh \ + #int=t in=stdin.fq \ + #out1={output.fwd_clean_dedup} \ + #out2={output.rev_clean_dedup} \ + #threads={threads}) &>{log} + #''' # After removal of adapters, human reads, and duplicates, # the reads' 3'end are quality trimmed (cut by quality score) with fastp # Notice that adapter- and quality- filtering are disabled because it was done by rule trim_adapters