diff --git a/rules/preprocess.smk b/rules/preprocess.smk index 4fd7c7dec23dd4c2f17c30fa4ecda7ef25aaad3b..0124820009e0688eb82e70a22451985173074410 100644 --- a/rules/preprocess.smk +++ b/rules/preprocess.smk @@ -30,24 +30,24 @@ BBMAP_REF_DIR = config['preprocess']['bbmap_ref_dir'] # It provides a report with quality check, before and after processing rule trim_adapters: input: - fwd=lambda wildcards: ['{}/{}/{}-R1.fastq.gz'.format(RAW_FASTQ_DIR, DATASETSX[ix], value) + fwd = lambda wildcards: ['{}/{}/{}-R1.fastq.gz'.format(RAW_FASTQ_DIR, DATASETSX[ix], value) for ix,value in enumerate(FASTQS) if value==wildcards.fastq_file], #RAW_FASTQ_DIR+'/{dataset}/{fastq_file}-R1.fastq.gz', - rev=lambda wildcards: ['{}/{}/{}-R2.fastq.gz'.format(RAW_FASTQ_DIR, DATASETSX[ix], value) + rev = lambda wildcards: ['{}/{}/{}-R2.fastq.gz'.format(RAW_FASTQ_DIR, DATASETSX[ix], value) for ix,value in enumerate(FASTQS) if value==wildcards.fastq_file], #RAW_FASTQ_DIR+'/{dataset}/{fastq_file}-R2.fastq.gz' output: - fwd_tr=temp(PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R1.fastp.fastq.gz'), - rev_tr=temp(PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R2.fastp.fastq.gz'), - report1=PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}.fastp.html', - report2=PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}.fastp.json' + fwd_tr = temp(PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R1.fastp.fastq.gz'), + rev_tr = temp(PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R2.fastp.fastq.gz'), + report1 = PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}.fastp.html', + report2 = PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}.fastp.json' log: LOGS_DIR + '/preprocess/{dataset}_atrimmed/{fastq_file}.log' params: - fastp_dir=PRE_PROC_DIR, + fastp_dir = PRE_PROC_DIR, adapter = config['preprocess']['adapter'], min_length = config['preprocess']['min_length'] - threads: int(cpus_avail/4) + threads: cpus_avail singularity: singularity_img group: 'preprocess' shell: @@ -71,9 +71,9 @@ rule trim_adapters: # rule filter_human: input: - human_ref=BBMAP_REF_DIR, - fwd_tr=rules.trim_adapters.output.fwd_tr, - rev_tr=rules.trim_adapters.output.rev_tr + human_ref = BBMAP_REF_DIR, + fwd_tr = rules.trim_adapters.output.fwd_tr, + rev_tr = rules.trim_adapters.output.rev_tr #fwd_tr=PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R1.fastp.fastq.gz', #rev_tr=PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R2.fastp.fastq.gz' output: @@ -174,13 +174,13 @@ rule trim3end_dedupe: fwd_clean_dedup = PRE_PROC_DIR+'/{dataset}_cdedupe/{fastq_file}-R1.clean.nodup.fastq.gz', rev_clean_dedup = PRE_PROC_DIR+'/{dataset}_cdedupe/{fastq_file}-R2.clean.nodup.fastq.gz' params: - fastp_dir=PRE_PROC_DIR, + fastp_dir = PRE_PROC_DIR, min_length = config['preprocess']['min_length'] output: - fwd_tr=PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}-R1.clean.nodup.fastp.fastq.gz', - rev_tr=PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}-R2.clean.nodup.fastp.fastq.gz', - report1=PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}.clean.nodup.fastp.html', - report2=PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}.clean.nodup.fastp.json' + fwd_tr = PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}-R1.clean.nodup.fastp.fastq.gz', + rev_tr = PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}-R2.clean.nodup.fastp.fastq.gz', + report1 = PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}.clean.nodup.fastp.html', + report2 = PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}.clean.nodup.fastp.json' threads: int(cpus_avail/4) singularity: singularity_img group: 'preprocess' @@ -199,10 +199,12 @@ rule trim3end_dedupe: rule multiqc_preprocess_listing_files: input: # - atrimmed=lambda wildcards: ['{}/{}_atrimmed/{}.fastp.json'.format(PRE_PROC_DIR, value, FASTQS[ix]) + atrimmed = lambda wildcards: ['{}/{}_atrimmed/{}.fastp.json'.format( + PRE_PROC_DIR, value, FASTQS[ix]) for ix,value in enumerate(DATASETSX) if value==wildcards.dataset], # - dfinaltrim=lambda wildcards: ['{}/{}_dfinaltrim/{}.clean.nodup.fastp.json'.format(PRE_PROC_DIR, value, FASTQS[ix]) + dfinaltrim = lambda wildcards: ['{}/{}_dfinaltrim/{}.clean.nodup.fastp.json'.format( + PRE_PROC_DIR, value, FASTQS[ix]) for ix,value in enumerate(DATASETSX) if value==wildcards.dataset] output: multiqc_input_list = PRE_PROC_REPORT+'/{dataset}_multiqc_inputs.txt' @@ -221,7 +223,7 @@ rule multiqc_preprocess: input: PRE_PROC_REPORT+'/{dataset}_multiqc_inputs.txt' output: - multiqc_report=report(PRE_PROC_REPORT+'/{dataset}_multiqc.html', + multiqc_report = report(PRE_PROC_REPORT+'/{dataset}_multiqc.html', category='preprocess') singularity: singularity_img shell: