Skip to content
Snippets Groups Projects
Commit f6d69c9c authored by Ticlla Ccenhua Monica Roxana's avatar Ticlla Ccenhua Monica Roxana
Browse files

allow rule trim_adapters to use all CPUs available

parent 74376ab6
Branches
No related tags found
No related merge requests found
......@@ -30,24 +30,24 @@ BBMAP_REF_DIR = config['preprocess']['bbmap_ref_dir']
# It provides a report with quality check, before and after processing
rule trim_adapters:
input:
fwd=lambda wildcards: ['{}/{}/{}-R1.fastq.gz'.format(RAW_FASTQ_DIR, DATASETSX[ix], value)
fwd = lambda wildcards: ['{}/{}/{}-R1.fastq.gz'.format(RAW_FASTQ_DIR, DATASETSX[ix], value)
for ix,value in enumerate(FASTQS) if value==wildcards.fastq_file],
#RAW_FASTQ_DIR+'/{dataset}/{fastq_file}-R1.fastq.gz',
rev=lambda wildcards: ['{}/{}/{}-R2.fastq.gz'.format(RAW_FASTQ_DIR, DATASETSX[ix], value)
rev = lambda wildcards: ['{}/{}/{}-R2.fastq.gz'.format(RAW_FASTQ_DIR, DATASETSX[ix], value)
for ix,value in enumerate(FASTQS) if value==wildcards.fastq_file],
#RAW_FASTQ_DIR+'/{dataset}/{fastq_file}-R2.fastq.gz'
output:
fwd_tr=temp(PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R1.fastp.fastq.gz'),
rev_tr=temp(PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R2.fastp.fastq.gz'),
report1=PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}.fastp.html',
report2=PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}.fastp.json'
fwd_tr = temp(PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R1.fastp.fastq.gz'),
rev_tr = temp(PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R2.fastp.fastq.gz'),
report1 = PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}.fastp.html',
report2 = PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}.fastp.json'
log:
LOGS_DIR + '/preprocess/{dataset}_atrimmed/{fastq_file}.log'
params:
fastp_dir=PRE_PROC_DIR,
fastp_dir = PRE_PROC_DIR,
adapter = config['preprocess']['adapter'],
min_length = config['preprocess']['min_length']
threads: int(cpus_avail/4)
threads: cpus_avail
singularity: singularity_img
group: 'preprocess'
shell:
......@@ -71,9 +71,9 @@ rule trim_adapters:
#
rule filter_human:
input:
human_ref=BBMAP_REF_DIR,
fwd_tr=rules.trim_adapters.output.fwd_tr,
rev_tr=rules.trim_adapters.output.rev_tr
human_ref = BBMAP_REF_DIR,
fwd_tr = rules.trim_adapters.output.fwd_tr,
rev_tr = rules.trim_adapters.output.rev_tr
#fwd_tr=PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R1.fastp.fastq.gz',
#rev_tr=PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R2.fastp.fastq.gz'
output:
......@@ -174,13 +174,13 @@ rule trim3end_dedupe:
fwd_clean_dedup = PRE_PROC_DIR+'/{dataset}_cdedupe/{fastq_file}-R1.clean.nodup.fastq.gz',
rev_clean_dedup = PRE_PROC_DIR+'/{dataset}_cdedupe/{fastq_file}-R2.clean.nodup.fastq.gz'
params:
fastp_dir=PRE_PROC_DIR,
fastp_dir = PRE_PROC_DIR,
min_length = config['preprocess']['min_length']
output:
fwd_tr=PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}-R1.clean.nodup.fastp.fastq.gz',
rev_tr=PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}-R2.clean.nodup.fastp.fastq.gz',
report1=PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}.clean.nodup.fastp.html',
report2=PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}.clean.nodup.fastp.json'
fwd_tr = PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}-R1.clean.nodup.fastp.fastq.gz',
rev_tr = PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}-R2.clean.nodup.fastp.fastq.gz',
report1 = PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}.clean.nodup.fastp.html',
report2 = PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}.clean.nodup.fastp.json'
threads: int(cpus_avail/4)
singularity: singularity_img
group: 'preprocess'
......@@ -199,10 +199,12 @@ rule trim3end_dedupe:
rule multiqc_preprocess_listing_files:
input:
#
atrimmed=lambda wildcards: ['{}/{}_atrimmed/{}.fastp.json'.format(PRE_PROC_DIR, value, FASTQS[ix])
atrimmed = lambda wildcards: ['{}/{}_atrimmed/{}.fastp.json'.format(
PRE_PROC_DIR, value, FASTQS[ix])
for ix,value in enumerate(DATASETSX) if value==wildcards.dataset],
#
dfinaltrim=lambda wildcards: ['{}/{}_dfinaltrim/{}.clean.nodup.fastp.json'.format(PRE_PROC_DIR, value, FASTQS[ix])
dfinaltrim = lambda wildcards: ['{}/{}_dfinaltrim/{}.clean.nodup.fastp.json'.format(
PRE_PROC_DIR, value, FASTQS[ix])
for ix,value in enumerate(DATASETSX) if value==wildcards.dataset]
output:
multiqc_input_list = PRE_PROC_REPORT+'/{dataset}_multiqc_inputs.txt'
......@@ -221,7 +223,7 @@ rule multiqc_preprocess:
input:
PRE_PROC_REPORT+'/{dataset}_multiqc_inputs.txt'
output:
multiqc_report=report(PRE_PROC_REPORT+'/{dataset}_multiqc.html',
multiqc_report = report(PRE_PROC_REPORT+'/{dataset}_multiqc.html',
category='preprocess')
singularity: singularity_img
shell:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment