Skip to content
Snippets Groups Projects
Commit f6d69c9c authored by Ticlla Ccenhua Monica Roxana's avatar Ticlla Ccenhua Monica Roxana
Browse files

allow rule trim_adapters to use all CPUs available

parent 74376ab6
No related branches found
No related tags found
No related merge requests found
...@@ -30,24 +30,24 @@ BBMAP_REF_DIR = config['preprocess']['bbmap_ref_dir'] ...@@ -30,24 +30,24 @@ BBMAP_REF_DIR = config['preprocess']['bbmap_ref_dir']
# It provides a report with quality check, before and after processing # It provides a report with quality check, before and after processing
rule trim_adapters: rule trim_adapters:
input: input:
fwd=lambda wildcards: ['{}/{}/{}-R1.fastq.gz'.format(RAW_FASTQ_DIR, DATASETSX[ix], value) fwd = lambda wildcards: ['{}/{}/{}-R1.fastq.gz'.format(RAW_FASTQ_DIR, DATASETSX[ix], value)
for ix,value in enumerate(FASTQS) if value==wildcards.fastq_file], for ix,value in enumerate(FASTQS) if value==wildcards.fastq_file],
#RAW_FASTQ_DIR+'/{dataset}/{fastq_file}-R1.fastq.gz', #RAW_FASTQ_DIR+'/{dataset}/{fastq_file}-R1.fastq.gz',
rev=lambda wildcards: ['{}/{}/{}-R2.fastq.gz'.format(RAW_FASTQ_DIR, DATASETSX[ix], value) rev = lambda wildcards: ['{}/{}/{}-R2.fastq.gz'.format(RAW_FASTQ_DIR, DATASETSX[ix], value)
for ix,value in enumerate(FASTQS) if value==wildcards.fastq_file], for ix,value in enumerate(FASTQS) if value==wildcards.fastq_file],
#RAW_FASTQ_DIR+'/{dataset}/{fastq_file}-R2.fastq.gz' #RAW_FASTQ_DIR+'/{dataset}/{fastq_file}-R2.fastq.gz'
output: output:
fwd_tr=temp(PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R1.fastp.fastq.gz'), fwd_tr = temp(PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R1.fastp.fastq.gz'),
rev_tr=temp(PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R2.fastp.fastq.gz'), rev_tr = temp(PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R2.fastp.fastq.gz'),
report1=PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}.fastp.html', report1 = PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}.fastp.html',
report2=PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}.fastp.json' report2 = PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}.fastp.json'
log: log:
LOGS_DIR + '/preprocess/{dataset}_atrimmed/{fastq_file}.log' LOGS_DIR + '/preprocess/{dataset}_atrimmed/{fastq_file}.log'
params: params:
fastp_dir=PRE_PROC_DIR, fastp_dir = PRE_PROC_DIR,
adapter = config['preprocess']['adapter'], adapter = config['preprocess']['adapter'],
min_length = config['preprocess']['min_length'] min_length = config['preprocess']['min_length']
threads: int(cpus_avail/4) threads: cpus_avail
singularity: singularity_img singularity: singularity_img
group: 'preprocess' group: 'preprocess'
shell: shell:
...@@ -71,9 +71,9 @@ rule trim_adapters: ...@@ -71,9 +71,9 @@ rule trim_adapters:
# #
rule filter_human: rule filter_human:
input: input:
human_ref=BBMAP_REF_DIR, human_ref = BBMAP_REF_DIR,
fwd_tr=rules.trim_adapters.output.fwd_tr, fwd_tr = rules.trim_adapters.output.fwd_tr,
rev_tr=rules.trim_adapters.output.rev_tr rev_tr = rules.trim_adapters.output.rev_tr
#fwd_tr=PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R1.fastp.fastq.gz', #fwd_tr=PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R1.fastp.fastq.gz',
#rev_tr=PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R2.fastp.fastq.gz' #rev_tr=PRE_PROC_DIR+'/{dataset}_atrimmed/{fastq_file}-R2.fastp.fastq.gz'
output: output:
...@@ -174,13 +174,13 @@ rule trim3end_dedupe: ...@@ -174,13 +174,13 @@ rule trim3end_dedupe:
fwd_clean_dedup = PRE_PROC_DIR+'/{dataset}_cdedupe/{fastq_file}-R1.clean.nodup.fastq.gz', fwd_clean_dedup = PRE_PROC_DIR+'/{dataset}_cdedupe/{fastq_file}-R1.clean.nodup.fastq.gz',
rev_clean_dedup = PRE_PROC_DIR+'/{dataset}_cdedupe/{fastq_file}-R2.clean.nodup.fastq.gz' rev_clean_dedup = PRE_PROC_DIR+'/{dataset}_cdedupe/{fastq_file}-R2.clean.nodup.fastq.gz'
params: params:
fastp_dir=PRE_PROC_DIR, fastp_dir = PRE_PROC_DIR,
min_length = config['preprocess']['min_length'] min_length = config['preprocess']['min_length']
output: output:
fwd_tr=PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}-R1.clean.nodup.fastp.fastq.gz', fwd_tr = PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}-R1.clean.nodup.fastp.fastq.gz',
rev_tr=PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}-R2.clean.nodup.fastp.fastq.gz', rev_tr = PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}-R2.clean.nodup.fastp.fastq.gz',
report1=PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}.clean.nodup.fastp.html', report1 = PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}.clean.nodup.fastp.html',
report2=PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}.clean.nodup.fastp.json' report2 = PRE_PROC_DIR+'/{dataset}_dfinaltrim/{fastq_file}.clean.nodup.fastp.json'
threads: int(cpus_avail/4) threads: int(cpus_avail/4)
singularity: singularity_img singularity: singularity_img
group: 'preprocess' group: 'preprocess'
...@@ -199,10 +199,12 @@ rule trim3end_dedupe: ...@@ -199,10 +199,12 @@ rule trim3end_dedupe:
rule multiqc_preprocess_listing_files: rule multiqc_preprocess_listing_files:
input: input:
# #
atrimmed=lambda wildcards: ['{}/{}_atrimmed/{}.fastp.json'.format(PRE_PROC_DIR, value, FASTQS[ix]) atrimmed = lambda wildcards: ['{}/{}_atrimmed/{}.fastp.json'.format(
PRE_PROC_DIR, value, FASTQS[ix])
for ix,value in enumerate(DATASETSX) if value==wildcards.dataset], for ix,value in enumerate(DATASETSX) if value==wildcards.dataset],
# #
dfinaltrim=lambda wildcards: ['{}/{}_dfinaltrim/{}.clean.nodup.fastp.json'.format(PRE_PROC_DIR, value, FASTQS[ix]) dfinaltrim = lambda wildcards: ['{}/{}_dfinaltrim/{}.clean.nodup.fastp.json'.format(
PRE_PROC_DIR, value, FASTQS[ix])
for ix,value in enumerate(DATASETSX) if value==wildcards.dataset] for ix,value in enumerate(DATASETSX) if value==wildcards.dataset]
output: output:
multiqc_input_list = PRE_PROC_REPORT+'/{dataset}_multiqc_inputs.txt' multiqc_input_list = PRE_PROC_REPORT+'/{dataset}_multiqc_inputs.txt'
...@@ -221,7 +223,7 @@ rule multiqc_preprocess: ...@@ -221,7 +223,7 @@ rule multiqc_preprocess:
input: input:
PRE_PROC_REPORT+'/{dataset}_multiqc_inputs.txt' PRE_PROC_REPORT+'/{dataset}_multiqc_inputs.txt'
output: output:
multiqc_report=report(PRE_PROC_REPORT+'/{dataset}_multiqc.html', multiqc_report = report(PRE_PROC_REPORT+'/{dataset}_multiqc.html',
category='preprocess') category='preprocess')
singularity: singularity_img singularity: singularity_img
shell: shell:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment