Newer
Older
current_rule = 'pe_remove_adapters_cutadapt'
BIOPZ-Katsantoni Maria
committed
'''
BIOPZ-Katsantoni Maria
committed
'''
BIOPZ-Katsantoni Maria
committed
config["output_dir"],
BIOPZ-Katsantoni Maria
committed
"{sample}",
BIOPZ-Katsantoni Maria
committed
reads2 = os.path.join(
config["output_dir"],
"samples",
BIOPZ-Katsantoni Maria
committed
"{sample}",
BIOPZ-Katsantoni Maria
committed
reads1 = temp(os.path.join(
config["output_dir"],
"{sample}.pe.remove_adapters_mate1.fastq.gz")),
reads2 = temp(os.path.join(
config["output_dir"],
"{sample}.pe.remove_adapters_mate2.fastq.gz"))
BIOPZ-Katsantoni Maria
committed
params:
adapter_3_mate1 = lambda wildcards:
BIOPZ-Katsantoni Maria
committed
get_sample('fq1_3p', search_id='index', search_value=wildcards.sample),
adapter_5_mate1 = lambda wildcards:
BIOPZ-Katsantoni Maria
committed
get_sample('fq1_5p', search_id='index', search_value=wildcards.sample),
adapter_3_mate2 = lambda wildcards:
BIOPZ-Katsantoni Maria
committed
get_sample('fq2_3p', search_id='index', search_value=wildcards.sample),
adapter_5_mate2 = lambda wildcards:
get_sample('fq2_5p', search_id='index', search_value=wildcards.sample),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'-a',
'-A',
'-g',
'-G',
'-o',
'-p',
)
)
BIOPZ-Katsantoni Maria
committed
singularity:
BIOPZ-Gypas Foivos
committed
"docker://quay.io/biocontainers/cutadapt:3.4--py37h73a75cf_1"
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
BIOPZ-Katsantoni Maria
committed
"{sample}",
current_rule + ".stderr.log"),
BIOPZ-Katsantoni Maria
committed
stdout = os.path.join(
config["log_dir"],
BIOPZ-Katsantoni Maria
committed
"{sample}",
current_rule + ".stdout.log")
BIOPZ-Katsantoni Maria
committed
shell:
"(cutadapt \
-j {threads} \
-a {params.adapter_3_mate1} \
-g {params.adapter_5_mate1} \
-A {params.adapter_3_mate2} \
-G {params.adapter_5_mate2} \
{params.additional_params} \
-o {output.reads1} \
-p {output.reads2} \
{input.reads1} \
BIOPZ-Katsantoni Maria
committed
1> {log.stdout} 2>{log.stderr}"
current_rule = 'pe_remove_polya_cutadapt'
BIOPZ-Gypas Foivos
committed
rule pe_remove_polya_cutadapt:
BIOPZ-Katsantoni Maria
committed
'''
Remove polyA tails
'''
input:
reads1 = os.path.join(
config["output_dir"],
"{sample}.pe.remove_adapters_mate1.fastq.gz"),
reads2 = os.path.join(
config["output_dir"],
"{sample}",
"{sample}.pe.remove_adapters_mate2.fastq.gz")
BIOPZ-Katsantoni Maria
committed
reads1 = temp(os.path.join(
config["output_dir"],
"{sample}",
"{sample}.pe.remove_polya_mate1.fastq.gz")),
reads2 = temp(os.path.join(
config["output_dir"],
"{sample}",
"{sample}.pe.remove_polya_mate2.fastq.gz"))
BIOPZ-Katsantoni Maria
committed
params:
polya_3_mate1 = lambda wildcards:
BIOPZ-Katsantoni Maria
committed
get_sample(
'fq1_polya_3p',
search_id='index',
search_value=wildcards.sample),
BIOPZ-Katsantoni Maria
committed
get_sample(
'fq1_polya_5p',
search_id='index',
search_value=wildcards.sample),
polya_3_mate2 = lambda wildcards:
BIOPZ-Katsantoni Maria
committed
get_sample(
'fq2_polya_3p',
search_id='index',
search_value=wildcards.sample),
BIOPZ-Katsantoni Maria
committed
get_sample(
'fq2_polya_5p',
search_id='index',
search_value=wildcards.sample),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'-a',
'-A',
'-g',
'-G',
'-o',
'-p',
)
)
BIOPZ-Katsantoni Maria
committed
singularity:
BIOPZ-Gypas Foivos
committed
"docker://quay.io/biocontainers/cutadapt:3.4--py37h73a75cf_1"
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
BIOPZ-Katsantoni Maria
committed
"{sample}",
current_rule + ".stderr.log"),
BIOPZ-Katsantoni Maria
committed
stdout = os.path.join(
config["log_dir"],
BIOPZ-Katsantoni Maria
committed
"{sample}",
current_rule + ".stdout.log")
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
"(cutadapt \
-j {threads} \
-a {params.polya_3_mate1} \
-A {params.polya_3_mate2} \
{params.additional_params} \
-o {output.reads1} \
-p {output.reads2} \
{input.reads1} \
BIOPZ-Katsantoni Maria
committed
1> {log.stdout} 2>{log.stderr}"
current_rule = 'pe_map_genome_star'
BIOPZ-Gypas Foivos
committed
rule pe_map_genome_star:
BIOPZ-Katsantoni Maria
committed
'''
Map to genome using STAR
'''
input:
index = lambda wildcards:
os.path.join(
config["star_indexes"],
BIOPZ-Katsantoni Maria
committed
get_sample(
'organism',
search_id='index',
search_value=wildcards.sample),
get_sample(
'index_size',
search_id='index',
search_value=wildcards.sample),
"STAR_index",
"chrNameLength.txt"),
reads1 = os.path.join(
config["output_dir"],
"{sample}",
reads2 = os.path.join(
config["output_dir"],
"{sample}",
BIOPZ-Katsantoni Maria
committed
output:
bam = os.path.join(
config["output_dir"],
"{sample}",
"map_genome",
"{sample}.pe.Aligned.sortedByCoord.out.bam"),
logfile = os.path.join(
config["output_dir"],
"{sample}",
"map_genome",
BIOPZ-Katsantoni Maria
committed
params:
sample_id = "{sample}",
index = lambda wildcards:
os.path.abspath(os.path.join(
config["star_indexes"],
BIOPZ-Katsantoni Maria
committed
get_sample(
'organism',
search_id='index',
search_value=wildcards.sample),
get_sample(
'index_size',
search_id='index',
search_value=wildcards.sample),
outFileNamePrefix = os.path.join(
config["output_dir"],
"{sample}",
"map_genome",
multimappers = lambda wildcards:
BIOPZ-Katsantoni Maria
committed
get_sample(
'multimappers',
search_id='index',
search_value=wildcards.sample),
soft_clip = lambda wildcards:
BIOPZ-Katsantoni Maria
committed
get_sample(
'soft_clip',
search_id='index',
search_value=wildcards.sample),
pass_mode = lambda wildcards:
BIOPZ-Katsantoni Maria
committed
get_sample(
'pass_mode',
search_id='index',
search_value=wildcards.sample),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--twopassMode',
'--genomeDir',
'--readFilesIn',
'--readFilesCommand',
'--outFilterMultimapNmax',
'--outFileNamePrefix',
'--outSAMattributes',
'--outStd',
'--outSAMtype',
'--outSAMattrRGline',
'--alignEndsType',
)
)
singularity:
BIOPZ-Gypas Foivos
committed
"docker://quay.io/biocontainers/star:2.7.8a--h9ee0642_1"
threads: 12
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
BIOPZ-Katsantoni Maria
committed
"{sample}",
current_rule + ".stderr.log")
shell:
"(STAR \
--twopassMode {params.pass_mode} \
--runThreadN {threads} \
--genomeDir {params.index} \
--readFilesIn {input.reads1} {input.reads2} \
--readFilesCommand zcat \
--outFilterMultimapNmax {params.multimappers} \
--outFileNamePrefix {params.outFileNamePrefix} \
--outSAMattributes All \
--outStd BAM_SortedByCoordinate \
--outSAMtype BAM SortedByCoordinate \
--outSAMattrRGline ID:rnaseq_pipeline SM:{params.sample_id} \
--alignEndsType {params.soft_clip} \
{params.additional_params} \
> {output.bam};) \
BIOPZ-Katsantoni Maria
committed
2> {log.stderr}"
current_rule = 'pe_quantification_salmon'
BIOPZ-Gypas Foivos
committed
rule pe_quantification_salmon:
BIOPZ-Katsantoni Maria
committed
'''
Quantification at transcript and gene level using Salmon
'''
input:
reads1 = os.path.join(
config["output_dir"],
"{sample}",
reads2 = os.path.join(
config["output_dir"],
"{sample}",
gtf = lambda wildcards:
os.path.abspath(get_sample(
BIOPZ-Katsantoni Maria
committed
'gtf',
search_id='index',
search_value=wildcards.sample)),
index = lambda wildcards:
os.path.join(
config["salmon_indexes"],
BIOPZ-Katsantoni Maria
committed
get_sample(
'organism',
search_id='index',
search_value=wildcards.sample),
get_sample(
'kmer',
search_id='index',
search_value=wildcards.sample),
BIOPZ-Katsantoni Maria
committed
output:
gn_estimates = os.path.join(
config["output_dir"],
"{sample}",
"quant.genes.sf"),
tr_estimates = os.path.join(
config["output_dir"],
"{sample}",
"quant.sf"),
meta_info = os.path.join(
config["output_dir"],
"samples",
"{sample}",
"{sample}.salmon.pe",
"aux_info",
"meta_info.json"),
flenDist = os.path.join(
config["output_dir"],
"samples",
"{sample}",
"{sample}.salmon.pe",
"libParams",
"flenDist.txt")
shadow: "minimal"
BIOPZ-Katsantoni Maria
committed
params:
output_dir = os.path.join(
config["output_dir"],
"{sample}",
libType = lambda wildcards:
BIOPZ-Katsantoni Maria
committed
get_sample(
'libtype',
search_id='index',
search_value=wildcards.sample),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--libType',
'--fldMean',
'--fldSD',
'--index',
'--geneMap',
'-1',
'-2',
'-o',
)
)
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
BIOPZ-Katsantoni Maria
committed
"{sample}",
current_rule + ".stderr.log"),
BIOPZ-Katsantoni Maria
committed
stdout = os.path.join(
config["log_dir"],
BIOPZ-Katsantoni Maria
committed
"{sample}",
current_rule + ".stdout.log"),
BIOPZ-Katsantoni Maria
committed
threads: 6
singularity:
BIOPZ-Gypas Foivos
committed
"docker://quay.io/biocontainers/salmon:1.4.0--h84f40af_1"
BIOPZ-Katsantoni Maria
committed
shell:
"(salmon quant \
--libType {params.libType} \
--threads {threads} \
{params.additional_params} \
--index {input.index} \
--geneMap {input.gtf} \
-1 {input.reads1} \
-2 {input.reads2} \
-o {params.output_dir}; \
) 1> {log.stdout} 2> {log.stderr}"
current_rule = 'pe_genome_quantification_kallisto'
BIOPZ-Gypas Foivos
committed
rule pe_genome_quantification_kallisto:
BIOPZ-Katsantoni Maria
committed
'''
Quantification at transcript and gene level using Kallisto
'''
input:
reads1 = os.path.join(
config["output_dir"],
"{sample}",
reads2 = os.path.join(
config["output_dir"],
"{sample}",
index = lambda wildcards:
os.path.join(
config["kallisto_indexes"],
BIOPZ-Katsantoni Maria
committed
get_sample(
'organism',
search_id='index',
search_value=wildcards.sample),
BIOPZ-Katsantoni Maria
committed
output:
pseudoalignment = os.path.join(
config["output_dir"],
"{sample}",
"quant_kallisto",
"{sample}.pe.kallisto.pseudo.sam"),
abundances = os.path.join(
config["output_dir"],
"samples",
"{sample}",
"quant_kallisto",
"abundance.h5")
shadow: "minimal"
BIOPZ-Katsantoni Maria
committed
params:
output_dir = os.path.join(
BIOPZ-Katsantoni Maria
committed
config["output_dir"],
BIOPZ-Katsantoni Maria
committed
"{sample}",
"quant_kallisto"),
directionality = lambda wildcards:
BIOPZ-Katsantoni Maria
committed
get_sample(
'kallisto_directionality',
search_id='index',
search_value=wildcards.sample),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--single',
'-i',
'-o',
'-l',
'-s',
'--pseudobam',
'--fr-stranded',
'--rf-stranded',
)
)
BIOPZ-Katsantoni Maria
committed
singularity:
BIOPZ-Gypas Foivos
committed
"docker://quay.io/biocontainers/kallisto:0.46.2--h60f4f9f_2"
BIOPZ-Katsantoni Maria
committed
threads: 8
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
BIOPZ-Katsantoni Maria
committed
"{sample}",
current_rule + ".stderr.log")
BIOPZ-Katsantoni Maria
committed
shell:
"(kallisto quant \
-i {input.index} \
-o {params.output_dir} \
{params.additional_params} \
--pseudobam \
BIOPZ-Katsantoni Maria
committed
{input.reads1} {input.reads2} > {output.pseudoalignment}) \