Newer
Older
BIOPZ-Iborra de Toledo Paula
committed
"abundance.h5"),
sample=[i for i in pd.unique(samples_table.index.values)])),
sample_name_list = ','.join(expand(
"{sample}",
sample=pd.unique(samples_table.index.values))),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--input',
'--names',
'--txOut',
'--output',
)
)
BIOPZ-Iborra de Toledo Paula
committed
log:
stderr = os.path.join(
config["log_dir"],
current_rule + ".stderr.log"),
BIOPZ-Iborra de Toledo Paula
committed
stdout = os.path.join(
config["log_dir"],
current_rule + ".stdout.log")
BIOPZ-Iborra de Toledo Paula
committed
threads: 1
singularity:
"docker://zavolab/merge_kallisto:0.6"
shell:
"(merge_kallisto.R \
--input {params.tables} \
--names {params.sample_name_list} \
--output {params.dir_out} \
{params.additional_params}) \
BIOPZ-Iborra de Toledo Paula
committed
1> {log.stdout} 2> {log.stderr}"
current_rule = 'pca_salmon'
rule pca_salmon:
input:
tpm = os.path.join(
config["output_dir"],
"summary_salmon",
"quantmerge",
"{molecule}_tpm.tsv"),
output:
out = directory(os.path.join(
config["output_dir"],
"zpca",
"pca_salmon_{molecule}"))
params:
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--tpm',
'--out',
)
)
log:
stderr = os.path.join(
config["log_dir"],
current_rule + "_{molecule}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
current_rule + "_{molecule}.stdout.log")
threads: 1
singularity:
"docker://zavolab/zpca:0.8.3-1"
shell:
"(zpca-tpm \
--tpm {input.tpm} \
--out {output.out} \
{params.additional_params}) \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'pca_kallisto'
rule pca_kallisto:
input:
tpm = os.path.join(
config["output_dir"],
"summary_kallisto",
"{molecule}_tpm.tsv")
output:
out = directory(os.path.join(
config["output_dir"],
"zpca",
"pca_kallisto_{molecule}"))
params:
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--tpm',
'--out',
)
)
log:
stderr = os.path.join(
config["log_dir"],
current_rule + "_{molecule}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
current_rule + "_{molecule}.stdout.log")
threads: 1
singularity:
"docker://zavolab/zpca:0.8.3-1"
shell:
"(zpca-tpm \
--tpm {input.tpm} \
--out {output.out} \
{params.additional_params}) \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'star_rpm'
rule star_rpm:
'''
Create stranded bedgraph coverage with STARs RPM normalisation
'''
input:
bam = lambda wildcards:
expand(
os.path.join(
config["output_dir"],
"samples",
"{sample}",
"map_genome",
"{sample}.{seqmode}.Aligned.sortedByCoord.out.bam"),
sample=wildcards.sample,
BIOPZ-Katsantoni Maria
committed
seqmode=get_sample(
'seqmode',
search_id='index',
search_value=wildcards.sample)),
bai = lambda wildcards:
expand(
os.path.join(
config["output_dir"],
"samples",
"{sample}",
"map_genome",
"{sample}.{seqmode}.Aligned.sortedByCoord.out.bam.bai"),
sample=wildcards.sample,
BIOPZ-Katsantoni Maria
committed
seqmode=get_sample(
'seqmode',
search_id='index',
search_value=wildcards.sample))
str1 = temp(os.path.join(
config["output_dir"],
"samples",
"{sample}",
"STAR_coverage",
"{sample}_Signal.Unique.str1.out.bg")),
str2 = temp(os.path.join(
config["output_dir"],
"samples",
"{sample}",
"STAR_coverage",
"{sample}_Signal.UniqueMultiple.str1.out.bg")),
str3 = temp(os.path.join(
config["output_dir"],
"samples",
"{sample}",
"STAR_coverage",
"{sample}_Signal.Unique.str2.out.bg")),
str4 = temp(os.path.join(
config["output_dir"],
"samples",
"{sample}",
"STAR_coverage",
"{sample}_Signal.UniqueMultiple.str2.out.bg"))
shadow: "full"
params:
out_dir = lambda wildcards, output:
os.path.dirname(output.str1),
prefix = lambda wildcards, output:
os.path.join(
os.path.dirname(output.str1),
str(wildcards.sample) + "_"),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--runMode',
'--inputBAMfile',
'--outWigType',
'--outFileNamePrefix',
)
)
BIOPZ-Gypas Foivos
committed
"docker://quay.io/biocontainers/star:2.7.8a--h9ee0642_1"
log:
stderr = os.path.join(
config["log_dir"],
"samples",
"{sample}",
current_rule + ".stderr.log"),
stdout = os.path.join(
config["log_dir"],
"samples",
"{sample}",
current_rule + ".stdout.log")
threads: 4
shell:
"(mkdir -p {params.out_dir}; \
chmod -R 777 {params.out_dir}; \
STAR \
--runMode inputAlignmentsFromBAM \
--runThreadN {threads} \
--inputBAMfile {input.bam} \
--outWigType bedGraph \
--outFileNamePrefix {params.prefix}) \
{params.additional_params} \
current_rule = 'rename_star_rpm_for_alfa'
rule rename_star_rpm_for_alfa:
input:
plus = lambda wildcards:
expand(
os.path.join(
config["output_dir"],
"samples",
"{sample}",
"STAR_coverage",
"{sample}_Signal.{unique}.{plus}.out.bg"),
sample=wildcards.sample,
unique=wildcards.unique,
BIOPZ-Katsantoni Maria
committed
plus=get_sample(
'alfa_plus',
search_id='index',
search_value=wildcards.sample)),
minus = lambda wildcards:
expand(
os.path.join(
config["output_dir"],
"samples",
"{sample}",
"STAR_coverage",
"{sample}_Signal.{unique}.{minus}.out.bg"),
sample=wildcards.sample,
unique=wildcards.unique,
BIOPZ-Katsantoni Maria
committed
minus=get_sample(
'alfa_minus',
search_id='index',
search_value=wildcards.sample))
plus = temp(os.path.join(
config["output_dir"],
"samples",
"{sample}",
"ALFA",
"{unique}",
"{sample}.{unique}.plus.bg")),
minus = temp(os.path.join(
config["output_dir"],
"samples",
"{sample}",
"ALFA",
"{unique}",
"{sample}.{unique}.minus.bg"))
log:
stderr = os.path.join(
config["log_dir"],
"samples",
"{sample}",
current_rule + "_{unique}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"samples",
"{sample}",
current_rule + "_{unique}.stdout.log")
BIOPZ-Gypas Foivos
committed
"docker://ubuntu:focal-20210416"
shell:
"(cp {input.plus} {output.plus}; \
cp {input.minus} {output.minus};) \
1>{log.stdout} 2>{log.stderr}"
current_rule = 'generate_alfa_index'
rule generate_alfa_index:
''' Generate ALFA index files from sorted GTF file '''
input:
os.path.abspath(get_sample(
BIOPZ-Katsantoni Maria
committed
'gtf',
search_id='organism',
search_value=wildcards.organism)),
chr_len = os.path.join(
config["star_indexes"],
"{organism}",
"{index_size}",
"STAR_index",
"chrNameLength.txt"),
output:
index_stranded = os.path.join(
config["alfa_indexes"],
"{organism}",
"{index_size}",
"ALFA",
index_unstranded = os.path.join(
config["alfa_indexes"],
"{organism}",
"{index_size}",
"ALFA",
"sorted_genes.unstranded.ALFA_index")
params:
genome_index = "sorted_genes",
os.path.dirname(output.index_stranded),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'-a',
'-g',
'--chr_len',
'-o',
)
)
BIOPZ-Gypas Foivos
committed
"docker://quay.io/biocontainers/alfa:1.1.1--pyh5e36f6f_0"
log:
os.path.join(
config["log_dir"],
current_rule + "_{organism}_{index_size}.log")
"(alfa -a {input.gtf} \
-g {params.genome_index} \
--chr_len {input.chr_len} \
-p {threads} \
-o {params.out_dir} \
{params.additional_params}) \
&> {log}"
current_rule = 'alfa_qc'
'''
Run ALFA from stranded bedgraph files
'''
input:
plus = os.path.join(
config["output_dir"],
gtf = lambda wildcards:
os.path.join(
config["alfa_indexes"],
BIOPZ-Katsantoni Maria
committed
get_sample(
'organism',
search_id='index',
search_value=wildcards.sample),
get_sample(
'index_size',
search_id='index',
search_value=wildcards.sample),
"ALFA",
"sorted_genes.stranded.ALFA_index")
biotypes = temp(os.path.join(
"ALFA_plots.Biotypes.pdf")),
categories = temp(os.path.join(
"ALFA_plots.Categories.pdf")),
out_dir = lambda wildcards, output:
os.path.dirname(output.biotypes),
genome_index = lambda wildcards, input:
os.path.abspath(
os.path.join(
os.path.dirname(input.gtf),
"sorted_genes")),
BIOPZ-Katsantoni Maria
committed
get_sample(
'alfa_directionality',
search_id='index',
search_value=wildcards.sample),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'-g',
'--bedgraph',
'-s',
)
)
BIOPZ-Gypas Foivos
committed
"docker://quay.io/biocontainers/alfa:1.1.1--pyh5e36f6f_0"
current_rule + ".{unique}.log")
"(cd {params.out_dir}; \
alfa \
-g {params.genome_index} \
--bedgraph {params.plus} {params.minus} {params.name} \
-s {params.alfa_orientation} \
{params.additional_params}) \
&> {log}"
current_rule = 'prepare_multiqc_config'
rule prepare_multiqc_config:
'''
Prepare config for the MultiQC
'''
input:
script = os.path.join(
workflow.basedir,
"workflow",
"scripts",
output:
multiqc_config = os.path.join(
config["output_dir"],
"multiqc_config.yaml")
logo_path = config['report_logo'],
multiqc_intro_text = config['report_description'],
url = config['report_url'],
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--config',
'--intro-text',
'--custom-logo',
'--url',
)
)
log:
stderr = os.path.join(
config["log_dir"],
current_rule + ".stderr.log"),
stdout = os.path.join(
config["log_dir"],
current_rule + ".stdout.log")
--intro-text '{params.multiqc_intro_text}' \
--custom-logo {params.logo_path} \
--url '{params.url}' \
{params.additional_params}) \
current_rule = 'multiqc_report'
BIOPZ-Katsantoni Maria
committed
sample=pd.unique(samples_table.index.values),
mate="fq1"),
fastqc_pe = expand(
os.path.join(
config['output_dir'],
"samples",
"{sample}",
"fastqc",
"{mate}"),
BIOPZ-Katsantoni Maria
committed
sample=[i for i in pd.unique(
samples_table[samples_table['seqmode'] == 'pe'].index.values)],
mate="fq2"),
pseudoalignment = expand(
os.path.join(
config['output_dir'],
BIOPZ-Katsantoni Maria
committed
sample=[i for i in pd.unique(samples_table.index.values)],
seqmode=[get_sample('seqmode', search_id='index', search_value=i)
for i in pd.unique(samples_table.index.values)]),
TIN_score = expand(
os.path.join(
config['output_dir'],
"samples",
"{sample}",
"TIN",
"TIN_score.tsv"),
sample=pd.unique(samples_table.index.values)),
tables = lambda wildcards:
expand(
os.path.join(
config["output_dir"],
"samples",
"{sample}",
"ALFA",
"{unique}",
"{sample}.ALFA_feature_counts.tsv"),
sample=pd.unique(samples_table.index.values),
unique=["Unique", "UniqueMultiple"]),
zpca_salmon = expand(os.path.join(
config["output_dir"],
"zpca",
"pca_salmon_{molecule}"),
molecule=["genes", "transcripts"]),
zpca_kallisto = expand(os.path.join(
config["output_dir"],
"zpca",
"pca_kallisto_{molecule}"),
molecule=["genes", "transcripts"]
),
config["output_dir"],
"multiqc_config.yaml")
output:
multiqc_report = directory(
os.path.join(
config["output_dir"],
"multiqc_summary"))
results_dir = os.path.join(
config["output_dir"]),
log_dir = config["log_dir"],
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--outdir',
'--config',
)
)
stderr = os.path.join(
config["log_dir"],
current_rule + ".stderr.log"),
stdout = os.path.join(
config["log_dir"],
current_rule + ".stdout.log")
"docker://zavolab/multiqc-plugins:1.2.1"
shell:
"(multiqc \
--outdir {output.multiqc_report} \
--config {input.multiqc_config} \
{params.additional_params} \
{params.results_dir} \
{params.log_dir};) \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'sort_bed_4_big'
sort bedGraphs in order to work with bedGraphtobigWig
config["output_dir"],
"samples",
"{sample}",
"ALFA",
"{unique}",
sorted_bg = temp(os.path.join(
config["output_dir"],
"samples",
"{sample}",
"bigWig",
"{unique}",
"{sample}_{unique}_{strand}.sorted.bg"))
params:
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'-i',
)
)
BIOPZ-Gypas Foivos
committed
"docker://quay.io/biocontainers/bedtools:2.27.1--h9a82719_5"
stderr = os.path.join(
config["log_dir"],
current_rule + "_{unique}_{strand}.stderr.log")
-i {input.bg} \
{params.additional_params} \
> {output.sorted_bg};) 2> {log.stderr}"
current_rule = 'prepare_bigWig'
bedGraphtobigWig, for viewing in genome browsers
sorted_bg = os.path.join(
config["output_dir"],
"samples",
"{sample}",
"bigWig",
"{unique}",
"{sample}_{unique}_{strand}.sorted.bg"),
chr_sizes = lambda wildcards:
os.path.join(
config['star_indexes'],
BIOPZ-Katsantoni Maria
committed
get_sample(
'organism',
search_id='index',
search_value=wildcards.sample),
get_sample(
'index_size',
search_id='index',
search_value=wildcards.sample),
"STAR_index",
"chrNameLength.txt")
bigWig = os.path.join(
config["output_dir"],
"samples",
"{sample}",
"bigWig",
"{unique}",
"{sample}_{unique}_{strand}.bw")
params:
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=()
)
BIOPZ-Gypas Foivos
committed
"docker://quay.io/biocontainers/ucsc-bedgraphtobigwig:377--h0b8a92a_2"
stderr = os.path.join(
config["log_dir"],
current_rule + "_{unique}_{strand}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
current_rule + "_{unique}_{strand}.stdout.log")
{params.additional_params} \
{input.sorted_bg} \
{input.chr_sizes} \
{output.bigWig};) \
1> {log.stdout} 2> {log.stderr}"