Skip to content
Snippets Groups Projects
Commit 4f7447ac authored by Dominik Burri's avatar Dominik Burri
Browse files

alfa for all samples

parent f54ed2c0
No related branches found
No related tags found
1 merge request!39Alfa qc
Pipeline #10316 passed
......@@ -6,3 +6,4 @@ channels:
- defaults
dependencies:
- alfa=1.1.1
- rename=1.600
......@@ -12,11 +12,13 @@ import pandas as pd
samples_table = pd.read_csv(config["samples"], header=0, index_col=0, comment='#', engine='python', sep="\t")
directionality = {"--fr": "fr-firststrand", "--rv": "fr-secondstrand"}
rename_files = {"fr-firststrand": ("STAR_Signal.UniqueMultiple.out.plus.bg",
"STAR_Signal.UniqueMultiple.out.minus.bg"),
"fr-secondstrand": ("STAR_Signal.UniqueMultiple.out.minus.bg",
"STAR_Signal.UniqueMultiple.out.plus.bg")}
rename_files = {"fr-firststrand": ("Signal.UniqueMultiple.out.plus.bg",
"Signal.UniqueMultiple.out.minus.bg"),
"fr-secondstrand": ("Signal.UniqueMultiple.out.minus.bg",
"Signal.UniqueMultiple.out.plus.bg")}
rename_expr = {"fr-firststrand": ("out.plus", "out.minus"),
"fr-secondstrand": ("out.minus", "out.plus")}
localrules: finish
##################################################################################
......@@ -60,7 +62,11 @@ rule finish:
"ALFA_plots.Biotypes.pdf"),
zip,
sample= [i for i in list(samples_table.index.values)],
seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)])
seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)]),
alfa = os.path.join(
config["output_dir"],
"ALFA",
"ALFA_plots.Biotypes.pdf")
rule create_index_star:
......@@ -193,3 +199,72 @@ rule generate_alfa_index:
-p {threads} \
-o {params.out_dir} &> {log}"
# TODO: add rule that executes ALFA on all samples; should be on the same genome & same orientation
rule alfa_bg_all_samples:
''' Run ALFA from stranded bedgraph files '''
input:
str1 = expand(os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"{sample}_Signal.UniqueMultiple.str1.out.bg"),
zip,
sample= [i for i in list(samples_table.index.values)],
seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)]),
str2 = expand(os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"{sample}_Signal.UniqueMultiple.str2.out.bg"),
zip,
sample= [i for i in list(samples_table.index.values)],
seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)]),
gtf = [os.path.join(config["alfa_indexes"], samples_table.loc[sample1, "organism"],
str(samples_table.loc[sample1, "index_size"]), "ALFA", "sorted_genes.stranded.ALFA_index") for sample1 in list(samples_table.index.values)]
output:
biotypes = os.path.join(
config["output_dir"],
"ALFA",
"ALFA_plots.Biotypes.pdf"),
categories = os.path.join(
config["output_dir"],
"ALFA",
"ALFA_plots.Categories.pdf")
params:
str1 = lambda wildcards, input: [os.path.abspath(i) for i in input.str1],
str2 = lambda wildcards, input: [os.path.abspath(i) for i in input.str2],
out_dir = lambda wildcards, output: directory(os.path.dirname(output.biotypes)),
in_file_str1 = lambda wildcards, input: [os.path.basename(inp) for inp in input.str1], # copy files into out_dir and create with expand construct of bg + name
rename_str1 = [rename_expr[directionality[samples_table.loc[sample1, "kallisto_directionality"]]][0] for sample1 in list(samples_table.index.values)][0],
in_file_str2 = lambda wildcards, input: [os.path.basename(inp) for inp in input.str2],
rename_str2 = [rename_expr[directionality[samples_table.loc[sample1, "kallisto_directionality"]]][1] for sample1 in list(samples_table.index.values)][0],
genome_index = [os.path.abspath(os.path.join(
config["alfa_indexes"],
samples_table.loc[sample1, "organism"],
str(samples_table.loc[sample1, "index_size"]),
"ALFA", "sorted_genes")) for sample1 in list(samples_table.index.values)][0],
orientation = [directionality[samples_table.loc[sample1, "kallisto_directionality"]] for sample1 in list(samples_table.index.values)][0],
bg_and_sample_name = [sample1 + "_Signal.UniqueMultiple." +
rename_expr[directionality[samples_table.loc[sample1, "kallisto_directionality"]]][0] + ".bg " +
sample1 + "_Signal.UniqueMultiple." + rename_expr[directionality[samples_table.loc[sample1, "kallisto_directionality"]]][1] + ".bg " + sample1
for sample1 in list(samples_table.index.values)]
# singularity:
# "docker://zavolab/alfa:1.1.1"
conda: os.path.abspath("../../envs/env_alfa.yaml")
log: os.path.abspath(os.path.join(config["local_log"], "ALFA", "alfa_bg_all_samples.log"))
shell:
"""
mkdir -p {params.out_dir}; \
cd {params.out_dir}; \
cp {params.str1} . ; \
cp {params.str2} . ; \
rename 's/str1.out/{params.rename_str1}/' *str1.out.bg; \
rename 's/str2.out/{params.rename_str2}/' *str2.out.bg; \
(alfa -g {params.genome_index} \
--bedgraph {params.bg_and_sample_name} \
-s {params.orientation}) &> {log}; \
rm *{params.rename_str1}.bg *{params.rename_str2}.bg
"""
\ No newline at end of file
......@@ -354,25 +354,25 @@ rule star_rpm_paired_end:
"paired_end",
"{sample}",
"ALFA",
"STAR_Signal.Unique.str1.out.bg"),
"{sample}_Signal.Unique.str1.out.bg"),
os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"ALFA",
"STAR_Signal.UniqueMultiple.str1.out.bg")),
"{sample}_Signal.UniqueMultiple.str1.out.bg")),
str2 = (os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"ALFA",
"STAR_Signal.Unique.str2.out.bg"),
"{sample}_Signal.Unique.str2.out.bg"),
os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"ALFA",
"STAR_Signal.UniqueMultiple.str2.out.bg"))
"{sample}_Signal.UniqueMultiple.str2.out.bg"))
params:
out_dir = directory(os.path.join(config["output_dir"],
"paired_end",
......@@ -381,7 +381,7 @@ rule star_rpm_paired_end:
prefix = os.path.join(config["output_dir"],
"paired_end",
"{sample}",
"ALFA", "STAR_"),
"ALFA", "{sample}_"),
stranded = "Stranded"
singularity:
"docker://zavolab/star:2.6.0a"
......@@ -409,13 +409,13 @@ rule alfa_bg_paired_end:
"paired_end",
"{sample}",
"ALFA",
"STAR_Signal.UniqueMultiple.str1.out.bg"),
"{sample}_Signal.UniqueMultiple.str1.out.bg"),
str2 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"ALFA",
"STAR_Signal.UniqueMultiple.str2.out.bg"),
"{sample}_Signal.UniqueMultiple.str2.out.bg"),
gtf = lambda wildcards: os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"],
str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes.stranded.ALFA_index")
output:
......@@ -438,12 +438,11 @@ rule alfa_bg_paired_end:
"{sample}",
"ALFA")),
orientation = lambda wildcards: directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]],
in_file_str1 = "STAR_Signal.UniqueMultiple.str1.out.bg",
rename_str1 = lambda wildcards: rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][0],
in_file_str2 = "STAR_Signal.UniqueMultiple.str2.out.bg",
rename_str2 = lambda wildcards: rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][1],
genome_index = lambda wildcards: os.path.abspath(os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"],
str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes")),
in_file_str1 = lambda wildcards, input: os.path.basename(input.str1),
rename_str1 = lambda wildcards: wildcards.sample + "_" + rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][0],
in_file_str2 = lambda wildcards, input: os.path.basename(input.str2),
rename_str2 = lambda wildcards: wildcards.sample + "_" + rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][1],
genome_index = lambda wildcards, input: os.path.abspath(os.path.join(os.path.dirname(input.gtf), "sorted_genes")),
name = "{sample}"
singularity:
"docker://zavolab/alfa:1.1.1"
......
......@@ -280,25 +280,25 @@ rule star_rpm_single_end:
"single_end",
"{sample}",
"ALFA",
"STAR_Signal.Unique.str1.out.bg"),
"{sample}_Signal.Unique.str1.out.bg"),
os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"ALFA",
"STAR_Signal.UniqueMultiple.str1.out.bg")),
"{sample}_Signal.UniqueMultiple.str1.out.bg")),
str2 = (os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"ALFA",
"STAR_Signal.Unique.str2.out.bg"),
"{sample}_Signal.Unique.str2.out.bg"),
os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"ALFA",
"STAR_Signal.UniqueMultiple.str2.out.bg"))
"{sample}_Signal.UniqueMultiple.str2.out.bg"))
params:
out_dir = directory(os.path.join(config["output_dir"],
"single_end",
......@@ -307,7 +307,7 @@ rule star_rpm_single_end:
prefix = os.path.join(config["output_dir"],
"single_end",
"{sample}",
"ALFA", "STAR_"),
"ALFA", "{sample}_"),
stranded = "Stranded"
singularity:
"docker://zavolab/star:2.6.0a"
......@@ -336,13 +336,13 @@ rule alfa_bg_single_end:
"single_end",
"{sample}",
"ALFA",
"STAR_Signal.UniqueMultiple.str1.out.bg"),
"{sample}_Signal.UniqueMultiple.str1.out.bg"),
str2 = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"ALFA",
"STAR_Signal.UniqueMultiple.str2.out.bg"),
"{sample}_Signal.UniqueMultiple.str2.out.bg"),
gtf = lambda wildcards: os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"],
str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes.stranded.ALFA_index")
output:
......@@ -365,12 +365,11 @@ rule alfa_bg_single_end:
"{sample}",
"ALFA")),
orientation = lambda wildcards: directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]],
in_file_str1 = "STAR_Signal.UniqueMultiple.str1.out.bg",
rename_str1 = lambda wildcards: rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][0],
in_file_str2 = "STAR_Signal.UniqueMultiple.str2.out.bg",
rename_str2 = lambda wildcards: rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][1],
genome_index = lambda wildcards: os.path.abspath(os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"],
str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes")),
in_file_str1 = lambda wildcards, input: os.path.basename(input.str1),
rename_str1 = lambda wildcards: wildcards.sample + "_" + rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][0],
in_file_str2 = lambda wildcards, input: os.path.basename(input.str2),
rename_str2 = lambda wildcards: wildcards.sample + "_" + rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][1],
genome_index = lambda wildcards, input: os.path.abspath(os.path.join(os.path.dirname(input.gtf), "sorted_genes")),
name = "{sample}"
singularity:
"docker://zavolab/alfa:1.1.1"
......
......@@ -20,7 +20,8 @@ snakemake \
--printshellcmds \
--rerun-incomplete \
--use-singularity \
--singularity-args="--bind ${PWD}"
--singularity-args="--bind ${PWD}" \
--use-conda
find results/ -type f -name \*\.gz -exec gunzip '{}' \;
find results/ -type f -name \*\.zip -exec sh -c 'unzip -o {} -d $(dirname {})' \;
md5sum --check "expected_output.md5"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment