Skip to content
Snippets Groups Projects
Commit cbd41cd0 authored by Dominik Burri's avatar Dominik Burri
Browse files

alfa biotypes integrated

parent 8c9ef971
No related branches found
No related tags found
1 merge request!39Alfa qc
Pipeline #10232 passed
...@@ -11,6 +11,7 @@ import pandas as pd ...@@ -11,6 +11,7 @@ import pandas as pd
############################ ############################
samples_table = pd.read_csv(config["samples"], header=0, index_col=0, comment='#', engine='python', sep="\t") samples_table = pd.read_csv(config["samples"], header=0, index_col=0, comment='#', engine='python', sep="\t")
directionality = {"--fr": "fr-firststrand", "--rv": "fr-secondstrand"}
localrules: finish localrules: finish
...@@ -43,7 +44,7 @@ rule finish: ...@@ -43,7 +44,7 @@ rule finish:
zip, zip,
sample= [i for i in list(samples_table.index.values)], sample= [i for i in list(samples_table.index.values)],
seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)]), seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)]),
alfa = expand(os.path.join(config["alfa_indexes"],"{organism}","{index_size}","ALFA", "sorted_genes.stranded.ALFA_index"), alfa_index = expand(os.path.join(config["alfa_indexes"],"{organism}","{index_size}","ALFA", "sorted_genes.stranded.ALFA_index"),
zip, zip,
organism= list(set([samples_table.loc[i,"organism"] for i in list(samples_table.index.values)])), organism= list(set([samples_table.loc[i,"organism"] for i in list(samples_table.index.values)])),
index_size= list(set([samples_table.loc[i,"index_size"] for i in list(samples_table.index.values)]))), index_size= list(set([samples_table.loc[i,"index_size"] for i in list(samples_table.index.values)]))),
...@@ -51,8 +52,8 @@ rule finish: ...@@ -51,8 +52,8 @@ rule finish:
config["output_dir"], config["output_dir"],
"{seqmode}", "{seqmode}",
"{sample}", "{sample}",
"star_rpm", "ALFA",
"STAR_Signal.Unique.str1.out.bg"), "ALFA_plots.Biotypes.pdf"),
zip, zip,
sample= [i for i in list(samples_table.index.values)], sample= [i for i in list(samples_table.index.values)],
seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)]) seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)])
......
...@@ -353,35 +353,35 @@ rule star_rpm_paired_end: ...@@ -353,35 +353,35 @@ rule star_rpm_paired_end:
config["output_dir"], config["output_dir"],
"paired_end", "paired_end",
"{sample}", "{sample}",
"star_rpm", "ALFA",
"STAR_Signal.Unique.str1.out.bg"), "STAR_Signal.Unique.str1.out.bg"),
os.path.join( os.path.join(
config["output_dir"], config["output_dir"],
"paired_end", "paired_end",
"{sample}", "{sample}",
"star_rpm", "ALFA",
"STAR_Signal.UniqueMultiple.str1.out.bg")), "STAR_Signal.UniqueMultiple.str1.out.bg")),
str2 = (os.path.join( str2 = (os.path.join(
config["output_dir"], config["output_dir"],
"paired_end", "paired_end",
"{sample}", "{sample}",
"star_rpm", "ALFA",
"STAR_Signal.Unique.str2.out.bg"), "STAR_Signal.Unique.str2.out.bg"),
os.path.join( os.path.join(
config["output_dir"], config["output_dir"],
"paired_end", "paired_end",
"{sample}", "{sample}",
"star_rpm", "ALFA",
"STAR_Signal.UniqueMultiple.str2.out.bg")) "STAR_Signal.UniqueMultiple.str2.out.bg"))
params: params:
out_dir = directory(os.path.join(config["output_dir"], out_dir = directory(os.path.join(config["output_dir"],
"paired_end", "paired_end",
"{sample}", "{sample}",
"star_rpm")), "ALFA")),
prefix = os.path.join(config["output_dir"], prefix = os.path.join(config["output_dir"],
"paired_end", "paired_end",
"{sample}", "{sample}",
"star_rpm", "STAR_"), "ALFA", "STAR_"),
stranded = "Stranded" stranded = "Stranded"
singularity: singularity:
"docker://zavolab/star:2.6.0a" "docker://zavolab/star:2.6.0a"
...@@ -402,6 +402,64 @@ rule star_rpm_paired_end: ...@@ -402,6 +402,64 @@ rule star_rpm_paired_end:
""" """
rule alfa_bg_paired_end:
''' Run ALFA from stranded bedgraph files '''
input:
str1 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"ALFA",
"STAR_Signal.UniqueMultiple.str1.out.bg"),
str2 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"ALFA",
"STAR_Signal.UniqueMultiple.str2.out.bg"),
gtf = lambda wildcards: os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"],
str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes.stranded.ALFA_index")
output:
biotypes = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"ALFA",
"ALFA_plots.Biotypes.pdf"),
categories = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"ALFA",
"ALFA_plots.Categories.pdf")
params:
out_dir = directory(os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"ALFA")),
in_file_str1 = "STAR_Signal.UniqueMultiple.str1.out.bg",
rename_str1 = "STAR_Signal.UniqueMultiple.out.plus.bg",
in_file_str2 = "STAR_Signal.UniqueMultiple.str2.out.bg",
rename_str2 = "STAR_Signal.UniqueMultiple.out.minus.bg",
genome_index = lambda wildcards: os.path.abspath(os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"],
str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes")),
name = "{sample}",
orientation = lambda wildcards: directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]
singularity:
"docker://zavolab/alfa:1.1.1"
log: os.path.abspath(os.path.join(config["local_log"], "paired_end", "{sample}", "alfa_bg_paired_end.log"))
shell:
"""
cd {params.out_dir}; \
cp {params.in_file_str1} {params.rename_str1}; \
cp {params.in_file_str2} {params.rename_str2}; \
(alfa -g {params.genome_index} \
--bedgraph {params.rename_str1} {params.rename_str2} {params.name} \
-s {params.orientation}) &> {log}; \
rm {params.rename_str1} {params.rename_str2}
"""
......
...@@ -279,35 +279,35 @@ rule star_rpm_single_end: ...@@ -279,35 +279,35 @@ rule star_rpm_single_end:
config["output_dir"], config["output_dir"],
"single_end", "single_end",
"{sample}", "{sample}",
"star_rpm", "ALFA",
"STAR_Signal.Unique.str1.out.bg"), "STAR_Signal.Unique.str1.out.bg"),
os.path.join( os.path.join(
config["output_dir"], config["output_dir"],
"single_end", "single_end",
"{sample}", "{sample}",
"star_rpm", "ALFA",
"STAR_Signal.UniqueMultiple.str1.out.bg")), "STAR_Signal.UniqueMultiple.str1.out.bg")),
str2 = (os.path.join( str2 = (os.path.join(
config["output_dir"], config["output_dir"],
"single_end", "single_end",
"{sample}", "{sample}",
"star_rpm", "ALFA",
"STAR_Signal.Unique.str2.out.bg"), "STAR_Signal.Unique.str2.out.bg"),
os.path.join( os.path.join(
config["output_dir"], config["output_dir"],
"single_end", "single_end",
"{sample}", "{sample}",
"star_rpm", "ALFA",
"STAR_Signal.UniqueMultiple.str2.out.bg")) "STAR_Signal.UniqueMultiple.str2.out.bg"))
params: params:
out_dir = directory(os.path.join(config["output_dir"], out_dir = directory(os.path.join(config["output_dir"],
"single_end", "single_end",
"{sample}", "{sample}",
"star_rpm")), "ALFA")),
prefix = os.path.join(config["output_dir"], prefix = os.path.join(config["output_dir"],
"single_end", "single_end",
"{sample}", "{sample}",
"star_rpm", "STAR_"), "ALFA", "STAR_"),
stranded = "Stranded" stranded = "Stranded"
singularity: singularity:
"docker://zavolab/star:2.6.0a" "docker://zavolab/star:2.6.0a"
...@@ -325,4 +325,63 @@ rule star_rpm_single_end: ...@@ -325,4 +325,63 @@ rule star_rpm_single_end:
--outWigStrand {params.stranded} \ --outWigStrand {params.stranded} \
--outWigNorm "RPM" \ --outWigNorm "RPM" \
--outFileNamePrefix {params.prefix}) &> {log} --outFileNamePrefix {params.prefix}) &> {log}
"""
rule alfa_bg_single_end:
''' Run ALFA from stranded bedgraph files '''
input:
str1 = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"ALFA",
"STAR_Signal.UniqueMultiple.str1.out.bg"),
str2 = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"ALFA",
"STAR_Signal.UniqueMultiple.str2.out.bg"),
gtf = lambda wildcards: os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"],
str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes.stranded.ALFA_index")
output:
biotypes = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"ALFA",
"ALFA_plots.Biotypes.pdf"),
categories = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"ALFA",
"ALFA_plots.Categories.pdf")
params:
out_dir = directory(os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"ALFA")),
in_file_str1 = "STAR_Signal.UniqueMultiple.str1.out.bg",
rename_str1 = "STAR_Signal.UniqueMultiple.out.plus.bg",
in_file_str2 = "STAR_Signal.UniqueMultiple.str2.out.bg",
rename_str2 = "STAR_Signal.UniqueMultiple.out.minus.bg",
genome_index = lambda wildcards: os.path.abspath(os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"],
str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes")),
name = "{sample}",
orientation = lambda wildcards: directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]
singularity:
"docker://zavolab/alfa:1.1.1"
log: os.path.abspath(os.path.join(config["local_log"], "single_end", "{sample}", "alfa_bg_single_end.log"))
shell:
"""
cd {params.out_dir}; \
cp {params.in_file_str1} {params.rename_str1}; \
cp {params.in_file_str2} {params.rename_str2}; \
(alfa -g {params.genome_index} \
--bedgraph {params.rename_str1} {params.rename_str2} {params.name} \
-s {params.orientation}) &> {log}; \
rm {params.rename_str1} {params.rename_str2}
""" """
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment