Skip to content
Snippets Groups Projects
Commit cbd41cd0 authored by Dominik Burri's avatar Dominik Burri
Browse files

alfa biotypes integrated

parent 8c9ef971
No related branches found
No related tags found
1 merge request!39Alfa qc
Pipeline #10232 passed
......@@ -11,6 +11,7 @@ import pandas as pd
############################
samples_table = pd.read_csv(config["samples"], header=0, index_col=0, comment='#', engine='python', sep="\t")
directionality = {"--fr": "fr-firststrand", "--rv": "fr-secondstrand"}
localrules: finish
......@@ -43,7 +44,7 @@ rule finish:
zip,
sample= [i for i in list(samples_table.index.values)],
seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)]),
alfa = expand(os.path.join(config["alfa_indexes"],"{organism}","{index_size}","ALFA", "sorted_genes.stranded.ALFA_index"),
alfa_index = expand(os.path.join(config["alfa_indexes"],"{organism}","{index_size}","ALFA", "sorted_genes.stranded.ALFA_index"),
zip,
organism= list(set([samples_table.loc[i,"organism"] for i in list(samples_table.index.values)])),
index_size= list(set([samples_table.loc[i,"index_size"] for i in list(samples_table.index.values)]))),
......@@ -51,8 +52,8 @@ rule finish:
config["output_dir"],
"{seqmode}",
"{sample}",
"star_rpm",
"STAR_Signal.Unique.str1.out.bg"),
"ALFA",
"ALFA_plots.Biotypes.pdf"),
zip,
sample= [i for i in list(samples_table.index.values)],
seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)])
......
......@@ -353,35 +353,35 @@ rule star_rpm_paired_end:
config["output_dir"],
"paired_end",
"{sample}",
"star_rpm",
"ALFA",
"STAR_Signal.Unique.str1.out.bg"),
os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"star_rpm",
"ALFA",
"STAR_Signal.UniqueMultiple.str1.out.bg")),
str2 = (os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"star_rpm",
"ALFA",
"STAR_Signal.Unique.str2.out.bg"),
os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"star_rpm",
"ALFA",
"STAR_Signal.UniqueMultiple.str2.out.bg"))
params:
out_dir = directory(os.path.join(config["output_dir"],
"paired_end",
"{sample}",
"star_rpm")),
"ALFA")),
prefix = os.path.join(config["output_dir"],
"paired_end",
"{sample}",
"star_rpm", "STAR_"),
"ALFA", "STAR_"),
stranded = "Stranded"
singularity:
"docker://zavolab/star:2.6.0a"
......@@ -402,6 +402,64 @@ rule star_rpm_paired_end:
"""
rule alfa_bg_paired_end:
''' Run ALFA from stranded bedgraph files '''
input:
str1 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"ALFA",
"STAR_Signal.UniqueMultiple.str1.out.bg"),
str2 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"ALFA",
"STAR_Signal.UniqueMultiple.str2.out.bg"),
gtf = lambda wildcards: os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"],
str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes.stranded.ALFA_index")
output:
biotypes = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"ALFA",
"ALFA_plots.Biotypes.pdf"),
categories = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"ALFA",
"ALFA_plots.Categories.pdf")
params:
out_dir = directory(os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"ALFA")),
in_file_str1 = "STAR_Signal.UniqueMultiple.str1.out.bg",
rename_str1 = "STAR_Signal.UniqueMultiple.out.plus.bg",
in_file_str2 = "STAR_Signal.UniqueMultiple.str2.out.bg",
rename_str2 = "STAR_Signal.UniqueMultiple.out.minus.bg",
genome_index = lambda wildcards: os.path.abspath(os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"],
str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes")),
name = "{sample}",
orientation = lambda wildcards: directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]
singularity:
"docker://zavolab/alfa:1.1.1"
log: os.path.abspath(os.path.join(config["local_log"], "paired_end", "{sample}", "alfa_bg_paired_end.log"))
shell:
"""
cd {params.out_dir}; \
cp {params.in_file_str1} {params.rename_str1}; \
cp {params.in_file_str2} {params.rename_str2}; \
(alfa -g {params.genome_index} \
--bedgraph {params.rename_str1} {params.rename_str2} {params.name} \
-s {params.orientation}) &> {log}; \
rm {params.rename_str1} {params.rename_str2}
"""
......
......@@ -279,35 +279,35 @@ rule star_rpm_single_end:
config["output_dir"],
"single_end",
"{sample}",
"star_rpm",
"ALFA",
"STAR_Signal.Unique.str1.out.bg"),
os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"star_rpm",
"ALFA",
"STAR_Signal.UniqueMultiple.str1.out.bg")),
str2 = (os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"star_rpm",
"ALFA",
"STAR_Signal.Unique.str2.out.bg"),
os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"star_rpm",
"ALFA",
"STAR_Signal.UniqueMultiple.str2.out.bg"))
params:
out_dir = directory(os.path.join(config["output_dir"],
"single_end",
"{sample}",
"star_rpm")),
"ALFA")),
prefix = os.path.join(config["output_dir"],
"single_end",
"{sample}",
"star_rpm", "STAR_"),
"ALFA", "STAR_"),
stranded = "Stranded"
singularity:
"docker://zavolab/star:2.6.0a"
......@@ -325,4 +325,63 @@ rule star_rpm_single_end:
--outWigStrand {params.stranded} \
--outWigNorm "RPM" \
--outFileNamePrefix {params.prefix}) &> {log}
"""
rule alfa_bg_single_end:
''' Run ALFA from stranded bedgraph files '''
input:
str1 = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"ALFA",
"STAR_Signal.UniqueMultiple.str1.out.bg"),
str2 = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"ALFA",
"STAR_Signal.UniqueMultiple.str2.out.bg"),
gtf = lambda wildcards: os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"],
str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes.stranded.ALFA_index")
output:
biotypes = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"ALFA",
"ALFA_plots.Biotypes.pdf"),
categories = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"ALFA",
"ALFA_plots.Categories.pdf")
params:
out_dir = directory(os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"ALFA")),
in_file_str1 = "STAR_Signal.UniqueMultiple.str1.out.bg",
rename_str1 = "STAR_Signal.UniqueMultiple.out.plus.bg",
in_file_str2 = "STAR_Signal.UniqueMultiple.str2.out.bg",
rename_str2 = "STAR_Signal.UniqueMultiple.out.minus.bg",
genome_index = lambda wildcards: os.path.abspath(os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"],
str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes")),
name = "{sample}",
orientation = lambda wildcards: directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]
singularity:
"docker://zavolab/alfa:1.1.1"
log: os.path.abspath(os.path.join(config["local_log"], "single_end", "{sample}", "alfa_bg_single_end.log"))
shell:
"""
cd {params.out_dir}; \
cp {params.in_file_str1} {params.rename_str1}; \
cp {params.in_file_str2} {params.rename_str2}; \
(alfa -g {params.genome_index} \
--bedgraph {params.rename_str1} {params.rename_str2} {params.name} \
-s {params.orientation}) &> {log}; \
rm {params.rename_str1} {params.rename_str2}
"""
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment