From cbd41cd02a7a57aeb5d0cac9e96ea82874174e16 Mon Sep 17 00:00:00 2001 From: burri0000 <dominik.burri@unibas.ch> Date: Fri, 14 Feb 2020 17:22:40 +0100 Subject: [PATCH] alfa biotypes integrated --- snakemake/Snakefile | 7 +-- snakemake/paired_end.snakefile.smk | 70 ++++++++++++++++++++++++++--- snakemake/single_end.snakefile.smk | 71 +++++++++++++++++++++++++++--- 3 files changed, 133 insertions(+), 15 deletions(-) diff --git a/snakemake/Snakefile b/snakemake/Snakefile index 554e9bf..4f83c6d 100644 --- a/snakemake/Snakefile +++ b/snakemake/Snakefile @@ -11,6 +11,7 @@ import pandas as pd ############################ samples_table = pd.read_csv(config["samples"], header=0, index_col=0, comment='#', engine='python', sep="\t") +directionality = {"--fr": "fr-firststrand", "--rv": "fr-secondstrand"} localrules: finish @@ -43,7 +44,7 @@ rule finish: zip, sample= [i for i in list(samples_table.index.values)], seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)]), - alfa = expand(os.path.join(config["alfa_indexes"],"{organism}","{index_size}","ALFA", "sorted_genes.stranded.ALFA_index"), + alfa_index = expand(os.path.join(config["alfa_indexes"],"{organism}","{index_size}","ALFA", "sorted_genes.stranded.ALFA_index"), zip, organism= list(set([samples_table.loc[i,"organism"] for i in list(samples_table.index.values)])), index_size= list(set([samples_table.loc[i,"index_size"] for i in list(samples_table.index.values)]))), @@ -51,8 +52,8 @@ rule finish: config["output_dir"], "{seqmode}", "{sample}", - "star_rpm", - "STAR_Signal.Unique.str1.out.bg"), + "ALFA", + "ALFA_plots.Biotypes.pdf"), zip, sample= [i for i in list(samples_table.index.values)], seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)]) diff --git a/snakemake/paired_end.snakefile.smk b/snakemake/paired_end.snakefile.smk index d527a16..f733ca7 100644 --- a/snakemake/paired_end.snakefile.smk +++ b/snakemake/paired_end.snakefile.smk @@ -353,35 +353,35 @@ rule star_rpm_paired_end: config["output_dir"], "paired_end", "{sample}", - "star_rpm", + "ALFA", "STAR_Signal.Unique.str1.out.bg"), os.path.join( config["output_dir"], "paired_end", "{sample}", - "star_rpm", + "ALFA", "STAR_Signal.UniqueMultiple.str1.out.bg")), str2 = (os.path.join( config["output_dir"], "paired_end", "{sample}", - "star_rpm", + "ALFA", "STAR_Signal.Unique.str2.out.bg"), os.path.join( config["output_dir"], "paired_end", "{sample}", - "star_rpm", + "ALFA", "STAR_Signal.UniqueMultiple.str2.out.bg")) params: out_dir = directory(os.path.join(config["output_dir"], "paired_end", "{sample}", - "star_rpm")), + "ALFA")), prefix = os.path.join(config["output_dir"], "paired_end", "{sample}", - "star_rpm", "STAR_"), + "ALFA", "STAR_"), stranded = "Stranded" singularity: "docker://zavolab/star:2.6.0a" @@ -402,6 +402,64 @@ rule star_rpm_paired_end: """ +rule alfa_bg_paired_end: + ''' Run ALFA from stranded bedgraph files ''' + input: + str1 = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "ALFA", + "STAR_Signal.UniqueMultiple.str1.out.bg"), + str2 = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "ALFA", + "STAR_Signal.UniqueMultiple.str2.out.bg"), + gtf = lambda wildcards: os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"], + str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes.stranded.ALFA_index") + output: + biotypes = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "ALFA", + "ALFA_plots.Biotypes.pdf"), + categories = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "ALFA", + "ALFA_plots.Categories.pdf") + params: + out_dir = directory(os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "ALFA")), + in_file_str1 = "STAR_Signal.UniqueMultiple.str1.out.bg", + rename_str1 = "STAR_Signal.UniqueMultiple.out.plus.bg", + in_file_str2 = "STAR_Signal.UniqueMultiple.str2.out.bg", + rename_str2 = "STAR_Signal.UniqueMultiple.out.minus.bg", + genome_index = lambda wildcards: os.path.abspath(os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"], + str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes")), + name = "{sample}", + orientation = lambda wildcards: directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]] + singularity: + "docker://zavolab/alfa:1.1.1" + log: os.path.abspath(os.path.join(config["local_log"], "paired_end", "{sample}", "alfa_bg_paired_end.log")) + shell: + """ + cd {params.out_dir}; \ + cp {params.in_file_str1} {params.rename_str1}; \ + cp {params.in_file_str2} {params.rename_str2}; \ + (alfa -g {params.genome_index} \ + --bedgraph {params.rename_str1} {params.rename_str2} {params.name} \ + -s {params.orientation}) &> {log}; \ + rm {params.rename_str1} {params.rename_str2} + """ + diff --git a/snakemake/single_end.snakefile.smk b/snakemake/single_end.snakefile.smk index aaf940a..a51806f 100644 --- a/snakemake/single_end.snakefile.smk +++ b/snakemake/single_end.snakefile.smk @@ -279,35 +279,35 @@ rule star_rpm_single_end: config["output_dir"], "single_end", "{sample}", - "star_rpm", + "ALFA", "STAR_Signal.Unique.str1.out.bg"), os.path.join( config["output_dir"], "single_end", "{sample}", - "star_rpm", + "ALFA", "STAR_Signal.UniqueMultiple.str1.out.bg")), str2 = (os.path.join( config["output_dir"], "single_end", "{sample}", - "star_rpm", + "ALFA", "STAR_Signal.Unique.str2.out.bg"), os.path.join( config["output_dir"], "single_end", "{sample}", - "star_rpm", + "ALFA", "STAR_Signal.UniqueMultiple.str2.out.bg")) params: out_dir = directory(os.path.join(config["output_dir"], "single_end", "{sample}", - "star_rpm")), + "ALFA")), prefix = os.path.join(config["output_dir"], "single_end", "{sample}", - "star_rpm", "STAR_"), + "ALFA", "STAR_"), stranded = "Stranded" singularity: "docker://zavolab/star:2.6.0a" @@ -325,4 +325,63 @@ rule star_rpm_single_end: --outWigStrand {params.stranded} \ --outWigNorm "RPM" \ --outFileNamePrefix {params.prefix}) &> {log} + """ + + +rule alfa_bg_single_end: + ''' Run ALFA from stranded bedgraph files ''' + input: + str1 = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "ALFA", + "STAR_Signal.UniqueMultiple.str1.out.bg"), + str2 = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "ALFA", + "STAR_Signal.UniqueMultiple.str2.out.bg"), + gtf = lambda wildcards: os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"], + str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes.stranded.ALFA_index") + output: + biotypes = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "ALFA", + "ALFA_plots.Biotypes.pdf"), + categories = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "ALFA", + "ALFA_plots.Categories.pdf") + params: + out_dir = directory(os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "ALFA")), + in_file_str1 = "STAR_Signal.UniqueMultiple.str1.out.bg", + rename_str1 = "STAR_Signal.UniqueMultiple.out.plus.bg", + in_file_str2 = "STAR_Signal.UniqueMultiple.str2.out.bg", + rename_str2 = "STAR_Signal.UniqueMultiple.out.minus.bg", + genome_index = lambda wildcards: os.path.abspath(os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"], + str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes")), + name = "{sample}", + orientation = lambda wildcards: directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]] + singularity: + "docker://zavolab/alfa:1.1.1" + log: os.path.abspath(os.path.join(config["local_log"], "single_end", "{sample}", "alfa_bg_single_end.log")) + shell: + """ + cd {params.out_dir}; \ + cp {params.in_file_str1} {params.rename_str1}; \ + cp {params.in_file_str2} {params.rename_str2}; \ + (alfa -g {params.genome_index} \ + --bedgraph {params.rename_str1} {params.rename_str2} {params.name} \ + -s {params.orientation}) &> {log}; \ + rm {params.rename_str1} {params.rename_str2} """ \ No newline at end of file -- GitLab