diff --git a/snakemake/Snakefile b/snakemake/Snakefile index 554e9bf90cbc5600504c9195d88f9af48feb8768..4f83c6d5ae0c9106cbdf496146cf3cbf9f2a12d7 100644 --- a/snakemake/Snakefile +++ b/snakemake/Snakefile @@ -11,6 +11,7 @@ import pandas as pd ############################ samples_table = pd.read_csv(config["samples"], header=0, index_col=0, comment='#', engine='python', sep="\t") +directionality = {"--fr": "fr-firststrand", "--rv": "fr-secondstrand"} localrules: finish @@ -43,7 +44,7 @@ rule finish: zip, sample= [i for i in list(samples_table.index.values)], seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)]), - alfa = expand(os.path.join(config["alfa_indexes"],"{organism}","{index_size}","ALFA", "sorted_genes.stranded.ALFA_index"), + alfa_index = expand(os.path.join(config["alfa_indexes"],"{organism}","{index_size}","ALFA", "sorted_genes.stranded.ALFA_index"), zip, organism= list(set([samples_table.loc[i,"organism"] for i in list(samples_table.index.values)])), index_size= list(set([samples_table.loc[i,"index_size"] for i in list(samples_table.index.values)]))), @@ -51,8 +52,8 @@ rule finish: config["output_dir"], "{seqmode}", "{sample}", - "star_rpm", - "STAR_Signal.Unique.str1.out.bg"), + "ALFA", + "ALFA_plots.Biotypes.pdf"), zip, sample= [i for i in list(samples_table.index.values)], seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)]) diff --git a/snakemake/paired_end.snakefile.smk b/snakemake/paired_end.snakefile.smk index d527a162a145250d541473163e544f5208c57931..f733ca7e38d885bc1ed77687bcc554831e4cfb6a 100644 --- a/snakemake/paired_end.snakefile.smk +++ b/snakemake/paired_end.snakefile.smk @@ -353,35 +353,35 @@ rule star_rpm_paired_end: config["output_dir"], "paired_end", "{sample}", - "star_rpm", + "ALFA", "STAR_Signal.Unique.str1.out.bg"), os.path.join( config["output_dir"], "paired_end", "{sample}", - "star_rpm", + "ALFA", "STAR_Signal.UniqueMultiple.str1.out.bg")), str2 = (os.path.join( config["output_dir"], "paired_end", "{sample}", - "star_rpm", + "ALFA", "STAR_Signal.Unique.str2.out.bg"), os.path.join( config["output_dir"], "paired_end", "{sample}", - "star_rpm", + "ALFA", "STAR_Signal.UniqueMultiple.str2.out.bg")) params: out_dir = directory(os.path.join(config["output_dir"], "paired_end", "{sample}", - "star_rpm")), + "ALFA")), prefix = os.path.join(config["output_dir"], "paired_end", "{sample}", - "star_rpm", "STAR_"), + "ALFA", "STAR_"), stranded = "Stranded" singularity: "docker://zavolab/star:2.6.0a" @@ -402,6 +402,64 @@ rule star_rpm_paired_end: """ +rule alfa_bg_paired_end: + ''' Run ALFA from stranded bedgraph files ''' + input: + str1 = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "ALFA", + "STAR_Signal.UniqueMultiple.str1.out.bg"), + str2 = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "ALFA", + "STAR_Signal.UniqueMultiple.str2.out.bg"), + gtf = lambda wildcards: os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"], + str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes.stranded.ALFA_index") + output: + biotypes = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "ALFA", + "ALFA_plots.Biotypes.pdf"), + categories = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "ALFA", + "ALFA_plots.Categories.pdf") + params: + out_dir = directory(os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "ALFA")), + in_file_str1 = "STAR_Signal.UniqueMultiple.str1.out.bg", + rename_str1 = "STAR_Signal.UniqueMultiple.out.plus.bg", + in_file_str2 = "STAR_Signal.UniqueMultiple.str2.out.bg", + rename_str2 = "STAR_Signal.UniqueMultiple.out.minus.bg", + genome_index = lambda wildcards: os.path.abspath(os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"], + str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes")), + name = "{sample}", + orientation = lambda wildcards: directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]] + singularity: + "docker://zavolab/alfa:1.1.1" + log: os.path.abspath(os.path.join(config["local_log"], "paired_end", "{sample}", "alfa_bg_paired_end.log")) + shell: + """ + cd {params.out_dir}; \ + cp {params.in_file_str1} {params.rename_str1}; \ + cp {params.in_file_str2} {params.rename_str2}; \ + (alfa -g {params.genome_index} \ + --bedgraph {params.rename_str1} {params.rename_str2} {params.name} \ + -s {params.orientation}) &> {log}; \ + rm {params.rename_str1} {params.rename_str2} + """ + diff --git a/snakemake/single_end.snakefile.smk b/snakemake/single_end.snakefile.smk index aaf940afa2e7cbf33febdad89ca166198c76d32d..a51806fa13eb4804080734f7f5aeada96ea2a948 100644 --- a/snakemake/single_end.snakefile.smk +++ b/snakemake/single_end.snakefile.smk @@ -279,35 +279,35 @@ rule star_rpm_single_end: config["output_dir"], "single_end", "{sample}", - "star_rpm", + "ALFA", "STAR_Signal.Unique.str1.out.bg"), os.path.join( config["output_dir"], "single_end", "{sample}", - "star_rpm", + "ALFA", "STAR_Signal.UniqueMultiple.str1.out.bg")), str2 = (os.path.join( config["output_dir"], "single_end", "{sample}", - "star_rpm", + "ALFA", "STAR_Signal.Unique.str2.out.bg"), os.path.join( config["output_dir"], "single_end", "{sample}", - "star_rpm", + "ALFA", "STAR_Signal.UniqueMultiple.str2.out.bg")) params: out_dir = directory(os.path.join(config["output_dir"], "single_end", "{sample}", - "star_rpm")), + "ALFA")), prefix = os.path.join(config["output_dir"], "single_end", "{sample}", - "star_rpm", "STAR_"), + "ALFA", "STAR_"), stranded = "Stranded" singularity: "docker://zavolab/star:2.6.0a" @@ -325,4 +325,63 @@ rule star_rpm_single_end: --outWigStrand {params.stranded} \ --outWigNorm "RPM" \ --outFileNamePrefix {params.prefix}) &> {log} + """ + + +rule alfa_bg_single_end: + ''' Run ALFA from stranded bedgraph files ''' + input: + str1 = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "ALFA", + "STAR_Signal.UniqueMultiple.str1.out.bg"), + str2 = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "ALFA", + "STAR_Signal.UniqueMultiple.str2.out.bg"), + gtf = lambda wildcards: os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"], + str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes.stranded.ALFA_index") + output: + biotypes = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "ALFA", + "ALFA_plots.Biotypes.pdf"), + categories = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "ALFA", + "ALFA_plots.Categories.pdf") + params: + out_dir = directory(os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "ALFA")), + in_file_str1 = "STAR_Signal.UniqueMultiple.str1.out.bg", + rename_str1 = "STAR_Signal.UniqueMultiple.out.plus.bg", + in_file_str2 = "STAR_Signal.UniqueMultiple.str2.out.bg", + rename_str2 = "STAR_Signal.UniqueMultiple.out.minus.bg", + genome_index = lambda wildcards: os.path.abspath(os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"], + str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes")), + name = "{sample}", + orientation = lambda wildcards: directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]] + singularity: + "docker://zavolab/alfa:1.1.1" + log: os.path.abspath(os.path.join(config["local_log"], "single_end", "{sample}", "alfa_bg_single_end.log")) + shell: + """ + cd {params.out_dir}; \ + cp {params.in_file_str1} {params.rename_str1}; \ + cp {params.in_file_str2} {params.rename_str2}; \ + (alfa -g {params.genome_index} \ + --bedgraph {params.rename_str1} {params.rename_str2} {params.name} \ + -s {params.orientation}) &> {log}; \ + rm {params.rename_str1} {params.rename_str2} """ \ No newline at end of file