diff --git a/envs/env_alfa.yaml b/envs/env_alfa.yaml index 0faa646d7cd22384c8b7facee776cbb49c067ba6..9dd3b4535e35534ed791dde7f872aa589aa5930d 100644 --- a/envs/env_alfa.yaml +++ b/envs/env_alfa.yaml @@ -6,3 +6,4 @@ channels: - defaults dependencies: - alfa=1.1.1 + - rename=1.600 diff --git a/snakemake/Snakefile b/snakemake/Snakefile index 1e81a7e88645b3adf668692a4617ab6ec447856e..1758062dfd6de24a338c9d4456a8cfbb4ebae3ca 100644 --- a/snakemake/Snakefile +++ b/snakemake/Snakefile @@ -12,11 +12,13 @@ import pandas as pd samples_table = pd.read_csv(config["samples"], header=0, index_col=0, comment='#', engine='python', sep="\t") directionality = {"--fr": "fr-firststrand", "--rv": "fr-secondstrand"} -rename_files = {"fr-firststrand": ("STAR_Signal.UniqueMultiple.out.plus.bg", - "STAR_Signal.UniqueMultiple.out.minus.bg"), - "fr-secondstrand": ("STAR_Signal.UniqueMultiple.out.minus.bg", - "STAR_Signal.UniqueMultiple.out.plus.bg")} - +rename_files = {"fr-firststrand": ("Signal.UniqueMultiple.out.plus.bg", + "Signal.UniqueMultiple.out.minus.bg"), + "fr-secondstrand": ("Signal.UniqueMultiple.out.minus.bg", + "Signal.UniqueMultiple.out.plus.bg")} +rename_expr = {"fr-firststrand": ("out.plus", "out.minus"), + "fr-secondstrand": ("out.minus", "out.plus")} + localrules: finish ################################################################################## @@ -60,7 +62,11 @@ rule finish: "ALFA_plots.Biotypes.pdf"), zip, sample= [i for i in list(samples_table.index.values)], - seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)]) + seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)]), + alfa = os.path.join( + config["output_dir"], + "ALFA", + "ALFA_plots.Biotypes.pdf") rule create_index_star: @@ -193,3 +199,72 @@ rule generate_alfa_index: -p {threads} \ -o {params.out_dir} &> {log}" + +# TODO: add rule that executes ALFA on all samples; should be on the same genome & same orientation +rule alfa_bg_all_samples: + ''' Run ALFA from stranded bedgraph files ''' + input: + str1 = expand(os.path.join( + config["output_dir"], + "{seqmode}", + "{sample}", + "ALFA", + "{sample}_Signal.UniqueMultiple.str1.out.bg"), + zip, + sample= [i for i in list(samples_table.index.values)], + seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)]), + str2 = expand(os.path.join( + config["output_dir"], + "{seqmode}", + "{sample}", + "ALFA", + "{sample}_Signal.UniqueMultiple.str2.out.bg"), + zip, + sample= [i for i in list(samples_table.index.values)], + seqmode= [samples_table.loc[i,"seqmode"] for i in list(samples_table.index.values)]), + gtf = [os.path.join(config["alfa_indexes"], samples_table.loc[sample1, "organism"], + str(samples_table.loc[sample1, "index_size"]), "ALFA", "sorted_genes.stranded.ALFA_index") for sample1 in list(samples_table.index.values)] + output: + biotypes = os.path.join( + config["output_dir"], + "ALFA", + "ALFA_plots.Biotypes.pdf"), + categories = os.path.join( + config["output_dir"], + "ALFA", + "ALFA_plots.Categories.pdf") + params: + str1 = lambda wildcards, input: [os.path.abspath(i) for i in input.str1], + str2 = lambda wildcards, input: [os.path.abspath(i) for i in input.str2], + out_dir = lambda wildcards, output: directory(os.path.dirname(output.biotypes)), + in_file_str1 = lambda wildcards, input: [os.path.basename(inp) for inp in input.str1], # copy files into out_dir and create with expand construct of bg + name + rename_str1 = [rename_expr[directionality[samples_table.loc[sample1, "kallisto_directionality"]]][0] for sample1 in list(samples_table.index.values)][0], + in_file_str2 = lambda wildcards, input: [os.path.basename(inp) for inp in input.str2], + rename_str2 = [rename_expr[directionality[samples_table.loc[sample1, "kallisto_directionality"]]][1] for sample1 in list(samples_table.index.values)][0], + genome_index = [os.path.abspath(os.path.join( + config["alfa_indexes"], + samples_table.loc[sample1, "organism"], + str(samples_table.loc[sample1, "index_size"]), + "ALFA", "sorted_genes")) for sample1 in list(samples_table.index.values)][0], + orientation = [directionality[samples_table.loc[sample1, "kallisto_directionality"]] for sample1 in list(samples_table.index.values)][0], + bg_and_sample_name = [sample1 + "_Signal.UniqueMultiple." + + rename_expr[directionality[samples_table.loc[sample1, "kallisto_directionality"]]][0] + ".bg " + + sample1 + "_Signal.UniqueMultiple." + rename_expr[directionality[samples_table.loc[sample1, "kallisto_directionality"]]][1] + ".bg " + sample1 + for sample1 in list(samples_table.index.values)] + # singularity: + # "docker://zavolab/alfa:1.1.1" + conda: os.path.abspath("../../envs/env_alfa.yaml") + log: os.path.abspath(os.path.join(config["local_log"], "ALFA", "alfa_bg_all_samples.log")) + shell: + """ + mkdir -p {params.out_dir}; \ + cd {params.out_dir}; \ + cp {params.str1} . ; \ + cp {params.str2} . ; \ + rename 's/str1.out/{params.rename_str1}/' *str1.out.bg; \ + rename 's/str2.out/{params.rename_str2}/' *str2.out.bg; \ + (alfa -g {params.genome_index} \ + --bedgraph {params.bg_and_sample_name} \ + -s {params.orientation}) &> {log}; \ + rm *{params.rename_str1}.bg *{params.rename_str2}.bg + """ \ No newline at end of file diff --git a/snakemake/paired_end.snakefile.smk b/snakemake/paired_end.snakefile.smk index 61d830960a3419afec248a78d7de3b460b3301ce..b92a2038ade28b78716b4b3f5abbfcac2621a45b 100644 --- a/snakemake/paired_end.snakefile.smk +++ b/snakemake/paired_end.snakefile.smk @@ -354,25 +354,25 @@ rule star_rpm_paired_end: "paired_end", "{sample}", "ALFA", - "STAR_Signal.Unique.str1.out.bg"), + "{sample}_Signal.Unique.str1.out.bg"), os.path.join( config["output_dir"], "paired_end", "{sample}", "ALFA", - "STAR_Signal.UniqueMultiple.str1.out.bg")), + "{sample}_Signal.UniqueMultiple.str1.out.bg")), str2 = (os.path.join( config["output_dir"], "paired_end", "{sample}", "ALFA", - "STAR_Signal.Unique.str2.out.bg"), + "{sample}_Signal.Unique.str2.out.bg"), os.path.join( config["output_dir"], "paired_end", "{sample}", "ALFA", - "STAR_Signal.UniqueMultiple.str2.out.bg")) + "{sample}_Signal.UniqueMultiple.str2.out.bg")) params: out_dir = directory(os.path.join(config["output_dir"], "paired_end", @@ -381,7 +381,7 @@ rule star_rpm_paired_end: prefix = os.path.join(config["output_dir"], "paired_end", "{sample}", - "ALFA", "STAR_"), + "ALFA", "{sample}_"), stranded = "Stranded" singularity: "docker://zavolab/star:2.6.0a" @@ -409,13 +409,13 @@ rule alfa_bg_paired_end: "paired_end", "{sample}", "ALFA", - "STAR_Signal.UniqueMultiple.str1.out.bg"), + "{sample}_Signal.UniqueMultiple.str1.out.bg"), str2 = os.path.join( config["output_dir"], "paired_end", "{sample}", "ALFA", - "STAR_Signal.UniqueMultiple.str2.out.bg"), + "{sample}_Signal.UniqueMultiple.str2.out.bg"), gtf = lambda wildcards: os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"], str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes.stranded.ALFA_index") output: @@ -438,12 +438,11 @@ rule alfa_bg_paired_end: "{sample}", "ALFA")), orientation = lambda wildcards: directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]], - in_file_str1 = "STAR_Signal.UniqueMultiple.str1.out.bg", - rename_str1 = lambda wildcards: rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][0], - in_file_str2 = "STAR_Signal.UniqueMultiple.str2.out.bg", - rename_str2 = lambda wildcards: rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][1], - genome_index = lambda wildcards: os.path.abspath(os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"], - str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes")), + in_file_str1 = lambda wildcards, input: os.path.basename(input.str1), + rename_str1 = lambda wildcards: wildcards.sample + "_" + rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][0], + in_file_str2 = lambda wildcards, input: os.path.basename(input.str2), + rename_str2 = lambda wildcards: wildcards.sample + "_" + rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][1], + genome_index = lambda wildcards, input: os.path.abspath(os.path.join(os.path.dirname(input.gtf), "sorted_genes")), name = "{sample}" singularity: "docker://zavolab/alfa:1.1.1" diff --git a/snakemake/single_end.snakefile.smk b/snakemake/single_end.snakefile.smk index 280d4ea7b0f7c6c86c624842444c64c6c2037bea..4d5d2311710c0614abe6a0cd4677c18568ee70ce 100644 --- a/snakemake/single_end.snakefile.smk +++ b/snakemake/single_end.snakefile.smk @@ -280,25 +280,25 @@ rule star_rpm_single_end: "single_end", "{sample}", "ALFA", - "STAR_Signal.Unique.str1.out.bg"), + "{sample}_Signal.Unique.str1.out.bg"), os.path.join( config["output_dir"], "single_end", "{sample}", "ALFA", - "STAR_Signal.UniqueMultiple.str1.out.bg")), + "{sample}_Signal.UniqueMultiple.str1.out.bg")), str2 = (os.path.join( config["output_dir"], "single_end", "{sample}", "ALFA", - "STAR_Signal.Unique.str2.out.bg"), + "{sample}_Signal.Unique.str2.out.bg"), os.path.join( config["output_dir"], "single_end", "{sample}", "ALFA", - "STAR_Signal.UniqueMultiple.str2.out.bg")) + "{sample}_Signal.UniqueMultiple.str2.out.bg")) params: out_dir = directory(os.path.join(config["output_dir"], "single_end", @@ -307,7 +307,7 @@ rule star_rpm_single_end: prefix = os.path.join(config["output_dir"], "single_end", "{sample}", - "ALFA", "STAR_"), + "ALFA", "{sample}_"), stranded = "Stranded" singularity: "docker://zavolab/star:2.6.0a" @@ -336,13 +336,13 @@ rule alfa_bg_single_end: "single_end", "{sample}", "ALFA", - "STAR_Signal.UniqueMultiple.str1.out.bg"), + "{sample}_Signal.UniqueMultiple.str1.out.bg"), str2 = os.path.join( config["output_dir"], "single_end", "{sample}", "ALFA", - "STAR_Signal.UniqueMultiple.str2.out.bg"), + "{sample}_Signal.UniqueMultiple.str2.out.bg"), gtf = lambda wildcards: os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"], str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes.stranded.ALFA_index") output: @@ -365,12 +365,11 @@ rule alfa_bg_single_end: "{sample}", "ALFA")), orientation = lambda wildcards: directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]], - in_file_str1 = "STAR_Signal.UniqueMultiple.str1.out.bg", - rename_str1 = lambda wildcards: rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][0], - in_file_str2 = "STAR_Signal.UniqueMultiple.str2.out.bg", - rename_str2 = lambda wildcards: rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][1], - genome_index = lambda wildcards: os.path.abspath(os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"], - str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes")), + in_file_str1 = lambda wildcards, input: os.path.basename(input.str1), + rename_str1 = lambda wildcards: wildcards.sample + "_" + rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][0], + in_file_str2 = lambda wildcards, input: os.path.basename(input.str2), + rename_str2 = lambda wildcards: wildcards.sample + "_" + rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][1], + genome_index = lambda wildcards, input: os.path.abspath(os.path.join(os.path.dirname(input.gtf), "sorted_genes")), name = "{sample}" singularity: "docker://zavolab/alfa:1.1.1" diff --git a/tests/test_integration_workflow/test.local.sh b/tests/test_integration_workflow/test.local.sh index 6bcf8d68dabf4492282ed9b039b4387c758cadbb..f26856cdb42fb2d327a6300c06124f9e429f9288 100755 --- a/tests/test_integration_workflow/test.local.sh +++ b/tests/test_integration_workflow/test.local.sh @@ -20,7 +20,8 @@ snakemake \ --printshellcmds \ --rerun-incomplete \ --use-singularity \ - --singularity-args="--bind ${PWD}" + --singularity-args="--bind ${PWD}" \ + --use-conda find results/ -type f -name \*\.gz -exec gunzip '{}' \; find results/ -type f -name \*\.zip -exec sh -c 'unzip -o {} -d $(dirname {})' \; md5sum --check "expected_output.md5"