Skip to content
Snippets Groups Projects
Commit f5e2f6ac authored by Dominik Burri's avatar Dominik Burri Committed by Alex Kanitz
Browse files

extend ALFA functionality

- generate nucleotide distribution for unique reads only
- new rule to generate PNG image for MultiQC
parent 05e5a95a
No related branches found
No related tags found
No related merge requests found
......@@ -53,6 +53,7 @@ rule finish:
"multiqc_summary"),
output_dir=config["output_dir"])
rule create_index_star:
"""
Create index for STAR alignments
......@@ -398,13 +399,13 @@ rule rename_star_rpm_for_alfa:
"{seqmode}",
"{sample}",
"STAR_coverage",
"{sample}_Signal.UniqueMultiple.str1.out.bg"),
"{sample}_Signal.{unique}.str1.out.bg"),
str2 = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"STAR_coverage",
"{sample}_Signal.UniqueMultiple.str2.out.bg")
"{sample}_Signal.{unique}.str2.out.bg")
output:
plus = os.path.join(
......@@ -412,13 +413,15 @@ rule rename_star_rpm_for_alfa:
"{seqmode}",
"{sample}",
"ALFA",
"{sample}_Signal.UniqueMultiple.out.plus.bg"),
"{unique}",
"{sample}_Signal.{unique}.out.plus.bg"),
minus = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"{sample}_Signal.UniqueMultiple.out.minus.bg")
"{unique}",
"{sample}_Signal.{unique}.out.minus.bg")
params:
orientation = lambda wildcards: samples_table.loc[wildcards.sample, "kallisto_directionality"]
......@@ -763,13 +766,15 @@ rule alfa_qc:
"{seqmode}",
"{sample}",
"ALFA",
"{sample}_Signal.UniqueMultiple.out.plus.bg"),
"{unique}",
"{sample}_Signal.{unique}.out.plus.bg"),
minus = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"{sample}_Signal.UniqueMultiple.out.minus.bg"),
"{unique}",
"{sample}_Signal.{unique}.out.minus.bg"),
gtf = lambda wildcards: os.path.join(config["alfa_indexes"],
samples_table.loc[wildcards.sample, "organism"],
str(samples_table.loc[wildcards.sample, "index_size"]),
......@@ -782,18 +787,21 @@ rule alfa_qc:
"{seqmode}",
"{sample}",
"ALFA",
"{unique}",
"ALFA_plots.Biotypes.pdf"),
categories = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"{unique}",
"ALFA_plots.Categories.pdf"),
table = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"{unique}",
"{sample}.ALFA_feature_counts.tsv")
params:
......@@ -812,7 +820,7 @@ rule alfa_qc:
config["log_dir"],
"{seqmode}",
"{sample}",
"alfa_qc.log"))
"alfa_qc.{unique}.log"))
shell:
"""
......@@ -831,6 +839,7 @@ rule alfa_qc_all_samples:
samples_table.loc[sample1, "seqmode"],
str(sample1),
"ALFA",
"{unique}",
sample1 + ".ALFA_feature_counts.tsv")
for sample1 in list(samples_table.index.values)]
......@@ -838,10 +847,12 @@ rule alfa_qc_all_samples:
biotypes = os.path.join(
config["output_dir"],
"ALFA",
"{unique}",
"ALFA_plots.Biotypes.pdf"),
categories = os.path.join(
config["output_dir"],
"ALFA",
"{unique}",
"ALFA_plots.Categories.pdf")
params:
......@@ -850,7 +861,7 @@ rule alfa_qc_all_samples:
log:
os.path.abspath(
os.path.join(config["log_dir"],
"alfa_qc_all_samples.log"))
"alfa_qc_all_samples.{unique}.log"))
singularity:
"docker://zavolab/alfa:1.1.1"
......@@ -861,6 +872,40 @@ rule alfa_qc_all_samples:
"""
rule alfa_concat_results:
input:
expand(os.path.join(
config["output_dir"],
"ALFA",
"{unique_type}",
"ALFA_plots.{annotation}.pdf"),
unique_type = ["Unique", "UniqueMultiple"],
annotation = ["Categories", "Biotypes"])
output:
expand(os.path.join(
config["output_dir"],
"ALFA",
"ALFA_plots.concat.png"))
params:
density = 300
log:
os.path.abspath(
os.path.join(config["log_dir"],
"alfa_qc_all_samples.concat.log"))
singularity:
"docker://zavolab/imagemagick:7.0.8"
shell:
"""
convert -append -density {params.density} \
{input} {output} &> {log}
"""
rule prepare_files_for_report:
'''
Re-structure the results and add comments for MultiQC parsing
......@@ -918,21 +963,10 @@ rule prepare_files_for_report:
sample=[i for i in list(samples_table.index.values)],
seqmode=[samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)]),
alfa_reports = expand(os.path.join(
alfa_concat_out = expand(os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"ALFA_plots.Biotypes.pdf"),
zip,
sample= [i for i in list(samples_table.index.values)],
seqmode= [
samples_table.loc[i,"seqmode"]
for i in list(samples_table.index.values)]),
alfa_all_samples = os.path.join(
config["output_dir"],
"ALFA",
"ALFA_plots.Categories.pdf")
"ALFA_plots.concat.png"))
output:
samples_dir = directory(os.path.join(
......@@ -1232,4 +1266,4 @@ rule MULTIQC_report:
{params.results_dir} \
{params.log_dir} \
&> {log.LOG_local_log};
"""
\ No newline at end of file
"""
This diff is collapsed.
This diff is collapsed.
90e42aa46890e9cd0a47800428699fbf results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.stranded.ALFA_index
ccc3cf5a57fddb0d469e597d4376b1bf results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.unstranded.ALFA_index
e5959524a2daf35da9249fb313920315 results/paired_end/paired_end_R1_on_minus_antisense/ALFA/Unique/paired_end_R1_on_minus_antisense.ALFA_feature_counts.tsv
c406c1800d3690dd774aaa7e3c190523 results/paired_end/paired_end_R1_on_minus_sense/ALFA/Unique/paired_end_R1_on_minus_sense.ALFA_feature_counts.tsv
c406c1800d3690dd774aaa7e3c190523 results/paired_end/paired_end_R1_on_plus_antisense/ALFA/Unique/paired_end_R1_on_plus_antisense.ALFA_feature_counts.tsv
e5959524a2daf35da9249fb313920315 results/paired_end/paired_end_R1_on_plus_sense/ALFA/Unique/paired_end_R1_on_plus_sense.ALFA_feature_counts.tsv
e5959524a2daf35da9249fb313920315 results/paired_end/paired_end_R1_on_minus_antisense/ALFA/UniqueMultiple/paired_end_R1_on_minus_antisense.ALFA_feature_counts.tsv
c406c1800d3690dd774aaa7e3c190523 results/paired_end/paired_end_R1_on_minus_sense/ALFA/UniqueMultiple/paired_end_R1_on_minus_sense.ALFA_feature_counts.tsv
c406c1800d3690dd774aaa7e3c190523 results/paired_end/paired_end_R1_on_plus_antisense/ALFA/UniqueMultiple/paired_end_R1_on_plus_antisense.ALFA_feature_counts.tsv
e5959524a2daf35da9249fb313920315 results/paired_end/paired_end_R1_on_plus_sense/ALFA/UniqueMultiple/paired_end_R1_on_plus_sense.ALFA_feature_counts.tsv
5e90c760710980f4f4866dbe9aa32c6c results/paired_end/paired_end_R1_on_minus_antisense/ALFA/UniqueMultiple/paired_end_R1_on_minus_antisense_Signal.UniqueMultiple.out.minus.bg
8e23d52d7f635d927e292174f33168eb results/paired_end/paired_end_R1_on_minus_antisense/ALFA/UniqueMultiple/paired_end_R1_on_minus_antisense_Signal.UniqueMultiple.out.plus.bg
8e23d52d7f635d927e292174f33168eb results/paired_end/paired_end_R1_on_minus_sense/ALFA/UniqueMultiple/paired_end_R1_on_minus_sense_Signal.UniqueMultiple.out.minus.bg
5e90c760710980f4f4866dbe9aa32c6c results/paired_end/paired_end_R1_on_minus_sense/ALFA/UniqueMultiple/paired_end_R1_on_minus_sense_Signal.UniqueMultiple.out.plus.bg
8e23d52d7f635d927e292174f33168eb results/paired_end/paired_end_R1_on_plus_antisense/ALFA/UniqueMultiple/paired_end_R1_on_plus_antisense_Signal.UniqueMultiple.out.minus.bg
5e90c760710980f4f4866dbe9aa32c6c results/paired_end/paired_end_R1_on_plus_antisense/ALFA/UniqueMultiple/paired_end_R1_on_plus_antisense_Signal.UniqueMultiple.out.plus.bg
5e90c760710980f4f4866dbe9aa32c6c results/paired_end/paired_end_R1_on_plus_sense/ALFA/UniqueMultiple/paired_end_R1_on_plus_sense_Signal.UniqueMultiple.out.minus.bg
8e23d52d7f635d927e292174f33168eb results/paired_end/paired_end_R1_on_plus_sense/ALFA/UniqueMultiple/paired_end_R1_on_plus_sense_Signal.UniqueMultiple.out.plus.bg
5e90c760710980f4f4866dbe9aa32c6c results/paired_end/paired_end_R1_on_minus_antisense/ALFA/Unique/paired_end_R1_on_minus_antisense_Signal.Unique.out.minus.bg
8e23d52d7f635d927e292174f33168eb results/paired_end/paired_end_R1_on_minus_antisense/ALFA/Unique/paired_end_R1_on_minus_antisense_Signal.Unique.out.plus.bg
8e23d52d7f635d927e292174f33168eb results/paired_end/paired_end_R1_on_minus_sense/ALFA/Unique/paired_end_R1_on_minus_sense_Signal.Unique.out.minus.bg
5e90c760710980f4f4866dbe9aa32c6c results/paired_end/paired_end_R1_on_minus_sense/ALFA/Unique/paired_end_R1_on_minus_sense_Signal.Unique.out.plus.bg
8e23d52d7f635d927e292174f33168eb results/paired_end/paired_end_R1_on_plus_antisense/ALFA/Unique/paired_end_R1_on_plus_antisense_Signal.Unique.out.minus.bg
5e90c760710980f4f4866dbe9aa32c6c results/paired_end/paired_end_R1_on_plus_antisense/ALFA/Unique/paired_end_R1_on_plus_antisense_Signal.Unique.out.plus.bg
5e90c760710980f4f4866dbe9aa32c6c results/paired_end/paired_end_R1_on_plus_sense/ALFA/Unique/paired_end_R1_on_plus_sense_Signal.Unique.out.minus.bg
8e23d52d7f635d927e292174f33168eb results/paired_end/paired_end_R1_on_plus_sense/ALFA/Unique/paired_end_R1_on_plus_sense_Signal.Unique.out.plus.bg
5e90c760710980f4f4866dbe9aa32c6c results/paired_end/paired_end_R1_on_minus_antisense/STAR_coverage/paired_end_R1_on_minus_antisense_Signal.UniqueMultiple.str1.out.bg
8e23d52d7f635d927e292174f33168eb results/paired_end/paired_end_R1_on_minus_antisense/STAR_coverage/paired_end_R1_on_minus_antisense_Signal.UniqueMultiple.str2.out.bg
5e90c760710980f4f4866dbe9aa32c6c results/paired_end/paired_end_R1_on_minus_antisense/STAR_coverage/paired_end_R1_on_minus_antisense_Signal.Unique.str1.out.bg
......@@ -16,15 +40,3 @@ ccc3cf5a57fddb0d469e597d4376b1bf results/alfa_indexes/homo_sapiens/75/ALFA/sort
5e90c760710980f4f4866dbe9aa32c6c results/paired_end/paired_end_R1_on_plus_sense/STAR_coverage/paired_end_R1_on_plus_sense_Signal.UniqueMultiple.str2.out.bg
8e23d52d7f635d927e292174f33168eb results/paired_end/paired_end_R1_on_plus_sense/STAR_coverage/paired_end_R1_on_plus_sense_Signal.Unique.str1.out.bg
5e90c760710980f4f4866dbe9aa32c6c results/paired_end/paired_end_R1_on_plus_sense/STAR_coverage/paired_end_R1_on_plus_sense_Signal.Unique.str2.out.bg
5e90c760710980f4f4866dbe9aa32c6c results/paired_end/paired_end_R1_on_minus_antisense/ALFA/paired_end_R1_on_minus_antisense_Signal.UniqueMultiple.out.minus.bg
8e23d52d7f635d927e292174f33168eb results/paired_end/paired_end_R1_on_minus_antisense/ALFA/paired_end_R1_on_minus_antisense_Signal.UniqueMultiple.out.plus.bg
8e23d52d7f635d927e292174f33168eb results/paired_end/paired_end_R1_on_minus_sense/ALFA/paired_end_R1_on_minus_sense_Signal.UniqueMultiple.out.minus.bg
5e90c760710980f4f4866dbe9aa32c6c results/paired_end/paired_end_R1_on_minus_sense/ALFA/paired_end_R1_on_minus_sense_Signal.UniqueMultiple.out.plus.bg
8e23d52d7f635d927e292174f33168eb results/paired_end/paired_end_R1_on_plus_antisense/ALFA/paired_end_R1_on_plus_antisense_Signal.UniqueMultiple.out.minus.bg
5e90c760710980f4f4866dbe9aa32c6c results/paired_end/paired_end_R1_on_plus_antisense/ALFA/paired_end_R1_on_plus_antisense_Signal.UniqueMultiple.out.plus.bg
5e90c760710980f4f4866dbe9aa32c6c results/paired_end/paired_end_R1_on_plus_sense/ALFA/paired_end_R1_on_plus_sense_Signal.UniqueMultiple.out.minus.bg
8e23d52d7f635d927e292174f33168eb results/paired_end/paired_end_R1_on_plus_sense/ALFA/paired_end_R1_on_plus_sense_Signal.UniqueMultiple.out.plus.bg
e5959524a2daf35da9249fb313920315 results/paired_end/paired_end_R1_on_minus_antisense/ALFA/paired_end_R1_on_minus_antisense.ALFA_feature_counts.tsv
c406c1800d3690dd774aaa7e3c190523 results/paired_end/paired_end_R1_on_minus_sense/ALFA/paired_end_R1_on_minus_sense.ALFA_feature_counts.tsv
c406c1800d3690dd774aaa7e3c190523 results/paired_end/paired_end_R1_on_plus_antisense/ALFA/paired_end_R1_on_plus_antisense.ALFA_feature_counts.tsv
e5959524a2daf35da9249fb313920315 results/paired_end/paired_end_R1_on_plus_sense/ALFA/paired_end_R1_on_plus_sense.ALFA_feature_counts.tsv
......@@ -33,7 +33,7 @@ snakemake \
--use-singularity \
--singularity-args="--bind ${PWD}/../input_files,${PWD}/../../images" \
--verbose \
results/ALFA/ALFA_plots.Categories.pdf
results/ALFA/ALFA_plots.concat.png
# Check md5 sum of some output files
find results/ -type f -name \*\.gz -exec gunzip '{}' \;
......
......@@ -89,9 +89,15 @@ bcccf679a8c083d01527514c9f5680a0 results/samples/synthetic_10_reads_mate_1_synt
3ce47cb1d62482c5d62337751d7e8552 results/transcriptome/homo_sapiens/transcriptome.fa
6b44c507f0a1c9f7369db0bb1deef0fd results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.stranded.ALFA_index
2caebc23faf78fdbbbdbb118d28bd6b5 results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.unstranded.ALFA_index
53fd53f884352d0493b2ca99cef5d76d results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired.ALFA_feature_counts.tsv
90ae442ebf35015eab2dd4e804c2bafb results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.minus.bg
16652c037090f3eed1123618a2e75107 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.plus.bg
c1254a0bae19ac3ffc39f73099ffcf2b results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.ALFA_feature_counts.tsv
bcccf679a8c083d01527514c9f5680a0 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.out.minus.bg
ea91b4f85622561158bff2f7c9c312b3 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.out.plus.bg
\ No newline at end of file
bcccf679a8c083d01527514c9f5680a0 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/UniqueMultiple/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.out.minus.bg
ea91b4f85622561158bff2f7c9c312b3 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/UniqueMultiple/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.out.plus.bg
bcccf679a8c083d01527514c9f5680a0 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/Unique/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.Unique.out.minus.bg
ea91b4f85622561158bff2f7c9c312b3 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/Unique/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.Unique.out.plus.bg
90ae442ebf35015eab2dd4e804c2bafb results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/UniqueMultiple/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.minus.bg
16652c037090f3eed1123618a2e75107 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/UniqueMultiple/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.out.plus.bg
90ae442ebf35015eab2dd4e804c2bafb results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.out.minus.bg
16652c037090f3eed1123618a2e75107 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.out.plus.bg
c1254a0bae19ac3ffc39f73099ffcf2b results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/UniqueMultiple/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.ALFA_feature_counts.tsv
c1254a0bae19ac3ffc39f73099ffcf2b results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/Unique/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.ALFA_feature_counts.tsv
53fd53f884352d0493b2ca99cef5d76d results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/UniqueMultiple/synthetic_10_reads_paired_synthetic_10_reads_paired.ALFA_feature_counts.tsv
53fd53f884352d0493b2ca99cef5d76d results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired.ALFA_feature_counts.tsv
\ No newline at end of file
......@@ -7,7 +7,7 @@ cleanup () {
rm -rf .java/
rm -rf .snakemake/
rm -rf logs/
# rm -rf results/
rm -rf results/
cd $user_dir
echo "Exit status: $rc"
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment