Skip to content
Snippets Groups Projects
Commit c2ed5f3d authored by Dominik Burri's avatar Dominik Burri
Browse files

Merge branch 'dev' into results_dir

incorporate changes of updated dev
parents bc168805 736d1a3d
No related branches found
No related tags found
1 merge request!78Remove unnecessary files in results directory
Pipeline #12111 passed
This commit is part of merge request !78. Comments created here will be created in the context of that merge request.
......@@ -13,8 +13,8 @@ test:
# add unit tests here
# add script tests here
- bash tests/test_scripts_prepare_inputs_table/test.sh
- bash tests/test_scripts_prepare_inputs_labkey/test.sh
- bash tests/test_alfa/test.sh
#- bash tests/test_scripts_prepare_inputs_labkey/test.sh
#- bash tests/test_alfa/test.sh
# add integration tests here
- bash tests/test_create_dag_image/test.sh
- bash tests/test_create_rule_graph/test.sh
......
......@@ -552,101 +552,6 @@ rule calculate_TIN_scores:
-n {threads} > {output.TIN_score};) 2> {log.stderr}"
rule merge_TIN_scores:
"""
Merge TIN scores tables
"""
input:
TIN_score = expand(
os.path.join(
config['output_dir'],
"samples",
"{sample}",
"TIN",
"TIN_score.tsv"),
sample=pd.unique(samples_table.index.values)),
output:
TIN_scores_merged = os.path.join(
config['output_dir'],
"TIN_scores_merged.tsv")
log:
stderr = os.path.join(
config['log_dir'],
"merge_TIN_scores.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"merge_TIN_scores.stdout.log")
params:
TIN_score_merged_paths = ",".join(expand(
os.path.join(
config['output_dir'],
"samples",
"{sample}",
"TIN",
"TIN_score.tsv"),
zip,
sample=[i for i in pd.unique(samples_table.index.values)],
seqmode=[get_sample('seqmode',
search_id='index',
search_value=i) for i in pd.unique(samples_table.index.values)]))
threads: 1
singularity:
"docker://zavolab/tin_score_calculation:0.2.0-slim"
shell:
"(tin_score_merge.py \
--input-files {params.TIN_score_merged_paths} \
--output-file {output.TIN_scores_merged}) \
1> {log.stdout} 2> {log.stderr}"
rule plot_TIN_scores:
"""
Generate TIN scores boxplots
"""
input:
TIN_scores_merged = os.path.join(
config['output_dir'],
"TIN_scores_merged.tsv"),
output:
TIN_boxplot_PNG = os.path.join(
config['output_dir'],
"TIN_scores_boxplot_mqc.png"),
TIN_boxplot_PDF = os.path.join(
config['output_dir'],
"TIN_scores_boxplot_mqc.pdf")
params:
TIN_boxplot_prefix = os.path.join(
config['output_dir'],
"TIN_scores_boxplot_mqc")
log:
stderr = os.path.join(
config['log_dir'],
"plot_TIN_scores.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"plot_TIN_scores.stdout.log")
threads: 1
singularity:
"docker://zavolab/tin_score_calculation:0.2.0-slim"
shell:
"(tin_score_plot.py \
--input-file {input.TIN_scores_merged} \
--output-file-prefix {params.TIN_boxplot_prefix}) \
1> {log.stdout} 2> {log.stderr}"
rule salmon_quantmerge_genes:
'''
Merge gene quantifications into a single file
......@@ -1326,83 +1231,6 @@ rule alfa_qc:
-s {params.alfa_orientation}) &> {log}"
rule alfa_qc_all_samples:
'''
Run ALFA from stranded bedgraph files on all samples
'''
input:
tables = lambda wildcards:
expand(
os.path.join(
config["output_dir"],
"samples",
"{sample}",
"ALFA",
"{unique}",
"{sample}.ALFA_feature_counts.tsv"),
sample=pd.unique(samples_table.index.values),
unique=wildcards.unique)
output:
biotypes = os.path.join(
config["output_dir"],
"ALFA",
"{unique}",
"ALFA_plots.Biotypes.pdf"),
categories = os.path.join(
config["output_dir"],
"ALFA",
"{unique}",
"ALFA_plots.Categories.pdf")
params:
out_dir = lambda wildcards, output:
os.path.dirname(output.biotypes)
log:
os.path.join(
config["log_dir"],
"alfa_qc_all_samples.{unique}.log")
singularity:
"docker://zavolab/alfa:1.1.1-slim"
shell:
"(alfa -c {input.tables} -o {params.out_dir}) &> {log}"
rule alfa_concat_results:
input:
expand(
os.path.join(
config["output_dir"],
"ALFA",
"{unique}",
"ALFA_plots.{annotation}.pdf"),
unique=["Unique", "UniqueMultiple"],
annotation=["Categories", "Biotypes"])
output:
os.path.join(
config["output_dir"],
"ALFA",
"ALFA_plots_mqc.png")
params:
density = 300
log:
os.path.join(
config["log_dir"],
"alfa_qc_all_samples.concat.log")
singularity:
"docker://zavolab/imagemagick:7.0.8"
shell:
"(convert -append -density {params.density} \
{input} {output}) &> {log}"
rule prepare_multiqc_config:
'''
Prepare config for the MultiQC
......@@ -1479,18 +1307,26 @@ rule multiqc_report:
seqmode=[get_sample('seqmode', search_id='index', search_value=i)
for i in pd.unique(samples_table.index.values)]),
TIN_boxplot_PNG = os.path.join(
config['output_dir'],
"TIN_scores_boxplot_mqc.png"),
TIN_boxplot_PDF = os.path.join(
config['output_dir'],
"TIN_scores_boxplot_mqc.pdf"),
TIN_score = expand(
os.path.join(
config['output_dir'],
"samples",
"{sample}",
"TIN",
"TIN_score.tsv"),
sample=pd.unique(samples_table.index.values)),
alfa_concat_out = os.path.join(
config["output_dir"],
"ALFA",
"ALFA_plots_mqc.png"),
tables = lambda wildcards:
expand(
os.path.join(
config["output_dir"],
"samples",
"{sample}",
"ALFA",
"{unique}",
"{sample}.ALFA_feature_counts.tsv"),
sample=pd.unique(samples_table.index.values),
unique=["Unique", "UniqueMultiple"]),
zpca_salmon = expand(os.path.join(
config["output_dir"],
......@@ -1529,7 +1365,7 @@ rule multiqc_report:
"multiqc_report.stdout.log")
singularity:
"docker://ewels/multiqc:1.7"
"docker://zavolab/multiqc-plugins:1.0.0"
shell:
"(multiqc \
......
......@@ -26,8 +26,6 @@ on installation and usage please see [here](README.md).
- [**sort_bed_4_big**](#sort_bed_4_big)
- [**prepare_bigWig**](#prepare_bigwig)
- [**calculate_TIN_scores**](#calculate_tin_scores)
- [**merge_TIN_scores**](#merge_tin_scores)
- [**plot_TIN_scores**](#plot_tin_scores)
- [**salmon_quantmerge_genes**](#salmon_quantmerge_genes)
- [**salmon_quantmerge_transcripts**](#salmon_quantmerge_transcripts)
- [**kallisto_merge_genes**](#kallisto_merge_genes)
......@@ -36,8 +34,6 @@ on installation and usage please see [here](README.md).
- [**pca_salmon**](#pca_salmon)
- [**generate_alfa_index**](#generate_alfa_index)
- [**alfa_qc**](#alfa_qc)
- [**alfa_qc_all_samples**](#alfa_qc_all_samples)
- [**alfa_concat_results**](#alfa_concat_results)
- [**prepare_multiqc_config**](#prepare_multiqc_config)
- [**multiqc_report**](#multiqc_report)
- [**finish**](#finish)
......@@ -365,29 +361,6 @@ Calculates the Transcript Integrity Number (TIN) for each transcript with
- TIN score table (custom `tsv`); used in
[**merge_TIN_scores**](#merge_tin_scores)
#### `merge_TIN_scores`
Merges TIN score tables for all samples with [custom script][custom-script-tin].
- **Input**
- TIN score table (custom `tsv`); per sample; from
[**calculate_TIN_scores**](#calculate_tin_scores)
- **Output**
- TIN score table (custom `tsv`); for all samples; used in
[**plot_TIN_scores**](#plot_tin_scores)
#### `plot_TIN_scores`
Generate sample-wise [box plots](https://en.wikipedia.org/wiki/Box_plot) of
TIN scores with [custom script][custom-script-tin].
- **Input**
- TIN score table (custom `tsv`); for all samples; from
[**merge_TIN_scores**](#merge_tin_scores)
- **Output**
- TIN score box plots (`.pdf` and `.png`); used in
[**multiqc_report**](#multiqc_report)
#### `salmon_quantmerge_genes`
Merge gene-level expression estimates for all samples with
......@@ -502,27 +475,6 @@ Annotate alignments with [**ALFA**](#third-party-software-used).
- Feature counts table (custom `.tsv`); used in
[**alfa_qc_all_samples**](#alfa_qc_all_samples)
#### `alfa_qc_all_samples`
Combines output of all samples with [**ALFA**](#third-party-software-used).
- **Input**
- Feature counts table (custom `.tsv`); from [**alfa_qc**](#alfa_qc)
- **Output**
- Figures for biotypes and feature categories (`.pdf`); summarized for all
samples together; used in [**alfa_concat_results**](#alfa_concat_results)
#### `alfa_concat_results`
Concatenate and convert ALFA output plots into single plot with
[**ImageMagick**](#third-party-software-used).
- **Input**
- Figures for biotypes and feature categories (`.pdf`); for individual and
summarized for all samples
- **Output**
- ALFA plot (`.png`), combined; used in [**multiqc_report**](#multiqc_report)
#### `prepare_multiqc_config`
Prepare config file for [**MultiQC**](#third-party-software-used).
......
......@@ -103,6 +103,7 @@ results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Uniqu
results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Unique_plus.bw
results/multiqc_summary/multiqc_data/multiqc_fastqc.txt
results/multiqc_summary/multiqc_data/multiqc_cutadapt.txt
results/multiqc_summary/multiqc_data/multiqc_cutadapt_1.txt
results/multiqc_summary/multiqc_data/multiqc_star.txt
results/multiqc_summary/multiqc_data/multiqc_kallisto.txt
results/multiqc_summary/multiqc_data/multiqc_general_stats.txt
......
......@@ -84,7 +84,8 @@ ec5aab1b79e7880dfa590e5bc7db5232 results/samples/synthetic_10_reads_paired_synt
69e2bf688165e9fb7c9c49a8763f5632 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Unique_minus.bw
ec5aab1b79e7880dfa590e5bc7db5232 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Unique_plus.bw
ba090b1b4a2473891de97493d3244956 results/multiqc_summary/multiqc_data/multiqc_fastqc.txt
3e4db5fad83e162bcc19abbe81333a95 results/multiqc_summary/multiqc_data/multiqc_cutadapt.txt
d8118d944149eecc691d182448696e7f results/multiqc_summary/multiqc_data/multiqc_cutadapt.txt
a127fabda5c3aad9d95414dc4fbc11c3 results/multiqc_summary/multiqc_data/multiqc_cutadapt_1.txt
0c6363588cf6ff74d49f27c164185918 results/multiqc_summary/multiqc_data/multiqc_star.txt
dd81441ca97912a62292d317af2c107c results/multiqc_summary/multiqc_data/multiqc_kallisto.txt
0703b4cb7ec2abfab13ccd5f58c2d536 results/multiqc_summary/multiqc_data/multiqc_general_stats.txt
......
......@@ -83,7 +83,8 @@ ed3428feeb7257b0a69ead76a417e339 results/samples/synthetic_10_reads_mate_1_synt
ec5aab1b79e7880dfa590e5bc7db5232 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/UniqueMultiple/synthetic_10_reads_paired_synthetic_10_reads_paired_UniqueMultiple_plus.bw
69e2bf688165e9fb7c9c49a8763f5632 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Unique_minus.bw
ec5aab1b79e7880dfa590e5bc7db5232 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Unique_plus.bw
3e4db5fad83e162bcc19abbe81333a95 results/multiqc_summary/multiqc_data/multiqc_cutadapt.txt
d8118d944149eecc691d182448696e7f results/multiqc_summary/multiqc_data/multiqc_cutadapt.txt
a127fabda5c3aad9d95414dc4fbc11c3 results/multiqc_summary/multiqc_data/multiqc_cutadapt_1.txt
0c6363588cf6ff74d49f27c164185918 results/multiqc_summary/multiqc_data/multiqc_star.txt
dd81441ca97912a62292d317af2c107c results/multiqc_summary/multiqc_data/multiqc_kallisto.txt
ba090b1b4a2473891de97493d3244956 results/multiqc_summary/multiqc_data/multiqc_fastqc.txt
......
......@@ -89,7 +89,7 @@ report_header_info:
- Analysis Author: "{author_name}"
- Contact E-mail: "{author_email}"
top_modules:
module_order:
- fastqc:
path_filters:
......@@ -109,17 +109,13 @@ top_modules:
path_filters:
- "*/*/map_genome/*"
- alfa:
name: "ALFA"
anchor: "ALFA"
- ALFA:
path_filters:
- "*/ALFA_plots.concat_mqc.png"
- "*/*/ALFA/*/*ALFA_feature_counts.tsv"
- TIN_scores:
name: "TIN_scores"
anchor: "TIN_scores"
- tin-score:
path_filters:
- "*/TIN_scores_boxplot_mqc.png"
- "*/*/TIN/TIN_score.tsv"
- salmon:
path_filters:
......@@ -143,8 +139,6 @@ fn_clean_exts:
- '.pseudo'
- '.salmon'
- '.sam'
- 'mqc'
- '.png'
..."""
with open(options.config, "w") as config:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment