diff --git a/Snakefile b/Snakefile index 9dd1b512e49e266112b9196a0c60bac950dd9904..efb213f6974ec15b6e6787e15476e91eb1180316 100644 --- a/Snakefile +++ b/Snakefile @@ -153,11 +153,11 @@ rule MetaPhlAn2: expand(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/profiles_merged/{dataset}_abundances_sp_heatmap.png', dataset=set(DATASETS)) rule StrainPhlAn: input: - expand(OUT_DIR + '/{dataset}/PhlAnProf/strphlan/all.done', dataset=set(DATASETS)) + expand(OUT_DIR + '/{dataset}/PhlAnProf/strphlan/{dataset}_clades_profiled.tsv', dataset=set(DATASETS)) rule PhlAnProf: input: expand(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/profiles_merged/{dataset}_abundances_sp_heatmap.png', dataset=set(DATASETS)), - expand(OUT_DIR + '/{dataset}/PhlAnProf/strphlan/all.done', dataset=set(DATASETS)) + expand(OUT_DIR + '/{dataset}/PhlAnProf/strphlan/{dataset}_clades_profiled.tsv', dataset=set(DATASETS)) ##----------------------------------------------------------------------------## ## Rules to make reports @@ -195,7 +195,7 @@ rule preQC_make_report: --report {params.report_path} \ -s {params.workflow_dir}/Snakefile preQC) &>{log} ''' - + rule MetaPhlAn2_make_report: output: temp(touch(OUT_DIR+'/logs/metaphlan2_make_report.done')) @@ -212,7 +212,7 @@ rule MetaPhlAn2_make_report: --report {params.report_path} \ -s {params.workflow_dir}/Snakefile MetaPhlAn2) &>{log} ''' - + rule PhlAnProf_make_report: output: temp(touch(OUT_DIR+'/logs/phlanprof_make_report.done')) diff --git a/images/PhlAnProf_dag.png b/images/PhlAnProf_dag.png index cf6cbd8b1b0b7897feb9d32b3eaecc66177d663a..986f798415142cc15dd16bd62764e6d695520630 100644 Binary files a/images/PhlAnProf_dag.png and b/images/PhlAnProf_dag.png differ diff --git a/rules/phlanprof.smk b/rules/phlanprof.smk index d25dac313be53b6b2d88510317600ce87a1a2784..ad107e5d724cbc5b1a341fcdea58814a0dd60b03 100644 --- a/rules/phlanprof.smk +++ b/rules/phlanprof.smk @@ -16,7 +16,7 @@ sequencing. localrules: metaphlan2_merge, metaphlan2_visualize, - strphlan_check + strphlan_summary ##----------------------------------------------------------------------------## ## Local variables ##----------------------------------------------------------------------------## @@ -82,31 +82,31 @@ rule metaphlan2_merge: output: merged_abund = report(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan' + '/profiles_merged/{dataset}_abundances_table.txt', - category='PhlAnProf:Metaphlan2', + category='PhlAnProf:Metaphlan2', caption='../report/PhlAnProf/metaphlan2_abund.rst'), merged_counts = report(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan' + '/profiles_merged/{dataset}_counts_table.txt', - category='PhlAnProf:Metaphlan2', + category='PhlAnProf:Metaphlan2', caption='../report/PhlAnProf/metaphlan2_counts.rst'), merged_abund_sp = report(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan' + '/profiles_merged/{dataset}_abundances_sp_table.txt', - category='PhlAnProf:Metaphlan2', + category='PhlAnProf:Metaphlan2', caption='../report/PhlAnProf/metaphlan2_abund_sp.rst'), merged_counts_sp = report(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan' + '/profiles_merged/{dataset}_counts_sp_table.txt', - category='PhlAnProf:Metaphlan2', + category='PhlAnProf:Metaphlan2', caption='../report/PhlAnProf/metaphlan2_counts_sp.rst'), merged_abund_sp_clean = report(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan' + '/profiles_merged/{dataset}_cleaned_abundances_sp_table.txt', - category='PhlAnProf:Metaphlan2', + category='PhlAnProf:Metaphlan2', caption='../report/PhlAnProf/metaphlan2_abund_sp_cleaned.rst'), merged_counts_sp_clean = report(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan' + '/profiles_merged/{dataset}_cleaned_counts_sp_table.txt', - category='PhlAnProf:Metaphlan2', + category='PhlAnProf:Metaphlan2', caption='../report/PhlAnProf/metaphlan2_counts_sp_cleaned.rst'), samples_removed = report(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan' + '/profiles_merged/{dataset}_samples_removed.txt', - category='PhlAnProf:Metaphlan2', + category='PhlAnProf:Metaphlan2', caption='../report/PhlAnProf/metaphlan2_samples_removed.rst') log: @@ -146,7 +146,7 @@ rule metaphlan2_visualize: merged_abund_sp = OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/profiles_merged/{dataset}_cleaned_abundances_sp_table.txt' output: merged_abund_sp_png = report(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/profiles_merged/{dataset}_abundances_sp_heatmap.png', - category='PhlAnProf:Metaphlan2', + category='PhlAnProf:Metaphlan2', caption='../report/PhlAnProf/metaphlan2_abund_sp_heatmap.rst') log: OUT_DIR + '/{dataset}/PhlAnProf/logs/metaphlan/merged_abundances_sp_heatmap.log' @@ -203,8 +203,8 @@ checkpoint strphlan_clades_detection: merged_abundances_sp = OUT_DIR + '/{dataset}/PhlAnProf/metaphlan' + '/profiles_merged/{dataset}_cleaned_abundances_sp_table.txt' output: - clades_list = report(OUT_DIR + '/{dataset}/PhlAnProf/strphlan/clades.txt', - category='PhlAnProf:StrainPhlAn', + clades_list = report(OUT_DIR + '/{dataset}/PhlAnProf/strphlan/{dataset}_clades.txt', + category='PhlAnProf:StrainPhlAn', caption='../report/PhlAnProf/strphlan_clades_selected.rst'), clades_detected_dir = directory("{}/{{dataset}}/PhlAnProf/strphlan/clades_detected".format(OUT_DIR)) log: @@ -320,16 +320,55 @@ def aggregate_clade_profiles(wildcards): dataset=wildcards.dataset, clade=glob_wildcards(os.path.join(checkpoint_output, '{clade}.clade')).clade) -rule strphlan_check: +rule strphlan_summary: input: aggregate_clade_profiles output: - combined = OUT_DIR + '/{dataset}/PhlAnProf/strphlan/all.done' - shell: - ''' - touch {output.combined} - ''' + #combined = OUT_DIR + '/{dataset}/PhlAnProf/strphlan/all.done', + clades_profiled = report(OUT_DIR + '/{dataset}/PhlAnProf/strphlan/{dataset}_clades_profiled.tsv', + category='PhlAnProf:StrainPhlAn', + caption='../report/PhlAnProf/strphlan_clades_profiled.rst') + log: + OUT_DIR + '/{dataset}/PhlAnProf/logs/strphlan/strphlan_summary.log' + run: + import os + from pathlib import Path + import pandas as pd + import collections + import sys + + sys.stdout = open(log[0], 'w') -# TODO: -# -rule to agreggate reports from strphlan_clade_profiling, -# it needs to parse the {clade}.info files + clades_info_list = list() + try: + for clade_dir in input: + clade_name = str(clade_dir).split('/')[-1] + clade_info_file = str(clade_dir)+'/{}.info'.format(clade_name) + if os.path.exists(clade_info_file): + with open(clade_info_file, mode='r') as f: + clade_info_dict = collections.OrderedDict((line.strip().split(':')[0], + line.strip().split(':')[1].strip()) + for line in f) + clade_raxml_tree_png = '{}/RAxML_bestTree.{}.tree.metadata.png'.format(str(clade_dir), + clade_name) + if os.path.exists(clade_raxml_tree_png): + clade_info_dict['raxml_tree_png'] = clade_raxml_tree_png + clade_raxml_ggtree_1 = '{}/RAxML_bestTree.{}.tree_1.png'.format(str(clade_dir), + clade_name) + if os.path.exists(clade_raxml_ggtree_1): + clade_info_dict['raxml_ggtree_1_png'] = clade_raxml_ggtree_1 + clade_raxml_ggtree_2 = '{}/RAxML_bestTree.{}.tree_2.png'.format(str(clade_dir), + clade_name) + if os.path.exists(clade_raxml_ggtree_2): + clade_info_dict['raxml_ggtree_2_png'] = clade_raxml_ggtree_2 + clades_info_list.append(clade_info_dict) + except (TypeError, ValueError) as e: + print(e) + else: + clades_info_table = pd.DataFrame(clades_info_list) + clades_info_table.to_csv(output.clades_profiled, sep='\t', header=True, index=False) + finally: + sys.stdout.close() + #''' + #touch {output.combined} + #'''