Skip to content
Snippets Groups Projects
Commit cf78ba0c authored by Ticlla Ccenhua Monica Roxana's avatar Ticlla Ccenhua Monica Roxana
Browse files

replace rule strphlan_check with strphlan_summary to agregate info files from...

replace rule strphlan_check with strphlan_summary to agregate info files from StrainPhlAn into a report table
parent 1ec25cbb
Branches
No related tags found
No related merge requests found
......@@ -153,11 +153,11 @@ rule MetaPhlAn2:
expand(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/profiles_merged/{dataset}_abundances_sp_heatmap.png', dataset=set(DATASETS))
rule StrainPhlAn:
input:
expand(OUT_DIR + '/{dataset}/PhlAnProf/strphlan/all.done', dataset=set(DATASETS))
expand(OUT_DIR + '/{dataset}/PhlAnProf/strphlan/{dataset}_clades_profiled.tsv', dataset=set(DATASETS))
rule PhlAnProf:
input:
expand(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/profiles_merged/{dataset}_abundances_sp_heatmap.png', dataset=set(DATASETS)),
expand(OUT_DIR + '/{dataset}/PhlAnProf/strphlan/all.done', dataset=set(DATASETS))
expand(OUT_DIR + '/{dataset}/PhlAnProf/strphlan/{dataset}_clades_profiled.tsv', dataset=set(DATASETS))
##----------------------------------------------------------------------------##
## Rules to make reports
......@@ -195,7 +195,7 @@ rule preQC_make_report:
--report {params.report_path} \
-s {params.workflow_dir}/Snakefile preQC) &>{log}
'''
rule MetaPhlAn2_make_report:
output:
temp(touch(OUT_DIR+'/logs/metaphlan2_make_report.done'))
......@@ -212,7 +212,7 @@ rule MetaPhlAn2_make_report:
--report {params.report_path} \
-s {params.workflow_dir}/Snakefile MetaPhlAn2) &>{log}
'''
rule PhlAnProf_make_report:
output:
temp(touch(OUT_DIR+'/logs/phlanprof_make_report.done'))
......
images/PhlAnProf_dag.png

59.7 KiB | W: | H:

images/PhlAnProf_dag.png

59.8 KiB | W: | H:

images/PhlAnProf_dag.png
images/PhlAnProf_dag.png
images/PhlAnProf_dag.png
images/PhlAnProf_dag.png
  • 2-up
  • Swipe
  • Onion skin
......@@ -16,7 +16,7 @@ sequencing.
localrules:
metaphlan2_merge,
metaphlan2_visualize,
strphlan_check
strphlan_summary
##----------------------------------------------------------------------------##
## Local variables
##----------------------------------------------------------------------------##
......@@ -82,31 +82,31 @@ rule metaphlan2_merge:
output:
merged_abund = report(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan' +
'/profiles_merged/{dataset}_abundances_table.txt',
category='PhlAnProf:Metaphlan2',
category='PhlAnProf:Metaphlan2',
caption='../report/PhlAnProf/metaphlan2_abund.rst'),
merged_counts = report(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan' +
'/profiles_merged/{dataset}_counts_table.txt',
category='PhlAnProf:Metaphlan2',
category='PhlAnProf:Metaphlan2',
caption='../report/PhlAnProf/metaphlan2_counts.rst'),
merged_abund_sp = report(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan' +
'/profiles_merged/{dataset}_abundances_sp_table.txt',
category='PhlAnProf:Metaphlan2',
category='PhlAnProf:Metaphlan2',
caption='../report/PhlAnProf/metaphlan2_abund_sp.rst'),
merged_counts_sp = report(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan' +
'/profiles_merged/{dataset}_counts_sp_table.txt',
category='PhlAnProf:Metaphlan2',
category='PhlAnProf:Metaphlan2',
caption='../report/PhlAnProf/metaphlan2_counts_sp.rst'),
merged_abund_sp_clean = report(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan' +
'/profiles_merged/{dataset}_cleaned_abundances_sp_table.txt',
category='PhlAnProf:Metaphlan2',
category='PhlAnProf:Metaphlan2',
caption='../report/PhlAnProf/metaphlan2_abund_sp_cleaned.rst'),
merged_counts_sp_clean = report(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan' +
'/profiles_merged/{dataset}_cleaned_counts_sp_table.txt',
category='PhlAnProf:Metaphlan2',
category='PhlAnProf:Metaphlan2',
caption='../report/PhlAnProf/metaphlan2_counts_sp_cleaned.rst'),
samples_removed = report(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan' +
'/profiles_merged/{dataset}_samples_removed.txt',
category='PhlAnProf:Metaphlan2',
category='PhlAnProf:Metaphlan2',
caption='../report/PhlAnProf/metaphlan2_samples_removed.rst')
log:
......@@ -146,7 +146,7 @@ rule metaphlan2_visualize:
merged_abund_sp = OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/profiles_merged/{dataset}_cleaned_abundances_sp_table.txt'
output:
merged_abund_sp_png = report(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/profiles_merged/{dataset}_abundances_sp_heatmap.png',
category='PhlAnProf:Metaphlan2',
category='PhlAnProf:Metaphlan2',
caption='../report/PhlAnProf/metaphlan2_abund_sp_heatmap.rst')
log:
OUT_DIR + '/{dataset}/PhlAnProf/logs/metaphlan/merged_abundances_sp_heatmap.log'
......@@ -203,8 +203,8 @@ checkpoint strphlan_clades_detection:
merged_abundances_sp = OUT_DIR + '/{dataset}/PhlAnProf/metaphlan' +
'/profiles_merged/{dataset}_cleaned_abundances_sp_table.txt'
output:
clades_list = report(OUT_DIR + '/{dataset}/PhlAnProf/strphlan/clades.txt',
category='PhlAnProf:StrainPhlAn',
clades_list = report(OUT_DIR + '/{dataset}/PhlAnProf/strphlan/{dataset}_clades.txt',
category='PhlAnProf:StrainPhlAn',
caption='../report/PhlAnProf/strphlan_clades_selected.rst'),
clades_detected_dir = directory("{}/{{dataset}}/PhlAnProf/strphlan/clades_detected".format(OUT_DIR))
log:
......@@ -320,16 +320,55 @@ def aggregate_clade_profiles(wildcards):
dataset=wildcards.dataset,
clade=glob_wildcards(os.path.join(checkpoint_output, '{clade}.clade')).clade)
rule strphlan_check:
rule strphlan_summary:
input:
aggregate_clade_profiles
output:
combined = OUT_DIR + '/{dataset}/PhlAnProf/strphlan/all.done'
shell:
'''
touch {output.combined}
'''
#combined = OUT_DIR + '/{dataset}/PhlAnProf/strphlan/all.done',
clades_profiled = report(OUT_DIR + '/{dataset}/PhlAnProf/strphlan/{dataset}_clades_profiled.tsv',
category='PhlAnProf:StrainPhlAn',
caption='../report/PhlAnProf/strphlan_clades_profiled.rst')
log:
OUT_DIR + '/{dataset}/PhlAnProf/logs/strphlan/strphlan_summary.log'
run:
import os
from pathlib import Path
import pandas as pd
import collections
import sys
sys.stdout = open(log[0], 'w')
# TODO:
# -rule to agreggate reports from strphlan_clade_profiling,
# it needs to parse the {clade}.info files
clades_info_list = list()
try:
for clade_dir in input:
clade_name = str(clade_dir).split('/')[-1]
clade_info_file = str(clade_dir)+'/{}.info'.format(clade_name)
if os.path.exists(clade_info_file):
with open(clade_info_file, mode='r') as f:
clade_info_dict = collections.OrderedDict((line.strip().split(':')[0],
line.strip().split(':')[1].strip())
for line in f)
clade_raxml_tree_png = '{}/RAxML_bestTree.{}.tree.metadata.png'.format(str(clade_dir),
clade_name)
if os.path.exists(clade_raxml_tree_png):
clade_info_dict['raxml_tree_png'] = clade_raxml_tree_png
clade_raxml_ggtree_1 = '{}/RAxML_bestTree.{}.tree_1.png'.format(str(clade_dir),
clade_name)
if os.path.exists(clade_raxml_ggtree_1):
clade_info_dict['raxml_ggtree_1_png'] = clade_raxml_ggtree_1
clade_raxml_ggtree_2 = '{}/RAxML_bestTree.{}.tree_2.png'.format(str(clade_dir),
clade_name)
if os.path.exists(clade_raxml_ggtree_2):
clade_info_dict['raxml_ggtree_2_png'] = clade_raxml_ggtree_2
clades_info_list.append(clade_info_dict)
except (TypeError, ValueError) as e:
print(e)
else:
clades_info_table = pd.DataFrame(clades_info_list)
clades_info_table.to_csv(output.clades_profiled, sep='\t', header=True, index=False)
finally:
sys.stdout.close()
#'''
#touch {output.combined}
#'''
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment