diff --git a/README.md b/README.md
index b255cc7e60b8c28c0d57a8d35c3284896649a670..c846de18b970921582c96f65fac12c5f52d5419a 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
 # MetaSnk
 
 [![Snakemake](https://img.shields.io/badge/snakemake-≥5.4.4-brightgreen.svg)](https://snakemake.bitbucket.io)
+[![Singularity](https://img.shields.io/badge/singularity-≥2.6.0-blue.svg)](https://sylabs.io/guides/3.5/user-guide/)
 [![Build Status](https://travis-ci.org/snakemake-workflows/metagenomicsnake.svg?branch=master)](https://travis-ci.org/snakemake-workflows/metagenomicsnake)
 
 ## Description
@@ -10,23 +11,40 @@ MetaSnk is a modularized Snakemake workflow for the analysis of metagenomic data
 ### Modules:
  - [rawQC](README_rawQC.md)
  - [preQC](README_preQC.md)
- - taxProf
+ - [PhlAnProf](README_phlanprof.md)
 
-## Authors
+### Authors
 
 * Monica R. Ticlla (@mticllacc)
 
-## Requirements
+### Requirements
 
-### Datasets
-- Paired-end Illumina sequences
-
-### Dependencies
+#### Dependencies
 - Snakemake >= 5.5.0
 - Singularity >= 2.6
 - python >= 3.6.8
 - conda >= 4.6
 
+#### Datasets
+- Paired-end Illumina sequences in fastq files named as follows:
+
+  ```
+  sampleID-RUN_LANE-R1.fastq.gz
+  sampleID-RUN_LANE-R2.fastq.gz
+  ```
+- MetaSnk expects to find the raw fastq files in a directory (to be set in the configuration file, [see below](#basic-configuration)) where they are grouped into datasets; one or multiple. Each dataset directory (named at the user's discretion) must contain a directory named 'fastq', where fastq files are placed, accompanied by a sample_metatada.tsv file.
+
+  ```
+  $RAW_DIR
+  ├── dataset_test_1
+     ├── fastq
+     |     ├── sampleID-RUN_LANE-R1.fastq.gz
+     |     ├── sampleID-RUN_LANE-R2.fastq.gz
+     └── sample_metadata.tsv
+  ```
+
+  Notice that you can have multiple paired fastq files per sample, but each SampleID-RUN_LANE combination must be unique.
+
 ## Usage
 
 ### Simple
@@ -35,16 +53,39 @@ MetaSnk is a modularized Snakemake workflow for the analysis of metagenomic data
 
 If you simply want to use this workflow, download and extract the [latest release](https://github.com/snakemake-workflows/metagenomicsnake/releases).
 
-    git clone https://git.scicore.unibas.ch/TBRU/MetagenomicSnake.git path/to/MetaSnk
-    cd path/to/MetaSnk
+    git clone https://git.scicore.unibas.ch/TBRU/MetagenomicSnake.git <path/to/MetaSnk>
+    cd <path/to/MetaSnk>
     echo -e "#MetaSnk directory\nmetasnk=$(pwd)\nexport metasnk">>$HOME/.bashrc
+    export METASNK_DBS=$HOME/MetaSnk_dbs
+    mkdir $METASNK_DBS
+    echo -e "#MetaSnk DBs directory\nMETASNK_DBS=$HOME/MetaSnk_dbs\nexport METASNK_DBS">>$HOME/.bashrc
     source $HOME/.bashrc
 
 If you intend to modify and further extend this workflow or want to work under version control, fork this repository as outlined in [Advanced](#advanced). The latter way is recommended.
 
 In any case, if you use this workflow in a paper, don't forget to give credits to the authors by citing the URL of this repository and, if available, its DOI (see above).
 
-#### Step 2: Create minimal environment
+##### Download singularity containers and reference databases
+MetaSnk wraps system requirements and software dependencies within singularity containers.
+Download these containers by running rule 'pullSIFS' :
+
+<div style="text-align:center">
+  <img src="./images/pullSIFS_dag.png"  width="250" height="150" />
+</div>
+
+    snakemake --profile ./profiles/local pullSIFS
+
+The singularity image files (.sif) will be stored in $METASNK_DBS/singularity.
+
+MetaSnK uses reference databases that need to be downloaded to the $METASNK_DBS directory:
+
+<div style="text-align:center">
+  <img src="./images/buildDBS_dag.png"  width="500" height="200" />
+</div>
+
+    snakemake --profile ./profiles/local buildDBS
+
+##### Create minimal environment
 
 Some rules will use this environment.
 
@@ -55,10 +96,42 @@ Some rules will use this environment.
 Configure the workflow according to your needs via editing the file `config.yaml`.
 
 ##### Basic configuration
-- Make a copy of the config.yaml:
+- Make a copy of the config.yaml (recommended) and place it in the working directory to be used by MetaSnk:
 ```
-  cp ./config.yaml path/to/my_config.yaml
+  cp ./config.yaml <path_to/my_working_directory/config.yaml>
 ```
+- Open the copied config.yaml and set RAW_DIR and OUT_DIR. You must provide absolute paths.
+
+  - The **RAW_DIR** should point to a directory where MetaSnk expects to find raw fastq data. This directory must have the following structure:
+  ```
+    $RAW_DIR
+    ├── dataset_test_1
+    │   ├── fastq
+    │   └── sample_metadata.tsv
+    └── dataset_test_2
+        ├── fastq
+        └── sample_metadata.tsv
+  ```
+  - The **OUT_DIR** is the directory where MetaSnk will save the outputs of the
+  workflow under the following structure:
+  ```
+    $OUT_DIR
+    ├── dataset_test_1
+    │   ├── PhlAnProf
+    │   ├── preQC
+    │   └── rawQC
+    ├── dataset_test_2
+    │   ├── PhlAnProf
+    │   ├── preQC
+    │   └── rawQC
+    ├── logs
+    │   ├── preQC_make_report.log
+    │   ├── rawQC_make_report.log
+    │   └── ref_indexing.log
+    ├── preQC_report.html
+    └── rawQC_report.html
+  ```
+
 
 #### Step 3: Execute workflow
 Activate the environment via
@@ -67,34 +140,44 @@ Activate the environment via
 
 Test your configuration by performing a dry-run via
 
-    snakemake --use-singularity -n
-
-Execute the workflow locally via
-
-    snakemake --use-singularity --cores $N
-
-using `$N` cores or run it in a cluster environment controlled by SGE (Sun Grid Engine) via
+    snakemake -s $metasnk/Snakefile -n
 
-    snakemake --use-singularity --cluster qsub --jobs 100
+Execute the workflow **locally** via
 
-or
+    snakemake \
+    --profile $metasnk/profiles/local \
+    --cores $N \
+    --directory='<path_to/my_working_directory>' \
+    -s $metasnk/Snakefile <METASNK_MODULE>
 
-    snakemake --use-singularity --drmaa --jobs 100
+using `$N` cores, and specifying a **working directory**. Have in mind that the working directory is where MetaSnk will  try to find your configuration file and also it is where snakemake will store files to track the status of a running MetaSnk workflow.
 
-or, in a cluster environment controlled by SLURM workload manager via
+or, **in a cluster environment controlled by SLURM** workload manager via
 
-    snakemake --profile ./profiles/slurm --use-singularity
+    snakemake \
+    --profile $metasnk/profiles/slurm \
+    --cores $N \
+    --cluster-config $metasnk/slurm_cluster.json \
+    --directory='<path_to/my_working_directory>' \
+    -s $metasnk/Snakefile <METASNK_MODULE>
 
-in combination with any of the modes above.
 See the [Snakemake documentation](https://snakemake.readthedocs.io/en/stable/executable.html) for further details.
 
 #### Step 4: Investigate results
 
 After successful execution, you can create a self-contained interactive HTML report with all results via:
 
-    snakemake --report report.html
+    snakemake \
+    --directory='<path_to/my_working_directory>' \
+    -s $metasnk/Snakefile rawQC_make_report
+
+or
 
-This report can, e.g., be forwarded to your collaborators.
+    snakemake \
+    --directory='<path_to/my_working_directory>' \
+    -s $metasnk/Snakefile preQC_make_report
+
+These reports can, e.g., be forwarded to your collaborators.
 
 ### Advanced
 
@@ -115,5 +198,3 @@ The following recipe provides established best practices for running and extendi
 ## Testing
 
 Tests cases are in the subfolder `.test`. They are automtically executed via continuous integration with Travis CI.
-
-snakemake --use-singularity -n -s Snakefile_test
diff --git a/README_phlanprof.md b/README_phlanprof.md
new file mode 100644
index 0000000000000000000000000000000000000000..ab8857cd9a0cf6828dc1237f725250e70ceff6e3
--- /dev/null
+++ b/README_phlanprof.md
@@ -0,0 +1,33 @@
+# MetaSnk: PhlAnProf
+[![Snakemake](https://img.shields.io/badge/snakemake-≥5.4.4-brightgreen.svg)](https://snakemake.bitbucket.io)
+[![Singularity](https://img.shields.io/badge/singularity-≥2.6.0-blue.svg)](https://sylabs.io/guides/3.5/user-guide/)
+[![Build Status](https://travis-ci.org/snakemake-workflows/metagenomicsnake.svg?branch=master)](https://travis-ci.org/snakemake-workflows/metagenomicsnake)
+
+## Description
+PhlAnProf performs taxonmic and strain-level profiling using MetaPhlAn2 and StrainPhlAn. If pre-processing was not performed PhlAnProf will trigger execution of preQC.
+
+<div style="text-align:center">
+  <img src="./images/PhlAnProf_dag.png"  width="315" height="650" />
+</div>
+
+## Usage
+
+### Case 1:
+From within the MetaSnk directory (i.e where it was installed, where the Snakefile is located):
+
+    snakemake --profile ./profiles/local --directory='path/to/workdir' -n PhlAnProf
+
+--directory : specifies the working directory and it is where snakemake will store
+              its files for tracking the status of the workflow before/during/after
+              execution. You should change the working directory depending on your current working project.
+
+After preQC is completed we can generate a html report:
+
+    snakemake --directory='path/to/workdir' -n PhlAnProf_make_report
+
+The report will be created in the path specified for OUT_DIR in the configuration file.
+
+### Case2:
+Execute preQC from a working directory outside the MetaSnk directory:
+
+    snakemake --profile $metasnk/profiles/local -s $metasnk/Snakefile -n PhlAnProf
diff --git a/README_preQC.md b/README_preQC.md
index 85d68d827b36205ac666694bdc586ed75109e499..8077257c08d28b42014ca63bbaa2bb16dc504444 100644
--- a/README_preQC.md
+++ b/README_preQC.md
@@ -30,7 +30,7 @@ rule runs a multi-step pre-processing of the paired fastq files, it includes:
 ### Case 1:
 From within the MetaSnk directory (i.e where it was installed, where the Snakefile is located):
 
-    snakemake --use-singularity --directory='path/to/workdir' -n preQC
+    snakemake --profile ./profiles/local --directory='path/to/workdir' -n preQC
 
 --directory : specifies the working directory and it is where snakemake will store its files for tracking the status of the workflow before/during/after execution.
 
@@ -43,4 +43,4 @@ The report will be created in the path specified for OUT_DIR in the configuratio
 ### Case2:
 Execute preQC from a working directory outside the MetaSnk directory:
 
-    snakemake --use-singularity -s path/to/MetaSnk/Snakefile -n preQC
+    snakemake --profile $metasnk/profiles/local -s $metasnk/Snakefile -n preQC
diff --git a/README_rawQC.md b/README_rawQC.md
index e1b7a53f35faf9c8f3bce667c936ad449adc5e42..eb653df28856c86d261fa030b161e93152c518a8 100644
--- a/README_rawQC.md
+++ b/README_rawQC.md
@@ -14,7 +14,7 @@ rawQC is an independent module, its output files are not required as inputs in o
 ### Case 1:
 From within the MetaSnk directory (i.e where it was installed, where the Sbakefile is located):
 
-    snakemake --use-singularity --directory='path/to/workdir' -n rawQC
+    snakemake --profile ./profiles/local --directory='path/to/workdir' -n rawQC
 
 --directory : specifies the working directory and it is where snakemake will store its files for tracking the status of the workflow before/during/after execution.
 
@@ -27,4 +27,4 @@ The report will be created in the path specified for OUT_DIR in the configuratio
 ### Case2:
 Execute rawQC from a working directory outside the MetaSnk directory:
 
-    snakemake --use-singularity -s path/to/MetaSnk/Snakefile -n rawQC
+    snakemake --profile $metasnk/profiles/local -s $metasnk/Snakefile -n rawQC
diff --git a/Snakefile b/Snakefile
index fb7a30eece936a2ff38a17c96ec6221b713b7cec..9f6bf598aa6e333ac89a1e4960208df682222d95 100644
--- a/Snakefile
+++ b/Snakefile
@@ -11,6 +11,17 @@ import os
 from multiprocessing import cpu_count
 cpus_avail = cpu_count()
 print('CPUs available: {}'.format(cpus_avail), file=sys.stderr)
+
+##----------------------------------------------------------------------------##
+## Check existence of expected variables in the environment
+##----------------------------------------------------------------------------##
+try:
+    print('MetaSnk will store reference data/databases at {}'.format(os.environ["METASNK_DBS"]), 
+          file=sys.stderr)
+except KeyError:
+    print("You did not set variable METASNK_DBS", file=sys.stderr)
+    exit(1)
+
 ##----------------------------------------------------------------------------##
 ## Re-set working directory
 ##----------------------------------------------------------------------------##
@@ -36,7 +47,7 @@ else:
     print("Working directory:{}".format(workdir_path), file=sys.stderr)
 
 ##----------------------------------------------------------------------------##
-## Configuration of MetagenomicSnake
+## Configuration of MetaSnK
 ##----------------------------------------------------------------------------##
 try:
     configfile_path = config['configfile_path']
@@ -57,21 +68,25 @@ except:
         print("... Configuration file: {}".format(configfile_path), file=sys.stderr)
 
 ##----------------------------------------------------------------------------##
-## Define paths
+## Define paths and local variables
 ##----------------------------------------------------------------------------##
 RAW_FASTQ_DIR = config['RAW_DIR']
 OUT_DIR = config['OUT_DIR']
-
-#RESULTS_DIR = config['OUT_DIR'] +'/results'
-#LOGS_DIR = config['OUT_DIR'] +'/logs'
-#REPORTS_DIR = config['OUT_DIR'] +'/reports'
-
+METASNK_DB_DIR = os.environ["METASNK_DBS"]
+SHUB_PREQC_SIF = 'shub://mticlla/MetagenomicSnake:preqc_v0_1'
+SHUB_METAPROF_SIF = 'shub://mticlla/OmicSingularities:metaprof'
+PREQC_SIF = METASNK_DB_DIR+'/singularity/MetagenomicSnake_preqc_v0_1.sif'
+METAPROF_SIF = METASNK_DB_DIR+'/singularity/OmicSingularities_metaprof.sif'
 ##----------------------------------------------------------------------------##
 ## Fastq files to be processed
 ##----------------------------------------------------------------------------##
 if config['SAMPLE_UNITS']['auto']:
     (DATASETS, SAMPLES, RUNS, LANES) = glob_wildcards(RAW_FASTQ_DIR+'/{dataset}/fastq/{sample}-{run}_{lane}-R1.fastq.gz')
     (DATASETSX, FASTQS) = glob_wildcards(RAW_FASTQ_DIR+'/{dataset}/fastq/{fastq_file}-R1.fastq.gz')
+    DATASETS_SAMPLES_DICT = dict()
+    for dataset in set(DATASETS):
+        samples = list({SAMPLES[ix] for ix,value in enumerate(DATASETS) if value==dataset})
+        DATASETS_SAMPLES_DICT[dataset] = samples
 else:
     # TODO:
     pass
@@ -79,8 +94,15 @@ else:
 ## Local rules
 ##----------------------------------------------------------------------------##
 localrules:
+    pullSIFS,
+    buildDBS,
     rawQC,
-    preQC
+    preQC,
+    PhlAnProf,
+    MetaPhlAn2,
+    StrainPhlAn,
+    rawQC_make_report,
+    preQC_make_report
 ##----------------------------------------------------------------------------##
 ## Run entire workflow
 ##----------------------------------------------------------------------------##
@@ -93,6 +115,18 @@ rule all:
 ##----------------------------------------------------------------------------##
 ## Modules
 ##----------------------------------------------------------------------------##
+rule pullSIFS:
+    input:
+        PREQC_SIF, 
+        METAPROF_SIF
+    
+rule buildDBS:
+    input:
+        METASNK_DB_DIR+'/strainphlan_markers/all_markers.fasta',
+        METASNK_DB_DIR+'/humann2_databases/chocophlan',
+        METASNK_DB_DIR+'/humann2_databases/uniref',
+        METASNK_DB_DIR+'/humann2_databases/utility_mapping'
+        
 rule rawQC:
     input:
         expand(OUT_DIR + '/{dataset}/rawQC/summary_stats/{dataset}_samples_stats.txt', dataset=set(DATASETS)),
@@ -112,6 +146,16 @@ rule preQC:
         expand(OUT_DIR + '/{dataset}/preQC/summary_stats/{dataset}_preqc_units_pct_barchart.svg', dataset=set(DATASETS)),
         expand(OUT_DIR + '/{dataset}/preQC/summary_stats/{dataset}_preqc_samples_pct_barchart.svg', dataset=set(DATASETS)),
         expand(OUT_DIR + '/{dataset}/preQC/summary_stats/{dataset}_concatenation_all.done', dataset=set(DATASETS))
+rule MetaPhlAn2:
+    input:
+        expand(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/merged_abundances_sp_heatmap.png', dataset=set(DATASETS))
+rule StrainPhlAn:
+    input:
+        expand(OUT_DIR + '/{dataset}/PhlAnProf/strphlan/all.done', dataset=set(DATASETS))
+rule PhlAnProf:
+    input:
+        expand(OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/merged_abundances_sp_heatmap.png', dataset=set(DATASETS)),
+        expand(OUT_DIR + '/{dataset}/PhlAnProf/strphlan/all.done', dataset=set(DATASETS))
 
 ##----------------------------------------------------------------------------##
 ## Rules to make reports
@@ -143,6 +187,7 @@ rule preQC_make_report:
         '''
         (snakemake --directory={params.workdir} --report {params.report_path} -s {params.workflow_dir}/Snakefile preQC) &>{log}
         '''
-
+include: "rules/setup_rules.smk"
 include: "rules/rawQC.smk"
 include: "rules/preprocess.smk"
+include: "rules/phlanprof.smk"
\ No newline at end of file
diff --git a/config.yaml b/config.yaml
index 1f62c397eb2d15b6d499e8626804bd45f9526e1b..4bfe9cfc6ec68aa4fe9e522d4f3c077cb5ee03e8 100644
--- a/config.yaml
+++ b/config.yaml
@@ -9,16 +9,11 @@
 #   Singularity's --bind/-B option can be specified multiple times, 
 #   or a comma-delimited string of bind path specifications can be used.
 #   
-# PATH to config file for SnakeMake
-# configfile_path: ''
-# PATH to MetaSnk_db where MetaSnk will download reference databases
-METASNKDB_DIR: '$HOME/MetaSnk_db'
+
 # Absolute PATH to folder where to find fastq files
 RAW_DIR: '/home/mticlla/Documents/Git_repositories/metagenomicsnake_testdata/data/raw'
 # Absolute PATH to folder where to place output files
 OUT_DIR: '/home/mticlla/Documents/Git_repositories/metagenomicsnake_testdata/data/interim/MetaSnk'
-# PATH to folder where MetagenomicSnake will store results
-# PREFIX: 'MetaSnk'
 
 #
 SAMPLE_UNITS:
diff --git a/images/buildDBS_dag.png b/images/buildDBS_dag.png
new file mode 100644
index 0000000000000000000000000000000000000000..98b56ebd4a6191fb72d2be93e7be2a71bc2eaede
Binary files /dev/null and b/images/buildDBS_dag.png differ
diff --git a/images/preQC_dag.png b/images/preQC_dag.png
index 5825ba4dcec9410ec1eaad7df86dd423c2ef8499..1b7d3128ac657895d9a341735380d56bf2199f33 100644
Binary files a/images/preQC_dag.png and b/images/preQC_dag.png differ
diff --git a/images/pullSIFS_dag.png b/images/pullSIFS_dag.png
new file mode 100644
index 0000000000000000000000000000000000000000..f21ba8699a5517e6d24385126b4958d5b5b0c6fa
Binary files /dev/null and b/images/pullSIFS_dag.png differ
diff --git a/profiles/local/config.yaml b/profiles/local/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..174eacb7d0a048dda08388f025a3c2d8d8fee35e
--- /dev/null
+++ b/profiles/local/config.yaml
@@ -0,0 +1,6 @@
+cores: 4
+restart-times: 3
+latency-wait: 15
+use-singularity: true
+singularity-args: '--bind $HOME'
+keep-going: true
\ No newline at end of file
diff --git a/profiles/slurm/config.yaml b/profiles/slurm/config.yaml
index cb7d3d2c8862e7ffc72a409f28b38f6672634c63..7001cda3b60e29c6196b76c95446fff899bcaad6 100644
--- a/profiles/slurm/config.yaml
+++ b/profiles/slurm/config.yaml
@@ -1,5 +1,7 @@
 restart-times: 3
 latency-wait: 15
+use-singularity: true
+singularity-args: '--bind $HOME'
 jobscript: "slurm-jobscript.sh"
 #cluster: "slurm-submit-advanced.py"
 cluster: "sbatch -n {cluster.ntasks} --nodes {cluster.nodes} --time {cluster.time} --mem {cluster.mem} --qos {cluster.qos}"
@@ -7,3 +9,4 @@ cluster: "sbatch -n {cluster.ntasks} --nodes {cluster.nodes} --time {cluster.tim
 max-jobs-per-second: 1
 max-status-checks-per-second: 10
 local-cores: 1
+keep-going: true
diff --git a/rules/phlanprof.smk b/rules/phlanprof.smk
new file mode 100644
index 0000000000000000000000000000000000000000..e95430237902b6ed7a6200d35777b16ac3dbd0d2
--- /dev/null
+++ b/rules/phlanprof.smk
@@ -0,0 +1,269 @@
+'''
+Author: Monica R. Ticlla
+Afiliation(s): SIB, SwissTPH, UNIBAS
+Description: rules for pre-processing of paired-end shotgun DNA metagenomic
+sequencing.
+'''
+
+##----------------------------------------------------------------------------##
+## Import modules
+##----------------------------------------------------------------------------##
+
+
+##----------------------------------------------------------------------------##
+## Local rules
+##----------------------------------------------------------------------------##
+localrules:
+    metaphlan2_merge,
+    metaphlan2_visualize,
+    strphlan_check
+##----------------------------------------------------------------------------##
+## Local variables
+##----------------------------------------------------------------------------##
+singularity_metaprof = METAPROF_SIF
+
+##----------------------------------------------------------------------------##
+## Helper functions
+##----------------------------------------------------------------------------##
+def get_all_samples_profiles(wildcards):
+    SAMPLES_W_PROF = DATASETS_SAMPLES_DICT[wildcards.dataset]
+    return expand('{out_dir}/{dataset}/PhlAnProf/metaphlan/profiles_abund/{sample}.txt', 
+                  out_dir=OUT_DIR, dataset=wildcards.dataset, sample=SAMPLES_W_PROF)
+
+def get_all_samples_prof_counts(wildcards):
+    SAMPLES_W_PROF_COUNTS = DATASETS_SAMPLES_DICT[wildcards.dataset]
+    return expand('{out_dir}/{dataset}/PhlAnProf/metaphlan/profiles_counts/{sample}.txt', 
+                  out_dir=OUT_DIR, dataset=wildcards.dataset, sample=SAMPLES_W_PROF_COUNTS)
+
+def get_all_samples_markers(wildcards):
+    SAMPLES_W_MARKERS = DATASETS_SAMPLES_DICT[wildcards.dataset]
+    return expand('{out_dir}/{dataset}/PhlAnProf/strphlan/markers/{sample}.markers', 
+                  out_dir=OUT_DIR, dataset=wildcards.dataset, sample=SAMPLES_W_MARKERS)
+
+##----------------------------------------------------------------------------##
+## Rules with target files
+##----------------------------------------------------------------------------##
+rule metaphlan2_profile:
+    input:
+        sample_fwd = OUT_DIR + '/{dataset}/preQC/emerged/{sample}-R1.fastq.gz',
+        sample_rev = OUT_DIR + '/{dataset}/preQC/emerged/{sample}-R2.fastq.gz'
+    output:
+        profile_w_stats = OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/profiles_stats/{sample}.txt',
+        profile_counts = OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/profiles_counts/{sample}.txt',
+        profile_abund = OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/profiles_abund/{sample}.txt',
+        bowtie2out = OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/bowtie2out/{sample}.bowtie2out.bz2',
+        samout = OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/samout/{sample}.sam.bz2'
+    log:
+        OUT_DIR + '/{dataset}/PhlAnProf/logs/metaphlan/{sample}.log'
+    params:
+        mpa_db = METASNK_DB_DIR + '/metaphlan_databases',
+        proc = cpus_avail
+    wildcard_constraints:
+        sample = '\w+'
+    singularity:singularity_metaprof
+    shell:
+        '''
+        (metaphlan2.py {input.sample_fwd},{input.sample_rev} \
+        --input_type fastq \
+        --bowtie2db {params.mpa_db} \
+        --index v20_m200 \
+        -t rel_ab_w_read_stats \
+        --bowtie2out {output.bowtie2out} \
+        --samout {output.samout} \
+        --output_file {output.profile_w_stats} \
+        --nproc {params.proc};
+        cut -f 1,5  {output.profile_w_stats} >{output.profile_counts}; \
+        cut -f 1,2  {output.profile_w_stats} >{output.profile_abund}) &>{log}
+        '''
+rule metaphlan2_merge:
+    input:
+        profiles_w_stats = get_all_samples_profiles,
+        profiles_counts = get_all_samples_prof_counts
+    output:
+        merged_abund = OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/merged_abundances_table.txt',
+        merged_counts = OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/merged_counts_table.txt',
+        merged_abund_sp = OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/merged_abundances_sp_table.txt',
+        merged_counts_sp = OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/merged_counts_sp_table.txt'
+    log:
+        OUT_DIR + '/{dataset}/PhlAnProf/logs/metaphlan/merged.log'
+    singularity:singularity_metaprof
+    group:'metaphlan2_merging'
+    shell:
+        '''
+        (merge_metaphlan_tables.py {input.profiles_w_stats} > {output.merged_abund};
+        merge_metaphlan_tables.py {input.profiles_counts} > {output.merged_counts};
+        grep -E "(s__)|(^ID)" {output.merged_abund} | grep -v "t__" | sed 's/^.*s__//g' > {output.merged_abund_sp};
+        grep -E "(s__)|(^ID)" {output.merged_counts} | grep -v "t__" | sed 's/^.*s__//g' > {output.merged_counts_sp}) &>{log}
+        '''
+rule metaphlan2_visualize:
+    input:
+        merged_abund_sp = OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/merged_abundances_sp_table.txt'
+    output:
+        merged_abund_sp_png = OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/merged_abundances_sp_heatmap.png'
+    log:
+        OUT_DIR + '/{dataset}/PhlAnProf/logs/metaphlan/merged_abundances_sp_heatmap.log'
+    singularity:singularity_metaprof
+    group:'metaphlan2_merging'
+    shell:
+        '''
+        (if [[ "$(head -n1 {input.merged_abund_sp} | awk '{{print NF}}')" -gt 2 ]];
+            then
+                hclust2.py \
+                -i {input.merged_abund_sp} \
+                -o {output.merged_abund_sp_png} \
+                --ftop 25 \
+                --f_dist_f braycurtis \
+                --s_dist_f braycurtis \
+                --cell_aspect_ratio 0.5 -l \
+                --flabel_size 6 \
+                --slabel_size 6 \
+                --max_flabel_len 100 \
+                --max_slabel_len 100 \
+                --minv 0.1 \
+                --dpi 300 \
+                -c 'bbcyr'
+            else
+                echo 'hclust2.py expects at least two samples! It will generate and empty file!'; \
+                touch {output.merged_abund_sp_png}
+        fi) &>{log}
+        '''
+# Get the consensus of unique marker genes for each species found in the sample
+rule strphlan_get_sample_markers:
+    input:
+        sample_sam = OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/samout/{sample}.sam.bz2'
+    output:
+        sample_markers = OUT_DIR + '/{dataset}/PhlAnProf/strphlan/markers/{sample}.markers'
+    log:
+        OUT_DIR + '/{dataset}/PhlAnProf/logs/strphlan/markers/{sample}.log'
+    singularity:singularity_metaprof
+    threads:cpus_avail
+    shell:
+        '''
+        (bash -c "source activate py27strphlan && sample2markers.py \
+        --ifn_samples {input.sample_sam} \
+        --input_type sam \
+        --output_dir $(dirname {output.sample_markers}) \
+        --nprocs {threads}") &>{log}
+        '''
+# Preliminar detection of clades detected in all samples, suitable for strain tracking analysis
+# - first, run strainphlan to detect clades
+# - keep only clades also detected by metaphlan, and
+# - keep only those above user's defined minimal abundance threshold
+checkpoint strphlan_clades_detection:
+    input:
+        sample_markers = get_all_samples_markers,
+        merged_abundances_sp = OUT_DIR + '/{dataset}/PhlAnProf/metaphlan/merged_abundances_sp_table.txt'
+    output:
+        clades_list = OUT_DIR + '/{dataset}/PhlAnProf/strphlan/clades.txt',
+        clades_detected_dir = directory("{}/{{dataset}}/PhlAnProf/strphlan/clades_detected".format(OUT_DIR))
+    log:
+        OUT_DIR + '/{dataset}/PhlAnProf/logs/strphlan/clade_detection.log'
+    params:
+        mpa_pkl = METASNK_DB_DIR + '/metaphlan_databases/mpa_v20_m200.pkl'
+    threads:cpus_avail
+    singularity:singularity_metaprof
+    shell:
+        '''
+        (bash -c 'if [[ "$(head -n1 {input.merged_abundances_sp} | awk "{{print NF}}")" -gt 2 ]]; then \
+        source activate py27strphlan && strainphlan.py \
+        --ifn_samples {input.sample_markers} \
+        --mpa_pkl {params.mpa_pkl} \
+        --output_dir $(dirname {output.clades_list}) \
+        --nprocs_main {threads} \
+        --print_clades_only | sort | \
+        comm -12 - <(echo $(tail -n +2 {input.merged_abundances_sp} |  cut -f1 | sed "s/^/s__/g") | tr " " "\n"| sort) \
+        >{output.clades_list}; \
+        mkdir {output.clades_detected_dir}; \
+        for clade in $(cat {output.clades_list}) ; do \
+        touch {output.clades_detected_dir}/${{clade}}.clade; done; \
+        else \
+        echo "strainphlan.py expects at least two samples! It will generate and empty file!"; \
+        > {output.clades_list};
+        mkdir -p {output.clades_detected_dir}; \
+        fi; echo $? ') &>{log}
+        '''
+rule strphlan_clades_markers_extraction:
+    input:
+        clade_name = OUT_DIR + '/{dataset}/PhlAnProf/strphlan/clades_detected/{clade}.clade',
+        clade_dir = OUT_DIR + '/{dataset}/PhlAnProf/strphlan/clades_detected'
+    output:
+        clade_ref = touch(OUT_DIR + '/{dataset}/PhlAnProf/strphlan/clades_extracted/{clade}.done')
+    log:
+        OUT_DIR + '/{dataset}/PhlAnProf/logs/strphlan/clades_extracted/{clade}.log'
+    params:
+        mpa_pkl = METASNK_DB_DIR + '/metaphlan_databases/mpa_v20_m200.pkl',
+        strpa_dir = METASNK_DB_DIR+'/strainphlan_markers',
+        all_markers = METASNK_DB_DIR+'/strainphlan_markers/all_markers.fasta'
+    singularity:singularity_metaprof
+    group: 'strphlan_clade_analysis'
+    shell:
+        '''
+        (bash -c 'if [ -f {params.strpa_dir}/{wildcards.clade}.markers.fasta ]; then \
+        echo "{wildcards.clade} already extracted!"; \
+        else \
+        source activate py27strphlan && extract_markers.py \
+        --mpa_pkl {params.mpa_pkl} \
+        --ifn_markers {params.all_markers} \
+        --clade {wildcards.clade} \
+        --ofn_markers {params.strpa_dir}/{wildcards.clade}.markers.fasta ; \
+        fi') &>{log}
+        '''
+
+rule strphlan_clade_profiling:
+    input:
+        samples_markers = get_all_samples_markers,
+        clade_ref_markers = OUT_DIR + '/{dataset}/PhlAnProf/strphlan/clades_extracted/{clade}.done',
+        samples_metadata = RAW_FASTQ_DIR + '/{dataset}/sample_metadata.tsv'
+    output:
+        clade_result_dir = directory('{}/{{dataset}}/PhlAnProf/strphlan/clades_profiled/{{clade}}'.format(OUT_DIR))
+    log:
+        clade_results = OUT_DIR + '/{dataset}/PhlAnProf/logs/strphlan/clades_profiled/{clade}.log'
+    params:
+        mpa_pkl = METASNK_DB_DIR + '/metaphlan_databases/mpa_v20_m200.pkl',
+        strpa_dir = METASNK_DB_DIR+'/strainphlan_markers'
+    threads: cpus_avail
+    singularity:singularity_metaprof
+    group: 'strphlan_clade_analysis'
+    shell:
+        '''
+        (bash -c 'source activate py27strphlan && mkdir -p {output.clade_result_dir}; \
+        strainphlan.py \
+        --ifn_samples {input.samples_markers} \
+        --mpa_pkl {params.mpa_pkl} \
+        --ifn_markers {params.strpa_dir}/{wildcards.clade}.markers.fasta \
+        --clades {wildcards.clade} \
+        --output_dir {output.clade_result_dir} \
+        --nprocs_main {threads} \
+        --relaxed_parameters; \
+        if [ -f {output.clade_result_dir}/RAxML_bestTree.{wildcards.clade}.tree ]; then \
+        add_metadata_tree.py \
+        --ifn_trees {output.clade_result_dir}/RAxML_bestTree.{wildcards.clade}.tree \
+        --ifn_metadatas {input.samples_metadata} \
+        --metadatas SubjectID; \
+        plot_tree_graphlan.py \
+        --ifn_tree {output.clade_result_dir}/RAxML_bestTree.{wildcards.clade}.tree.metadata \
+        --colorized_metadata SubjectID \
+        --leaf_marker_size 60 \
+        --legend_marker_size 60; fi') &>{log}
+        '''
+
+def aggregate_clade_profiles(wildcards):
+    '''
+    aggregate the file names of all the files
+    generated at the strphlan2_clades_detection step
+    '''
+    checkpoint_output = checkpoints.strphlan_clades_detection.get(**wildcards).output[1]
+    return expand('{out_dir}/{dataset}/PhlAnProf/strphlan/clades_profiled/{clade}', 
+                  out_dir=OUT_DIR,
+                  dataset=wildcards.dataset,
+                  clade=glob_wildcards(os.path.join(checkpoint_output, '{clade}.clade')).clade)
+
+rule strphlan_check:
+    input:
+        aggregate_clade_profiles
+    output:
+        combined = OUT_DIR + '/{dataset}/PhlAnProf/strphlan/all.done'
+    shell:
+        '''
+        touch {output.combined}
+        '''
diff --git a/rules/preprocess.smk b/rules/preprocess.smk
index c4e6e6d95791634c0378b1aa5b6fa40b749a80a7..2938d73d589f3a9fa5ee804ce26edd754121157f 100644
--- a/rules/preprocess.smk
+++ b/rules/preprocess.smk
@@ -25,7 +25,8 @@ localrules:
 ##----------------------------------------------------------------------------##
 ## Local variables
 ##----------------------------------------------------------------------------##
-singularity_img = 'shub://mticlla/MetagenomicSnake:preqc_v0_1'
+#singularity_img = 'shub://mticlla/MetagenomicSnake:preqc_v0_1'
+singularity_preqc = PREQC_SIF
 BBMAP_REF = config['preprocess']['filter_human']['bbmap_reference']
 BBMAP_REF_DIR = config['preprocess']['filter_human']['bbmap_ref_dir']
 
@@ -116,7 +117,7 @@ rule trim_adapters:
         min_length = config['preprocess']['trim_adapters']['min_length'],
         trim_tails = config['preprocess']['trim_adapters']['trim_tails']
     threads: cpus_avail
-    singularity: singularity_img
+    singularity: singularity_preqc
     group: 'preprocess'
     message: "Running trim_adapters with {threads} cores."
     shell:
@@ -155,7 +156,7 @@ rule filter_human:
         # in minutes
         runtime = lambda wildcards, attempt: 120*attempt
     threads:cpus_avail
-    singularity: singularity_img
+    singularity: singularity_preqc
     group: 'preprocess'
     message: "Running filter_human with {threads} cores."
     shell:
@@ -198,7 +199,7 @@ rule index_human_ref:
         usemodulo=config['preprocess']['filter_human']['bbmap_usemodulo'],
         mem_gb = config['preprocess']['filter_human']['bbmap_mem']
     threads: cpus_avail
-    singularity: singularity_img
+    singularity: singularity_preqc
     message: "Running index_human_ref with {threads} cores."
     shell:
         '''
@@ -222,7 +223,7 @@ rule dedupe:
     threads:cpus_avail
     params:
         dd_mem_gb = config['preprocess']['filter_human']['bbmap_mem']
-    singularity: singularity_img
+    singularity: singularity_preqc
     group: 'preprocess'
     message: "Running dedupe with {threads} cores."
     shell:
@@ -264,7 +265,7 @@ rule trim_3end:
     log:
         OUT_DIR + '/{dataset}/preQC/logs/dfinaltrim/{fastq_file}.log'
     threads: cpus_avail
-    singularity: singularity_img
+    singularity: singularity_preqc
     group: 'preprocess'
     message: "Running trim3end_dedupe with {threads} cores ..."
     shell:
@@ -392,7 +393,7 @@ rule mqc_trim_adapters:
                                 '/{dataset}/preQC/multiqc/atrimmed' + 
                                 '/{dataset}_atrimmed_mqc.html', 
                                 category='preQC_step1:trim_adapters')
-    singularity: singularity_img
+    singularity: singularity_preqc
     shell:
         '''
         multiqc -f --file-list {input} --filename {output.multiqc_report}
@@ -424,7 +425,7 @@ rule multiqc_trim_3end:
                                 '/{dataset}/preQC/multiqc/dfinaltrim'+
                                 '/{dataset}_dfinaltrim_mqc.html',
                                 category='preQC_step4:trim_3end')
-    singularity: singularity_img
+    singularity: singularity_preqc
     shell:
         '''
         multiqc -f --file-list {input} --filename {output.multiqc_report}
diff --git a/rules/rawQC.smk b/rules/rawQC.smk
index 3ba8d34e0481b713a0b4a2f091fa9e0de420a5ae..75e55e9f29d63ab292bce6cec265706b20f6acfc 100644
--- a/rules/rawQC.smk
+++ b/rules/rawQC.smk
@@ -12,8 +12,8 @@ localrules:
 ##---------------------------------------------------------------------------##
 ## Local variables
 ##----------------------------------------------------------------------------##
-singularity_img = 'shub://mticlla/MetagenomicSnake:preqc_v0_1'
-
+#singularity_img = 'shub://mticlla/MetagenomicSnake:preqc_v0_1'
+singularity_img = PREQC_SIF
 ##----------------------------------------------------------------------------##
 ## Rules with target files
 ##----------------------------------------------------------------------------##
diff --git a/rules/setup_rules.smk b/rules/setup_rules.smk
new file mode 100644
index 0000000000000000000000000000000000000000..f84fd4ab134099491aa12cae13c1e8e5d337ffd2
--- /dev/null
+++ b/rules/setup_rules.smk
@@ -0,0 +1,125 @@
+# The main entry point of your workflow.
+# After configuring, running snakemake -n in a clone of this repository should
+# successfully execute a dry-run of the workflow.
+
+##----------------------------------------------------------------------------##
+## Local variables
+##----------------------------------------------------------------------------##
+# Variables already defined in the Snakefile
+#METASNK_DB_DIR = os.environ["METASNK_DBS"]
+#SHUB_PREQC_SIF = 'shub://mticlla/MetagenomicSnake:preqc_v0_1'
+#SHUB_METAPROF_SIF = 'shub://mticlla/OmicSingularities:metaprof'
+#PREQC_SIF = METASNK_DB_DIR+'/singularity/MetagenomicSnake_preqc_v0_1.sif'
+#METAPROF_SIF = METASNK_DB_DIR+'/singularity/OmicSingularities_metaprof.sif'
+
+##----------------------------------------------------------------------------##
+## Local rules
+##----------------------------------------------------------------------------##
+localrules:
+    pull_preqc_sif,
+    pull_metaprof_sif,
+    get_metaphlan_db,
+    get_strphlan_db,
+    get_panphlan_db,
+    get_humann2_chocophlan,
+    get_humann2_uniref,
+    get_humann2_utility,
+    get_humann2_dbs
+    
+##----------------------------------------------------------------------------##
+## Rules to download/build container/databases used by MetaSnk
+##----------------------------------------------------------------------------##
+# These rules require internet access
+# Download singularity containers
+rule pull_preqc_sif:
+    output:
+        # Singularity image for rawQC and preQC (about 800M)
+        local_sif = PREQC_SIF
+    params:
+        shub_sif = SHUB_PREQC_SIF
+    shell:
+        '''
+        [ ! -d $(dirname {output}) ] && mkdir $(dirname {output});
+        singularity pull \
+        --dir $(dirname {output.local_sif}) \
+        $(basename {output}) \
+        {params.shub_sif}
+        '''
+rule pull_metaprof_sif:
+    output:
+        # Singularity image for PhlAnProf (about 3G)
+        local_sif = METAPROF_SIF
+    params:
+        shub_sif = SHUB_METAPROF_SIF
+    shell:
+        '''
+        [ ! -d $(dirname {output}) ] && mkdir $(dirname {output});
+        singularity pull \
+        --dir $(dirname {output.local_sif}) \
+        $(basename {output}) \
+        {params.shub_sif}
+        '''
+rule get_metaphlan_db:
+    output:
+        directory(METASNK_DB_DIR+'/metaphlan_databases')
+    singularity:
+        METAPROF_SIF
+    shell:
+        '''
+        metaphlan2.py --install --bowtie2db {output}
+        '''    
+rule get_strphlan_db:
+    input:
+        metaphlan_db_dir = METASNK_DB_DIR+'/metaphlan_databases'
+    output:
+        METASNK_DB_DIR+'/strainphlan_markers/all_markers.fasta'
+    singularity:METAPROF_SIF
+    shell:
+        '''
+        bowtie2-inspect {input}/mpa_v20_m200 > {output}
+        '''
+rule get_humann2_chocophlan:
+    output:
+        humann2_choco_dir = directory(METASNK_DB_DIR+'/humann2_databases/chocophlan')
+    singularity:METAPROF_SIF   
+    shell:
+        '''
+        bash -c 'source activate humann2 && mkdir -p $(dirname {output.humann2_choco_dir}); \
+        humann2_databases \
+        --download chocophlan full $(dirname {output.humann2_choco_dir}) \
+        --update-config no; \
+        rm -f {output.humann2_choco_dir}/.snakemake_timestamp'
+        '''
+# Downloads a translated search database for HUMAnN2
+# It download the EC-filtered UniRef90 database
+rule get_humann2_uniref:
+    output:
+        humann2_uniref_dir = directory(METASNK_DB_DIR+'/humann2_databases/uniref')
+    singularity:METAPROF_SIF 
+    shell:
+        '''
+        bash -c 'source activate humann2 && mkdir -p $(dirname {output.humann2_uniref_dir}); \
+        humann2_databases \
+        --download uniref uniref90_ec_filtered_diamond $(dirname {output.humann2_uniref_dir}) \
+        --update-config no'
+        '''
+rule get_humann2_utility:
+    output:
+        utility_mapping_dir = directory(METASNK_DB_DIR+'/humann2_databases/utility_mapping')
+    singularity:METAPROF_SIF
+    shell:
+        '''
+        bash -c 'source activate humann2 && mkdir -p $(dirname {output.utility_mapping_dir}); \
+        humann2_databases \
+        --download utility_mapping full $(dirname {output.utility_mapping_dir}) \
+        --update-config no'
+        '''
+rule get_humann2_dbs:
+    input:
+        METASNK_DB_DIR+'/humann2_databases/chocophlan',
+        METASNK_DB_DIR+'/humann2_databases/uniref',
+        METASNK_DB_DIR+'/humann2_databases/utility_mapping'
+
+rule get_panphlan_db:
+    output:
+        directory(METASNK_DB_DIR+'/panphlan_databases')
\ No newline at end of file
diff --git a/slurm_cluster.json b/slurm_cluster.json
index 55878aa4184936ed9e73d37ce02d829649e839c2..310796afd67f5ce4bad89c2061a41baa91baca39 100644
--- a/slurm_cluster.json
+++ b/slurm_cluster.json
@@ -3,7 +3,7 @@
         "nodes":1,
         "ntasks":4,
         "time":"30",
-        "mem" : 8000,       
+        "mem" : 8000,
         "qos":"30min"
     },
     "preprocess":{
@@ -13,7 +13,7 @@
         "ntasks":20,
         "qos":"6hours",
         "mem":32000,
-        "constraint":"mem32G"    
+        "constraint":"mem32G"
     },
     "filter_human":{
         "nodes":1,
@@ -22,7 +22,7 @@
         "ntasks":20,
         "qos":"6hours",
         "mem":32000,
-        "constraint":"mem32G"  
+        "constraint":"mem32G"
     },
     "index_human_ref":{
         "nodes":1,
@@ -38,5 +38,34 @@
         "ntasks":1,
         "mem" : 10000,
         "constraint":"mem10G"
+    },
+    "metaphlan2_profile":{
+      "nodes":1,
+      "time":"120",
+      "ntasks":20,
+      "qos":"6hours",
+      "mem":32000
+    },
+    "metaphlan2_merging":{
+      "nodes":1
+    },
+    "strphlan_get_sample_markers":{
+      "nodes":1,
+      "time":"120",
+      "ntasks":20,
+      "mem":32000,
+      "qos":"6hours"
+    },
+    "strphlan_clades_detection":{
+      "time":"120",
+      "ntasks":20,
+      "mem":32000,
+      "qos":"6hours"
+    },
+    "strphlan_clade_analysis":{
+      "time":"120",
+      "ntasks":20,
+      "mem":32000,
+      "qos":"6hours"
     }
 }