diff --git a/Snakefile b/Snakefile
index efb213f6974ec15b6e6787e15476e91eb1180316..d56cc9beecd7835fca8e1e32e874175a98875241 100644
--- a/Snakefile
+++ b/Snakefile
@@ -77,6 +77,8 @@ SHUB_PREQC_SIF = 'shub://mticlla/MetagenomicSnake:preqc_v0_1'
 SHUB_METAPROF_SIF = 'shub://mticlla/OmicSingularities:metaprof'
 PREQC_SIF = METASNK_DB_DIR+'/singularity/MetagenomicSnake_preqc_v0_1.sif'
 METAPROF_SIF = METASNK_DB_DIR+'/singularity/OmicSingularities_metaprof.sif'
+HUMAN_REF = eval(config['preprocess']['filter_human']['bbmap_reference'])
+
 ##----------------------------------------------------------------------------##
 ## Fastq files to be processed
 ##----------------------------------------------------------------------------##
@@ -127,7 +129,9 @@ rule buildDBS:
         METASNK_DB_DIR+'/strainphlan_markers/all_markers.fasta',
         METASNK_DB_DIR+'/humann2_databases/chocophlan',
         METASNK_DB_DIR+'/humann2_databases/uniref',
-        METASNK_DB_DIR+'/humann2_databases/utility_mapping'
+        METASNK_DB_DIR+'/humann2_databases/utility_mapping',
+        HUMAN_REF
+        #METASNK_DB_DIR+'/ref_genomes/hg19_main_mask_ribo_animal_allplant_allfungus.fa.gz'
 
 rule rawQC:
     input:
diff --git a/config.yaml b/config.yaml
index a165debec251c77e18fa8d54fd233333745de79f..e2a29fd54bdc9a2c4f2d21e657ca3021e10b1339 100644
--- a/config.yaml
+++ b/config.yaml
@@ -3,12 +3,12 @@
 # one row per sample. It can be parsed easily via pandas.
 
 # Set PATHS
-# WARNING: 
+# WARNING:
 #   MetagenomicSnake uses singularity containers to run the workflow.
 #   Bind access to those paths if needed
-#   Singularity's --bind/-B option can be specified multiple times, 
+#   Singularity's --bind/-B option can be specified multiple times,
 #   or a comma-delimited string of bind path specifications can be used.
-#   
+#
 
 # Absolute PATH to folder where to find fastq files
 RAW_DIR: '/home/mticlla/Documents/Git_repositories/metagenomicsnake_testdata/data/raw'
@@ -39,16 +39,21 @@ preprocess:
     bbmap_usemodulo: t
     bbmap_mem: 10
 
-    # PATH to reference(e.g human) genome
-    # -----------------------------------
-    # Masked hg19 kindly provided by Brian Brushnell and 
-    # manually dowloaded from
+    # PATH to reference(e.g human) genome to be removed
+    # -------------------------------------------------
+    # MetaSnk uses by default the masked hg19 genome, kindly provided by Brian Brushnell
+     # and dowloaded from
     # https://drive.google.com/open?id=0B3llHR93L14wd0pSSnFULUlhcUk
     # How the human genome was masked is explained in this SEQanswers entry
     # http://seqanswers.com/forums/archive/index.php/t-42552.html
-    bbmap_reference: '/home/mticlla/Documents/Git_repositories/metagenomicsnake/ref/hg19_main_mask_ribo_animal_allplant_allfungus.fa.gz'
-    bbmap_ref_dir: '/home/mticlla/Documents/Git_repositories/metagenomicsnake/ref/ref_bbsplit'
-    
+    # If you want to use a different human reference genome, provide the absolute path below
+    bbmap_reference: "METASNK_DB_DIR+'/ref_genomes/hg19_main_mask_ribo_animal_allplant_allfungus.fa.gz'"
+
+    # PATH to the folder where to place the indexed human reference genomes
+    # ---------------------------------------------------------------------
+    # The reference genome needs to be indexed, by default the indexed files are
+    # placed in your $METASNK_DBS dir (set during the installation steps of MetaSnk)
+    bbmap_ref_dir: "METASNK_DB_DIR+'/ref_genomes/ref_bbsplit'"
 phlanprof:
     strphlan_clade_profiling:
         metadatas:
diff --git a/images/buildDBS_dag.png b/images/buildDBS_dag.png
index 98b56ebd4a6191fb72d2be93e7be2a71bc2eaede..a00062d63045f65f7a72bd23e4ca24426ac60cdb 100644
Binary files a/images/buildDBS_dag.png and b/images/buildDBS_dag.png differ
diff --git a/rules/preprocess.smk b/rules/preprocess.smk
index 2938d73d589f3a9fa5ee804ce26edd754121157f..0066ba55ab264375e9a5a84bea6a452c2966cdbd 100644
--- a/rules/preprocess.smk
+++ b/rules/preprocess.smk
@@ -25,50 +25,48 @@ localrules:
 ##----------------------------------------------------------------------------##
 ## Local variables
 ##----------------------------------------------------------------------------##
-#singularity_img = 'shub://mticlla/MetagenomicSnake:preqc_v0_1'
 singularity_preqc = PREQC_SIF
-BBMAP_REF = config['preprocess']['filter_human']['bbmap_reference']
-BBMAP_REF_DIR = config['preprocess']['filter_human']['bbmap_ref_dir']
-
+BBMAP_REF = HUMAN_REF
+BBMAP_REF_DIR = eval(config['preprocess']['filter_human']['bbmap_ref_dir'])
 ##----------------------------------------------------------------------------##
 ## Helper functions
 ##----------------------------------------------------------------------------##
 # Helper functions to create list of files for MultiQC
 
 def mqc_trim_adapters_inputs(wildcards):
-    list_files = ['{}/{}/preQC/atrimmed/{}.fastp.json'.format(OUT_DIR, 
-                                                            value, 
-                                                            FASTQS[ix]) 
-                for ix,value in enumerate(DATASETSX) 
+    list_files = ['{}/{}/preQC/atrimmed/{}.fastp.json'.format(OUT_DIR,
+                                                            value,
+                                                            FASTQS[ix])
+                for ix,value in enumerate(DATASETSX)
                 if value==wildcards.dataset]
     return(list_files)
 
 def mqc_filter_human_inputs(wildcards):
-    list_files = ['{}/{}/preQC/bfiltered/{}.clean.stats'.format(OUT_DIR, 
-                                                            value, 
-                                                            FASTQS[ix]) 
-                for ix,value in enumerate(DATASETSX) 
+    list_files = ['{}/{}/preQC/bfiltered/{}.clean.stats'.format(OUT_DIR,
+                                                            value,
+                                                            FASTQS[ix])
+                for ix,value in enumerate(DATASETSX)
                 if value==wildcards.dataset]
     return(list_files)
 
 def mqc_trim_3end_inputs(wildcards):
-    list_files = ['{}/{}/preQC/dfinaltrim/{}.clean.nodup.fastp.json'.format(OUT_DIR, 
-                                                                            value, 
+    list_files = ['{}/{}/preQC/dfinaltrim/{}.clean.nodup.fastp.json'.format(OUT_DIR,
+                                                                            value,
                                                                             FASTQS[ix])
-                  for ix,value in enumerate(DATASETSX) 
+                  for ix,value in enumerate(DATASETSX)
                   if value==wildcards.dataset]
     return(list_files)
 
 def list_concatenated_r1_fastqs(wildcards):
     list_r1 = ['{}/{}/preQC/emerged/{}-R1.fastq.gz'.format(OUT_DIR,
                                                           wildcards.dataset,
-                                                          value) 
+                                                          value)
                for ix,value in enumerate(SAMPLES) if DATASETS[ix]==wildcards.dataset]
     return(list_r1)
 def list_concatenated_r2_fastqs(wildcards):
     list_r2 = ['{}/{}/preQC/emerged/{}-R2.fastq.gz'.format(OUT_DIR,
                                                           wildcards.dataset,
-                                                          value) 
+                                                          value)
                for ix,value in enumerate(SAMPLES) if DATASETS[ix]==wildcards.dataset]
     return(list_r2)
 ##----------------------------------------------------------------------------##
@@ -83,14 +81,14 @@ def list_concatenated_r2_fastqs(wildcards):
 #   - limiting the N gase number (-n)
 #   - percentage of unqualified bases (-u)
 #   - average quality score (-e, default 0 means no requirement)
-# - remove adapters: enabled by default, fastp detects adapters by 
-#   per-read overlap analysis (seeks for the overlap of each read pair). 
-#   If fastp fails to find an overlap, It usess the provided the adapter 
-#   sequences (e.g Nextera XT adapters). 
+# - remove adapters: enabled by default, fastp detects adapters by
+#   per-read overlap analysis (seeks for the overlap of each read pair).
+#   If fastp fails to find an overlap, It usess the provided the adapter
+#   sequences (e.g Nextera XT adapters).
 #   Check this page from Illumina to know which adapters to remove:
 #   https://support.illumina.com/bulletins/2016/12/what-sequences-do-i-use-for-adapter-trimming.html
 # - base correction in overlapped regions
-# - trimming of the last base(s) for every read pair(this step preceeds 
+# - trimming of the last base(s) for every read pair(this step preceeds
 #   adapter removal)
 # - length filtering: discard reads shorter than a minimum length, after trimming
 #
@@ -187,8 +185,8 @@ rule filter_human:
 #
 rule index_human_ref:
     input:
-        # Masked hg19 kindly provided by Brian Brushnell and 
-        # manually dowloaded from
+        # Masked hg19 kindly provided by Brian Brushnell and
+        # dowloaded from
         # https://drive.google.com/open?id=0B3llHR93L14wd0pSSnFULUlhcUk
         reference = BBMAP_REF
     output:
@@ -281,7 +279,7 @@ rule trim_3end:
         --thread {threads}) &>{log}
         '''
 ###**THESE TARGET FILES ARE THE FINAL CLEAN**###
-# concatenate quality/length filtered, adapter-trimmed, cleaned, deduplicated and 
+# concatenate quality/length filtered, adapter-trimmed, cleaned, deduplicated and
 # quality-trimmed fastqs from the same samples
 rule concatenate_fastqs:
     input:
@@ -310,15 +308,15 @@ rule check_concatenation:
         inputs_r1 = list_concatenated_r1_fastqs,
         inputs_r2 = list_concatenated_r2_fastqs
     output:
-        concatenated_fastqs_list = OUT_DIR + '/{dataset}/preQC/summary_stats/' + 
+        concatenated_fastqs_list = OUT_DIR + '/{dataset}/preQC/summary_stats/' +
                                     '{dataset}_concatenation_all.done'
     #conda:'../envs/rawQC.yaml'
     run:
         import os
         from pathlib import Path
-        
+
         try:
-            sample_fastqs_exist = [os.path.exists(r1_file) and os.path.exists(r2_file) 
+            sample_fastqs_exist = [os.path.exists(r1_file) and os.path.exists(r2_file)
                                    for r1_file, r2_file in zip(input.inputs_r1,input.inputs_r2)]
             if all(sample_fastqs_exist):
                 Path(output.concatenated_fastqs_list).touch()
@@ -386,12 +384,12 @@ rule mqc_trim_adapters:
     input:
         OUT_DIR +'/{dataset}/preQC/multiqc/atrimmed/multiqc_inputs.txt'
     output:
-        multiqc_data_dir = OUT_DIR + 
+        multiqc_data_dir = OUT_DIR +
                             '/{dataset}/preQC/multiqc/atrimmed'+
                             '/{dataset}_atrimmed_mqc_data/multiqc_data.json',
-        multiqc_report = report(OUT_DIR + 
-                                '/{dataset}/preQC/multiqc/atrimmed' + 
-                                '/{dataset}_atrimmed_mqc.html', 
+        multiqc_report = report(OUT_DIR +
+                                '/{dataset}/preQC/multiqc/atrimmed' +
+                                '/{dataset}_atrimmed_mqc.html',
                                 category='preQC_step1:trim_adapters')
     singularity: singularity_preqc
     shell:
@@ -403,25 +401,25 @@ rule mqc_filter_human:
     input:
         OUT_DIR +'/{dataset}/preQC/multiqc/bfiltered/multiqc_inputs.txt'
     output:
-        mqc_report = report(OUT_DIR + 
-                            '/{dataset}/preQC/multiqc/bfiltered' + 
-                            '/{dataset}_bfiltered_stats.tsv', 
+        mqc_report = report(OUT_DIR +
+                            '/{dataset}/preQC/multiqc/bfiltered' +
+                            '/{dataset}_bfiltered_stats.tsv',
                             category='preQC_step2:filter_human')
     #conda:'../envs/rawQC.yaml'
     run:
         from utils import summarize_filter_human_step
         mqc_stats_data = summarize_filter_human_step('{}'.format(input))
-        mqc_stats_data.to_csv(output.mqc_report, sep='\t', header=True, 
+        mqc_stats_data.to_csv(output.mqc_report, sep='\t', header=True,
                               index=True, index_label='Unit')
 #
 rule multiqc_trim_3end:
     input:
         OUT_DIR+'/{dataset}/preQC/multiqc/dfinaltrim/multiqc_inputs.txt'
     output:
-        multiqc_fastqc = OUT_DIR + 
+        multiqc_fastqc = OUT_DIR +
                             '/{dataset}/preQC/multiqc/dfinaltrim'+
                             '/{dataset}_dfinaltrim_mqc_data/multiqc_data.json',
-        multiqc_report = report(OUT_DIR + 
+        multiqc_report = report(OUT_DIR +
                                 '/{dataset}/preQC/multiqc/dfinaltrim'+
                                 '/{dataset}_dfinaltrim_mqc.html',
                                 category='preQC_step4:trim_3end')
@@ -436,64 +434,64 @@ rule summarize_preQC:
     input:
         trim_adapters_mqc_json = OUT_DIR + '/{dataset}/preQC/multiqc/atrimmed'+
                                  '/{dataset}_atrimmed_mqc_data/multiqc_data.json',
-        filter_human_mqc_stats = OUT_DIR + '/{dataset}/preQC/multiqc/bfiltered' + 
+        filter_human_mqc_stats = OUT_DIR + '/{dataset}/preQC/multiqc/bfiltered' +
                                  '/{dataset}_bfiltered_stats.tsv',
         trim3end_mqc_json = OUT_DIR + '/{dataset}/preQC/multiqc/dfinaltrim'+
                             '/{dataset}_dfinaltrim_mqc_data/multiqc_data.json'
     log:
         OUT_DIR + '/{dataset}/preQC/logs/summarize_preqc.log'
     output:
-        units_summary = report(OUT_DIR + 
-                               '/{dataset}/preQC/summary_stats' + 
-                               '/{dataset}_preqc_units_summary.tsv', 
+        units_summary = report(OUT_DIR +
+                               '/{dataset}/preQC/summary_stats' +
+                               '/{dataset}_preqc_units_summary.tsv',
                                category='preQC:summaries'),
-        samples_summary = report(OUT_DIR + 
+        samples_summary = report(OUT_DIR +
                                  '/{dataset}/preQC/summary_stats' +
-                                 '/{dataset}_preqc_samples_summary.tsv', 
+                                 '/{dataset}_preqc_samples_summary.tsv',
                                  category='preQC:summaries'),
-        units_summary_plot = report(OUT_DIR + 
-                                    '/{dataset}/preQC/summary_stats' + 
-                                    '/{dataset}_preqc_units_barchart.svg', 
+        units_summary_plot = report(OUT_DIR +
+                                    '/{dataset}/preQC/summary_stats' +
+                                    '/{dataset}_preqc_units_barchart.svg',
                                     category='preQC:summaries'),
-        samples_summary_plot = report(OUT_DIR + 
-                                      '/{dataset}/preQC/summary_stats' + 
-                                      '/{dataset}_preqc_samples_barchart.svg', 
-                                      category='preQC:summaries'), 
-        units_summary_pct_plot = report(OUT_DIR + 
-                                        '/{dataset}/preQC/summary_stats' + 
-                                        '/{dataset}_preqc_units_pct_barchart.svg', 
+        samples_summary_plot = report(OUT_DIR +
+                                      '/{dataset}/preQC/summary_stats' +
+                                      '/{dataset}_preqc_samples_barchart.svg',
+                                      category='preQC:summaries'),
+        units_summary_pct_plot = report(OUT_DIR +
+                                        '/{dataset}/preQC/summary_stats' +
+                                        '/{dataset}_preqc_units_pct_barchart.svg',
                                         category='preQC:summaries'),
-        samples_summary_pct_plot = report(OUT_DIR + 
-                                          '/{dataset}/preQC/summary_stats'+ 
-                                          '/{dataset}_preqc_samples_pct_barchart.svg', 
+        samples_summary_pct_plot = report(OUT_DIR +
+                                          '/{dataset}/preQC/summary_stats'+
+                                          '/{dataset}_preqc_samples_pct_barchart.svg',
                                           category='preQC:summaries')
     #conda:'../envs/rawQC.yaml'
     run:
         from utils import summarize_preqc
         from utils import plot_preqc_summary
         import sys
-        
+
         sys.stdout = open(log[0], 'w')
-        
-        
+
+
         try:
             trim_adapters_mqc_json = '{}'.format(input.trim_adapters_mqc_json)
             filter_human_mqc_stats = '{}'.format(input.filter_human_mqc_stats)
             trim3end_mqc_json = '{}'.format(input.trim3end_mqc_json)
             # units summary
-            units_summary_df = summarize_preqc(trim_adapters_mqc_json, filter_human_mqc_stats, 
+            units_summary_df = summarize_preqc(trim_adapters_mqc_json, filter_human_mqc_stats,
                                                trim3end_mqc_json)
         except (TypeError, ValueError) as e:
-            print(e)                            
+            print(e)
         else:
-            units_summary_df.to_csv(output.units_summary, sep='\t', header=True, index=True, 
+            units_summary_df.to_csv(output.units_summary, sep='\t', header=True, index=True,
                                     index_label='Unit')
             # samples summary
             samples_summary_df = units_summary_df.groupby(['Sample']).agg('sum')
-            pct_columns = ['trim_adapters_pct_disc', 'filter_human_pct_disc', 'dedupe_pct_disc', 
+            pct_columns = ['trim_adapters_pct_disc', 'filter_human_pct_disc', 'dedupe_pct_disc',
                            'trim_3end_pct_disc', 'total_pct_disc', 'total_pct_kept']
             samples_summary_df[pct_columns] = units_summary_df[['Sample']+pct_columns].groupby(['Sample']).agg('mean')
-            samples_summary_df.to_csv(output.samples_summary, sep='\t', header=True, index=True, 
+            samples_summary_df.to_csv(output.samples_summary, sep='\t', header=True, index=True,
                                       index_label='Sample')
             # radial barcharts
             #
@@ -510,4 +508,3 @@ rule summarize_preQC:
             fig4.savefig(output.samples_summary_pct_plot, bbox_inches='tight', format='svg')
         finally:
             sys.stdout.close()
- 
\ No newline at end of file
diff --git a/rules/setup_rules.smk b/rules/setup_rules.smk
index f84fd4ab134099491aa12cae13c1e8e5d337ffd2..29a61f7593ce74181f2a7f1d88119b4e9109a51b 100644
--- a/rules/setup_rules.smk
+++ b/rules/setup_rules.smk
@@ -24,8 +24,9 @@ localrules:
     get_humann2_chocophlan,
     get_humann2_uniref,
     get_humann2_utility,
-    get_humann2_dbs
-    
+    get_humann2_dbs,
+    get_human_ref
+
 ##----------------------------------------------------------------------------##
 ## Rules to download/build container/databases used by MetaSnk
 ##----------------------------------------------------------------------------##
@@ -59,6 +60,17 @@ rule pull_metaprof_sif:
         $(basename {output}) \
         {params.shub_sif}
         '''
+rule get_human_ref:
+    output:
+        # Human reference genome for removal of human DNA
+        human_ref = HUMAN_REF
+    message: "Downloading fasta file of human reference genome ..."
+    shell:
+        '''
+        [ ! -d $(dirname {output.human_ref}) ] && mkdir $(dirname {output.human_ref});
+        cd $(dirname {output.human_ref})
+        wget https://github.com/mticlla/MetaSnk_data/raw/master/ref/hg19_main_mask_ribo_animal_allplant_allfungus.fa.gz
+        '''
 rule get_metaphlan_db:
     output:
         directory(METASNK_DB_DIR+'/metaphlan_databases')
@@ -67,7 +79,7 @@ rule get_metaphlan_db:
     shell:
         '''
         metaphlan2.py --install --bowtie2db {output}
-        '''    
+        '''
 rule get_strphlan_db:
     input:
         metaphlan_db_dir = METASNK_DB_DIR+'/metaphlan_databases'
@@ -81,7 +93,7 @@ rule get_strphlan_db:
 rule get_humann2_chocophlan:
     output:
         humann2_choco_dir = directory(METASNK_DB_DIR+'/humann2_databases/chocophlan')
-    singularity:METAPROF_SIF   
+    singularity:METAPROF_SIF
     shell:
         '''
         bash -c 'source activate humann2 && mkdir -p $(dirname {output.humann2_choco_dir}); \
@@ -95,7 +107,7 @@ rule get_humann2_chocophlan:
 rule get_humann2_uniref:
     output:
         humann2_uniref_dir = directory(METASNK_DB_DIR+'/humann2_databases/uniref')
-    singularity:METAPROF_SIF 
+    singularity:METAPROF_SIF
     shell:
         '''
         bash -c 'source activate humann2 && mkdir -p $(dirname {output.humann2_uniref_dir}); \
@@ -122,4 +134,4 @@ rule get_humann2_dbs:
 
 rule get_panphlan_db:
     output:
-        directory(METASNK_DB_DIR+'/panphlan_databases')
\ No newline at end of file
+        directory(METASNK_DB_DIR+'/panphlan_databases')