From d19f7b4638561dd223fd24470736c3a8f8755a9a Mon Sep 17 00:00:00 2001
From: BIOPZ-Gypas Foivos <foivos.gypas@unibas.ch>
Date: Mon, 19 Nov 2018 11:59:43 +0100
Subject: [PATCH] Addition of rule that removes multimappers in the process
 data pipeline

---
 snakemake/process_data/Snakefile   | 19 +++++++++++++++++--
 snakemake/process_data/config.yaml |  4 +++-
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/snakemake/process_data/Snakefile b/snakemake/process_data/Snakefile
index df87399..3136843 100644
--- a/snakemake/process_data/Snakefile
+++ b/snakemake/process_data/Snakefile
@@ -1,7 +1,7 @@
 configfile: "config.yaml"
 #from snakemake.utils import listfiles
 
-localrules: create_output_and_log_directories, finish
+localrules: create_output_and_log_directories, remove_multimappers, finish
 
 #################################################################################
 ### Finish rule
@@ -9,7 +9,7 @@ localrules: create_output_and_log_directories, finish
 
 rule finish:
 	input:
-		sam = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam"), sample=config["sample"])
+		sam = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam"), sample=config["sample"])
 
 #################################################################################
 ### Create output and log directories
@@ -210,3 +210,18 @@ rule map_to_transcripts:
 		--threads {threads} \
 		-o {output.sam} \
 		-u {output.reads} ) &> {log}"
+
+################################################################################
+### Remove multimappers
+################################################################################
+
+rule remove_multimappers:
+	input:
+		sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam")
+	output:
+		sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam")
+	log:
+		os.path.join(config["local_log"], "remove_multimappers_{sample}.log")
+	threads:	1
+	shell:
+		"(grep -P \"^@|\tNH:i:1\t\" {input.sam} > {output.sam}) &> {log}"
diff --git a/snakemake/process_data/config.yaml b/snakemake/process_data/config.yaml
index 6a99137..fb70bb0 100644
--- a/snakemake/process_data/config.yaml
+++ b/snakemake/process_data/config.yaml
@@ -18,7 +18,9 @@
   ##############################################################################
   input_dir: "samples"
   input_reads_pattern: ".fastq.gz"
-  sample: ["example", "example2"]
+  sample: ["example", "example2", "SRR1536304", "SRR1536305"]
   example: {adapter: GATCGGAAGAGCACA, minimum_quality: 20, quality_type: 33}
   example2: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 64}
+  SRR1536304: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 33}
+  SRR1536305: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 33}
 ...
-- 
GitLab