From d19f7b4638561dd223fd24470736c3a8f8755a9a Mon Sep 17 00:00:00 2001 From: BIOPZ-Gypas Foivos <foivos.gypas@unibas.ch> Date: Mon, 19 Nov 2018 11:59:43 +0100 Subject: [PATCH] Addition of rule that removes multimappers in the process data pipeline --- snakemake/process_data/Snakefile | 19 +++++++++++++++++-- snakemake/process_data/config.yaml | 4 +++- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/snakemake/process_data/Snakefile b/snakemake/process_data/Snakefile index df87399..3136843 100644 --- a/snakemake/process_data/Snakefile +++ b/snakemake/process_data/Snakefile @@ -1,7 +1,7 @@ configfile: "config.yaml" #from snakemake.utils import listfiles -localrules: create_output_and_log_directories, finish +localrules: create_output_and_log_directories, remove_multimappers, finish ################################################################################# ### Finish rule @@ -9,7 +9,7 @@ localrules: create_output_and_log_directories, finish rule finish: input: - sam = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam"), sample=config["sample"]) + sam = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam"), sample=config["sample"]) ################################################################################# ### Create output and log directories @@ -210,3 +210,18 @@ rule map_to_transcripts: --threads {threads} \ -o {output.sam} \ -u {output.reads} ) &> {log}" + +################################################################################ +### Remove multimappers +################################################################################ + +rule remove_multimappers: + input: + sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam") + output: + sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam") + log: + os.path.join(config["local_log"], "remove_multimappers_{sample}.log") + threads: 1 + shell: + "(grep -P \"^@|\tNH:i:1\t\" {input.sam} > {output.sam}) &> {log}" diff --git a/snakemake/process_data/config.yaml b/snakemake/process_data/config.yaml index 6a99137..fb70bb0 100644 --- a/snakemake/process_data/config.yaml +++ b/snakemake/process_data/config.yaml @@ -18,7 +18,9 @@ ############################################################################## input_dir: "samples" input_reads_pattern: ".fastq.gz" - sample: ["example", "example2"] + sample: ["example", "example2", "SRR1536304", "SRR1536305"] example: {adapter: GATCGGAAGAGCACA, minimum_quality: 20, quality_type: 33} example2: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 64} + SRR1536304: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 33} + SRR1536305: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 33} ... -- GitLab