diff --git a/snakemake/process_data/Snakefile b/snakemake/process_data/Snakefile index df87399b07484041f9ff1f55a4f65a7d8d3b505b..31368434ebfd08c2e135c48afb480ab3783d7e32 100644 --- a/snakemake/process_data/Snakefile +++ b/snakemake/process_data/Snakefile @@ -1,7 +1,7 @@ configfile: "config.yaml" #from snakemake.utils import listfiles -localrules: create_output_and_log_directories, finish +localrules: create_output_and_log_directories, remove_multimappers, finish ################################################################################# ### Finish rule @@ -9,7 +9,7 @@ localrules: create_output_and_log_directories, finish rule finish: input: - sam = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam"), sample=config["sample"]) + sam = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam"), sample=config["sample"]) ################################################################################# ### Create output and log directories @@ -210,3 +210,18 @@ rule map_to_transcripts: --threads {threads} \ -o {output.sam} \ -u {output.reads} ) &> {log}" + +################################################################################ +### Remove multimappers +################################################################################ + +rule remove_multimappers: + input: + sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam") + output: + sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam") + log: + os.path.join(config["local_log"], "remove_multimappers_{sample}.log") + threads: 1 + shell: + "(grep -P \"^@|\tNH:i:1\t\" {input.sam} > {output.sam}) &> {log}" diff --git a/snakemake/process_data/config.yaml b/snakemake/process_data/config.yaml index 6a991377a4069e94913a5b7ebccc677a4f77fcc8..fb70bb03e05d73ba69ffcb1d0b554f2303782a17 100644 --- a/snakemake/process_data/config.yaml +++ b/snakemake/process_data/config.yaml @@ -18,7 +18,9 @@ ############################################################################## input_dir: "samples" input_reads_pattern: ".fastq.gz" - sample: ["example", "example2"] + sample: ["example", "example2", "SRR1536304", "SRR1536305"] example: {adapter: GATCGGAAGAGCACA, minimum_quality: 20, quality_type: 33} example2: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 64} + SRR1536304: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 33} + SRR1536305: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 33} ...