Newer
Older
configfile: "config.yaml"
#from snakemake.utils import listfiles
BIOPZ-Gypas Foivos
committed
localrules: create_output_and_log_directories, remove_multimappers, finish
#################################################################################
### Finish rule
#################################################################################
rule finish:
input:
BIOPZ-Gypas Foivos
committed
sam = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam"), sample=config["sample"])
#################################################################################
### Create output and log directories
#################################################################################
rule create_output_and_log_directories:
output:
output_dir = config["output_dir"],
cluster_log = config["cluster_log"],
local_log = config["local_log"],
sample_dir = expand(os.path.join(config["output_dir"], "{sample}"), sample=config["sample"]),
flag = config["dir_created"]
threads: 1
shell:
"mkdir -p {output.output_dir}; \
mkdir -p {output.cluster_log}; \
mkdir -p {output.local_log}; \
mkdir -p {output.sample_dir}; \
touch {output.flag};"
#################################################################################
### Clipping reads
#################################################################################
rule clip_reads:
input:
flag = config["dir_created"],
reads = os.path.join(config["input_dir"], "{sample}" + config["input_reads_pattern"]),
output:
reads = os.path.join(config["output_dir"], "{sample}/pro.clipped.fastq.gz"),
adapter = lambda wildcards: config[wildcards.sample]['adapter'],
z = "-z",
cluster_log = os.path.join(config["cluster_log"], "clip_reads_{sample}.log")
log:
os.path.join(config["local_log"], "clip_reads_{sample}.log")
singularity:
BIOPZ-Gypas Foivos
committed
"docker://zavolab/fastx:0.0.14"
shell:
"(fastx_clipper \
{params.v} \
{params.n} \
-l {params.l} \
-a {params.adapter} \
-i <(zcat {input.reads}) \
-o {output.reads}) &> {log}"
#################################################################################
### Trimming reads
#################################################################################
rule trim_reads:
input:
reads = os.path.join(config["output_dir"], "{sample}/pro.clipped.fastq.gz")
reads = os.path.join(config["output_dir"], "{sample}/pro.trimmed.fastq.gz"),
t = lambda wildcards: config[wildcards.sample]['minimum_quality'],
Q = lambda wildcards: config[wildcards.sample]['quality_type'],
cluster_log = os.path.join(config["cluster_log"], "trim_reads_{sample}.log")
os.path.join(config["local_log"], "trim_reads_{sample}.log")
BIOPZ-Gypas Foivos
committed
"docker://zavolab/fastx:0.0.14"
shell:
"(fastq_quality_trimmer \
{params.v} \
-l {params.l} \
-t {params.t} \
-Q {params.Q} \
{params.z} \
-i <(zcat {input.reads}) \
-o {output.reads}) &> {log}"
#################################################################################
### Filtering reads
#################################################################################
rule filter_reads:
input:
reads = os.path.join(config["output_dir"], "{sample}/pro.trimmed.fastq.gz"),
reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fastq.gz"),
q = lambda wildcards: config[wildcards.sample]['minimum_quality'],
z = "-z",
Q = lambda wildcards: config[wildcards.sample]['quality_type'],
cluster_log = os.path.join(config["cluster_log"], "filter_reads_{sample}.log")
os.path.join(config["local_log"], "filter_reads_{sample}.log")
BIOPZ-Gypas Foivos
committed
"docker://zavolab/fastx:0.0.14"
shell:
"(fastq_quality_filter \
{params.v} \
-q {params.q} \
-p {params.p} \
-Q {params.Q} \
{params.z} \
-i <(zcat {input.reads}) \
-o {output.reads}) &> {log}"
#################################################################################
### Convert fastq to fasta
#################################################################################
rule fastq_to_fasta:
input:
reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fastq.gz"),
reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fasta"),
cluster_log = os.path.join(config["cluster_log"], "fastq_to_fasta_{sample}.log")
os.path.join(config["local_log"], "fastq_to_fasta_{sample}.log")
BIOPZ-Gypas Foivos
committed
"docker://zavolab/fastx:0.0.14"
shell:
"(fastq_to_fasta \
{params.v} \
{params.n} \
{params.r} \
#################################################################################
### Map reads to other genes (rRNA, tRNA, etc...)
#################################################################################
rule map_to_other_genes:
input:
reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fasta"),
index = config["other_RNAs_index"],
sequence = config["other_RNAs_sequence"]
output:
sam = os.path.join(config["output_dir"], "{sample}/other_genes.mapped.sam"),
reads = os.path.join(config["output_dir"], "{sample}/other_genes.unmapped.fasta")
params:
silent = "--silent",
accuracy = "90",
cluster_log = os.path.join(config["cluster_log"], "map_to_other_genes_{sample}.log")
log:
os.path.join(config["local_log"], "map_to_other_genes_{sample}.log")
threads: 8
singularity:
BIOPZ-Gypas Foivos
committed
"docker://zavolab/segemehl:0.2.0"
shell:
"(segemehl.x \
{params.silent} \
-i {input.index} \
-d {input.sequence} \
-q {input.reads} \
--accuracy {params.accuracy} \
--threads {threads} \
-o {output.sam} \
-u {output.reads} ) &> {log}"
#################################################################################
### Map reads to other genes (rRNA, tRNA, etc...)
#################################################################################
rule map_to_transcripts:
input:
reads = os.path.join(config["output_dir"], "{sample}/other_genes.unmapped.fasta"),
index = config["transcripts_index"],
sequence = config["transcripts_sequence"]
output:
sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam"),
reads = os.path.join(config["output_dir"], "{sample}/transcripts.unmapped.fasta")
params:
silent = "--silent",
accuracy = "90",
cluster_log = os.path.join(config["cluster_log"], "map_to_transcripts_{sample}.log")
log:
os.path.join(config["local_log"], "map_to_transcripts_{sample}.log")
threads: 8
singularity:
BIOPZ-Gypas Foivos
committed
"docker://zavolab/segemehl:0.2.0"
shell:
"(segemehl.x \
{params.silent} \
-i {input.index} \
-d {input.sequence} \
-q {input.reads} \
--accuracy {params.accuracy} \
--threads {threads} \
-o {output.sam} \
-u {output.reads} ) &> {log}"
BIOPZ-Gypas Foivos
committed
################################################################################
### Remove multimappers
################################################################################
rule remove_multimappers:
input:
sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam")
output:
sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam")
log:
os.path.join(config["local_log"], "remove_multimappers_{sample}.log")
threads: 1
shell:
"(grep -P \"^@|\tNH:i:1\t\" {input.sam} > {output.sam}) &> {log}"