Newer
Older
configfile: "config.yaml"
#from snakemake.utils import listfiles
localrules: create_output_and_log_directories, concat_samples, finish
#################################################################################
### Finish rule
#################################################################################
rule finish:
input:
sam = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam"), sample=config["sample"])
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#################################################################################
### Create output and log directories
#################################################################################
rule create_output_and_log_directories:
output:
output_dir = config["output_dir"],
cluster_log = config["cluster_log"],
local_log = config["local_log"],
sample_dir = expand(os.path.join(config["output_dir"], "{sample}"), sample=config["sample"]),
flag = config["dir_created"]
threads: 1
shell:
"mkdir -p {output.output_dir}; \
mkdir -p {output.cluster_log}; \
mkdir -p {output.local_log}; \
mkdir -p {output.sample_dir}; \
touch {output.flag};"
#################################################################################
### Clipping reads
#################################################################################
rule clip_reads:
input:
flag = config["dir_created"],
reads = os.path.join(config["input_dir"], "{sample}" + config["input_reads_pattern"]),
output:
reads = os.path.join(config["output_dir"], "{sample}/pro.clipped.fastq.gz"),
params:
v = "-v",
n = "-n",
l = "20",
qual = "-Q33",
z = "-z",
adapter = lambda wildcards: config[ wildcards.sample ]['adapter'],
cluster_log = os.path.join(config["cluster_log"], "clip_reads_{sample}.log")
log:
os.path.join(config["local_log"], "clip_reads_{sample}.log")
singularity:
BIOPZ-Gypas Foivos
committed
"docker://zavolab/fastx:0.0.14"
shell:
"(fastx_clipper \
{params.v} \
{params.n} \
-l {params.l} \
{params.qual} \
-a {params.adapter} \
{params.z} \
-i <(zcat {input.reads}) \
-o {output.reads}) &> {log}"
#################################################################################
### Trimming reads
#################################################################################
rule trim_reads:
input:
reads = os.path.join(config["output_dir"], "{sample}/pro.clipped.fastq.gz")
output:
reads = os.path.join(config["output_dir"], "{sample}/pro.trimmed.fastq.gz"),
params:
v = "-v",
l = "20",
t = "20",
qual = "-Q33",
z = "-z",
cluster_log = os.path.join(config["local_log"], "trim_reads_{sample}.log")
log:
os.path.join(config["cluster_log"], "trim_reads_{sample}.log")
singularity:
BIOPZ-Gypas Foivos
committed
"docker://zavolab/fastx:0.0.14"
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
shell:
"(fastq_quality_trimmer \
{params.v} \
-l {params.l} \
-t {params.t} \
{params.qual} \
{params.z} \
-i <(zcat {input.reads}) \
-o {output.reads}) &> {log}"
#################################################################################
### Filtering reads
#################################################################################
rule filter_reads:
input:
reads = os.path.join(config["output_dir"], "{sample}/pro.trimmed.fastq.gz"),
output:
reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fastq.gz"),
params:
v = "-v",
q = "20",
p = "90",
qual = "-Q33",
z = "-z",
cluster_log = os.path.join(config["local_log"], "filter_reads_{sample}.log")
log:
os.path.join(config["cluster_log"], "filter_reads_{sample}.log")
singularity:
BIOPZ-Gypas Foivos
committed
"docker://zavolab/fastx:0.0.14"
shell:
"(fastq_quality_filter \
{params.v} \
-q {params.q} \
-p {params.p} \
{params.qual} \
{params.z} \
-i <(zcat {input.reads}) \
-o {output.reads}) &> {log}"
#################################################################################
### Convert fastq to fasta
#################################################################################
rule fastq_to_fasta:
input:
reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fastq.gz"),
output:
reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fasta"),
params:
v = "-v",
qual = "-Q33",
n = "-n",
r = "-r",
z = "-z",
cluster_log = os.path.join(config["local_log"], "fastq_to_fasta_{sample}.log")
log:
os.path.join(config["cluster_log"], "fastq_to_fasta_{sample}.log")
singularity:
BIOPZ-Gypas Foivos
committed
"docker://zavolab/fastx:0.0.14"
shell:
"(fastq_to_fasta \
{params.v} \
{params.qual} \
{params.n} \
{params.r} \
-i <(zcat {input.reads}) \
-o {output.reads}) &> {log}"
#################################################################################
### Map reads to other genes (rRNA, tRNA, etc...)
#################################################################################
rule map_to_other_genes:
input:
reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fasta"),
index = config["other_RNAs_index"],
sequence = config["other_RNAs_sequence"]
output:
sam = os.path.join(config["output_dir"], "{sample}/other_genes.mapped.sam"),
reads = os.path.join(config["output_dir"], "{sample}/other_genes.unmapped.fasta")
params:
silent = "--silent",
accuracy = "90",
cluster_log = os.path.join(config["cluster_log"], "map_to_other_genes_{sample}.log")
log:
os.path.join(config["local_log"], "map_to_other_genes_{sample}.log")
threads: 8
singularity:
BIOPZ-Gypas Foivos
committed
"docker://zavolab/segemehl:0.2.0"
shell:
"(segemehl.x \
{params.silent} \
-i {input.index} \
-d {input.sequence} \
-q {input.reads} \
--accuracy {params.accuracy} \
--threads {threads} \
-o {output.sam} \
-u {output.reads} ) &> {log}"
#################################################################################
### Map reads to other genes (rRNA, tRNA, etc...)
#################################################################################
rule map_to_transcripts:
input:
reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fasta"),
index = config["transcripts_index"],
sequence = config["transcripts_sequence"]
output:
sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam"),
reads = os.path.join(config["output_dir"], "{sample}/transcripts.unmapped.fasta")
params:
silent = "--silent",
accuracy = "90",
cluster_log = os.path.join(config["cluster_log"], "map_to_transcripts_{sample}.log")
log:
os.path.join(config["local_log"], "map_to_transcripts_{sample}.log")
threads: 8
singularity:
BIOPZ-Gypas Foivos
committed
"docker://zavolab/segemehl:0.2.0"
shell:
"(segemehl.x \
{params.silent} \
-i {input.index} \
-d {input.sequence} \
-q {input.reads} \
--accuracy {params.accuracy} \
--threads {threads} \
-o {output.sam} \
-u {output.reads} ) &> {log}"