diff --git a/snakemake/process_data/Snakefile b/snakemake/process_data/Snakefile index c9a07d8556260e4a37810ea17cd6ac897d8627e5..df87399b07484041f9ff1f55a4f65a7d8d3b505b 100644 --- a/snakemake/process_data/Snakefile +++ b/snakemake/process_data/Snakefile @@ -39,14 +39,13 @@ rule clip_reads: flag = config["dir_created"], reads = os.path.join(config["input_dir"], "{sample}" + config["input_reads_pattern"]), output: - reads = os.path.join(config["output_dir"], "{sample}/pro.clipped.fastq"), + reads = os.path.join(config["output_dir"], "{sample}/pro.clipped.fastq.gz"), params: v = "-v", n = "-n", l = "20", - qual = "-Q33", + adapter = lambda wildcards: config[wildcards.sample]['adapter'], z = "-z", - adapter = lambda wildcards: config[ wildcards.sample ]['adapter'], cluster_log = os.path.join(config["cluster_log"], "clip_reads_{sample}.log") log: os.path.join(config["local_log"], "clip_reads_{sample}.log") @@ -57,7 +56,7 @@ rule clip_reads: {params.v} \ {params.n} \ -l {params.l} \ - {params.qual} \ + {params.z} \ -a {params.adapter} \ -i <(zcat {input.reads}) \ -o {output.reads}) &> {log}" @@ -68,14 +67,14 @@ rule clip_reads: rule trim_reads: input: - reads = os.path.join(config["output_dir"], "{sample}/pro.clipped.fastq") + reads = os.path.join(config["output_dir"], "{sample}/pro.clipped.fastq.gz") output: - reads = os.path.join(config["output_dir"], "{sample}/pro.trimmed.fastq"), + reads = os.path.join(config["output_dir"], "{sample}/pro.trimmed.fastq.gz"), params: v = "-v", l = "20", - t = "20", - qual = "-Q33", + t = lambda wildcards: config[wildcards.sample]['minimum_quality'], + Q = lambda wildcards: config[wildcards.sample]['quality_type'], z = "-z", cluster_log = os.path.join(config["cluster_log"], "trim_reads_{sample}.log") log: @@ -87,8 +86,9 @@ rule trim_reads: {params.v} \ -l {params.l} \ -t {params.t} \ - {params.qual} \ - -i {input.reads} \ + -Q {params.Q} \ + {params.z} \ + -i <(zcat {input.reads}) \ -o {output.reads}) &> {log}" ################################################################################# @@ -97,14 +97,15 @@ rule trim_reads: rule filter_reads: input: - reads = os.path.join(config["output_dir"], "{sample}/pro.trimmed.fastq"), + reads = os.path.join(config["output_dir"], "{sample}/pro.trimmed.fastq.gz"), output: - reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fastq"), + reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fastq.gz"), params: v = "-v", - q = "20", + q = lambda wildcards: config[wildcards.sample]['minimum_quality'], p = "90", - qual = "-Q33", + z = "-z", + Q = lambda wildcards: config[wildcards.sample]['quality_type'], cluster_log = os.path.join(config["cluster_log"], "filter_reads_{sample}.log") log: os.path.join(config["local_log"], "filter_reads_{sample}.log") @@ -115,8 +116,9 @@ rule filter_reads: {params.v} \ -q {params.q} \ -p {params.p} \ - {params.qual} \ - -i {input.reads} \ + -Q {params.Q} \ + {params.z} \ + -i <(zcat {input.reads}) \ -o {output.reads}) &> {log}" ################################################################################# @@ -125,15 +127,13 @@ rule filter_reads: rule fastq_to_fasta: input: - reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fastq"), + reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fastq.gz"), output: reads = os.path.join(config["output_dir"], "{sample}/pro.filtered.fasta"), params: v = "-v", - qual = "-Q33", n = "-n", r = "-r", - z = "-z", cluster_log = os.path.join(config["cluster_log"], "fastq_to_fasta_{sample}.log") log: os.path.join(config["local_log"], "fastq_to_fasta_{sample}.log") @@ -142,10 +142,9 @@ rule fastq_to_fasta: shell: "(fastq_to_fasta \ {params.v} \ - {params.qual} \ {params.n} \ {params.r} \ - -i {input.reads} \ + -i <(zcat {input.reads}) \ -o {output.reads}) &> {log}" ################################################################################# diff --git a/snakemake/process_data/config.yaml b/snakemake/process_data/config.yaml index ea7df86e192bddeaddf427ee50483ae8289d522c..6a991377a4069e94913a5b7ebccc677a4f77fcc8 100644 --- a/snakemake/process_data/config.yaml +++ b/snakemake/process_data/config.yaml @@ -18,8 +18,7 @@ ############################################################################## input_dir: "samples" input_reads_pattern: ".fastq.gz" - sample: ["example", "s_ribseq_r1"] - example: {adapter: GATCGGAAGAGCACA} - m_ribseq_r2: {adapter: CTGTAGGCACCATCA} - s_ribseq_r1: {adapter: CTGTAGGCACCATCA} + sample: ["example", "example2"] + example: {adapter: GATCGGAAGAGCACA, minimum_quality: 20, quality_type: 33} + example2: {adapter: CTGTAGGCACCATCA, minimum_quality: 20, quality_type: 64} ...