Skip to content
Snippets Groups Projects
Commit 44d3f3e3 authored by BIOPZ-Gypas Foivos's avatar BIOPZ-Gypas Foivos
Browse files

Rename snakemake/paired_end.snakemake to snakemake/paired_end.snakefile. Fix...

Rename snakemake/paired_end.snakemake to snakemake/paired_end.snakefile. Fix wiring of rules. Add fake tests.
parent 5094be70
Branches
Tags
1 merge request!4Subpipelines
configfile: "config.yaml"
################################################################################
### python modules
################################################################################
import os
import sys
import pandas as pd
configfile: "config.yaml"
############################
samples_table = pd.read_csv(config["samples"], header=0, index_col=0, comment='#', engine='python', sep="\t")
localrules: finish
##################################################################################
# Execution dependend on sequencing mode
##################################################################################
include: 'paired_end.snakefile'
include: 'single_end.snakefile'
#################################################################################
### Final rule
#################################################################################
rule finish:
input:
final_sample = expand()
outdir1 = expand(os.path.join(config["output_dir"], "paired_end", "{sample}", "mate1_fastqc"), sample=samples_table.index.values),
outdir2 = expand(os.path.join(config["output_dir"], "paired_end", "{sample}", "mate2_fastqc"), sample=samples_table.index.values)
##################################################################################
# Execution dependend on sequencing mode
##################################################################################
include: 'paired_end.snakefile'
include: 'single_end.snakefile'
rule create_index_star:
''' Create index using STAR'''
input:
genome = lambda wildcards: samples_table.loc[wildcards.sample, 'genome'],
gtf = lambda wildcards: samples_table.loc[wildcards.sample, 'gtf']
output:
chromosome_info = os.path.join(
config["star_indexes"],
"{organism}",
"{index_size}",
"STAR_index",
"chrNameLength.txt"),
chromosomes_names = os.path.join(
config["star_indexes"],
"{organism}",
"{index_size}",
"STAR_index",
"chrName.txt")
params:
output_dir = os.path.join(
config["star_indexes"],
"{organism}",
"{index_size}",
"STAR_index"),
outFileNamePrefix = os.path.join(
config["star_indexes"],
"{organism}",
"{index_size}",
"STAR_index/STAR_"),
sjdbOverhang = lambda wildcards:
samples_table[wildcards.sample, "index_size"],
singularity:
"docker://zavolab/star:2.6.0a"
threads: 12
log:
os.path.join( config["local_log"], "{organism}_{index_size}_create_index_star.log")
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
STAR \
--runMode genomeGenerate \
--sjdbOverhang {params.sjdbOverhang} \
--genomeDir {params.output_dir} \
--genomeFastaFiles {input.genome} \
--runThreadN {threads} \
--outFileNamePrefix {params.outFileNamePrefix} \
--sjdbGTFfile {input.gtf}) &> {log}"
rule create_index_salmon:
'''Create index for salmon quantification'''
input:
transcriptome = lambda wildcards:
samples_table.loc[wildcards.sample, 'tr_fasta_filtered']
output:
index = os.path.join(
config["salmon_indexes"],
"{organism}",
"salmon.idx")
params:
kmerLen = lambda wildcards:
samples_table.loc[wildcards.sample, 'kmer']
singularity:
"docker://zavolab/salmon:0.11.0"
log:
os.path.join(config["local_log"], "{organism}_create_index_salmon.log")
threads: 8
shell:
"(salmon index \
--t {input.transcriptome} \
--i {output.index} \
--k {params.kmerLen} \
--threads {threads}) &> {log}"
rule create_index_kallisto:
'''Create index for running Kallisto'''
input:
transcriptome = lambda wildcards:
samples_table.loc[wildcards.sample, 'tr_fasta_filtered']
output:
index = os.path.join(
config["kallisto_indexes"],
"{organism}",
"kallisto.idx")
params:
output_dir = lambda wildcards:
os.path.join(
config["kallisto_indexes"],
samples_table[wildcards.sample, 'organism'])
singularity:
"docker://zavolab/kallisto:0.9"
log:
os.path.join(config["local_log"], "{organism}_create_index_kallisto.log")
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
kallisto index -i {output.index} {input.transcriptome}) &> {log}"
......@@ -16,12 +16,14 @@
database_path: "/scicore/home/zavolan/GROUP/Rna_Seq_pipeline/Blabla"
STAR_idx_folder: "STAR_indices"
output_dir: "results"
star_indexes: "results"
salmon_indexes: "results"
kallisto_indexes: "results"
local_log: "logs/local_log"
cluster_log: "logs/cluster_log"
##############################################################################
### Sample info
##############################################################################
input_dir: "samples"
sample: ["test"]
test: {libType: A, fldMean: 300, fldSD: 100}
samples: "../tests/samples.tsv"
...
rule fastqc:
rule pe_fastqc:
'''A quality control tool for high throughput sequence data'''
input:
reads1 = os.path.join(get_fq_1("{sample}")),
reads2 = os.path.join(get_fq_2("{sample}"))
reads1 = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"],
reads2 = lambda wildcards: samples_table.loc[wildcards.sample, "fq2"]
output:
outdir1 = directory(os.path.join(config["output_dir"], "paired_end", "{sample}", "mate1_fastqc")),
outdir2 = directory(os.path.join(config["output_dir"], "paired_end", "{sample}", "mate2_fastqc"))
threads:
2
singularity:
"docker://zavolab/fastqc:0.11.8"
log:
......@@ -15,15 +17,14 @@ rule fastqc:
shell:
"(mkdir -p {output.outdir1}; \
mkdir -p {output.outdir2}; \
fastqc --outdir {output.outdir1} {input.reads1}; \
fastqc --outdir {output.outdir1} {input.reads1}; & \
fastqc --outdir {output.outdir2} {input.reads2}) &> {log}"
rule htseq_qa:
rule pe_htseq_qa:
'''Assess the technical quality of a run'''
input:
reads1 = os.path.join(get_fq_1("{sample}")),
reads2 = os.path.join(get_fq_2("{sample}"))
reads1 = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"],
reads2 = lambda wildcards: samples_table.loc[wildcards.sample, "fq2"]
output:
qual_pdf_mate1 = os.path.join(config["output_dir"], "paired_end", "{sample}", "htseq_quality_mate1.pdf"),
qual_pdf_mate2 = os.path.join(config["output_dir"], "paired_end", "{sample}", "htseq_quality_mate2.pdf")
......@@ -38,11 +39,11 @@ rule htseq_qa:
htseq-qa -t fastq -o {output.qual_pdf_mate2} {input.reads2}; ) &> {log}"
rule remove_adapters_cutadapt:
rule pe_remove_adapters_cutadapt:
'''Remove adapters'''
input:
reads1 = os.path.join(get_fq_1("{sample}")),
reads2 = os.path.join(get_fq_2("{sample}"))
reads1 = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"],
reads2 = lambda wildcards: samples_table.loc[wildcards.sample, "fq2"]
output:
reads1 = os.path.join(
config["output_dir"],
......@@ -56,13 +57,13 @@ rule remove_adapters_cutadapt:
"{sample}.remove_adapters_mate2.fastq.gz")
params:
adapter_3_mate1 = lambda wildcards:
sample_table.loc[wildcards.sample, 'fq1_3p'],
samples_table.loc[wildcards.sample, 'fq1_3p'],
adapter_5_mate1 = lambda wildcards:
sample_table.loc[wildcards.sample, 'fq1_5p'],
samples_table.loc[wildcards.sample, 'fq1_5p'],
adapter_3_mate2 = lambda wildcards:
sample_table.loc[wildcards.sample, 'fq2_3p'],
samples_table.loc[wildcards.sample, 'fq2_3p'],
adapter_5_mate2 = lambda wildcards:
sample_table.loc[wildcards.sample, 'fq2_5p']
samples_table.loc[wildcards.sample, 'fq2_5p']
singularity:
"docker://zavolab/cutadapt:1.16"
threads: 8
......@@ -85,7 +86,7 @@ rule remove_adapters_cutadapt:
{input.reads2}) &> {log}"
rule remove_polya_cutadapt:
rule pe_remove_polya_cutadapt:
'''Remove polyA tails'''
input:
reads1 = os.path.join(
......@@ -111,9 +112,9 @@ rule remove_polya_cutadapt:
"{sample}.remove_polya_mate2.fastq.gz")
params:
polya_3_mate1 = lambda wildcards:
sample_table.loc[wildcards.sample, 'fq1_polya'],
samples_table.loc[wildcards.sample, 'fq1_polya'],
polya_3_mate2 = lambda wildcards:
sample_table.loc[wildcards.sample, 'fq2_polya'],
samples_table.loc[wildcards.sample, 'fq2_polya'],
singularity:
"docker://zavolab/cutadapt:1.16"
threads: 8
......@@ -137,65 +138,16 @@ rule remove_polya_cutadapt:
{input.reads2}) &> {log}'
rule create_index_star:
''' Create index using STAR'''
rule pe_map_genome_star:
'''Map to genome using STAR'''
input:
genome = sample_table.loc["{sample}", 'genome'],
gtf = sample_table.loc["{sample}", 'gtf']
output:
chromosome_info = os.path.join(
config["star_indexes"],
sample_table.loc[wildcards.sample, "organism"],
sample_table.loc["{sample}", "index_size"],
"STAR_index",
"chrNameLength.txt"),
chromosomes_names = os.path.join(
config["star_indexes"],
sample_table.loc["{sample}", "organism"],
sample_table.loc["{sample}", "index_size"],
"STAR_index",
"chrName.txt")
params:
output_dir = lambda wildcards:
index = lambda wildcards:
os.path.join(
config["star_indexes"],
sample_table.loc[wildcards.sample, "organism"],
sample_table.loc[wildcards.sample, "index_size"],
"STAR_index"),
outFileNamePrefix = os.path.join(
config["star_indexes"],
sample_table.loc[wildcards.sample, "organism"],
sample_table.loc[wildcards.sample, "index_size"],
"STAR_index/STAR_"),
sjdbOverhang = lambda wildcards:
sample_table.loc[wildcards.sample, "index_size"]
singularity:
"docker://zavolab/star:2.6.0a"
threads: 12
log:
os.path.join( config["local_log"], "paired_end", "{sample}", "create_index_star.log")
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
STAR \
--runMode genomeGenerate \
--sjdbOverhang {params.sjdbOverhang} \
--genomeDir {params.output_dir} \
--genomeFastaFiles {input.genome} \
--runThreadN {threads} \
--outFileNamePrefix {params.outFileNamePrefix} \
--sjdbGTFfile {input.gtf}) &> {log}"
rule map_genome_star:
'''Map to genome using STAR'''
input:
index = os.path.join(
config["star_indexes"],
sample_table.loc["{sample}", "organism"],
sample_table.loc["{sample}", "index_size"],
"STAR_index",
"chrNameLength.txt"),
samples_table.loc[wildcards.sample, "organism"],
samples_table.loc[wildcards.sample, "index_size"],
"STAR_index",
"chrNameLength.txt"),
reads1 = os.path.join(
config["output_dir"],
"paired_end",
......@@ -224,7 +176,7 @@ rule map_genome_star:
index = lambda wildcards:
os.path.join(
config["star_indexes"],
sample_table.loc[wildcards.sample, "index_size"],
samples_table.loc[wildcards.sample, "index_size"],
"STAR_index"),
outFileNamePrefix = os.path.join(
config["output_dir"],
......@@ -233,11 +185,11 @@ rule map_genome_star:
"map_genome",
"{sample}_"),
multimappers = lambda wildcards:
sample_table.loc[wildcards.sample, "mulitmappers"],
samples_table.loc[wildcards.sample, "mulitmappers"],
soft_clip = lambda wildcards:
sample_table.loc[wildcards.sample, "soft_clip"],
samples_table.loc[wildcards.sample, "soft_clip"],
pass_mode = lambda wildcards:
sample_table.loc[wildcards.sample, "pass_mode"]
samples_table.loc[wildcards.sample, "pass_mode"]
singularity:
"docker://zavolab/star:2.6.0a"
......@@ -271,7 +223,7 @@ rule map_genome_star:
--alignEndsType {params.soft_clip}} > {output.bam};) &> {log}"
rule index_genomic_alignment_samtools:
rule pe_index_genomic_alignment_samtools:
'''Index the genomic alignment'''
input:
bam = os.path.join(
......@@ -296,32 +248,7 @@ rule index_genomic_alignment_samtools:
"(samtools index {input.bam} {output.bai};) &> {log}"
rule create_index_salmon:
'''Create index for salmon quantification'''
input:
transcriptome = sample_table.loc["{sample}", 'tr_fasta_filtered']
output:
index = os.path.join(
config["salmon_indexes"],
sample_table["{sample}", 'organism'],
"salmon.idx")
params:
kmerLen = lambda wildcards:
sample_table.loc[wildcards.sample, 'kmer']
singularity:
"docker://zavolab/salmon:0.11.0"
log:
os.path.join(config["local_log"], "paired_end", "{sample}", "create_index_salmon.log")
threads: 8
shell:
"(salmon index \
--t {input.transcriptome} \
--i {output.index} \
--k {params.kmerLen} \
--threads {threads}) &> {log}"
rule quantification_salmon:
rule pe_quantification_salmon:
'''Quantification at transcript and gene level using Salmon'''
input:
reads1 = os.path.join(
......@@ -334,11 +261,13 @@ rule quantification_salmon:
"paired_end",
"{sample}",
"{sample}.remove_polya_mate2.fastq.gz"),
gtf = sample_table["{sample}", 'gtf_filtered'],
index = os.path.join(
config["salmon_indexes"],
sample_table["{sample}", 'organism'],
"salmon.idx")
gtf = lambda wildcards:
samples_table.loc[wildcards.sample, 'gtf_filtered'],
index = lambda wildcards:
os.path.join(
config["salmon_indexes"],
samples_table.loc[wildcards.sample, 'organism'],
"salmon.idx")
output:
gn_estimates = os.path.join(
config["output_dir"],
......@@ -359,7 +288,7 @@ rule quantification_salmon:
"{sample}",
"salmon_quant"),
libType = lambda wildcards:
sample_table.loc[wildcards.sample, 'libtype']
samples_table.loc[wildcards.sample, 'libtype']
log:
os.path.join(config["local_log"], "paired_end", "{sample}", "genome_quantification_salmon.log")
threads: 6
......@@ -379,31 +308,7 @@ rule quantification_salmon:
-o {params.output_dir}) &> {log}"
rule create_index_kallisto:
'''Create index for running Kallisto'''
input:
transcriptome = sample_table[,"{sample}", 'tr_fasta_filtered']
output:
index = os.path.join(
config["kallisto_indexes"],
sample_table["{sample}", 'organism'],
"kallisto.idx")
params:
output_dir = lambda wildcards:
os.path.join(
config["kallisto_indexes"],
sample_table[wildcards.sample, 'organism'])
singularity:
"docker://zavolab/kallisto:0.9"
log:
os.path.join(config["local_log"], "paired_end", "{sample}", "create_index_kallisto.log")
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
kallisto index -i {output.index} {input.transcriptome}) &> {log}"
rule genome_quantification_kallisto:
rule pe_genome_quantification_kallisto:
'''Quantification at transcript and gene level using Kallisto'''
input:
reads1 = os.path.join(
......@@ -416,10 +321,11 @@ rule genome_quantification_kallisto:
"paired_end",
"{sample}",
"{sample}.remove_polya_mate2.fastq.gz"),
index = os.path.join(
config["kallisto_indexes"],
sample_table["{sample}", 'organism'],
"kallisto.idx")
index = lambda wildcards:
os.path.join(
config["kallisto_indexes"],
samples_table.loc[wildcards.sample, 'organism'],
"kallisto.idx")
output:
pseudoalignment = os.path.join(
config["output_dir"],
......@@ -435,7 +341,7 @@ rule genome_quantification_kallisto:
wildcards.sample,
"quant_kallisto"),
directionality = lambda wildcards:
sample_table.loc[wildcards.sample, "kallisto_directionality"]
samples_table.loc[wildcards.sample, "kallisto_directionality"]
singularity:
"docker://zavolab/kallisto:0.9"
threads: 8
......
import os
rule fastqc:
''' A quality control tool for high throughput sequence data. '''
input:
reads = os.path.join(get_fq_1("{sample}"))
reads = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"],
output:
outdir = directory(os.path.join(config["output_dir"], "single_end", "{sample}", "fastqc"))
singularity:
......@@ -19,7 +19,7 @@ rule fastqc:
rule htseq_qa:
''' Assess the technical quality of a run. '''
input:
reads = os.path.join(get_fq_1("{sample}"))
reads = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"]
output:
qual_pdf = os.path.join(config["output_dir"], "single_end", "{sample}", "htseq_quality.pdf")
singularity:
......@@ -36,14 +36,14 @@ rule htseq_qa:
rule remove_adapters_cutadapt:
''' Remove adapters '''
input:
reads = os.path.join(get_fq_1("{sample}"))
reads = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"]
output:
reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_adapters.fastq.gz")
params:
adapters_3 = lambda wildcards:
sample_table.loc[wildcards.sample, 'fq1_3p']
samples_table.loc[wildcards.sample, 'fq1_3p'],
adapters_5 = lambda wildcards:
sample_table.loc[wildcards.sample, 'fq1_5p']
samples_table.loc[wildcards.sample, 'fq1_5p']
singularity:
"docker://zavolab/cutadapt:1.16"
......@@ -66,13 +66,12 @@ rule remove_adapters_cutadapt:
rule remove_polya_cutadapt:
''' Remove ployA tails'''
input:
reads = os.path.join(get_fq_1("{sample}"))
reads = lambda wildcards: samples_table[wildcards.sample, "fq1"]
output:
reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_polya.fastq.gz")
params:
polya_3 =
adapters_3 = lambda wildcards:
sample_table.loc[wildcards.sample, "fq1_polya"]
polya_3 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq1_polya"]
singularity:
"docker://zavolab/cutadapt:1.16"
threads: 8
......@@ -92,63 +91,15 @@ rule remove_polya_cutadapt:
{input.reads}) &> {log}"
rule create_index_star:
''' Create index using STAR. '''
input:
genome = sample_table.loc["{sample}", "genome"],
gtf = sample_table.loc["{sample}", "gtf"]
output:
chromosome_info = os.path.join(
config["star_indexes"],
sample_table.loc["{sample}", "organism"],
sample_table.loc["{sample}", "index_size"],
"STAR_index","chrNameLength.txt"),
chromosome_names = os.path.join(
config["star_indexes"],
sample_table.loc["{sample}", "organism"],
sample_table.loc["{sample}", "index_size"],
"STAR_index", "chrName.txt")
params:
output_dir = lambda wildcards:
os.path.join(
config["star_indexes"],
sample_table.loc["{sample}", "organism"],
sample_table.loc[wildcards.sample, "index_size"],
"STAR_index"),
outFileNamePrefix = lambda wildcards:
os.path.join(
config["star_indexes"],
sample_table.loc["{sample}", "organism"],
sample_table.loc[wildcards.sample, "index_size"],
"STAR_index/STAR_"),
sjdbOverhang = lambda wildcards:
sample_table.loc[wildcards.sample, "index_size"]
singularity:
"docker://zavolab/star:2.6.0a"
threads: 12
log:
os.path.join(config["local_log"], "single_end", "{sample}", "create_index_star.log")
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
STAR \
--runMode genomeGenerate \
--sjdbOverhang {params.sjdbOverhang} \
--genomeDir {params.output_dir} \
--genomeFastaFiles {input.genome} \
--runThreadN {threads} \
--outFileNamePrefix {params.outFileNamePrefix} \
--sjdbGTFfile {input.gtf}) &> {log}"
rule map_genome_star:
''' Map to genome using STAR. '''
input:
index = os.path.join(
config["star_indexes"],
sample_table.loc["{sample}", "organism"],
sample_table.loc["{sample}", "index_size"],
"STAR_index","chrNameLength.txt"),
index = lambda wildcards:
os.path.join(
config["star_indexes"],
samples_table.loc[wildcards.sample, "organism"],
samples_table.loc[wildcards.sample, "index_size"],
"STAR_index","chrNameLength.txt"),
reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_polya.fastq.gz")
output:
bam = os.path.join(config["output_dir"], "single_end",
......@@ -160,12 +111,12 @@ rule map_genome_star:
"map_genome",
"{sample}_Log.final.out")
params:
sample_id = "{sample}"
sample_id = "{sample}",
index = lambda wildcards:
os.path.join(
config["star_indexes"],
sample_table.loc["{sample}", "organism"],
sample_table.loc[wildcards.sample, "index_size"],
samples_table.loc["{sample}", "organism"],
samples_table.loc[wildcards.sample, "index_size"],
"STAR_index"),
outFileNamePrefix = lambda wildcards:
os.path.join(
......@@ -173,11 +124,11 @@ rule map_genome_star:
"single_end",
"{sample}", "map_genome", "{sample}_"),
multimappers = lambda wildcards:
sample_table.loc[wildcards.sample, "multimappers"],
samples_table.loc[wildcards.sample, "multimappers"],
soft_clip = lambda wildcards:
sample_table.loc[wildcards.sample, "soft_clip"],
samples_table.loc[wildcards.sample, "soft_clip"],
pass_mode = lambda wildcards:
sample_table.loc[wildcards.sample, "pass_mode"],
samples_table.loc[wildcards.sample, "pass_mode"],
singularity:
"docker://zavolab/star:2.6.0a"
threads: 12
......@@ -208,128 +159,80 @@ rule map_genome_star:
rule index_genomic_alignment_samtools:
'''Index genome bamfile using samtools.'''
input:
bam = os.path.join(config["output_dir"],
"single_end",
"{sample}",
'''Index genome bamfile using samtools.'''
input:
bam = os.path.join(config["output_dir"],
"single_end",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam")
output:
bai = os.path.join(config["output_dir"],
"single_end",
output:
bai = os.path.join(config["output_dir"],
"single_end",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam.bai")
singularity:
"docker://zavolab/samtools:1.8"
threads: 1
log:
os.path.join(config["local_log"], "single_end", "{sample}", "index_genomic_alignment_samtools.log")
shell:
"(samtools index {input.bam} {output.bai};) &> {log}"
rule create_index_salmon:
''' Create index for Salmon quantification. '''
input:
transcriptome = sample_table.loc[wildcards.sample, 'tr_fasta_filtered']
output:
index = os.path.join(
config["salmon_indexes"],
sample_table["{sample}", 'organism'],
"salmon.idx")
params:
kmerLen = lambda wildcards:
sample_table.loc[wildcards.sample, 'kmer']
singularity:
"docker://zavolab/salmon:0.11.0"
"docker://zavolab/samtools:1.8"
threads: 1
log:
os.path.join(config["local_log"], "single_end", "{sample}", "create_index_salmon.log")
threads: 8
os.path.join(config["local_log"], "single_end", "{sample}", "index_genomic_alignment_samtools.log")
shell:
"(salmon index \
--t {input.transcriptome} \
--i {output.index} \
--k {params.kmerLen} \
--threads {threads}) &> {log}"
"(samtools index {input.bam} {output.bai};) &> {log}"
rule quantification_salmon:
''' Quantification at transcript and gene level using Salmon. '''
input:
reads = os.path.join(
input:
reads = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"{sample}.remove_polya.fastq.gz"),
index = os.path.join(
config["salmon_indexes"],
sample_table["{sample}", 'organism'],
"salmon.idx"),
gtf = sample_table.loc["{sample}", "gtf_filtered"]
output:
gn_estimates = os.path.join(
index = lambda wildcards:
os.path.join(
config["salmon_indexes"],
samples_table[wildcards.sample, 'organism'],
"salmon.idx"),
gtf = lambda wildcards: samples_table.loc[wildcards.sample, "gtf_filtered"]
output:
gn_estimates = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"salmon_quant",
"quant.genes.sf"),
tr_estimates = os.path.join(
tr_estimates = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"salmon_quant",
"quant.sf")
params:
output_dir = os.path.join(
params:
output_dir = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"salmon_quant"),
libType = lambda wildcards:
sample_table.loc[wildcards.sample, "libtype"]
log:
os.path.join(config["local_log"], "single_end", "{sample}", "quantification_salmon.log")
threads: 12
conda:
"envs/salmon.yaml"
shell:
"(salmon quant \
--libType {params.libType} \
--seqBias \
--validateMappings \
--threads {threads} \
--writeUnmappedNames \
--index {input.index} \
--geneMap {input.gtf} \
--unmatedReads {input.reads} \
-o {params.output_dir}) &> {log}"
rule create_index_kallisto:
''' Create index for running Kallisto. '''
input:
transcriptome = sample_table.loc["{sample}", 'tr_fasta_filtered']
output:
index = os.path.join(
config["kallisto_indexes"],
sample_table.loc["{sample}", "organism"],
"kallisto.idx")
params:
output_dir = lambda wildcards:
os.path.join(
config["kallisto_indexes"],
sample_table.loc["{sample}", "organism"]),
libType = lambda wildcards:
samples_table.loc[wildcards.sample, "libtype"]
log:
os.path.join(config["local_log"], "single_end", "{sample}", "create_index_kallisto.log")
singularity:
"docker://zavolab/kallisto:0.9"
os.path.join(config["local_log"], "single_end", "{sample}", "quantification_salmon.log")
threads: 12
conda:
"envs/salmon.yaml"
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
kallisto index -i {output.index} {input.transcriptome}) &> {log}"
"(salmon quant \
--libType {params.libType} \
--seqBias \
--validateMappings \
--threads {threads} \
--writeUnmappedNames \
--index {input.index} \
--geneMap {input.gtf} \
--unmatedReads {input.reads} \
-o {params.output_dir}) &> {log}"
rule genome_quantification_kallisto:
''' Quantification at transcript and gene level using Kallisto. '''
......@@ -339,10 +242,11 @@ rule genome_quantification_kallisto:
"single_end",
"{sample}",
"{sample}.remove_polya.fastq.gz"),
index = os.path.join(
config["kallisto_indexes"],
sample_table.loc["{sample}", "organism"],
"kallisto.idx")
index = lambda wildcards:
os.path.join(
config["kallisto_indexes"],
samples_table.loc[wildcards.sample, "organism"],
"kallisto.idx")
output:
pseudoalignment = os.path.join(
config["output_dir"],
......@@ -356,9 +260,9 @@ rule genome_quantification_kallisto:
"single_end",
"{sample}",
"quant_kallisto"),
fraglen = lambda wildcards: sample_table.loc[wildcards.sample, 'mean'],
fragsd = lambda wildcards: sample_table.loc[wildcards.sample, 'sd'],
directionality = lambda wildcards: sample_table.loc[wildcards.sample, 'kallisto_directionality']
fraglen = lambda wildcards: samples_table.loc[wildcards.sample, 'mean'],
fragsd = lambda wildcards: samples_table.loc[wildcards.sample, 'sd'],
directionality = lambda wildcards: samples_table.loc[wildcards.sample, 'kallisto_directionality']
threads: 8
log:
os.path.join(config["local_log"],"kallisto_align_{sample}.log")
......
sample fq1 fq2
LN18C_rep1 /Users/foivosgypas/Desktop/samples/BSSE_QGF_131557_HHK5FDRXX_1_7_1_LN18C_1_GAATGAGA_GAGGCATT_S1_L001_R1_001_MM_1.fastq.gz /Users/foivosgypas/Desktop/samples/BSSE_QGF_131557_HHK5FDRXX_1_7_1_LN18C_1_GAATGAGA_GAGGCATT_S1_L001_R2_001_MM_1.fastq.gz
LN18C_rep2 /Users/foivosgypas/Desktop/samples/BSSE_QGF_131558_HHK5FDRXX_1_7_2_LN18C_2_AGGCAGAG_AGAATGCC_S2_L001_R1_001_MM_1.fastq.gz /Users/foivosgypas/Desktop/samples/BSSE_QGF_131558_HHK5FDRXX_1_7_2_LN18C_2_AGGCAGAG_AGAATGCC_S2_L001_R2_001_MM_1.fastq.gz
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment