"tests/test_scripts_prepare_inputs_table/test.sh" did not exist on "1e52fa5604db6ca59f353dbf3c9c18d12cad665d"
Newer
Older
BIOPZ-Gypas Foivos
committed
rule pe_fastqc:
BIOPZ-Katsantoni Maria
committed
'''
A quality control tool for high throughput sequence data
'''
BIOPZ-Katsantoni Maria
committed
reads1 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq1"],
reads2 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq2"]
BIOPZ-Katsantoni Maria
committed
outdir1 = directory(os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"mate1_fastqc")),
outdir2 = directory(os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"mate2_fastqc"))
threads: 2
singularity:
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"fastqc.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"fastqc.stdout.log")
shell:
"(mkdir -p {output.outdir1}; \
mkdir -p {output.outdir2}; \
BIOPZ-Katsantoni Maria
committed
fastqc --outdir {output.outdir1} {input.reads1}; \
fastqc --outdir {output.outdir2} {input.reads2}); \
1> {log.stdout} 2> {log.stderr}"
BIOPZ-Gypas Foivos
committed
rule pe_remove_adapters_cutadapt:
BIOPZ-Katsantoni Maria
committed
'''
Remove adapters
'''
BIOPZ-Katsantoni Maria
committed
reads1 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq1"],
reads2 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq2"]
output:
reads1 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_adapters_mate1.fastq.gz"),
reads2 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_adapters_mate2.fastq.gz")
BIOPZ-Katsantoni Maria
committed
params:
adapter_3_mate1 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq1_3p'],
adapter_5_mate1 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq1_5p'],
adapter_3_mate2 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq2_3p'],
adapter_5_mate2 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq2_5p']
BIOPZ-Katsantoni Maria
committed
singularity:
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_adapters_cutadapt.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_adapters_cutadapt.stdout.log")
shell:
"(cutadapt \
-e 0.1 \
-j {threads} \
--pair-filter=both \
-m 10 \
-n 3 \
-a {params.adapter_3_mate1} \
-g {params.adapter_5_mate1} \
-A {params.adapter_3_mate2} \
-G {params.adapter_5_mate2} \
-o {output.reads1} \
-p {output.reads2} \
{input.reads1} \
BIOPZ-Katsantoni Maria
committed
{input.reads2}); \
1> {log.stdout} 2>{log.stderr}"
BIOPZ-Gypas Foivos
committed
rule pe_remove_polya_cutadapt:
BIOPZ-Katsantoni Maria
committed
'''
Remove polyA tails
'''
input:
reads1 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_adapters_mate1.fastq.gz"),
reads2 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_adapters_mate2.fastq.gz")
BIOPZ-Katsantoni Maria
committed
output:
reads1 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_polya_mate1.fastq.gz"),
reads2 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_polya_mate2.fastq.gz")
BIOPZ-Katsantoni Maria
committed
params:
polya_3_mate1 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq1_polya'],
polya_3_mate2 = lambda wildcards:
BIOPZ-Katsantoni Maria
committed
samples_table.loc[wildcards.sample, 'fq2_polya']
singularity:
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_polya_cutadapt.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_adapters_cutadapt.stdout.log")
BIOPZ-Katsantoni Maria
committed
"(cutadapt \
--match-read-wildcards \
-j {threads} \
--pair-filter=both \
-m 10 \
-n 2 \
-e 0.1 \
-q 6 \
-m 10 \
-a {params.polya_3_mate1} \
-A {params.polya_3_mate2} \
-o {output.reads1} \
-p {output.reads2} \
{input.reads1} \
BIOPZ-Katsantoni Maria
committed
{input.reads2};) \
1> {log.stdout} 2>{log.stderr}"
BIOPZ-Gypas Foivos
committed
rule pe_map_genome_star:
BIOPZ-Katsantoni Maria
committed
'''
Map to genome using STAR
'''
input:
index = lambda wildcards:
os.path.join(
config["star_indexes"],
str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "index_size"]),
"STAR_index",
"chrNameLength.txt"),
reads1 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_polya_mate1.fastq.gz"),
reads2 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_polya_mate2.fastq.gz")
BIOPZ-Katsantoni Maria
committed
output:
bam = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam"),
logfile = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"map_genome",
"{sample}_Log.final.out")
BIOPZ-Katsantoni Maria
committed
params:
sample_id = "{sample}",
index = lambda wildcards:
os.path.join(
config["star_indexes"],
str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "index_size"]),
"STAR_index"),
outFileNamePrefix = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"map_genome",
"{sample}_"),
multimappers = lambda wildcards:
str(samples_table.loc[wildcards.sample, "multimappers"]),
soft_clip = lambda wildcards:
samples_table.loc[wildcards.sample, "soft_clip"],
pass_mode = lambda wildcards:
samples_table.loc[wildcards.sample, "pass_mode"]
singularity:
threads: 12
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"map_genome_star.stderr.log")
shell:
"(STAR \
--runMode alignReads \
--twopassMode {params.pass_mode} \
--runThreadN {threads} \
--genomeDir {params.index} \
--readFilesIn {input.reads1} {input.reads2} \
--readFilesCommand zcat \
--outSAMunmapped None \
--outFilterMultimapNmax {params.multimappers} \
--outFilterMultimapScoreRange 1 \
--outFileNamePrefix {params.outFileNamePrefix} \
--outSAMattributes All \
--outStd BAM_SortedByCoordinate \
--outSAMtype BAM SortedByCoordinate \
--outFilterMismatchNoverLmax 0.04 \
--outFilterScoreMinOverLread 0.3 \
--outFilterMatchNminOverLread 0.3 \
--outFilterType BySJout \
--outReadsUnmapped None \
--outSAMattrRGline ID:rnaseq_pipeline SM:{params.sample_id} \
BIOPZ-Katsantoni Maria
committed
--alignEndsType {params.soft_clip} > {output.bam};) \
2> {log.stderr}"
BIOPZ-Gypas Foivos
committed
rule pe_quantification_salmon:
BIOPZ-Katsantoni Maria
committed
'''
Quantification at transcript and gene level using Salmon
'''
input:
reads1 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_polya_mate1.fastq.gz"),
reads2 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_polya_mate2.fastq.gz"),
gtf = lambda wildcards:
samples_table.loc[wildcards.sample, 'gtf_filtered'],
index = lambda wildcards:
os.path.join(
config["salmon_indexes"],
str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "kmer"]),
"salmon.idx")
BIOPZ-Katsantoni Maria
committed
output:
gn_estimates = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"salmon_quant",
"quant.genes.sf"),
tr_estimates = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"salmon_quant",
"quant.sf")
BIOPZ-Katsantoni Maria
committed
params:
output_dir = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"salmon_quant"),
libType = lambda wildcards:
samples_table.loc[wildcards.sample, 'libtype']
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"genome_quantification_salmon.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"genome_quantification_salmon.stdout.log"),
threads: 6
singularity:
"docker://zavolab/salmon:1.1.0-slim"
BIOPZ-Katsantoni Maria
committed
shell:
"(salmon quant \
--libType {params.libType} \
--seqBias \
--validateMappings \
--threads {threads} \
--writeUnmappedNames \
--index {input.index} \
--geneMap {input.gtf} \
-1 {input.reads1} \
-2 {input.reads2} \
BIOPZ-Katsantoni Maria
committed
-o {params.output_dir}) 1> {log.stdout} 2> {log.stderr}"
BIOPZ-Gypas Foivos
committed
rule pe_genome_quantification_kallisto:
BIOPZ-Katsantoni Maria
committed
'''
Quantification at transcript and gene level using Kallisto
'''
input:
reads1 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_polya_mate1.fastq.gz"),
reads2 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_polya_mate2.fastq.gz"),
index = lambda wildcards:
os.path.join(
config["kallisto_indexes"],
samples_table.loc[wildcards.sample, 'organism'],
"kallisto.idx")
BIOPZ-Katsantoni Maria
committed
output:
pseudoalignment = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"quant_kallisto",
"{sample}.kallisto.pseudo.sam")
BIOPZ-Katsantoni Maria
committed
params:
output_dir = os.path.join(
BIOPZ-Katsantoni Maria
committed
config["output_dir"],
"paired_end",
"{sample}",
"quant_kallisto"),
directionality = lambda wildcards:
samples_table.loc[wildcards.sample, "kallisto_directionality"]
BIOPZ-Katsantoni Maria
committed
singularity:
BIOPZ-Katsantoni Maria
committed
threads: 8
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"genome_quantification_kallisto.stderr.log")
shell:
"(kallisto quant \
-i {input.index} \
-o {params.output_dir} \
--pseudobam \
{params.directionality} \
BIOPZ-Katsantoni Maria
committed
{input.reads1} {input.reads2} > {output.pseudoalignment}) \
2> {log.stderr}"