Newer
Older
BIOPZ-Gypas Foivos
committed
rule pe_fastqc:
BIOPZ-Katsantoni Maria
committed
'''
A quality control tool for high throughput sequence data
'''
BIOPZ-Katsantoni Maria
committed
reads1 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq1"],
reads2 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq2"]
BIOPZ-Katsantoni Maria
committed
outdir1 = directory(os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"mate1_fastqc")),
outdir2 = directory(os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"mate2_fastqc"))
threads: 2
singularity:
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"fastqc.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"fastqc.stdout.log")
shell:
"(mkdir -p {output.outdir1}; \
mkdir -p {output.outdir2}; \
BIOPZ-Katsantoni Maria
committed
fastqc --outdir {output.outdir1} {input.reads1}; \
fastqc --outdir {output.outdir2} {input.reads2}); \
1> {log.stdout} 2> {log.stderr}"
BIOPZ-Gypas Foivos
committed
rule pe_remove_adapters_cutadapt:
BIOPZ-Katsantoni Maria
committed
'''
Remove adapters
'''
BIOPZ-Katsantoni Maria
committed
reads1 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq1"],
reads2 = lambda wildcards:
samples_table.loc[wildcards.sample, "fq2"]
output:
reads1 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_adapters_mate1.fastq.gz"),
reads2 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_adapters_mate2.fastq.gz")
BIOPZ-Katsantoni Maria
committed
params:
adapter_3_mate1 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq1_3p'],
adapter_5_mate1 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq1_5p'],
adapter_3_mate2 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq2_3p'],
adapter_5_mate2 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq2_5p']
BIOPZ-Katsantoni Maria
committed
singularity:
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_adapters_cutadapt.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_adapters_cutadapt.stdout.log")
shell:
"(cutadapt \
-e 0.1 \
-j {threads} \
--pair-filter=both \
-m 10 \
-n 3 \
-a {params.adapter_3_mate1} \
-g {params.adapter_5_mate1} \
-A {params.adapter_3_mate2} \
-G {params.adapter_5_mate2} \
-o {output.reads1} \
-p {output.reads2} \
{input.reads1} \
BIOPZ-Katsantoni Maria
committed
{input.reads2}); \
1> {log.stdout} 2>{log.stderr}"
BIOPZ-Gypas Foivos
committed
rule pe_remove_polya_cutadapt:
BIOPZ-Katsantoni Maria
committed
'''
Remove polyA tails
'''
input:
reads1 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_adapters_mate1.fastq.gz"),
reads2 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_adapters_mate2.fastq.gz")
BIOPZ-Katsantoni Maria
committed
output:
reads1 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_polya_mate1.fastq.gz"),
reads2 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_polya_mate2.fastq.gz")
BIOPZ-Katsantoni Maria
committed
params:
polya_3_mate1 = lambda wildcards:
samples_table.loc[wildcards.sample, 'fq1_polya'],
polya_3_mate2 = lambda wildcards:
BIOPZ-Katsantoni Maria
committed
samples_table.loc[wildcards.sample, 'fq2_polya']
singularity:
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_polya_cutadapt.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"remove_adapters_cutadapt.stdout.log")
BIOPZ-Katsantoni Maria
committed
"(cutadapt \
--match-read-wildcards \
-j {threads} \
--pair-filter=both \
-m 10 \
-n 2 \
-e 0.1 \
-q 6 \
-m 10 \
-a {params.polya_3_mate1} \
-A {params.polya_3_mate2} \
-o {output.reads1} \
-p {output.reads2} \
{input.reads1} \
BIOPZ-Katsantoni Maria
committed
{input.reads2};) \
1> {log.stdout} 2>{log.stderr}"
BIOPZ-Gypas Foivos
committed
rule pe_map_genome_star:
BIOPZ-Katsantoni Maria
committed
'''
Map to genome using STAR
'''
input:
index = lambda wildcards:
os.path.join(
config["star_indexes"],
str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "index_size"]),
"STAR_index",
"chrNameLength.txt"),
reads1 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_polya_mate1.fastq.gz"),
reads2 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_polya_mate2.fastq.gz")
BIOPZ-Katsantoni Maria
committed
output:
bam = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam"),
logfile = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"map_genome",
"{sample}_Log.final.out")
BIOPZ-Katsantoni Maria
committed
params:
sample_id = "{sample}",
index = lambda wildcards:
os.path.join(
config["star_indexes"],
str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "index_size"]),
"STAR_index"),
outFileNamePrefix = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"map_genome",
"{sample}_"),
multimappers = lambda wildcards:
str(samples_table.loc[wildcards.sample, "multimappers"]),
soft_clip = lambda wildcards:
samples_table.loc[wildcards.sample, "soft_clip"],
pass_mode = lambda wildcards:
samples_table.loc[wildcards.sample, "pass_mode"]
singularity:
threads: 12
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"map_genome_star.stderr.log")
shell:
"(STAR \
--runMode alignReads \
--twopassMode {params.pass_mode} \
--runThreadN {threads} \
--genomeDir {params.index} \
--readFilesIn {input.reads1} {input.reads2} \
--readFilesCommand zcat \
--outSAMunmapped None \
--outFilterMultimapNmax {params.multimappers} \
--outFilterMultimapScoreRange 1 \
--outFileNamePrefix {params.outFileNamePrefix} \
--outSAMattributes All \
--outStd BAM_SortedByCoordinate \
--outSAMtype BAM SortedByCoordinate \
--outFilterMismatchNoverLmax 0.04 \
--outFilterScoreMinOverLread 0.3 \
--outFilterMatchNminOverLread 0.3 \
--outFilterType BySJout \
--outReadsUnmapped None \
--outSAMattrRGline ID:rnaseq_pipeline SM:{params.sample_id} \
BIOPZ-Katsantoni Maria
committed
--alignEndsType {params.soft_clip} > {output.bam};) \
2> {log.stderr}"
BIOPZ-Gypas Foivos
committed
rule pe_index_genomic_alignment_samtools:
BIOPZ-Katsantoni Maria
committed
'''
Index the genomic alignment
'''
input:
bam = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam"),
output:
bai = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam.bai"),
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"index_genomic_alignment_samtools.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"index_genomic_alignment_samtools.stdout.log")
BIOPZ-Katsantoni Maria
committed
"(samtools index {input.bam} {output.bai};) \
1> {log.stdout} 2> {log.stderr}"
BIOPZ-Gypas Foivos
committed
rule pe_quantification_salmon:
BIOPZ-Katsantoni Maria
committed
'''
Quantification at transcript and gene level using Salmon
'''
input:
reads1 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_polya_mate1.fastq.gz"),
reads2 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_polya_mate2.fastq.gz"),
gtf = lambda wildcards:
samples_table.loc[wildcards.sample, 'gtf_filtered'],
index = lambda wildcards:
os.path.join(
config["salmon_indexes"],
str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "kmer"]),
"salmon.idx")
BIOPZ-Katsantoni Maria
committed
output:
gn_estimates = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"salmon_quant",
"quant.genes.sf"),
tr_estimates = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"salmon_quant",
"quant.sf")
BIOPZ-Katsantoni Maria
committed
params:
output_dir = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"salmon_quant"),
libType = lambda wildcards:
samples_table.loc[wildcards.sample, 'libtype']
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"genome_quantification_salmon.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"genome_quantification_salmon.stdout.log"),
threads: 6
singularity:
"docker://zavolab/salmon:1.1.0-slim"
BIOPZ-Katsantoni Maria
committed
shell:
"(salmon quant \
--libType {params.libType} \
--seqBias \
--validateMappings \
--threads {threads} \
--writeUnmappedNames \
--index {input.index} \
--geneMap {input.gtf} \
-1 {input.reads1} \
-2 {input.reads2} \
BIOPZ-Katsantoni Maria
committed
-o {params.output_dir}) 1> {log.stdout} 2> {log.stderr}"
BIOPZ-Gypas Foivos
committed
rule pe_genome_quantification_kallisto:
BIOPZ-Katsantoni Maria
committed
'''
Quantification at transcript and gene level using Kallisto
'''
input:
reads1 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_polya_mate1.fastq.gz"),
reads2 = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"{sample}.remove_polya_mate2.fastq.gz"),
index = lambda wildcards:
os.path.join(
config["kallisto_indexes"],
samples_table.loc[wildcards.sample, 'organism'],
"kallisto.idx")
BIOPZ-Katsantoni Maria
committed
output:
pseudoalignment = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"quant_kallisto",
"{sample}.kallisto.pseudo.sam")
BIOPZ-Katsantoni Maria
committed
params:
output_dir = os.path.join(
BIOPZ-Katsantoni Maria
committed
config["output_dir"],
"paired_end",
"{sample}",
"quant_kallisto"),
directionality = lambda wildcards:
samples_table.loc[wildcards.sample, "kallisto_directionality"]
BIOPZ-Katsantoni Maria
committed
singularity:
BIOPZ-Katsantoni Maria
committed
threads: 8
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"genome_quantification_kallisto.stderr.log")
shell:
"(kallisto quant \
-i {input.index} \
-o {params.output_dir} \
--pseudobam \
{params.directionality} \
BIOPZ-Katsantoni Maria
committed
{input.reads1} {input.reads2} > {output.pseudoalignment}) \
2> {log.stderr}"
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
rule star_rpm_paired_end:
''' Create stranded bedgraph coverage with STARs RPM normalisation '''
input:
bam = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam"),
bai = os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam.bai")
output:
str1 = (os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"STAR_coverage",
"{sample}_Signal.Unique.str1.out.bg"),
os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"STAR_coverage",
"{sample}_Signal.UniqueMultiple.str1.out.bg")),
str2 = (os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"STAR_coverage",
"{sample}_Signal.Unique.str2.out.bg"),
os.path.join(
config["output_dir"],
"paired_end",
"{sample}",
"STAR_coverage",
"{sample}_Signal.UniqueMultiple.str2.out.bg"))
params:
out_dir = lambda wildcards, output: os.path.dirname(output.str1[0]),
prefix = lambda wildcards, output: os.path.join(os.path.dirname(output.str1[0]),
str(wildcards.sample) + "_"),
stranded = "Stranded"
singularity:
"docker://zavolab/star:2.7.3a-slim"
log:
stderr = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"star_rpm_paired_end.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"paired_end",
"{sample}",
"star_rpm_paired_end.stdout.log")
threads: 4
shell:
"""
(mkdir -p {params.out_dir}; \
chmod -R 777 {params.out_dir}; \
STAR \
--runMode inputAlignmentsFromBAM \
--runThreadN {threads} \
--inputBAMfile {input.bam} \
--outWigType "bedGraph" \
--outWigStrand "{params.stranded}" \
--outWigNorm "RPM" \
--outFileNamePrefix {params.prefix}) \
1> {log.stdout} 2> {log.stderr}
"""