Newer
Older
"""General purpose RNA-Seq analysis pipeline developed by the Zavolan Lab"""
BIOPZ-Gypas Foivos
committed
import os
import sys
BIOPZ-Gypas Foivos
committed
# Get sample table
samples_table = pd.read_csv(
config['samples'],
header=0,
index_col=0,
comment='#',
engine='python',
sep="\t",
)
localrules: finish, rename_star_rpm_for_alfa, prepare_files_for_report, \
prepare_MultiQC_config
# Create log directories
os.makedirs(
os.path.join(
os.getcwd(),
config['log_dir'],
),
BIOPZ-Katsantoni Maria
committed
exist_ok=True)
if cluster_config:
os.makedirs(
os.path.join(
os.getcwd(),
os.path.dirname(cluster_config['__default__']['out']),
),
BIOPZ-Katsantoni Maria
committed
exist_ok=True)
BIOPZ-Gypas Foivos
committed
# Include subworkflows
include: os.path.join("workflow", "rules", "paired_end.snakefile.smk")
include: os.path.join("workflow", "rules", "single_end.snakefile.smk")
BIOPZ-Katsantoni Maria
committed
"""
Rule for collecting outputs
"""
os.path.join(
config['output_dir'],
BIOPZ-Iborra de Toledo Paula
committed
BIOPZ-Gypas Foivos
committed
rule create_index_star:
BIOPZ-Katsantoni Maria
committed
"""
Create index for STAR alignments
"""
BIOPZ-Katsantoni Maria
committed
samples_table['genome']
[samples_table['organism'] == wildcards.organism]
[0],
BIOPZ-Katsantoni Maria
committed
samples_table['gtf']
[samples_table['organism'] == wildcards.organism]
[0]
output:
chromosome_info = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
"STAR_index",
BIOPZ-Katsantoni Maria
committed
"chrNameLength.txt"),
chromosomes_names = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
"STAR_index",
BIOPZ-Katsantoni Maria
committed
"chrName.txt")
params:
output_dir = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
BIOPZ-Katsantoni Maria
committed
"STAR_index"),
outFileNamePrefix = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
BIOPZ-Katsantoni Maria
committed
"STAR_index/STAR_"),
sjdbOverhang = "{index_size}"
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
BIOPZ-Katsantoni Maria
committed
"{organism}_{index_size}_create_index_star.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_{index_size}_create_index_star.stdout.log")
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
STAR \
--runMode genomeGenerate \
--sjdbOverhang {params.sjdbOverhang} \
--genomeDir {params.output_dir} \
--genomeFastaFiles {input.genome} \
--runThreadN {threads} \
--outFileNamePrefix {params.outFileNamePrefix} \
BIOPZ-Katsantoni Maria
committed
--sjdbGTFfile {input.gtf}) \
1> {log.stdout} 2> {log.stderr}"
BIOPZ-Gypas Foivos
committed
BIOPZ-Iborra de Toledo Paula
committed
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
rule extract_transcriptome:
""" Create transcriptome from genome and gene annotations """
input:
genome = lambda wildcards:
samples_table['genome'][
samples_table['organism'] == wildcards.organism
][0],
gtf = lambda wildcards:
samples_table['gtf'][
samples_table['organism'] == wildcards.organism
][0]
output:
transcriptome = os.path.join(
config['output_dir'],
"transcriptome",
"{organism}",
"transcriptome.fa",
)
log:
stderr = os.path.join(
config['log_dir'],
"{organism}_extract_transcriptome.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_extract_transcriptome.log")
singularity:
BIOPZ-Iborra de Toledo Paula
committed
shell:
"(gffread \
-w {output.transcriptome} \
-g {input.genome} {input.gtf}) \
1> {log.stdout} 2> {log.stderr}"
BIOPZ-Gypas Foivos
committed
rule extract_decoys_salmon:
BIOPZ-Katsantoni Maria
committed
"""
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
Extract names of the genome targets
"""
input:
genome = lambda wildcards:
samples_table['genome']
[samples_table['organism'] == wildcards.organism]
[0],
output:
decoys = os.path.join(
config['output_dir'],
"transcriptome",
"{organism}",
"decoys.txt")
singularity:
"docker://bash:5.0.16"
log:
stderr = os.path.join(
config['log_dir'],
"{organism}_extract_decoys_salmon.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_extract_decoys_salmon.stdout.log")
threads: 1
shell:
"""
(grep "^>" <{input.genome} \
| cut -d " " -f 1 > {output.decoys} && \
sed -i.bak -e 's/>//g' {output.decoys}) \
1> {log.stdout} 2> {log.stderr}
"""
rule concatenate_transcriptome_and_genome:
"""
Concatenate genome and transcriptome
BIOPZ-Katsantoni Maria
committed
"""
BIOPZ-Iborra de Toledo Paula
committed
transcriptome = os.path.join(
config['output_dir'],
"transcriptome",
"{organism}",
"transcriptome.fa",
),
genome = lambda wildcards:
samples_table['genome']
[samples_table['organism'] == wildcards.organism]
[0],
output:
genome_transcriptome = os.path.join(
config['output_dir'],
"transcriptome",
"{organism}",
"genome_transcriptome.fa",
BIOPZ-Iborra de Toledo Paula
committed
)
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
singularity:
"docker://bash:5.0.16"
log:
stderr = os.path.join(
config['log_dir'],
"{organism}_concatenate_transcriptome_and_genome.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_concatenate_transcriptome_and_genome.stdout.log")
shell:
"(cat {input.transcriptome} {input.genome} \
1> {output.genome_transcriptome}) \
1> {log.stdout} 2> {log.stderr}"
rule create_index_salmon:
"""
Create index for Salmon quantification
"""
input:
genome_transcriptome = os.path.join(
config['output_dir'],
"transcriptome",
"{organism}",
"genome_transcriptome.fa",
),
decoys = os.path.join(
config['output_dir'],
"transcriptome",
"{organism}",
"decoys.txt")
output:
index = directory(
os.path.join(
config['salmon_indexes'],
"{organism}",
BIOPZ-Katsantoni Maria
committed
"salmon.idx"))
"docker://zavolab/salmon:1.1.0-slim"
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
"{organism}_{kmer}_create_index_salmon.stderr.log"),
BIOPZ-Katsantoni Maria
committed
stdout = os.path.join(
config['log_dir'],
"{organism}_{kmer}_create_index_salmon.stdout.log")
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
--transcripts {input.genome_transcriptome} \
--decoys {input.decoys} \
BIOPZ-Katsantoni Maria
committed
--threads {threads}) \
1> {log.stdout} 2> {log.stderr}"
BIOPZ-Gypas Foivos
committed
rule create_index_kallisto:
BIOPZ-Katsantoni Maria
committed
"""
Create index for Kallisto quantification
"""
BIOPZ-Iborra de Toledo Paula
committed
transcriptome = os.path.join(
config['output_dir'],
"transcriptome",
"{organism}",
"transcriptome.fa",
)
output:
index = os.path.join(
config['kallisto_indexes'],
"{organism}",
BIOPZ-Katsantoni Maria
committed
"kallisto.idx")
params:
output_dir = os.path.join(
config['kallisto_indexes'],
BIOPZ-Katsantoni Maria
committed
"{organism}")
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
BIOPZ-Katsantoni Maria
committed
"{organism}_create_index_kallisto.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_create_index_kallisto.stdout.log")
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
BIOPZ-Katsantoni Maria
committed
kallisto index -i {output.index} {input.transcriptome}) \
1> {log.stdout} 2> {log.stderr}"
BIOPZ-Gypas Foivos
committed
BIOPZ-Katsantoni Maria
committed
"""
Convert transcripts to BED12 format
"""
BIOPZ-Katsantoni Maria
committed
samples_table['gtf']
[0]
output:
bed12 = os.path.join(
config['output_dir'],
BIOPZ-Katsantoni Maria
committed
"full_transcripts_protein_coding.bed")
"docker://zavolab/gtf_transcript_type_to_bed12:0.1.0-slim"
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
BIOPZ-Katsantoni Maria
committed
"extract_transcripts_as_bed12.stderr.log")
BIOPZ-Katsantoni Maria
committed
"(gtf_transcript_type_to_bed12.pl \
BIOPZ-Katsantoni Maria
committed
--type=protein_coding > {output.bed12}); \
2> {log.stderr}"
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
rule index_genomic_alignment_samtools:
'''
Index genome bamfile using samtools
'''
input:
bam = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam")
output:
bai = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam.bai")
singularity:
"docker://zavolab/samtools:1.10-slim"
threads: 1
log:
stderr = os.path.join(
config["log_dir"],
"{seqmode}",
"{sample}",
"index_genomic_alignment_samtools.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"{seqmode}",
"{sample}",
"index_genomic_alignment_samtools.stdout.log")
shell:
"(samtools index {input.bam} {output.bai};) \
1> {log.stdout} 2> {log.stderr}"
rule star_rpm:
''' Create stranded bedgraph coverage with STARs RPM normalisation '''
input:
bam = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam"),
bai = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam.bai")
output:
str1 = (os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"STAR_coverage",
"{sample}_Signal.Unique.str1.out.bg"),
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"STAR_coverage",
"{sample}_Signal.UniqueMultiple.str1.out.bg")),
str2 = (os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"STAR_coverage",
"{sample}_Signal.Unique.str2.out.bg"),
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"STAR_coverage",
"{sample}_Signal.UniqueMultiple.str2.out.bg"))
params:
out_dir = lambda wildcards, output: os.path.dirname(output.str1[0]),
prefix = lambda wildcards, output: os.path.join(os.path.dirname(output.str1[0]),
str(wildcards.sample) + "_"),
stranded = "Stranded"
singularity:
"docker://zavolab/star:2.7.3a-slim"
log:
stderr = os.path.join(
config["log_dir"],
"{seqmode}",
"{sample}",
"star_rpm_single_end.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"{seqmode}",
"{sample}",
"star_rpm_single_end.stdout.log")
threads: 4
shell:
"""
(mkdir -p {params.out_dir}; \
chmod -R 777 {params.out_dir}; \
STAR \
--runMode inputAlignmentsFromBAM \
--runThreadN {threads} \
--inputBAMfile {input.bam} \
--outWigType "bedGraph" \
--outWigStrand {params.stranded} \
--outWigNorm "RPM" \
--outFileNamePrefix {params.prefix}) \
1> {log.stdout} 2> {log.stderr}
"""
rule rename_star_rpm_for_alfa:
input:
str1 = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"STAR_coverage",
str2 = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"STAR_coverage",
output:
plus = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"{unique}",
"{sample}_Signal.{unique}.out.plus.bg"),
minus = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"{unique}",
"{sample}_Signal.{unique}.out.minus.bg")
params:
orientation = lambda wildcards: samples_table.loc[wildcards.sample, "kallisto_directionality"]
run:
if params['orientation'] == "--fr":
shutil.copy2(input['str1'], output['plus'])
shutil.copy2(input['str2'], output['minus'])
elif params['orientation'] == "--rf":
shutil.copy2(input['str1'], output['minus'])
shutil.copy2(input['str2'], output['plus'])
BIOPZ-Katsantoni Maria
committed
"""
Caluclate transcript integrity (TIN) score
"""
bam = os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam"),
bai = os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"map_genome",
BIOPZ-Katsantoni Maria
committed
"{sample}_Aligned.sortedByCoord.out.bam.bai"),
transcripts_bed12 = os.path.join(
config['output_dir'],
BIOPZ-Katsantoni Maria
committed
"full_transcripts_protein_coding.bed")
output:
TIN_score = os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"TIN",
BIOPZ-Katsantoni Maria
committed
"TIN_score.tsv")
BIOPZ-Katsantoni Maria
committed
sample = "{sample}"
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config['log_dir'],
"{seqmode}",
"{sample}",
BIOPZ-Katsantoni Maria
committed
"calculate_TIN_scores.log")
BIOPZ-Katsantoni Maria
committed
"docker://zavolab/tin_score_calculation:0.2.0-slim"
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
"(tin_score_calculation.py \
-i {input.bam} \
-r {input.transcripts_bed12} \
-c 0 \
--names {params.sample} \
BIOPZ-Katsantoni Maria
committed
-n 100 > {output.TIN_score};) 2> {log.stderr}"
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
rule merge_TIN_scores:
"""
Merge TIN scores tables
"""
input:
TIN_score = expand(
os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"TIN",
"TIN_score.tsv"),
zip,
sample=[i for i in list(samples_table.index.values)],
seqmode=[samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)])
output:
TIN_scores_merged = os.path.join(
config['output_dir'],
"TIN_scores_merged.tsv")
params:
TIN_score_merged_paths = ",".join(expand(
os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"TIN",
"TIN_score.tsv"),
zip,
sample=[i for i in list(samples_table.index.values)],
seqmode=[samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)]))
log:
stderr = os.path.join(
config['log_dir'],
"merge_TIN_scores.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"merge_TIN_scores.stdout.log")
threads: 1
singularity:
"docker://zavolab/tin_score_calculation:0.2.0-slim"
shell:
"(tin_score_merge.py \
--input-files {params.TIN_score_merged_paths} \
--output-file {output.TIN_scores_merged}) \
1> {log.stdout} 2> {log.stderr}"
rule plot_TIN_scores:
"""
Generate TIN scores boxplots
"""
input:
TIN_scores_merged = os.path.join(
config['output_dir'],
"TIN_scores_merged.tsv"),
output:
TIN_boxplot_PNG = os.path.join(
config['output_dir'],
"TIN_scores_boxplot.png"),
TIN_boxplot_PDF = os.path.join(
config['output_dir'],
"TIN_scores_boxplot.pdf")
params:
TIN_boxplot_prefix = os.path.join(
config['output_dir'],
"TIN_scores_boxplot")
log:
stderr = os.path.join(
config['log_dir'],
"plot_TIN_scores.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"plot_TIN_scores.stdout.log")
threads: 1
singularity:
"docker://zavolab/tin_score_calculation:0.2.0-slim"
shell:
"(tin_score_plot.py \
--input-file {input.TIN_scores_merged} \
--output-file-prefix {params.TIN_boxplot_prefix}) \
1> {log.stdout} 2> {log.stderr}"
rule salmon_quantmerge_genes:
BIOPZ-Katsantoni Maria
committed
'''
Merge gene quantifications into a single file
'''
BIOPZ-Katsantoni Maria
committed
salmon_in = expand(
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant",
"quant.genes.sf"),
BIOPZ-Katsantoni Maria
committed
sample=list(samples_table.index.values),
seqmode=list(samples_table["seqmode"]))
BIOPZ-Katsantoni Maria
committed
salmon_out = os.path.join(
BIOPZ-Katsantoni Maria
committed
"summary_salmon",
"quantmerge",
"genes_{salmon_merge_on}.tsv")
params:
salmon_dir = expand(
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant"),
BIOPZ-Katsantoni Maria
committed
sample=list(samples_table.index.values),
seqmode=list(samples_table["seqmode"])),
sample_name_list = expand(
"{sample}",
sample=list(samples_table.index.values)),
salmon_merge_on = "{salmon_merge_on}"
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
"salmon_quantmerge_genes_{salmon_merge_on}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"salmon_quantmerge_genes_{salmon_merge_on}.stdout.log")
threads: 1
singularity:
"docker://zavolab/salmon:1.1.0-slim"
BIOPZ-Katsantoni Maria
committed
shell:
"(salmon quantmerge \
--quants {params.salmon_dir} \
--genes \
--names {params.sample_name_list} \
--column {params.salmon_merge_on} \
BIOPZ-Katsantoni Maria
committed
--output {output.salmon_out};) \
1> {log.stdout} 2> {log.stderr}"
rule salmon_quantmerge_transcripts:
BIOPZ-Katsantoni Maria
committed
'''
Merge gene quantifications into a single file
'''
BIOPZ-Katsantoni Maria
committed
salmon_in = expand(
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant",
"quant.sf"),
BIOPZ-Katsantoni Maria
committed
sample=list(samples_table.index.values),
seqmode=list(samples_table["seqmode"])),
BIOPZ-Katsantoni Maria
committed
salmon_out = os.path.join(
BIOPZ-Katsantoni Maria
committed
"summary_salmon",
"quantmerge",
"transcripts_{salmon_merge_on}.tsv")
params:
salmon_dir = expand(
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant"),
BIOPZ-Katsantoni Maria
committed
sample=list(samples_table.index.values),
seqmode=list(samples_table["seqmode"])),
sample_name_list = expand(
"{sample}",
sample=list(samples_table.index.values)),
salmon_merge_on = "{salmon_merge_on}"
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
"salmon_quantmerge_transcripts_{salmon_merge_on}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"salmon_quantmerge_transcripts_{salmon_merge_on}.stdout.log")
threads: 1
singularity:
"docker://zavolab/salmon:1.1.0-slim"
BIOPZ-Katsantoni Maria
committed
shell:
"(salmon quantmerge \
--quants {params.salmon_dir} \
--names {params.sample_name_list} \
--column {params.salmon_merge_on} \
BIOPZ-Katsantoni Maria
committed
--output {output.salmon_out}) \
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
1> {log.stdout} 2> {log.stderr}"
#################################################################################
### ALFA: Annotation Landscape For Aligned reads
#################################################################################
directionality = {"--fr": "fr-firststrand", "--rf": "fr-secondstrand"}
rule generate_alfa_index:
''' Generate ALFA index files from sorted GTF file '''
input:
gtf = lambda wildcards: samples_table["gtf"][samples_table["organism"]==wildcards.organism][0],
chr_len = os.path.join(
config["star_indexes"],
"{organism}",
"{index_size}",
"STAR_index",
"chrNameLength.txt"),
output:
index_stranded = os.path.join(config["alfa_indexes"],
"{organism}",
"{index_size}",
"ALFA",
"sorted_genes.stranded.ALFA_index"),
index_unstranded = os.path.join(config["alfa_indexes"],
"{organism}",
"{index_size}",
"ALFA",
"sorted_genes.unstranded.ALFA_index")
params:
genome_index = "sorted_genes",
out_dir = lambda wildcards, output: os.path.dirname(output.index_stranded)
threads: 4
singularity:
log:
os.path.join(config["log_dir"], "{organism}_{index_size}_generate_alfa_index.log")
shell:
"""
alfa -a {input.gtf} \
-g {params.genome_index} \
--chr_len {input.chr_len} \
-p {threads} \
-o {params.out_dir} &> {log}
"""
rule alfa_qc:
''' Run ALFA from stranded bedgraph files '''
input:
plus = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"{unique}",
"{sample}_Signal.{unique}.out.plus.bg"),
minus = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"{unique}",
"{sample}_Signal.{unique}.out.minus.bg"),
gtf = lambda wildcards: os.path.join(config["alfa_indexes"],
samples_table.loc[wildcards.sample, "organism"],
str(samples_table.loc[wildcards.sample, "index_size"]),
"ALFA",
"sorted_genes.stranded.ALFA_index")
output:
biotypes = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"ALFA_plots.Biotypes.pdf"),
categories = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"ALFA_plots.Categories.pdf"),
table = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"{sample}.ALFA_feature_counts.tsv")
params:
out_dir = lambda wildcards, output: os.path.dirname(output.biotypes),
alfa_orientation = lambda wildcards: directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]],
in_file_plus = lambda wildcards, input: os.path.basename(input.plus),
in_file_minus = lambda wildcards, input: os.path.basename(input.minus),
genome_index = lambda wildcards, input: os.path.abspath(os.path.join(os.path.dirname(input.gtf), "sorted_genes")),
name = "{sample}"
singularity:
log:
os.path.abspath(os.path.join(
config["log_dir"],
"{seqmode}",
"{sample}",
shell:
"""
cd {params.out_dir}; \
(alfa -g {params.genome_index} \
--bedgraph {params.in_file_plus} {params.in_file_minus} {params.name} \
-s {params.alfa_orientation}) &> {log}
"""
rule alfa_qc_all_samples:
''' Run ALFA from stranded bedgraph files on all samples '''
input:
tables = [os.path.join(
config["output_dir"],
samples_table.loc[sample1, "seqmode"],
str(sample1),
"ALFA",
sample1 + ".ALFA_feature_counts.tsv")
for sample1 in list(samples_table.index.values)]
output:
biotypes = os.path.join(
config["output_dir"],
"ALFA",
"ALFA_plots.Biotypes.pdf"),
categories = os.path.join(
config["output_dir"],
"ALFA",
"ALFA_plots.Categories.pdf")
params:
out_dir = lambda wildcards, output: os.path.dirname(output.biotypes)
log:
os.path.abspath(
os.path.join(config["log_dir"],
shell:
"""
(alfa -c {input.tables} -o {params.out_dir}) &> {log}
"""
rule alfa_concat_results:
input:
expand(os.path.join(
config["output_dir"],
"ALFA",
"{unique_type}",
"ALFA_plots.{annotation}.pdf"),
unique_type = ["Unique", "UniqueMultiple"],
annotation = ["Categories", "Biotypes"])
output:
expand(os.path.join(
config["output_dir"],
"ALFA",
"ALFA_plots.concat.png"))
params:
density = 300
log:
os.path.abspath(
os.path.join(config["log_dir"],
"alfa_qc_all_samples.concat.log"))
singularity:
"docker://zavolab/imagemagick:7.0.8"
shell:
"""
convert -append -density {params.density} \
{input} {output} &> {log}
"""