Newer
Older
"""General purpose RNA-Seq analysis pipeline developed by the Zavolan Lab"""
BIOPZ-Gypas Foivos
committed
import os
import sys
BIOPZ-Gypas Foivos
committed
# Get sample table
samples_table = pd.read_csv(
config['samples'],
header=0,
index_col=0,
comment='#',
engine='python',
sep="\t",
)
localrules: finish, rename_star_rpm_for_alfa, prepare_files_for_report, \
prepare_MultiQC_config
# Create log directories
os.makedirs(
os.path.join(
os.getcwd(),
config['log_dir'],
),
BIOPZ-Katsantoni Maria
committed
exist_ok=True)
if cluster_config:
os.makedirs(
os.path.join(
os.getcwd(),
os.path.dirname(cluster_config['__default__']['out']),
),
BIOPZ-Katsantoni Maria
committed
exist_ok=True)
BIOPZ-Gypas Foivos
committed
# Include subworkflows
include: os.path.join("workflow", "rules", "paired_end.snakefile.smk")
include: os.path.join("workflow", "rules", "single_end.snakefile.smk")
BIOPZ-Katsantoni Maria
committed
"""
Rule for collecting outputs
"""
os.path.join(
config['output_dir'],
BIOPZ-Iborra de Toledo Paula
committed
BIOPZ-Gypas Foivos
committed
rule create_index_star:
BIOPZ-Katsantoni Maria
committed
"""
Create index for STAR alignments
"""
BIOPZ-Katsantoni Maria
committed
samples_table['genome']
[samples_table['organism'] == wildcards.organism]
[0],
BIOPZ-Katsantoni Maria
committed
samples_table['gtf']
[samples_table['organism'] == wildcards.organism]
[0]
output:
chromosome_info = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
"STAR_index",
BIOPZ-Katsantoni Maria
committed
"chrNameLength.txt"),
chromosomes_names = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
"STAR_index",
BIOPZ-Katsantoni Maria
committed
"chrName.txt")
params:
output_dir = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
BIOPZ-Katsantoni Maria
committed
"STAR_index"),
outFileNamePrefix = os.path.join(
config['star_indexes'],
"{organism}",
"{index_size}",
BIOPZ-Katsantoni Maria
committed
"STAR_index/STAR_"),
sjdbOverhang = "{index_size}"
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
BIOPZ-Katsantoni Maria
committed
"{organism}_{index_size}_create_index_star.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_{index_size}_create_index_star.stdout.log")
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
STAR \
--runMode genomeGenerate \
--sjdbOverhang {params.sjdbOverhang} \
--genomeDir {params.output_dir} \
--genomeFastaFiles {input.genome} \
--runThreadN {threads} \
--outFileNamePrefix {params.outFileNamePrefix} \
BIOPZ-Katsantoni Maria
committed
--sjdbGTFfile {input.gtf}) \
1> {log.stdout} 2> {log.stderr}"
BIOPZ-Gypas Foivos
committed
BIOPZ-Iborra de Toledo Paula
committed
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
rule extract_transcriptome:
""" Create transcriptome from genome and gene annotations """
input:
genome = lambda wildcards:
samples_table['genome'][
samples_table['organism'] == wildcards.organism
][0],
gtf = lambda wildcards:
samples_table['gtf'][
samples_table['organism'] == wildcards.organism
][0]
output:
transcriptome = os.path.join(
config['output_dir'],
"transcriptome",
"{organism}",
"transcriptome.fa",
)
log:
stderr = os.path.join(
config['log_dir'],
"{organism}_extract_transcriptome.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_extract_transcriptome.log")
singularity:
BIOPZ-Iborra de Toledo Paula
committed
shell:
"(gffread \
-w {output.transcriptome} \
-g {input.genome} {input.gtf}) \
1> {log.stdout} 2> {log.stderr}"
BIOPZ-Gypas Foivos
committed
rule create_index_salmon:
BIOPZ-Katsantoni Maria
committed
"""
Create index for Salmon quantification
"""
BIOPZ-Iborra de Toledo Paula
committed
transcriptome = os.path.join(
config['output_dir'],
"transcriptome",
"{organism}",
"transcriptome.fa",
)
output:
index = directory(
os.path.join(
config['salmon_indexes'],
"{organism}",
"{kmer}",
BIOPZ-Katsantoni Maria
committed
"salmon.idx"))
BIOPZ-Katsantoni Maria
committed
kmerLen = "{kmer}"
"docker://zavolab/salmon:1.1.0-slim"
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
BIOPZ-Katsantoni Maria
committed
"{organism}_{kmer}_create_index_salmon.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_{kmer}_create_index_salmon.stdout.log")
BIOPZ-Katsantoni Maria
committed
shell:
"(salmon index \
--transcripts {input.transcriptome} \
--index {output.index} \
--kmerLen {params.kmerLen} \
BIOPZ-Katsantoni Maria
committed
--threads {threads}) \
1> {log.stdout} 2> {log.stderr}"
BIOPZ-Gypas Foivos
committed
rule create_index_kallisto:
BIOPZ-Katsantoni Maria
committed
"""
Create index for Kallisto quantification
"""
BIOPZ-Iborra de Toledo Paula
committed
transcriptome = os.path.join(
config['output_dir'],
"transcriptome",
"{organism}",
"transcriptome.fa",
)
output:
index = os.path.join(
config['kallisto_indexes'],
"{organism}",
BIOPZ-Katsantoni Maria
committed
"kallisto.idx")
params:
output_dir = os.path.join(
config['kallisto_indexes'],
BIOPZ-Katsantoni Maria
committed
"{organism}")
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
BIOPZ-Katsantoni Maria
committed
"{organism}_create_index_kallisto.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_create_index_kallisto.stdout.log")
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
BIOPZ-Katsantoni Maria
committed
kallisto index -i {output.index} {input.transcriptome}) \
1> {log.stdout} 2> {log.stderr}"
BIOPZ-Gypas Foivos
committed
BIOPZ-Katsantoni Maria
committed
"""
Convert transcripts to BED12 format
"""
BIOPZ-Katsantoni Maria
committed
samples_table['gtf']
[0]
output:
bed12 = os.path.join(
config['output_dir'],
BIOPZ-Katsantoni Maria
committed
"full_transcripts_protein_coding.bed")
"docker://zavolab/gtf_transcript_type_to_bed12:0.1.0-slim"
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
BIOPZ-Katsantoni Maria
committed
"extract_transcripts_as_bed12.stderr.log")
BIOPZ-Katsantoni Maria
committed
"(gtf_transcript_type_to_bed12.pl \
BIOPZ-Katsantoni Maria
committed
--type=protein_coding > {output.bed12}); \
2> {log.stderr}"
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
rule index_genomic_alignment_samtools:
'''
Index genome bamfile using samtools
'''
input:
bam = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam")
output:
bai = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam.bai")
singularity:
"docker://zavolab/samtools:1.10-slim"
threads: 1
log:
stderr = os.path.join(
config["log_dir"],
"{seqmode}",
"{sample}",
"index_genomic_alignment_samtools.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"{seqmode}",
"{sample}",
"index_genomic_alignment_samtools.stdout.log")
shell:
"(samtools index {input.bam} {output.bai};) \
1> {log.stdout} 2> {log.stderr}"
rule star_rpm:
''' Create stranded bedgraph coverage with STARs RPM normalisation '''
input:
bam = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam"),
bai = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam.bai")
output:
str1 = (os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"STAR_coverage",
"{sample}_Signal.Unique.str1.out.bg"),
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"STAR_coverage",
"{sample}_Signal.UniqueMultiple.str1.out.bg")),
str2 = (os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"STAR_coverage",
"{sample}_Signal.Unique.str2.out.bg"),
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"STAR_coverage",
"{sample}_Signal.UniqueMultiple.str2.out.bg"))
params:
out_dir = lambda wildcards, output: os.path.dirname(output.str1[0]),
prefix = lambda wildcards, output: os.path.join(os.path.dirname(output.str1[0]),
str(wildcards.sample) + "_"),
stranded = "Stranded"
singularity:
"docker://zavolab/star:2.7.3a-slim"
log:
stderr = os.path.join(
config["log_dir"],
"{seqmode}",
"{sample}",
"star_rpm_single_end.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"{seqmode}",
"{sample}",
"star_rpm_single_end.stdout.log")
threads: 4
shell:
"""
(mkdir -p {params.out_dir}; \
chmod -R 777 {params.out_dir}; \
STAR \
--runMode inputAlignmentsFromBAM \
--runThreadN {threads} \
--inputBAMfile {input.bam} \
--outWigType "bedGraph" \
--outWigStrand {params.stranded} \
--outWigNorm "RPM" \
--outFileNamePrefix {params.prefix}) \
1> {log.stdout} 2> {log.stderr}
"""
rule rename_star_rpm_for_alfa:
input:
str1 = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"STAR_coverage",
str2 = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"STAR_coverage",
output:
plus = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"{unique}",
"{sample}_Signal.{unique}.out.plus.bg"),
minus = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"{unique}",
"{sample}_Signal.{unique}.out.minus.bg")
params:
orientation = lambda wildcards: samples_table.loc[wildcards.sample, "kallisto_directionality"]
run:
if params['orientation'] == "--fr":
shutil.copy2(input['str1'], output['plus'])
shutil.copy2(input['str2'], output['minus'])
elif params['orientation'] == "--rf":
shutil.copy2(input['str1'], output['minus'])
shutil.copy2(input['str2'], output['plus'])
BIOPZ-Katsantoni Maria
committed
"""
Caluclate transcript integrity (TIN) score
"""
bam = os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam"),
bai = os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"map_genome",
BIOPZ-Katsantoni Maria
committed
"{sample}_Aligned.sortedByCoord.out.bam.bai"),
transcripts_bed12 = os.path.join(
config['output_dir'],
BIOPZ-Katsantoni Maria
committed
"full_transcripts_protein_coding.bed")
output:
TIN_score = os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"TIN",
BIOPZ-Katsantoni Maria
committed
"TIN_score.tsv")
BIOPZ-Katsantoni Maria
committed
sample = "{sample}"
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config['log_dir'],
"{seqmode}",
"{sample}",
BIOPZ-Katsantoni Maria
committed
"calculate_TIN_scores.log")
BIOPZ-Katsantoni Maria
committed
"docker://zavolab/tin_score_calculation:0.2.0-slim"
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
"(tin_score_calculation.py \
-i {input.bam} \
-r {input.transcripts_bed12} \
-c 0 \
--names {params.sample} \
BIOPZ-Katsantoni Maria
committed
-n 100 > {output.TIN_score};) 2> {log.stderr}"
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
rule merge_TIN_scores:
"""
Merge TIN scores tables
"""
input:
TIN_score = expand(
os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"TIN",
"TIN_score.tsv"),
zip,
sample=[i for i in list(samples_table.index.values)],
seqmode=[samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)])
output:
TIN_scores_merged = os.path.join(
config['output_dir'],
"TIN_scores_merged.tsv")
params:
TIN_score_merged_paths = ",".join(expand(
os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"TIN",
"TIN_score.tsv"),
zip,
sample=[i for i in list(samples_table.index.values)],
seqmode=[samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)]))
log:
stderr = os.path.join(
config['log_dir'],
"merge_TIN_scores.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"merge_TIN_scores.stdout.log")
threads: 1
singularity:
"docker://zavolab/tin_score_calculation:0.2.0-slim"
shell:
"(tin_score_merge.py \
--input-files {params.TIN_score_merged_paths} \
--output-file {output.TIN_scores_merged}) \
1> {log.stdout} 2> {log.stderr}"
rule plot_TIN_scores:
"""
Generate TIN scores boxplots
"""
input:
TIN_scores_merged = os.path.join(
config['output_dir'],
"TIN_scores_merged.tsv"),
output:
TIN_boxplot_PNG = os.path.join(
config['output_dir'],
"TIN_scores_boxplot.png"),
TIN_boxplot_PDF = os.path.join(
config['output_dir'],
"TIN_scores_boxplot.pdf")
params:
TIN_boxplot_prefix = os.path.join(
config['output_dir'],
"TIN_scores_boxplot")
log:
stderr = os.path.join(
config['log_dir'],
"plot_TIN_scores.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"plot_TIN_scores.stdout.log")
threads: 1
singularity:
"docker://zavolab/tin_score_calculation:0.2.0-slim"
shell:
"(tin_score_plot.py \
--input-file {input.TIN_scores_merged} \
--output-file-prefix {params.TIN_boxplot_prefix}) \
1> {log.stdout} 2> {log.stderr}"
rule salmon_quantmerge_genes:
BIOPZ-Katsantoni Maria
committed
'''
Merge gene quantifications into a single file
'''
BIOPZ-Katsantoni Maria
committed
salmon_in = expand(
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant",
"quant.genes.sf"),
BIOPZ-Katsantoni Maria
committed
sample=list(samples_table.index.values),
seqmode=list(samples_table["seqmode"]))
BIOPZ-Katsantoni Maria
committed
salmon_out = os.path.join(
BIOPZ-Katsantoni Maria
committed
"summary_salmon",
"quantmerge",
"genes_{salmon_merge_on}.tsv")
params:
salmon_dir = expand(
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant"),
BIOPZ-Katsantoni Maria
committed
sample=list(samples_table.index.values),
seqmode=list(samples_table["seqmode"])),
sample_name_list = expand(
"{sample}",
sample=list(samples_table.index.values)),
salmon_merge_on = "{salmon_merge_on}"
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
"salmon_quantmerge_genes_{salmon_merge_on}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"salmon_quantmerge_genes_{salmon_merge_on}.stdout.log")
threads: 1
singularity:
"docker://zavolab/salmon:1.1.0-slim"
BIOPZ-Katsantoni Maria
committed
shell:
"(salmon quantmerge \
--quants {params.salmon_dir} \
--genes \
--names {params.sample_name_list} \
--column {params.salmon_merge_on} \
BIOPZ-Katsantoni Maria
committed
--output {output.salmon_out};) \
1> {log.stdout} 2> {log.stderr}"
rule salmon_quantmerge_transcripts:
BIOPZ-Katsantoni Maria
committed
'''
Merge gene quantifications into a single file
'''
BIOPZ-Katsantoni Maria
committed
salmon_in = expand(
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant",
"quant.sf"),
BIOPZ-Katsantoni Maria
committed
sample=list(samples_table.index.values),
seqmode=list(samples_table["seqmode"])),
BIOPZ-Katsantoni Maria
committed
salmon_out = os.path.join(
BIOPZ-Katsantoni Maria
committed
"summary_salmon",
"quantmerge",
"transcripts_{salmon_merge_on}.tsv")
params:
salmon_dir = expand(
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"salmon_quant"),
BIOPZ-Katsantoni Maria
committed
sample=list(samples_table.index.values),
seqmode=list(samples_table["seqmode"])),
sample_name_list = expand(
"{sample}",
sample=list(samples_table.index.values)),
salmon_merge_on = "{salmon_merge_on}"
BIOPZ-Katsantoni Maria
committed
BIOPZ-Katsantoni Maria
committed
stderr = os.path.join(
config["log_dir"],
"salmon_quantmerge_transcripts_{salmon_merge_on}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"salmon_quantmerge_transcripts_{salmon_merge_on}.stdout.log")
threads: 1
singularity:
"docker://zavolab/salmon:1.1.0-slim"
BIOPZ-Katsantoni Maria
committed
shell:
"(salmon quantmerge \
--quants {params.salmon_dir} \
--names {params.sample_name_list} \
--column {params.salmon_merge_on} \
BIOPZ-Katsantoni Maria
committed
--output {output.salmon_out}) \
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
1> {log.stdout} 2> {log.stderr}"
#################################################################################
### ALFA: Annotation Landscape For Aligned reads
#################################################################################
directionality = {"--fr": "fr-firststrand", "--rf": "fr-secondstrand"}
rule generate_alfa_index:
''' Generate ALFA index files from sorted GTF file '''
input:
gtf = lambda wildcards: samples_table["gtf"][samples_table["organism"]==wildcards.organism][0],
chr_len = os.path.join(
config["star_indexes"],
"{organism}",
"{index_size}",
"STAR_index",
"chrNameLength.txt"),
output:
index_stranded = os.path.join(config["alfa_indexes"],
"{organism}",
"{index_size}",
"ALFA",
"sorted_genes.stranded.ALFA_index"),
index_unstranded = os.path.join(config["alfa_indexes"],
"{organism}",
"{index_size}",
"ALFA",
"sorted_genes.unstranded.ALFA_index")
params:
genome_index = "sorted_genes",
out_dir = lambda wildcards, output: os.path.dirname(output.index_stranded)
threads: 4
singularity:
log:
os.path.join(config["log_dir"], "{organism}_{index_size}_generate_alfa_index.log")
shell:
"""
alfa -a {input.gtf} \
-g {params.genome_index} \
--chr_len {input.chr_len} \
-p {threads} \
-o {params.out_dir} &> {log}
"""
rule alfa_qc:
''' Run ALFA from stranded bedgraph files '''
input:
plus = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"{unique}",
"{sample}_Signal.{unique}.out.plus.bg"),
minus = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"{unique}",
"{sample}_Signal.{unique}.out.minus.bg"),
gtf = lambda wildcards: os.path.join(config["alfa_indexes"],
samples_table.loc[wildcards.sample, "organism"],
str(samples_table.loc[wildcards.sample, "index_size"]),
"ALFA",
"sorted_genes.stranded.ALFA_index")
output:
biotypes = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"ALFA_plots.Biotypes.pdf"),
categories = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"ALFA_plots.Categories.pdf"),
table = os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"ALFA",
"{sample}.ALFA_feature_counts.tsv")
params:
out_dir = lambda wildcards, output: os.path.dirname(output.biotypes),
alfa_orientation = lambda wildcards: directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]],
in_file_plus = lambda wildcards, input: os.path.basename(input.plus),
in_file_minus = lambda wildcards, input: os.path.basename(input.minus),
genome_index = lambda wildcards, input: os.path.abspath(os.path.join(os.path.dirname(input.gtf), "sorted_genes")),
name = "{sample}"
singularity:
log:
os.path.abspath(os.path.join(
config["log_dir"],
"{seqmode}",
"{sample}",
shell:
"""
cd {params.out_dir}; \
(alfa -g {params.genome_index} \
--bedgraph {params.in_file_plus} {params.in_file_minus} {params.name} \
-s {params.alfa_orientation}) &> {log}
"""
rule alfa_qc_all_samples:
''' Run ALFA from stranded bedgraph files on all samples '''
input:
tables = [os.path.join(
config["output_dir"],
samples_table.loc[sample1, "seqmode"],
str(sample1),
"ALFA",
sample1 + ".ALFA_feature_counts.tsv")
for sample1 in list(samples_table.index.values)]
output:
biotypes = os.path.join(
config["output_dir"],
"ALFA",
"ALFA_plots.Biotypes.pdf"),
categories = os.path.join(
config["output_dir"],
"ALFA",
"ALFA_plots.Categories.pdf")
params:
out_dir = lambda wildcards, output: os.path.dirname(output.biotypes)
log:
os.path.abspath(
os.path.join(config["log_dir"],
shell:
"""
(alfa -c {input.tables} -o {params.out_dir}) &> {log}
"""
rule alfa_concat_results:
input:
expand(os.path.join(
config["output_dir"],
"ALFA",
"{unique_type}",
"ALFA_plots.{annotation}.pdf"),
unique_type = ["Unique", "UniqueMultiple"],
annotation = ["Categories", "Biotypes"])
output:
expand(os.path.join(
config["output_dir"],
"ALFA",
"ALFA_plots.concat.png"))
params:
density = 300
log:
os.path.abspath(
os.path.join(config["log_dir"],
"alfa_qc_all_samples.concat.log"))
singularity:
shell:
"""
convert -append -density {params.density} \
{input} {output} &> {log}
"""
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
rule prepare_files_for_report:
'''
Re-structure the results and add comments for MultiQC parsing
'''
input:
outdir1 = expand(
os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"mate1_fastqc"),
zip,
sample=[i for i in list(samples_table.index.values)],
seqmode=[samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)]),
pseudoalignment = expand(
os.path.join(
config['output_dir'],
"{seqmode}",
"{sample}",
"quant_kallisto",
"{sample}.kallisto.pseudo.sam"),
zip,
sample=[i for i in list(samples_table.index.values)],
seqmode=[samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)]),
TIN_boxplot_PNG = os.path.join(
config['output_dir'],
"TIN_scores_boxplot.png"),
TIN_boxplot_PDF = os.path.join(
config['output_dir'],
"TIN_scores_boxplot.pdf"),
salmon_merge_genes = expand(
os.path.join(
config["output_dir"],
"summary_salmon",
"quantmerge",
"genes_{salmon_merge_on}.tsv"),
salmon_merge_on=["tpm", "numreads"]),
salmon_merge_transcripts = expand(
os.path.join(
config["output_dir"],
"summary_salmon",
"quantmerge",
"transcripts_{salmon_merge_on}.tsv"),
salmon_merge_on=["tpm", "numreads"]),
star_rpm = expand(
os.path.join(
config["output_dir"],
"{seqmode}",
"{sample}",
"STAR_coverage",
"{sample}_Signal.UniqueMultiple.str1.out.bg"),
zip,
sample=[i for i in list(samples_table.index.values)],
seqmode=[samples_table.loc[i, 'seqmode']
for i in list(samples_table.index.values)]),
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
output:
samples_dir = directory(os.path.join(
"{output_dir}",
"samples"))
params:
results_dir = config["output_dir"],
log_dir = config["log_dir"],
log_samples_dir = os.path.join(
config["log_dir"],
"samples")
log:
LOG_local_log = \
os.path.join("{output_dir}", "local_log", \
"prepare_files_for_report.log")
run:
# remove "single/paired end" from the results directories
os.mkdir(output.samples_dir)
# move paired end results
paired_end_dir = glob.glob(
os.path.join(
params.results_dir,
"paired_end",
"*"))
for s in paired_end_dir:
sample_name = s.split("/")[-1]
shutil.copytree(
s, \
os.path.join(
params.results_dir,