diff --git a/Snakefile b/Snakefile index 283403578f98200ab045856e651c0d5b9d3be042..4e3d7c76b74e0e7f9c83619991ec0af08dd31b5e 100644 --- a/Snakefile +++ b/Snakefile @@ -24,84 +24,82 @@ os.makedirs( os.getcwd(), config['log_dir'], ), - exist_ok=True, -) + exist_ok=True) + if cluster_config: os.makedirs( os.path.join( os.getcwd(), os.path.dirname(cluster_config['__default__']['out']), ), - exist_ok=True, - ) + exist_ok=True) # Include subworkflows include: os.path.join("workflow", "rules", "paired_end.snakefile.smk") include: os.path.join("workflow", "rules", "single_end.snakefile.smk") -# Final rule rule finish: - """Rule for collecting outputs""" + """ + Rule for collecting outputs + """ input: outdir1 = expand( os.path.join( config['output_dir'], "{seqmode}", "{sample}", - "mate1_fastqc", - ), + "mate1_fastqc"), zip, sample=[i for i in list(samples_table.index.values)], - seqmode=[ - samples_table.loc[i, 'seqmode'] - for i in list(samples_table.index.values) - ] - ), - salmon_gn_estimates = expand( - os.path.join( - config['output_dir'], - "{seqmode}", - "{sample}", - "salmon_quant", - "quant.genes.sf", - ), - zip, - sample=[i for i in list(samples_table.index.values)], - seqmode=[ - samples_table.loc[i, 'seqmode'] - for i in list(samples_table.index.values) - ] - ), + seqmode=[samples_table.loc[i, 'seqmode'] + for i in list(samples_table.index.values)]), pseudoalignment = expand( os.path.join( config['output_dir'], "{seqmode}", "{sample}", "quant_kallisto", - "{sample}.kallisto.pseudo.sam", - ), + "{sample}.kallisto.pseudo.sam"), zip, sample=[i for i in list(samples_table.index.values)], - seqmode=[ - samples_table.loc[i, 'seqmode'] - for i in list(samples_table.index.values) - ] - ), + seqmode=[samples_table.loc[i, 'seqmode'] + for i in list(samples_table.index.values)]), TIN_score = expand( os.path.join( config['output_dir'], "{seqmode}", "{sample}", "TIN", - "TIN_score.tsv", - ), + "TIN_score.tsv"), zip, sample=[i for i in list(samples_table.index.values)], - seqmode=[ - samples_table.loc[i, 'seqmode'] - for i in list(samples_table.index.values) - ] - ), + seqmode=[samples_table.loc[i, 'seqmode'] + for i in list(samples_table.index.values)]), + salmon_merge_genes = expand( + os.path.join( + config["output_dir"], + "summary_salmon", + "quantmerge", + "genes_{salmon_merge_on}.tsv"), + salmon_merge_on=["tpm", "numreads"]), + salmon_merge_transcripts = expand( + os.path.join( + config["output_dir"], + "summary_salmon", + "quantmerge", + "transcripts_{salmon_merge_on}.tsv"), + salmon_merge_on=["tpm", "numreads"]), + star_rpm = expand( + os.path.join( + config["output_dir"], + "{seqmode}", + "{sample}", + "STAR_coverage", + "{sample}_Signal.UniqueMultiple.str1.out.bg"), + zip, + sample=[i for i in list(samples_table.index.values)], + seqmode=[samples_table.loc[i, 'seqmode'] + for i in list(samples_table.index.values)]), alfa_reports = expand(os.path.join( config["output_dir"], "{seqmode}", @@ -112,62 +110,68 @@ rule finish: sample= [i for i in list(samples_table.index.values)], seqmode= [ samples_table.loc[i,"seqmode"] - for i in list(samples_table.index.values)] - ), + for i in list(samples_table.index.values)]), alfa_all_samples = os.path.join( config["output_dir"], "ALFA", - "ALFA_plots.Categories.pdf"), + "ALFA_plots.Categories.pdf") + rule create_index_star: - """Create index for STAR alignments""" + """ + Create index for STAR alignments + """ input: genome = lambda wildcards: - samples_table['genome'][ - samples_table['organism'] == wildcards.organism - ][0], + samples_table['genome'] + [samples_table['organism'] == wildcards.organism] + [0], gtf = lambda wildcards: - samples_table['gtf'][ - samples_table['organism'] == wildcards.organism - ][0], + samples_table['gtf'] + [samples_table['organism'] == wildcards.organism] + [0] + output: chromosome_info = os.path.join( config['star_indexes'], "{organism}", "{index_size}", "STAR_index", - "chrNameLength.txt", - ), + "chrNameLength.txt"), chromosomes_names = os.path.join( config['star_indexes'], "{organism}", "{index_size}", "STAR_index", - "chrName.txt", - ), + "chrName.txt") + params: output_dir = os.path.join( config['star_indexes'], "{organism}", "{index_size}", - "STAR_index", - ), + "STAR_index"), outFileNamePrefix = os.path.join( config['star_indexes'], "{organism}", "{index_size}", - "STAR_index/STAR_", - ), - sjdbOverhang = "{index_size}", + "STAR_index/STAR_"), + sjdbOverhang = "{index_size}" + singularity: - "docker://zavolab/star:2.6.0a" + "docker://zavolab/star:2.7.3a-slim" + threads: 12 + log: - os.path.join( + stderr = os.path.join( + config['log_dir'], + "{organism}_{index_size}_create_index_star.stderr.log"), + stdout = os.path.join( config['log_dir'], - "{organism}_{index_size}_create_index_star.log", - ) + "{organism}_{index_size}_create_index_star.stdout.log") + shell: "(mkdir -p {params.output_dir}; \ chmod -R 777 {params.output_dir}; \ @@ -178,150 +182,453 @@ rule create_index_star: --genomeFastaFiles {input.genome} \ --runThreadN {threads} \ --outFileNamePrefix {params.outFileNamePrefix} \ - --sjdbGTFfile {input.gtf}) &> {log}" + --sjdbGTFfile {input.gtf}) \ + 1> {log.stdout} 2> {log.stderr}" +rule extract_transcriptome: + """ Create transcriptome from genome and gene annotations """ + input: + genome = lambda wildcards: + samples_table['genome'][ + samples_table['organism'] == wildcards.organism + ][0], + gtf = lambda wildcards: + samples_table['gtf'][ + samples_table['organism'] == wildcards.organism + ][0] + output: + transcriptome = os.path.join( + config['output_dir'], + "transcriptome", + "{organism}", + "transcriptome.fa", + ) + log: + stderr = os.path.join( + config['log_dir'], + "{organism}_extract_transcriptome.log"), + stdout = os.path.join( + config['log_dir'], + "{organism}_extract_transcriptome.log") + singularity: + "docker://zavolab/gffread:0.11.7" + shell: + "(gffread \ + -w {output.transcriptome} \ + -g {input.genome} {input.gtf}) \ + 1> {log.stdout} 2> {log.stderr}" rule create_index_salmon: - """Create index for Salmon quantification""" + """ + Create index for Salmon quantification + """ input: - transcriptome = lambda wildcards: - samples_table['tr_fasta_filtered'][ - samples_table['organism'] == wildcards.organism - ][0] + transcriptome = os.path.join( + config['output_dir'], + "transcriptome", + "{organism}", + "transcriptome.fa", + ) output: index = directory( os.path.join( config['salmon_indexes'], "{organism}", "{kmer}", - "salmon.idx", - ) - ), + "salmon.idx")) + params: - kmerLen = "{kmer}", + kmerLen = "{kmer}" + singularity: - "docker://zavolab/salmon:0.11.0" + "docker://zavolab/salmon:1.1.0-slim" + log: - os.path.join( + stderr = os.path.join( config['log_dir'], - "{organism}_{kmer}_create_index_salmon.log" - ) + "{organism}_{kmer}_create_index_salmon.stderr.log"), + stdout = os.path.join( + config['log_dir'], + "{organism}_{kmer}_create_index_salmon.stdout.log") + threads: 8 + shell: "(salmon index \ --transcripts {input.transcriptome} \ --index {output.index} \ --kmerLen {params.kmerLen} \ - --threads {threads}) &> {log}" + --threads {threads}) \ + 1> {log.stdout} 2> {log.stderr}" rule create_index_kallisto: - """Create index for Kallisto quantification""" + """ + Create index for Kallisto quantification + """ input: - transcriptome = lambda wildcards: - samples_table['tr_fasta_filtered'][ - samples_table['organism'] == wildcards.organism - ][0], + transcriptome = os.path.join( + config['output_dir'], + "transcriptome", + "{organism}", + "transcriptome.fa", + ) output: index = os.path.join( config['kallisto_indexes'], "{organism}", - "kallisto.idx", - ), + "kallisto.idx") + params: output_dir = os.path.join( config['kallisto_indexes'], - "{organism}", - ), + "{organism}") + singularity: - "docker://zavolab/kallisto:0.46.1" + "docker://zavolab/kallisto:0.46.1-slim" + log: - os.path.join( + stderr = os.path.join( config['log_dir'], - "{organism}_create_index_kallisto.log" - ) + "{organism}_create_index_kallisto.stderr.log"), + stdout = os.path.join( + config['log_dir'], + "{organism}_create_index_kallisto.stdout.log") + shell: "(mkdir -p {params.output_dir}; \ chmod -R 777 {params.output_dir}; \ - kallisto index -i {output.index} {input.transcriptome}) &> {log}" + kallisto index -i {output.index} {input.transcriptome}) \ + 1> {log.stdout} 2> {log.stderr}" rule extract_transcripts_as_bed12: - """Convert transcripts to BED12 format""" + """ + Convert transcripts to BED12 format + """ input: gtf = lambda wildcards: - samples_table['gtf'][0], + samples_table['gtf'] + [0] + output: bed12 = os.path.join( config['output_dir'], - "full_transcripts_protein_coding.bed", - ), + "full_transcripts_protein_coding.bed") + singularity: - "docker://zavolab/gtf_transcript_type_to_bed12:0.1.0" + "docker://zavolab/gtf_transcript_type_to_bed12:0.1.0-slim" + threads: 1 + log: - os.path.join( + stderr = os.path.join( config['log_dir'], - "extract_transcripts_as_bed12.log", - ) + "extract_transcripts_as_bed12.stderr.log") + shell: - "gtf_transcript_type_to_bed12.pl \ + "(gtf_transcript_type_to_bed12.pl \ --anno={input.gtf} \ - --type=protein_coding \ - 1> {output.bed12} \ - 2> {log}" + --type=protein_coding > {output.bed12}); \ + 2> {log.stderr}" + + +rule index_genomic_alignment_samtools: + ''' + Index genome bamfile using samtools + ''' + input: + bam = os.path.join( + config["output_dir"], + "{seqmode}", + "{sample}", + "map_genome", + "{sample}_Aligned.sortedByCoord.out.bam") + + output: + bai = os.path.join( + config["output_dir"], + "{seqmode}", + "{sample}", + "map_genome", + "{sample}_Aligned.sortedByCoord.out.bam.bai") + + singularity: + "docker://zavolab/samtools:1.10-slim" + + threads: 1 + + log: + stderr = os.path.join( + config["log_dir"], + "{seqmode}", + "{sample}", + "index_genomic_alignment_samtools.stderr.log"), + stdout = os.path.join( + config["log_dir"], + "{seqmode}", + "{sample}", + "index_genomic_alignment_samtools.stdout.log") + + shell: + "(samtools index {input.bam} {output.bai};) \ + 1> {log.stdout} 2> {log.stderr}" + + +rule star_rpm: + ''' Create stranded bedgraph coverage with STARs RPM normalisation ''' + input: + bam = os.path.join( + config["output_dir"], + "{seqmode}", + "{sample}", + "map_genome", + "{sample}_Aligned.sortedByCoord.out.bam"), + bai = os.path.join( + config["output_dir"], + "{seqmode}", + "{sample}", + "map_genome", + "{sample}_Aligned.sortedByCoord.out.bam.bai") + + output: + str1 = (os.path.join( + config["output_dir"], + "{seqmode}", + "{sample}", + "STAR_coverage", + "{sample}_Signal.Unique.str1.out.bg"), + os.path.join( + config["output_dir"], + "{seqmode}", + "{sample}", + "STAR_coverage", + "{sample}_Signal.UniqueMultiple.str1.out.bg")), + str2 = (os.path.join( + config["output_dir"], + "{seqmode}", + "{sample}", + "STAR_coverage", + "{sample}_Signal.Unique.str2.out.bg"), + os.path.join( + config["output_dir"], + "{seqmode}", + "{sample}", + "STAR_coverage", + "{sample}_Signal.UniqueMultiple.str2.out.bg")) + + params: + out_dir = lambda wildcards, output: os.path.dirname(output.str1[0]), + prefix = lambda wildcards, output: os.path.join(os.path.dirname(output.str1[0]), + str(wildcards.sample) + "_"), + stranded = "Stranded" + + singularity: + "docker://zavolab/star:2.7.3a-slim" + + log: + stderr = os.path.join( + config["log_dir"], + "{seqmode}", + "{sample}", + "star_rpm_single_end.stderr.log"), + stdout = os.path.join( + config["log_dir"], + "{seqmode}", + "{sample}", + "star_rpm_single_end.stdout.log") + + threads: 4 + + shell: + """ + (mkdir -p {params.out_dir}; \ + chmod -R 777 {params.out_dir}; \ + STAR \ + --runMode inputAlignmentsFromBAM \ + --runThreadN {threads} \ + --inputBAMfile {input.bam} \ + --outWigType "bedGraph" \ + --outWigStrand {params.stranded} \ + --outWigNorm "RPM" \ + --outFileNamePrefix {params.prefix}) \ + 1> {log.stdout} 2> {log.stderr} + """ rule calculate_TIN_scores: - """Caluclate transcript integrity (TIN) score""" + """ + Caluclate transcript integrity (TIN) score + """ input: + bam = os.path.join( + config['output_dir'], + "{seqmode}", + "{sample}", + "map_genome", + "{sample}_Aligned.sortedByCoord.out.bam"), bai = os.path.join( config['output_dir'], "{seqmode}", "{sample}", "map_genome", - "{sample}_Aligned.sortedByCoord.out.bam.bai" - ), + "{sample}_Aligned.sortedByCoord.out.bam.bai"), transcripts_bed12 = os.path.join( config['output_dir'], - "full_transcripts_protein_coding.bed" - ), + "full_transcripts_protein_coding.bed") + output: TIN_score = os.path.join( config['output_dir'], "{seqmode}", "{sample}", "TIN", - "TIN_score.tsv", - ), + "TIN_score.tsv") + params: - bam = os.path.join( - config['output_dir'], - "{seqmode}", - "{sample}", - "map_genome", - "{sample}_Aligned.sortedByCoord.out.bam" - ), - sample = "{sample}", + sample = "{sample}" + log: - os.path.join( + stderr = os.path.join( config['log_dir'], "{seqmode}", "{sample}", - "calculate_TIN_scores.log", - ) + "calculate_TIN_scores.log") + threads: 8 + singularity: - "docker://zavolab/tin_score_calculation:0.1.0" + "docker://zavolab/tin_score_calculation:0.1.0-slim" + shell: - "tin_score_calculation.py \ - -i {params.bam} \ + "(tin_score_calculation.py \ + -i {input.bam} \ -r {input.transcripts_bed12} \ -c 0 \ --names {params.sample} \ - -n 100 \ - 1> {output.TIN_score} \ - 2> {log}" + -n 100 > {output.TIN_score};) 2> {log.stderr}" + + +rule salmon_quantmerge_genes: + ''' + Merge gene quantifications into a single file + ''' + input: + salmon_in = expand( + os.path.join( + config["output_dir"], + "{seqmode}", + "{sample}", + "salmon_quant", + "quant.genes.sf"), + zip, + sample=list(samples_table.index.values), + seqmode=list(samples_table["seqmode"])) + + output: + salmon_out = os.path.join( + config["output_dir"], + "summary_salmon", + "quantmerge", + "genes_{salmon_merge_on}.tsv") + + params: + salmon_dir = expand( + os.path.join( + config["output_dir"], + "{seqmode}", + "{sample}", + "salmon_quant"), + zip, + sample=list(samples_table.index.values), + seqmode=list(samples_table["seqmode"])), + sample_name_list = expand( + "{sample}", + sample=list(samples_table.index.values)), + salmon_merge_on = "{salmon_merge_on}" + + log: + stderr = os.path.join( + config["log_dir"], + "salmon_quantmerge_genes_{salmon_merge_on}.stderr.log"), + stdout = os.path.join( + config["log_dir"], + "salmon_quantmerge_genes_{salmon_merge_on}.stdout.log") + + threads: 1 + + singularity: + "docker://zavolab/salmon:1.1.0-slim" + + shell: + "(salmon quantmerge \ + --quants {params.salmon_dir} \ + --genes \ + --names {params.sample_name_list} \ + --column {params.salmon_merge_on} \ + --output {output.salmon_out};) \ + 1> {log.stdout} 2> {log.stderr}" + + +rule salmon_quantmerge_transcripts: + ''' + Merge gene quantifications into a single file + ''' + input: + salmon_in = expand( + os.path.join( + config["output_dir"], + "{seqmode}", + "{sample}", + "salmon_quant", + "quant.sf"), + zip, + sample=list(samples_table.index.values), + seqmode=list(samples_table["seqmode"])), + + output: + salmon_out = os.path.join( + config["output_dir"], + "summary_salmon", + "quantmerge", + "transcripts_{salmon_merge_on}.tsv") + + params: + salmon_dir = expand( + os.path.join( + config["output_dir"], + "{seqmode}", + "{sample}", + "salmon_quant"), + zip, + sample=list(samples_table.index.values), + seqmode=list(samples_table["seqmode"])), + sample_name_list = expand( + "{sample}", + sample=list(samples_table.index.values)), + salmon_merge_on = "{salmon_merge_on}" + + log: + stderr = os.path.join( + config["log_dir"], + "salmon_quantmerge_transcripts_{salmon_merge_on}.stderr.log"), + stdout = os.path.join( + config["log_dir"], + "salmon_quantmerge_transcripts_{salmon_merge_on}.stdout.log") + + threads: 1 + + singularity: + "docker://zavolab/salmon:1.1.0-slim" + + shell: + "(salmon quantmerge \ + --quants {params.salmon_dir} \ + --names {params.sample_name_list} \ + --column {params.salmon_merge_on} \ + --output {output.salmon_out}) \ + 1> {log.stdout} 2> {log.stderr}" + ################################################################################# ### ALFA: Annotation Landscape For Aligned reads diff --git a/images/dag_test_workflow.svg b/images/dag_test_workflow.svg index 6b992f3ac567c7b0bb1e670089e234ef8db3b360..0c0c25a7a513e3dd8c1fd0299cde5aada907fa4d 100644 --- a/images/dag_test_workflow.svg +++ b/images/dag_test_workflow.svg @@ -4,266 +4,404 @@ <!-- Generated by graphviz version 2.38.0 (20140413.2041) --> <!-- Title: snakemake_dag Pages: 1 --> -<svg width="1338pt" height="409pt" - viewBox="0.00 0.00 1338.00 409.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> +<svg width="1858pt" height="409pt" + viewBox="0.00 0.00 1857.85 409.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 405)"> <title>snakemake_dag</title> -<polygon fill="white" stroke="none" points="-4,4 -4,-405 1334,-405 1334,4 -4,4"/> +<polygon fill="white" stroke="none" points="-4,4 -4,-405 1853.85,-405 1853.85,4 -4,4"/> <!-- 0 --> <g id="node1" class="node"><title>0</title> -<path fill="none" stroke="#56d8c1" stroke-width="2" d="M714,-36C714,-36 684,-36 684,-36 678,-36 672,-30 672,-24 672,-24 672,-12 672,-12 672,-6 678,-0 684,-0 684,-0 714,-0 714,-0 720,-0 726,-6 726,-12 726,-12 726,-24 726,-24 726,-30 720,-36 714,-36"/> -<text text-anchor="middle" x="699" y="-15.5" font-family="sans" font-size="10.00">finish</text> +<path fill="none" stroke="#d88556" stroke-width="2" d="M1050,-36C1050,-36 1020,-36 1020,-36 1014,-36 1008,-30 1008,-24 1008,-24 1008,-12 1008,-12 1008,-6 1014,-0 1020,-0 1020,-0 1050,-0 1050,-0 1056,-0 1062,-6 1062,-12 1062,-12 1062,-24 1062,-24 1062,-30 1056,-36 1050,-36"/> +<text text-anchor="middle" x="1035" y="-15.5" font-family="sans" font-size="10.00">finish</text> </g> <!-- 1 --> <g id="node2" class="node"><title>1</title> -<path fill="none" stroke="#d8a456" stroke-width="2" d="M280,-108C280,-108 12,-108 12,-108 6,-108 0,-102 0,-96 0,-96 0,-84 0,-84 0,-78 6,-72 12,-72 12,-72 280,-72 280,-72 286,-72 292,-78 292,-84 292,-84 292,-96 292,-96 292,-102 286,-108 280,-108"/> +<path fill="none" stroke="#56d8a2" stroke-width="2" d="M280,-108C280,-108 12,-108 12,-108 6,-108 0,-102 0,-96 0,-96 0,-84 0,-84 0,-78 6,-72 12,-72 12,-72 280,-72 280,-72 286,-72 292,-78 292,-84 292,-84 292,-96 292,-96 292,-102 286,-108 280,-108"/> <text text-anchor="middle" x="146" y="-93" font-family="sans" font-size="10.00">pe_fastqc</text> <text text-anchor="middle" x="146" y="-82" font-family="sans" font-size="10.00">sample: synthetic_10_reads_paired_synthetic_10_reads_paired</text> </g> <!-- 1->0 --> <g id="edge1" class="edge"><title>1->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M280.572,-71.9656C406.068,-56.08 585.083,-33.4199 661.788,-23.7104"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="662.435,-27.1565 671.916,-22.4284 661.555,-20.212 662.435,-27.1565"/> +<path fill="none" stroke="grey" stroke-width="2" d="M292.017,-72.8323C295.037,-72.5474 298.034,-72.2696 301,-72 567.9,-47.736 890.22,-27.651 997.94,-21.1875"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="998.204,-24.6781 1007.98,-20.5875 997.786,-17.6906 998.204,-24.6781"/> </g> <!-- 2 --> <g id="node3" class="node"><title>2</title> -<path fill="none" stroke="#59d856" stroke-width="2" d="M602,-108C602,-108 322,-108 322,-108 316,-108 310,-102 310,-96 310,-96 310,-84 310,-84 310,-78 316,-72 322,-72 322,-72 602,-72 602,-72 608,-72 614,-78 614,-84 614,-84 614,-96 614,-96 614,-102 608,-108 602,-108"/> +<path fill="none" stroke="#d8c356" stroke-width="2" d="M602,-108C602,-108 322,-108 322,-108 316,-108 310,-102 310,-96 310,-96 310,-84 310,-84 310,-78 316,-72 322,-72 322,-72 602,-72 602,-72 608,-72 614,-78 614,-84 614,-84 614,-96 614,-96 614,-102 608,-108 602,-108"/> <text text-anchor="middle" x="462" y="-93" font-family="sans" font-size="10.00">fastqc</text> <text text-anchor="middle" x="462" y="-82" font-family="sans" font-size="10.00">sample: synthetic_10_reads_mate_1_synthetic_10_reads_mate_1</text> </g> <!-- 2->0 --> <g id="edge2" class="edge"><title>2->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M519.674,-71.9656C564.073,-58.8519 624.103,-41.1216 662.193,-29.8712"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="663.253,-33.2076 671.852,-27.0183 661.27,-26.4943 663.253,-33.2076"/> +<path fill="none" stroke="grey" stroke-width="2" d="M601.439,-71.9656C732.107,-56.0027 918.769,-33.1992 997.595,-23.5696"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="998.483,-26.9872 1007.98,-22.3003 997.634,-20.0389 998.483,-26.9872"/> </g> <!-- 3 --> <g id="node4" class="node"><title>3</title> -<path fill="none" stroke="#d85656" stroke-width="2" d="M513.5,-252C513.5,-252 410.5,-252 410.5,-252 404.5,-252 398.5,-246 398.5,-240 398.5,-240 398.5,-228 398.5,-228 398.5,-222 404.5,-216 410.5,-216 410.5,-216 513.5,-216 513.5,-216 519.5,-216 525.5,-222 525.5,-228 525.5,-228 525.5,-240 525.5,-240 525.5,-246 519.5,-252 513.5,-252"/> -<text text-anchor="middle" x="462" y="-231.5" font-family="sans" font-size="10.00">pe_quantification_salmon</text> +<path fill="none" stroke="#d89c56" stroke-width="2" d="M713,-180C713,-180 571,-180 571,-180 565,-180 559,-174 559,-168 559,-168 559,-156 559,-156 559,-150 565,-144 571,-144 571,-144 713,-144 713,-144 719,-144 725,-150 725,-156 725,-156 725,-168 725,-168 725,-174 719,-180 713,-180"/> +<text text-anchor="middle" x="642" y="-159.5" font-family="sans" font-size="10.00">pe_genome_quantification_kallisto</text> </g> <!-- 3->0 --> <g id="edge3" class="edge"><title>3->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M487.264,-215.826C520.088,-192.982 578.459,-150.513 623,-108 643.903,-88.0485 665.037,-62.797 679.694,-44.2918"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="682.537,-46.3381 685.948,-36.3074 677.026,-42.0215 682.537,-46.3381"/> +<path fill="none" stroke="grey" stroke-width="2" d="M645.855,-143.817C651.431,-123.229 663.839,-89.3457 688,-72 737.154,-36.7105 918.852,-24.2782 997.883,-20.4767"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="998.057,-23.9725 1007.89,-20.0156 997.735,-16.9799 998.057,-23.9725"/> </g> <!-- 4 --> <g id="node5" class="node"><title>4</title> -<path fill="none" stroke="#d8bc56" stroke-width="2" d="M642.5,-252C642.5,-252 555.5,-252 555.5,-252 549.5,-252 543.5,-246 543.5,-240 543.5,-240 543.5,-228 543.5,-228 543.5,-222 549.5,-216 555.5,-216 555.5,-216 642.5,-216 642.5,-216 648.5,-216 654.5,-222 654.5,-228 654.5,-228 654.5,-240 654.5,-240 654.5,-246 648.5,-252 642.5,-252"/> -<text text-anchor="middle" x="599" y="-231.5" font-family="sans" font-size="10.00">quantification_salmon</text> +<path fill="none" stroke="#56d8b9" stroke-width="2" d="M881,-180C881,-180 755,-180 755,-180 749,-180 743,-174 743,-168 743,-168 743,-156 743,-156 743,-150 749,-144 755,-144 755,-144 881,-144 881,-144 887,-144 893,-150 893,-156 893,-156 893,-168 893,-168 893,-174 887,-180 881,-180"/> +<text text-anchor="middle" x="818" y="-159.5" font-family="sans" font-size="10.00">genome_quantification_kallisto</text> </g> <!-- 4->0 --> <g id="edge4" class="edge"><title>4->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M623.094,-215.785C634.597,-206.411 647.644,-193.897 656,-180 681.56,-137.492 692.17,-79.7097 696.391,-46.1773"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="699.874,-46.5286 697.548,-36.1923 692.92,-45.7231 699.874,-46.5286"/> +<path fill="none" stroke="grey" stroke-width="2" d="M746.115,-143.919C728.035,-136.04 710.736,-124.592 700,-108 691.308,-94.567 689.608,-84.1656 700,-72 737.975,-27.5452 918.617,-20.167 997.736,-19.0937"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="997.795,-22.5933 1007.76,-18.9844 997.719,-15.5937 997.795,-22.5933"/> </g> <!-- 5 --> <g id="node6" class="node"><title>5</title> -<path fill="none" stroke="#88d856" stroke-width="2" d="M827,-252C827,-252 685,-252 685,-252 679,-252 673,-246 673,-240 673,-240 673,-228 673,-228 673,-222 679,-216 685,-216 685,-216 827,-216 827,-216 833,-216 839,-222 839,-228 839,-228 839,-240 839,-240 839,-246 833,-252 827,-252"/> -<text text-anchor="middle" x="756" y="-231.5" font-family="sans" font-size="10.00">pe_genome_quantification_kallisto</text> +<path fill="none" stroke="#8fd856" stroke-width="2" d="M1472,-108C1472,-108 1384,-108 1384,-108 1378,-108 1372,-102 1372,-96 1372,-96 1372,-84 1372,-84 1372,-78 1378,-72 1384,-72 1384,-72 1472,-72 1472,-72 1478,-72 1484,-78 1484,-84 1484,-84 1484,-96 1484,-96 1484,-102 1478,-108 1472,-108"/> +<text text-anchor="middle" x="1428" y="-87.5" font-family="sans" font-size="10.00">calculate_TIN_scores</text> </g> <!-- 5->0 --> <g id="edge5" class="edge"><title>5->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M751.432,-215.849C741.551,-178.753 718.127,-90.8101 706.219,-46.1027"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="709.546,-44.9947 703.59,-36.2325 702.782,-46.7964 709.546,-44.9947"/> +<path fill="none" stroke="grey" stroke-width="2" d="M1371.71,-73.9257C1368.77,-73.2489 1365.85,-72.6022 1363,-72 1258.43,-49.9293 1133.41,-32.0436 1072.32,-23.8462"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1072.75,-20.3727 1062.38,-22.5219 1071.83,-27.3114 1072.75,-20.3727"/> </g> <!-- 6 --> <g id="node7" class="node"><title>6</title> -<path fill="none" stroke="#568ad8" stroke-width="2" d="M995,-252C995,-252 869,-252 869,-252 863,-252 857,-246 857,-240 857,-240 857,-228 857,-228 857,-222 863,-216 869,-216 869,-216 995,-216 995,-216 1001,-216 1007,-222 1007,-228 1007,-228 1007,-240 1007,-240 1007,-246 1001,-252 995,-252"/> -<text text-anchor="middle" x="932" y="-231.5" font-family="sans" font-size="10.00">genome_quantification_kallisto</text> +<path fill="none" stroke="#8fd856" stroke-width="2" d="M1676,-108C1676,-108 1588,-108 1588,-108 1582,-108 1576,-102 1576,-96 1576,-96 1576,-84 1576,-84 1576,-78 1582,-72 1588,-72 1588,-72 1676,-72 1676,-72 1682,-72 1688,-78 1688,-84 1688,-84 1688,-96 1688,-96 1688,-102 1682,-108 1676,-108"/> +<text text-anchor="middle" x="1632" y="-87.5" font-family="sans" font-size="10.00">calculate_TIN_scores</text> </g> <!-- 6->0 --> <g id="edge6" class="edge"><title>6->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M863.1,-215.882C827.305,-205.814 788.309,-192.618 775,-180 737.506,-144.455 755.302,-118.111 732,-72 727.356,-62.8095 721.614,-53.1415 716.229,-44.6159"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="719.17,-42.7179 710.807,-36.2128 713.288,-46.5131 719.17,-42.7179"/> +<path fill="none" stroke="grey" stroke-width="2" d="M1575.75,-73.7128C1572.8,-73.0916 1569.87,-72.515 1567,-72 1383.05,-38.991 1159.35,-25.1198 1072.23,-20.7018"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1072.22,-17.1969 1062.05,-20.1976 1071.87,-24.1883 1072.22,-17.1969"/> </g> <!-- 7 --> <g id="node8" class="node"><title>7</title> -<path fill="none" stroke="#5673d8" stroke-width="2" d="M923,-108C923,-108 835,-108 835,-108 829,-108 823,-102 823,-96 823,-96 823,-84 823,-84 823,-78 829,-72 835,-72 835,-72 923,-72 923,-72 929,-72 935,-78 935,-84 935,-84 935,-96 935,-96 935,-102 929,-108 923,-108"/> -<text text-anchor="middle" x="879" y="-93" font-family="sans" font-size="10.00">calculate_TIN_scores</text> -<text text-anchor="middle" x="879" y="-82" font-family="sans" font-size="10.00">seqmode: paired_end</text> +<path fill="none" stroke="#56b1d8" stroke-width="2" d="M1342,-108C1342,-108 1230,-108 1230,-108 1224,-108 1218,-102 1218,-96 1218,-96 1218,-84 1218,-84 1218,-78 1224,-72 1230,-72 1230,-72 1342,-72 1342,-72 1348,-72 1354,-78 1354,-84 1354,-84 1354,-96 1354,-96 1354,-102 1348,-108 1342,-108"/> +<text text-anchor="middle" x="1286" y="-93" font-family="sans" font-size="10.00">salmon_quantmerge_genes</text> +<text text-anchor="middle" x="1286" y="-82" font-family="sans" font-size="10.00">salmon_merge_on: tpm</text> </g> <!-- 7->0 --> <g id="edge7" class="edge"><title>7->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M834.967,-71.8761C804.523,-60.0365 764.485,-44.4665 735.753,-33.293"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="736.685,-29.8999 726.096,-29.5374 734.148,-36.424 736.685,-29.8999"/> +<path fill="none" stroke="grey" stroke-width="2" d="M1224.92,-71.9656C1177.03,-58.6095 1111.97,-40.4647 1071.77,-29.2542"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1072.68,-25.8757 1062.11,-26.5606 1070.8,-32.6184 1072.68,-25.8757"/> </g> <!-- 8 --> <g id="node9" class="node"><title>8</title> -<path fill="none" stroke="#5673d8" stroke-width="2" d="M1109,-108C1109,-108 1021,-108 1021,-108 1015,-108 1009,-102 1009,-96 1009,-96 1009,-84 1009,-84 1009,-78 1015,-72 1021,-72 1021,-72 1109,-72 1109,-72 1115,-72 1121,-78 1121,-84 1121,-84 1121,-96 1121,-96 1121,-102 1115,-108 1109,-108"/> -<text text-anchor="middle" x="1065" y="-93" font-family="sans" font-size="10.00">calculate_TIN_scores</text> -<text text-anchor="middle" x="1065" y="-82" font-family="sans" font-size="10.00">seqmode: single_end</text> +<path fill="none" stroke="#56b1d8" stroke-width="2" d="M841.5,-108C841.5,-108 720.5,-108 720.5,-108 714.5,-108 708.5,-102 708.5,-96 708.5,-96 708.5,-84 708.5,-84 708.5,-78 714.5,-72 720.5,-72 720.5,-72 841.5,-72 841.5,-72 847.5,-72 853.5,-78 853.5,-84 853.5,-84 853.5,-96 853.5,-96 853.5,-102 847.5,-108 841.5,-108"/> +<text text-anchor="middle" x="781" y="-93" font-family="sans" font-size="10.00">salmon_quantmerge_genes</text> +<text text-anchor="middle" x="781" y="-82" font-family="sans" font-size="10.00">salmon_merge_on: numreads</text> </g> <!-- 8->0 --> <g id="edge8" class="edge"><title>8->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M1008.61,-78.2159C933.016,-63.7572 800.875,-38.4843 736.452,-26.1629"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="736.823,-22.6705 726.343,-24.2296 735.508,-29.5459 736.823,-22.6705"/> +<path fill="none" stroke="grey" stroke-width="2" d="M842.811,-71.9656C891.409,-58.5722 957.482,-40.3633 998.132,-29.1604"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="999.183,-32.5015 1007.89,-26.4704 997.323,-25.7531 999.183,-32.5015"/> </g> <!-- 9 --> <g id="node10" class="node"><title>9</title> -<path fill="none" stroke="#bed856" stroke-width="2" d="M726,-326.5C726,-326.5 614,-326.5 614,-326.5 608,-326.5 602,-320.5 602,-314.5 602,-314.5 602,-302.5 602,-302.5 602,-296.5 608,-290.5 614,-290.5 614,-290.5 726,-290.5 726,-290.5 732,-290.5 738,-296.5 738,-302.5 738,-302.5 738,-314.5 738,-314.5 738,-320.5 732,-326.5 726,-326.5"/> -<text text-anchor="middle" x="670" y="-306" font-family="sans" font-size="10.00">pe_remove_polya_cutadapt</text> +<path fill="none" stroke="#d85656" stroke-width="2" d="M1014.5,-108C1014.5,-108 883.5,-108 883.5,-108 877.5,-108 871.5,-102 871.5,-96 871.5,-96 871.5,-84 871.5,-84 871.5,-78 877.5,-72 883.5,-72 883.5,-72 1014.5,-72 1014.5,-72 1020.5,-72 1026.5,-78 1026.5,-84 1026.5,-84 1026.5,-96 1026.5,-96 1026.5,-102 1020.5,-108 1014.5,-108"/> +<text text-anchor="middle" x="949" y="-93" font-family="sans" font-size="10.00">salmon_quantmerge_transcripts</text> +<text text-anchor="middle" x="949" y="-82" font-family="sans" font-size="10.00">salmon_merge_on: tpm</text> </g> -<!-- 9->3 --> -<g id="edge9" class="edge"><title>9->3</title> -<path fill="none" stroke="grey" stroke-width="2" d="M620.97,-290.41C590.929,-279.939 552.3,-266.475 520.644,-255.441"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="521.655,-252.087 511.061,-252.1 519.351,-258.697 521.655,-252.087"/> -</g> -<!-- 9->5 --> -<g id="edge13" class="edge"><title>9->5</title> -<path fill="none" stroke="grey" stroke-width="2" d="M690.381,-290.319C701.627,-280.838 715.758,-268.925 728.024,-258.585"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="730.376,-261.18 735.765,-252.058 725.864,-255.828 730.376,-261.18"/> -</g> -<!-- 18 --> -<g id="node19" class="node"><title>18</title> -<path fill="none" stroke="#70d856" stroke-width="2" d="M1126.5,-252C1126.5,-252 1037.5,-252 1037.5,-252 1031.5,-252 1025.5,-246 1025.5,-240 1025.5,-240 1025.5,-228 1025.5,-228 1025.5,-222 1031.5,-216 1037.5,-216 1037.5,-216 1126.5,-216 1126.5,-216 1132.5,-216 1138.5,-222 1138.5,-228 1138.5,-228 1138.5,-240 1138.5,-240 1138.5,-246 1132.5,-252 1126.5,-252"/> -<text text-anchor="middle" x="1082" y="-231.5" font-family="sans" font-size="10.00">pe_map_genome_star</text> -</g> -<!-- 9->18 --> -<g id="edge26" class="edge"><title>9->18</title> -<path fill="none" stroke="grey" stroke-width="2" d="M738.35,-290.717C742.959,-289.749 747.541,-288.832 752,-288 865.226,-266.87 898.018,-274.364 1015.13,-252.03"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="1016.06,-255.415 1025.21,-250.075 1014.73,-248.543 1016.06,-255.415"/> +<!-- 9->0 --> +<g id="edge9" class="edge"><title>9->0</title> +<path fill="none" stroke="grey" stroke-width="2" d="M970.258,-71.6966C981.089,-62.8807 994.418,-52.0321 1006.16,-42.4742"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1008.44,-45.1316 1013.99,-36.1043 1004.02,-39.7026 1008.44,-45.1316"/> </g> <!-- 10 --> <g id="node11" class="node"><title>10</title> -<path fill="none" stroke="#56d8a2" stroke-width="2" d="M541.5,-329C541.5,-329 440.5,-329 440.5,-329 434.5,-329 428.5,-323 428.5,-317 428.5,-317 428.5,-300 428.5,-300 428.5,-294 434.5,-288 440.5,-288 440.5,-288 541.5,-288 541.5,-288 547.5,-288 553.5,-294 553.5,-300 553.5,-300 553.5,-317 553.5,-317 553.5,-323 547.5,-329 541.5,-329"/> -<text text-anchor="middle" x="491" y="-317" font-family="sans" font-size="10.00">create_index_salmon</text> -<text text-anchor="middle" x="491" y="-306" font-family="sans" font-size="10.00">kmer: 31</text> -<text text-anchor="middle" x="491" y="-295" font-family="sans" font-size="10.00">organism: homo_sapiens</text> -</g> -<!-- 10->3 --> -<g id="edge10" class="edge"><title>10->3</title> -<path fill="none" stroke="grey" stroke-width="2" d="M483.076,-287.689C479.859,-279.647 476.094,-270.236 472.644,-261.61"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="475.891,-260.304 468.928,-252.319 469.392,-262.904 475.891,-260.304"/> +<path fill="none" stroke="#d85656" stroke-width="2" d="M1187.5,-108C1187.5,-108 1056.5,-108 1056.5,-108 1050.5,-108 1044.5,-102 1044.5,-96 1044.5,-96 1044.5,-84 1044.5,-84 1044.5,-78 1050.5,-72 1056.5,-72 1056.5,-72 1187.5,-72 1187.5,-72 1193.5,-72 1199.5,-78 1199.5,-84 1199.5,-84 1199.5,-96 1199.5,-96 1199.5,-102 1193.5,-108 1187.5,-108"/> +<text text-anchor="middle" x="1122" y="-93" font-family="sans" font-size="10.00">salmon_quantmerge_transcripts</text> +<text text-anchor="middle" x="1122" y="-82" font-family="sans" font-size="10.00">salmon_merge_on: numreads</text> </g> -<!-- 10->4 --> -<g id="edge12" class="edge"><title>10->4</title> -<path fill="none" stroke="grey" stroke-width="2" d="M520.226,-287.88C533.987,-278.643 550.489,-267.565 564.825,-257.941"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="567.076,-260.646 573.428,-252.166 563.174,-254.834 567.076,-260.646"/> +<!-- 10->0 --> +<g id="edge10" class="edge"><title>10->0</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1100.49,-71.6966C1089.54,-62.8807 1076.05,-52.0321 1064.18,-42.4742"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1066.24,-39.6462 1056.26,-36.1043 1061.86,-45.1001 1066.24,-39.6462"/> </g> <!-- 11 --> <g id="node12" class="node"><title>11</title> -<path fill="none" stroke="#d86e56" stroke-width="2" d="M1015,-326.5C1015,-326.5 919,-326.5 919,-326.5 913,-326.5 907,-320.5 907,-314.5 907,-314.5 907,-302.5 907,-302.5 907,-296.5 913,-290.5 919,-290.5 919,-290.5 1015,-290.5 1015,-290.5 1021,-290.5 1027,-296.5 1027,-302.5 1027,-302.5 1027,-314.5 1027,-314.5 1027,-320.5 1021,-326.5 1015,-326.5"/> -<text text-anchor="middle" x="967" y="-306" font-family="sans" font-size="10.00">remove_polya_cutadapt</text> +<path fill="none" stroke="#56d863" stroke-width="2" d="M1545.5,-108C1545.5,-108 1514.5,-108 1514.5,-108 1508.5,-108 1502.5,-102 1502.5,-96 1502.5,-96 1502.5,-84 1502.5,-84 1502.5,-78 1508.5,-72 1514.5,-72 1514.5,-72 1545.5,-72 1545.5,-72 1551.5,-72 1557.5,-78 1557.5,-84 1557.5,-84 1557.5,-96 1557.5,-96 1557.5,-102 1551.5,-108 1545.5,-108"/> +<text text-anchor="middle" x="1530" y="-87.5" font-family="sans" font-size="10.00">star_rpm</text> </g> -<!-- 11->4 --> -<g id="edge11" class="edge"><title>11->4</title> -<path fill="none" stroke="grey" stroke-width="2" d="M906.62,-290.63C902.698,-289.695 898.8,-288.808 895,-288 796.444,-267.04 767.467,-272.788 664.451,-251.962"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="665.082,-248.518 654.582,-249.932 663.672,-255.375 665.082,-248.518"/> -</g> -<!-- 11->6 --> -<g id="edge15" class="edge"><title>11->6</title> -<path fill="none" stroke="grey" stroke-width="2" d="M958.705,-290.319C954.521,-281.651 949.355,-270.949 944.685,-261.276"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="947.734,-259.542 940.235,-252.058 941.431,-262.585 947.734,-259.542"/> -</g> -<!-- 19 --> -<g id="node20" class="node"><title>19</title> -<path fill="none" stroke="#d88d56" stroke-width="2" d="M1241.5,-252C1241.5,-252 1168.5,-252 1168.5,-252 1162.5,-252 1156.5,-246 1156.5,-240 1156.5,-240 1156.5,-228 1156.5,-228 1156.5,-222 1162.5,-216 1168.5,-216 1168.5,-216 1241.5,-216 1241.5,-216 1247.5,-216 1253.5,-222 1253.5,-228 1253.5,-228 1253.5,-240 1253.5,-240 1253.5,-246 1247.5,-252 1241.5,-252"/> -<text text-anchor="middle" x="1205" y="-231.5" font-family="sans" font-size="10.00">map_genome_star</text> -</g> -<!-- 11->19 --> -<g id="edge28" class="edge"><title>11->19</title> -<path fill="none" stroke="grey" stroke-width="2" d="M1023.1,-290.41C1060.19,-279.112 1108.72,-264.329 1146.32,-252.874"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="1147.58,-256.15 1156.13,-249.888 1145.54,-249.454 1147.58,-256.15"/> +<!-- 11->0 --> +<g id="edge11" class="edge"><title>11->0</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1502.48,-75.2191C1499.33,-73.9871 1496.13,-72.8766 1493,-72 1341.16,-29.4948 1151.82,-21.0221 1072.59,-19.3746"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1072.22,-15.8673 1062.16,-19.1846 1072.09,-22.8662 1072.22,-15.8673"/> </g> <!-- 12 --> <g id="node13" class="node"><title>12</title> -<path fill="none" stroke="#56d8d8" stroke-width="2" d="M873.5,-326.5C873.5,-326.5 772.5,-326.5 772.5,-326.5 766.5,-326.5 760.5,-320.5 760.5,-314.5 760.5,-314.5 760.5,-302.5 760.5,-302.5 760.5,-296.5 766.5,-290.5 772.5,-290.5 772.5,-290.5 873.5,-290.5 873.5,-290.5 879.5,-290.5 885.5,-296.5 885.5,-302.5 885.5,-302.5 885.5,-314.5 885.5,-314.5 885.5,-320.5 879.5,-326.5 873.5,-326.5"/> -<text text-anchor="middle" x="823" y="-311.5" font-family="sans" font-size="10.00">create_index_kallisto</text> -<text text-anchor="middle" x="823" y="-300.5" font-family="sans" font-size="10.00">organism: homo_sapiens</text> -</g> -<!-- 12->5 --> -<g id="edge14" class="edge"><title>12->5</title> -<path fill="none" stroke="grey" stroke-width="2" d="M807.122,-290.319C798.611,-281.109 787.979,-269.604 778.618,-259.475"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="781.122,-257.027 771.764,-252.058 775.981,-261.778 781.122,-257.027"/> +<path fill="none" stroke="#56d863" stroke-width="2" d="M1762.5,-108C1762.5,-108 1731.5,-108 1731.5,-108 1725.5,-108 1719.5,-102 1719.5,-96 1719.5,-96 1719.5,-84 1719.5,-84 1719.5,-78 1725.5,-72 1731.5,-72 1731.5,-72 1762.5,-72 1762.5,-72 1768.5,-72 1774.5,-78 1774.5,-84 1774.5,-84 1774.5,-96 1774.5,-96 1774.5,-102 1768.5,-108 1762.5,-108"/> +<text text-anchor="middle" x="1747" y="-87.5" font-family="sans" font-size="10.00">star_rpm</text> </g> -<!-- 12->6 --> -<g id="edge16" class="edge"><title>12->6</title> -<path fill="none" stroke="grey" stroke-width="2" d="M848.831,-290.319C863.492,-280.567 882.022,-268.242 897.868,-257.702"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="899.966,-260.511 906.354,-252.058 896.089,-254.682 899.966,-260.511"/> +<!-- 12->0 --> +<g id="edge12" class="edge"><title>12->0</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1719.31,-78.2242C1712.14,-75.8128 1704.36,-73.5217 1697,-72 1462.81,-23.5966 1173.55,-18.8355 1072.18,-18.772"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1072.13,-15.2719 1062.14,-18.7811 1072.14,-22.2719 1072.13,-15.2719"/> </g> <!-- 13 --> <g id="node14" class="node"><title>13</title> -<path fill="none" stroke="#a7d856" stroke-width="2" d="M962,-180C962,-180 796,-180 796,-180 790,-180 784,-174 784,-168 784,-168 784,-156 784,-156 784,-150 790,-144 796,-144 796,-144 962,-144 962,-144 968,-144 974,-150 974,-156 974,-156 974,-168 974,-168 974,-174 968,-180 962,-180"/> -<text text-anchor="middle" x="879" y="-159.5" font-family="sans" font-size="10.00">pe_index_genomic_alignment_samtools</text> +<path fill="none" stroke="#569ad8" stroke-width="2" d="M910,-326.5C910,-326.5 798,-326.5 798,-326.5 792,-326.5 786,-320.5 786,-314.5 786,-314.5 786,-302.5 786,-302.5 786,-296.5 792,-290.5 798,-290.5 798,-290.5 910,-290.5 910,-290.5 916,-290.5 922,-296.5 922,-302.5 922,-302.5 922,-314.5 922,-314.5 922,-320.5 916,-326.5 910,-326.5"/> +<text text-anchor="middle" x="854" y="-306" font-family="sans" font-size="10.00">pe_remove_polya_cutadapt</text> +</g> +<!-- 13->3 --> +<g id="edge13" class="edge"><title>13->3</title> +<path fill="none" stroke="grey" stroke-width="2" d="M816.97,-290.282C796.361,-280.126 770.587,-266.455 749,-252 719.625,-232.331 688.877,-206.074 667.92,-187.184"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="670.143,-184.474 660.391,-180.334 665.433,-189.652 670.143,-184.474"/> +</g> +<!-- 16 --> +<g id="node17" class="node"><title>16</title> +<path fill="none" stroke="#56d8d0" stroke-width="2" d="M1359.5,-252C1359.5,-252 1270.5,-252 1270.5,-252 1264.5,-252 1258.5,-246 1258.5,-240 1258.5,-240 1258.5,-228 1258.5,-228 1258.5,-222 1264.5,-216 1270.5,-216 1270.5,-216 1359.5,-216 1359.5,-216 1365.5,-216 1371.5,-222 1371.5,-228 1371.5,-228 1371.5,-240 1371.5,-240 1371.5,-246 1365.5,-252 1359.5,-252"/> +<text text-anchor="middle" x="1315" y="-231.5" font-family="sans" font-size="10.00">pe_map_genome_star</text> +</g> +<!-- 13->16 --> +<g id="edge39" class="edge"><title>13->16</title> +<path fill="none" stroke="grey" stroke-width="2" d="M922.353,-290.735C926.962,-289.762 931.542,-288.84 936,-288 1044.5,-267.56 1171.85,-251.323 1247.85,-242.446"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1248.69,-245.871 1258.22,-241.242 1247.89,-238.918 1248.69,-245.871"/> </g> -<!-- 13->7 --> -<g id="edge17" class="edge"><title>13->7</title> -<path fill="none" stroke="grey" stroke-width="2" d="M879,-143.697C879,-135.983 879,-126.712 879,-118.112"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="882.5,-118.104 879,-108.104 875.5,-118.104 882.5,-118.104"/> +<!-- 21 --> +<g id="node22" class="node"><title>21</title> +<path fill="none" stroke="#d86e56" stroke-width="2" d="M1027.5,-180C1027.5,-180 924.5,-180 924.5,-180 918.5,-180 912.5,-174 912.5,-168 912.5,-168 912.5,-156 912.5,-156 912.5,-150 918.5,-144 924.5,-144 924.5,-144 1027.5,-144 1027.5,-144 1033.5,-144 1039.5,-150 1039.5,-156 1039.5,-156 1039.5,-168 1039.5,-168 1039.5,-174 1033.5,-180 1027.5,-180"/> +<text text-anchor="middle" x="976" y="-159.5" font-family="sans" font-size="10.00">pe_quantification_salmon</text> +</g> +<!-- 13->21 --> +<g id="edge44" class="edge"><title>13->21</title> +<path fill="none" stroke="grey" stroke-width="2" d="M868.474,-290.357C889.936,-264.936 930.317,-217.108 955.021,-187.848"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="957.768,-190.02 961.545,-180.121 952.419,-185.504 957.768,-190.02"/> </g> <!-- 14 --> <g id="node15" class="node"><title>14</title> -<path fill="none" stroke="#56d873" stroke-width="2" d="M1125.5,-180C1125.5,-180 1004.5,-180 1004.5,-180 998.5,-180 992.5,-174 992.5,-168 992.5,-168 992.5,-156 992.5,-156 992.5,-150 998.5,-144 1004.5,-144 1004.5,-144 1125.5,-144 1125.5,-144 1131.5,-144 1137.5,-150 1137.5,-156 1137.5,-156 1137.5,-168 1137.5,-168 1137.5,-174 1131.5,-180 1125.5,-180"/> -<text text-anchor="middle" x="1065" y="-159.5" font-family="sans" font-size="10.00">extract_transcripts_as_bed12</text> +<path fill="none" stroke="#d8b456" stroke-width="2" d="M854,-252C854,-252 770,-252 770,-252 764,-252 758,-246 758,-240 758,-240 758,-228 758,-228 758,-222 764,-216 770,-216 770,-216 854,-216 854,-216 860,-216 866,-222 866,-228 866,-228 866,-240 866,-240 866,-246 860,-252 854,-252"/> +<text text-anchor="middle" x="812" y="-231.5" font-family="sans" font-size="10.00">create_index_kallisto</text> </g> -<!-- 14->7 --> -<g id="edge18" class="edge"><title>14->7</title> -<path fill="none" stroke="grey" stroke-width="2" d="M1019.5,-143.876C993.62,-134.137 961.033,-121.872 933.759,-111.608"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="934.945,-108.315 924.353,-108.068 932.479,-114.866 934.945,-108.315"/> +<!-- 14->3 --> +<g id="edge14" class="edge"><title>14->3</title> +<path fill="none" stroke="grey" stroke-width="2" d="M770.413,-215.876C746.967,-206.222 717.496,-194.087 692.703,-183.878"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="694.031,-180.639 683.452,-180.068 691.366,-187.112 694.031,-180.639"/> </g> -<!-- 14->8 --> -<g id="edge20" class="edge"><title>14->8</title> -<path fill="none" stroke="grey" stroke-width="2" d="M1065,-143.697C1065,-135.983 1065,-126.712 1065,-118.112"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="1068.5,-118.104 1065,-108.104 1061.5,-118.104 1068.5,-118.104"/> +<!-- 14->4 --> +<g id="edge16" class="edge"><title>14->4</title> +<path fill="none" stroke="grey" stroke-width="2" d="M813.483,-215.697C814.144,-207.983 814.939,-198.712 815.676,-190.112"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="819.167,-190.367 816.534,-180.104 812.193,-189.769 819.167,-190.367"/> </g> <!-- 15 --> <g id="node16" class="node"><title>15</title> -<path fill="none" stroke="#56c1d8" stroke-width="2" d="M1318,-180C1318,-180 1168,-180 1168,-180 1162,-180 1156,-174 1156,-168 1156,-168 1156,-156 1156,-156 1156,-150 1162,-144 1168,-144 1168,-144 1318,-144 1318,-144 1324,-144 1330,-150 1330,-156 1330,-156 1330,-168 1330,-168 1330,-174 1324,-180 1318,-180"/> -<text text-anchor="middle" x="1243" y="-159.5" font-family="sans" font-size="10.00">index_genomic_alignment_samtools</text> -</g> -<!-- 15->8 --> -<g id="edge19" class="edge"><title>15->8</title> -<path fill="none" stroke="grey" stroke-width="2" d="M1199.46,-143.876C1174.8,-134.179 1143.78,-121.98 1117.75,-111.743"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="1118.99,-108.471 1108.4,-108.068 1116.43,-114.985 1118.99,-108.471"/> +<path fill="none" stroke="#56d87b" stroke-width="2" d="M1202,-326.5C1202,-326.5 1106,-326.5 1106,-326.5 1100,-326.5 1094,-320.5 1094,-314.5 1094,-314.5 1094,-302.5 1094,-302.5 1094,-296.5 1100,-290.5 1106,-290.5 1106,-290.5 1202,-290.5 1202,-290.5 1208,-290.5 1214,-296.5 1214,-302.5 1214,-302.5 1214,-314.5 1214,-314.5 1214,-320.5 1208,-326.5 1202,-326.5"/> +<text text-anchor="middle" x="1154" y="-306" font-family="sans" font-size="10.00">remove_polya_cutadapt</text> </g> -<!-- 16 --> -<g id="node17" class="node"><title>16</title> -<path fill="none" stroke="#56a2d8" stroke-width="2" d="M791,-401C791,-401 523,-401 523,-401 517,-401 511,-395 511,-389 511,-389 511,-377 511,-377 511,-371 517,-365 523,-365 523,-365 791,-365 791,-365 797,-365 803,-371 803,-377 803,-377 803,-389 803,-389 803,-395 797,-401 791,-401"/> -<text text-anchor="middle" x="657" y="-386" font-family="sans" font-size="10.00">pe_remove_adapters_cutadapt</text> -<text text-anchor="middle" x="657" y="-375" font-family="sans" font-size="10.00">sample: synthetic_10_reads_paired_synthetic_10_reads_paired</text> +<!-- 15->4 --> +<g id="edge15" class="edge"><title>15->4</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1093.95,-291.497C1036.84,-276.202 958.661,-255.033 952,-252 913.239,-234.354 873.108,-206.15 846.993,-186.228"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="849.08,-183.417 839.026,-180.075 844.801,-188.957 849.08,-183.417"/> </g> -<!-- 16->9 --> -<g id="edge21" class="edge"><title>16->9</title> -<path fill="none" stroke="grey" stroke-width="2" d="M660.081,-364.819C661.586,-356.422 663.434,-346.116 665.125,-336.686"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="668.621,-337.019 666.941,-326.558 661.731,-335.783 668.621,-337.019"/> +<!-- 19 --> +<g id="node20" class="node"><title>19</title> +<path fill="none" stroke="#56d892" stroke-width="2" d="M1668.5,-252C1668.5,-252 1595.5,-252 1595.5,-252 1589.5,-252 1583.5,-246 1583.5,-240 1583.5,-240 1583.5,-228 1583.5,-228 1583.5,-222 1589.5,-216 1595.5,-216 1595.5,-216 1668.5,-216 1668.5,-216 1674.5,-216 1680.5,-222 1680.5,-228 1680.5,-228 1680.5,-240 1680.5,-240 1680.5,-246 1674.5,-252 1668.5,-252"/> +<text text-anchor="middle" x="1632" y="-231.5" font-family="sans" font-size="10.00">map_genome_star</text> +</g> +<!-- 15->19 --> +<g id="edge42" class="edge"><title>15->19</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1214.06,-298.391C1306.38,-284.388 1482.06,-257.742 1573.46,-243.879"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1574.01,-247.335 1583.38,-242.375 1572.96,-240.414 1574.01,-247.335"/> +</g> +<!-- 22 --> +<g id="node23" class="node"><title>22</title> +<path fill="none" stroke="#78d856" stroke-width="2" d="M1158.5,-180C1158.5,-180 1071.5,-180 1071.5,-180 1065.5,-180 1059.5,-174 1059.5,-168 1059.5,-168 1059.5,-156 1059.5,-156 1059.5,-150 1065.5,-144 1071.5,-144 1071.5,-144 1158.5,-144 1158.5,-144 1164.5,-144 1170.5,-150 1170.5,-156 1170.5,-156 1170.5,-168 1170.5,-168 1170.5,-174 1164.5,-180 1158.5,-180"/> +<text text-anchor="middle" x="1115" y="-159.5" font-family="sans" font-size="10.00">quantification_salmon</text> +</g> +<!-- 15->22 --> +<g id="edge46" class="edge"><title>15->22</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1149.37,-290.357C1142.69,-265.588 1130.26,-219.546 1122.32,-190.135"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1125.61,-188.864 1119.62,-180.121 1118.85,-190.688 1125.61,-188.864"/> +</g> +<!-- 16->5 --> +<g id="edge17" class="edge"><title>16->5</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1261.78,-215.963C1245.11,-207.835 1228.58,-196.231 1219,-180 1210.87,-166.221 1209.15,-156.611 1219,-144 1257.7,-94.4397 1295.38,-121.499 1361.95,-107.991"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1363.03,-111.337 1371.99,-105.69 1361.46,-104.514 1363.03,-111.337"/> +</g> +<!-- 16->11 --> +<g id="edge31" class="edge"><title>16->11</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1350.89,-215.807C1369.48,-206.262 1392.16,-193.588 1411,-180 1430.13,-166.199 1431.44,-158.561 1450,-144 1463.79,-133.184 1479.75,-122.31 1493.77,-113.251"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1495.67,-116.192 1502.21,-107.861 1491.9,-110.292 1495.67,-116.192"/> </g> <!-- 17 --> <g id="node18" class="node"><title>17</title> -<path fill="none" stroke="#56d88a" stroke-width="2" d="M1113,-401C1113,-401 833,-401 833,-401 827,-401 821,-395 821,-389 821,-389 821,-377 821,-377 821,-371 827,-365 833,-365 833,-365 1113,-365 1113,-365 1119,-365 1125,-371 1125,-377 1125,-377 1125,-389 1125,-389 1125,-395 1119,-401 1113,-401"/> -<text text-anchor="middle" x="973" y="-386" font-family="sans" font-size="10.00">remove_adapters_cutadapt</text> -<text text-anchor="middle" x="973" y="-375" font-family="sans" font-size="10.00">sample: synthetic_10_reads_mate_1_synthetic_10_reads_mate_1</text> +<path fill="none" stroke="#56c9d8" stroke-width="2" d="M1390,-180C1390,-180 1240,-180 1240,-180 1234,-180 1228,-174 1228,-168 1228,-168 1228,-156 1228,-156 1228,-150 1234,-144 1240,-144 1240,-144 1390,-144 1390,-144 1396,-144 1402,-150 1402,-156 1402,-156 1402,-168 1402,-168 1402,-174 1396,-180 1390,-180"/> +<text text-anchor="middle" x="1315" y="-165" font-family="sans" font-size="10.00">index_genomic_alignment_samtools</text> +<text text-anchor="middle" x="1315" y="-154" font-family="sans" font-size="10.00">seqmode: paired_end</text> +</g> +<!-- 16->17 --> +<g id="edge40" class="edge"><title>16->17</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1315,-215.697C1315,-207.983 1315,-198.712 1315,-190.112"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1318.5,-190.104 1315,-180.104 1311.5,-190.104 1318.5,-190.104"/> +</g> +<!-- 17->5 --> +<g id="edge18" class="edge"><title>17->5</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1342.64,-143.876C1357.42,-134.724 1375.79,-123.342 1391.7,-113.485"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1393.59,-116.431 1400.25,-108.19 1389.91,-110.481 1393.59,-116.431"/> </g> <!-- 17->11 --> -<g id="edge22" class="edge"><title>17->11</title> -<path fill="none" stroke="grey" stroke-width="2" d="M971.578,-364.819C970.883,-356.422 970.03,-346.116 969.25,-336.686"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="972.725,-336.235 968.412,-326.558 965.748,-336.813 972.725,-336.235"/> +<g id="edge32" class="edge"><title>17->11</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1380.41,-143.913C1414.38,-134.442 1456.34,-121.802 1493,-108 1493.1,-107.964 1493.19,-107.928 1493.29,-107.892"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1494.35,-111.239 1502.3,-104.238 1491.72,-104.752 1494.35,-111.239"/> +</g> +<!-- 18 --> +<g id="node19" class="node"><title>18</title> +<path fill="none" stroke="#d6d856" stroke-width="2" d="M1591.5,-180C1591.5,-180 1470.5,-180 1470.5,-180 1464.5,-180 1458.5,-174 1458.5,-168 1458.5,-168 1458.5,-156 1458.5,-156 1458.5,-150 1464.5,-144 1470.5,-144 1470.5,-144 1591.5,-144 1591.5,-144 1597.5,-144 1603.5,-150 1603.5,-156 1603.5,-156 1603.5,-168 1603.5,-168 1603.5,-174 1597.5,-180 1591.5,-180"/> +<text text-anchor="middle" x="1531" y="-159.5" font-family="sans" font-size="10.00">extract_transcripts_as_bed12</text> +</g> +<!-- 18->5 --> +<g id="edge19" class="edge"><title>18->5</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1505.8,-143.876C1492.59,-134.893 1476.21,-123.763 1461.89,-114.034"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1463.53,-110.916 1453.29,-108.19 1459.6,-116.705 1463.53,-110.916"/> +</g> +<!-- 18->6 --> +<g id="edge22" class="edge"><title>18->6</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1555.71,-143.876C1568.67,-134.893 1584.73,-123.763 1598.77,-114.034"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1600.97,-116.763 1607.2,-108.19 1596.98,-111.009 1600.97,-116.763"/> </g> -<!-- 18->13 --> -<g id="edge23" class="edge"><title>18->13</title> -<path fill="none" stroke="grey" stroke-width="2" d="M1032.6,-215.966C1004.11,-206.142 968.114,-193.729 938.129,-183.389"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="939.216,-180.062 928.621,-180.111 936.934,-186.68 939.216,-180.062"/> +<!-- 19->6 --> +<g id="edge20" class="edge"><title>19->6</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1632,-215.871C1632,-191.67 1632,-147.211 1632,-118.393"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1635.5,-118.189 1632,-108.189 1628.5,-118.189 1635.5,-118.189"/> </g> -<!-- 19->15 --> -<g id="edge24" class="edge"><title>19->15</title> -<path fill="none" stroke="grey" stroke-width="2" d="M1214.39,-215.697C1218.76,-207.644 1224.06,-197.894 1228.9,-188.982"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="1232.02,-190.563 1233.71,-180.104 1225.87,-187.223 1232.02,-190.563"/> +<!-- 19->12 --> +<g id="edge33" class="edge"><title>19->12</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1680.75,-227.646C1736.69,-220.381 1823.26,-205.316 1843,-180 1852.84,-167.383 1851.13,-157.779 1843,-144 1830.39,-122.628 1805.73,-109.279 1784.56,-101.324"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1785.42,-97.9183 1774.83,-97.9398 1783.12,-104.53 1785.42,-97.9183"/> </g> <!-- 20 --> <g id="node21" class="node"><title>20</title> -<path fill="none" stroke="#d6d856" stroke-width="2" d="M1193.5,-329C1193.5,-329 1092.5,-329 1092.5,-329 1086.5,-329 1080.5,-323 1080.5,-317 1080.5,-317 1080.5,-300 1080.5,-300 1080.5,-294 1086.5,-288 1092.5,-288 1092.5,-288 1193.5,-288 1193.5,-288 1199.5,-288 1205.5,-294 1205.5,-300 1205.5,-300 1205.5,-317 1205.5,-317 1205.5,-323 1199.5,-329 1193.5,-329"/> -<text text-anchor="middle" x="1143" y="-317" font-family="sans" font-size="10.00">create_index_star</text> -<text text-anchor="middle" x="1143" y="-306" font-family="sans" font-size="10.00">index_size: 75</text> -<text text-anchor="middle" x="1143" y="-295" font-family="sans" font-size="10.00">organism: homo_sapiens</text> -</g> -<!-- 20->18 --> -<g id="edge25" class="edge"><title>20->18</title> -<path fill="none" stroke="grey" stroke-width="2" d="M1126.33,-287.689C1119.2,-279.215 1110.79,-269.22 1103.22,-260.226"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="1105.69,-257.718 1096.57,-252.319 1100.33,-262.225 1105.69,-257.718"/> -</g> -<!-- 20->19 --> -<g id="edge27" class="edge"><title>20->19</title> -<path fill="none" stroke="grey" stroke-width="2" d="M1159.94,-287.689C1167.26,-279.128 1175.91,-269.016 1183.66,-259.951"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="1186.35,-262.194 1190.19,-252.319 1181.03,-257.645 1186.35,-262.194"/> +<path fill="none" stroke="#56c9d8" stroke-width="2" d="M1822,-180C1822,-180 1672,-180 1672,-180 1666,-180 1660,-174 1660,-168 1660,-168 1660,-156 1660,-156 1660,-150 1666,-144 1672,-144 1672,-144 1822,-144 1822,-144 1828,-144 1834,-150 1834,-156 1834,-156 1834,-168 1834,-168 1834,-174 1828,-180 1822,-180"/> +<text text-anchor="middle" x="1747" y="-165" font-family="sans" font-size="10.00">index_genomic_alignment_samtools</text> +<text text-anchor="middle" x="1747" y="-154" font-family="sans" font-size="10.00">seqmode: single_end</text> +</g> +<!-- 19->20 --> +<g id="edge43" class="edge"><title>19->20</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1660.13,-215.876C1675.17,-206.724 1693.87,-195.342 1710.06,-185.485"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1712.04,-188.379 1718.76,-180.19 1708.4,-182.4 1712.04,-188.379"/> +</g> +<!-- 20->6 --> +<g id="edge21" class="edge"><title>20->6</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1718.87,-143.876C1703.83,-134.724 1685.13,-123.342 1668.94,-113.485"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1670.6,-110.4 1660.24,-108.19 1666.96,-116.379 1670.6,-110.4"/> +</g> +<!-- 20->12 --> +<g id="edge34" class="edge"><title>20->12</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1747,-143.697C1747,-135.983 1747,-126.712 1747,-118.112"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1750.5,-118.104 1747,-108.104 1743.5,-118.104 1750.5,-118.104"/> +</g> +<!-- 21->7 --> +<g id="edge23" class="edge"><title>21->7</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1039.83,-146.527C1043.61,-145.672 1047.35,-144.824 1051,-144 1103.35,-132.163 1162.27,-118.875 1208.02,-108.565"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1208.88,-111.958 1217.87,-106.345 1207.34,-105.13 1208.88,-111.958"/> +</g> +<!-- 21->8 --> +<g id="edge25" class="edge"><title>21->8</title> +<path fill="none" stroke="grey" stroke-width="2" d="M928.298,-143.876C901.048,-134.094 866.703,-121.765 838.034,-111.474"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="839.142,-108.153 828.547,-108.068 836.777,-114.741 839.142,-108.153"/> +</g> +<!-- 21->9 --> +<g id="edge27" class="edge"><title>21->9</title> +<path fill="none" stroke="grey" stroke-width="2" d="M969.326,-143.697C966.285,-135.813 962.617,-126.304 959.239,-117.546"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="962.462,-116.175 955.597,-108.104 955.931,-118.694 962.462,-116.175"/> +</g> +<!-- 21->10 --> +<g id="edge29" class="edge"><title>21->10</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1011.72,-143.876C1031.5,-134.392 1056.27,-122.513 1077.33,-112.419"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1078.9,-115.548 1086.4,-108.068 1075.87,-109.236 1078.9,-115.548"/> +</g> +<!-- 22->7 --> +<g id="edge24" class="edge"><title>22->7</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1156.83,-143.876C1180.42,-134.222 1210.06,-122.087 1235,-111.878"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1236.38,-115.096 1244.3,-108.068 1233.72,-108.618 1236.38,-115.096"/> +</g> +<!-- 22->8 --> +<g id="edge26" class="edge"><title>22->8</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1059.24,-146.384C1055.78,-145.556 1052.35,-144.754 1049,-144 969.907,-126.177 947.224,-124.641 863.791,-108.136"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="864.358,-104.68 853.867,-106.159 862.991,-111.545 864.358,-104.68"/> +</g> +<!-- 22->9 --> +<g id="edge28" class="edge"><title>22->9</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1074.39,-143.876C1051.6,-134.264 1022.97,-122.193 998.83,-112.013"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1000.05,-108.729 989.476,-108.068 997.331,-115.179 1000.05,-108.729"/> +</g> +<!-- 22->10 --> +<g id="edge30" class="edge"><title>22->10</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1116.73,-143.697C1117.5,-135.983 1118.43,-126.712 1119.29,-118.112"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1122.78,-118.403 1120.29,-108.104 1115.81,-117.706 1122.78,-118.403"/> +</g> +<!-- 23 --> +<g id="node24" class="node"><title>23</title> +<path fill="none" stroke="#a7d856" stroke-width="2" d="M975,-401C975,-401 707,-401 707,-401 701,-401 695,-395 695,-389 695,-389 695,-377 695,-377 695,-371 701,-365 707,-365 707,-365 975,-365 975,-365 981,-365 987,-371 987,-377 987,-377 987,-389 987,-389 987,-395 981,-401 975,-401"/> +<text text-anchor="middle" x="841" y="-386" font-family="sans" font-size="10.00">pe_remove_adapters_cutadapt</text> +<text text-anchor="middle" x="841" y="-375" font-family="sans" font-size="10.00">sample: synthetic_10_reads_paired_synthetic_10_reads_paired</text> +</g> +<!-- 23->13 --> +<g id="edge35" class="edge"><title>23->13</title> +<path fill="none" stroke="grey" stroke-width="2" d="M844.081,-364.819C845.586,-356.422 847.434,-346.116 849.125,-336.686"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="852.621,-337.019 850.941,-326.558 845.731,-335.783 852.621,-337.019"/> +</g> +<!-- 24 --> +<g id="node25" class="node"><title>24</title> +<path fill="none" stroke="#61d856" stroke-width="2" d="M1057.5,-326.5C1057.5,-326.5 956.5,-326.5 956.5,-326.5 950.5,-326.5 944.5,-320.5 944.5,-314.5 944.5,-314.5 944.5,-302.5 944.5,-302.5 944.5,-296.5 950.5,-290.5 956.5,-290.5 956.5,-290.5 1057.5,-290.5 1057.5,-290.5 1063.5,-290.5 1069.5,-296.5 1069.5,-302.5 1069.5,-302.5 1069.5,-314.5 1069.5,-314.5 1069.5,-320.5 1063.5,-326.5 1057.5,-326.5"/> +<text text-anchor="middle" x="1007" y="-311.5" font-family="sans" font-size="10.00">extract_transcriptome</text> +<text text-anchor="middle" x="1007" y="-300.5" font-family="sans" font-size="10.00">organism: homo_sapiens</text> +</g> +<!-- 24->14 --> +<g id="edge36" class="edge"><title>24->14</title> +<path fill="none" stroke="grey" stroke-width="2" d="M960.788,-290.319C932.86,-279.935 897.084,-266.634 867.613,-255.677"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="868.473,-252.263 857.881,-252.058 866.034,-258.824 868.473,-252.263"/> +</g> +<!-- 27 --> +<g id="node28" class="node"><title>27</title> +<path fill="none" stroke="#566bd8" stroke-width="2" d="M1057.5,-252C1057.5,-252 972.5,-252 972.5,-252 966.5,-252 960.5,-246 960.5,-240 960.5,-240 960.5,-228 960.5,-228 960.5,-222 966.5,-216 972.5,-216 972.5,-216 1057.5,-216 1057.5,-216 1063.5,-216 1069.5,-222 1069.5,-228 1069.5,-228 1069.5,-240 1069.5,-240 1069.5,-246 1063.5,-252 1057.5,-252"/> +<text text-anchor="middle" x="1015" y="-237" font-family="sans" font-size="10.00">create_index_salmon</text> +<text text-anchor="middle" x="1015" y="-226" font-family="sans" font-size="10.00">kmer: 31</text> +</g> +<!-- 24->27 --> +<g id="edge48" class="edge"><title>24->27</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1008.9,-290.319C1009.82,-281.922 1010.96,-271.616 1012,-262.186"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1015.5,-262.382 1013.12,-252.058 1008.54,-261.614 1015.5,-262.382"/> +</g> +<!-- 25 --> +<g id="node26" class="node"><title>25</title> +<path fill="none" stroke="#bed856" stroke-width="2" d="M1297,-401C1297,-401 1017,-401 1017,-401 1011,-401 1005,-395 1005,-389 1005,-389 1005,-377 1005,-377 1005,-371 1011,-365 1017,-365 1017,-365 1297,-365 1297,-365 1303,-365 1309,-371 1309,-377 1309,-377 1309,-389 1309,-389 1309,-395 1303,-401 1297,-401"/> +<text text-anchor="middle" x="1157" y="-386" font-family="sans" font-size="10.00">remove_adapters_cutadapt</text> +<text text-anchor="middle" x="1157" y="-375" font-family="sans" font-size="10.00">sample: synthetic_10_reads_mate_1_synthetic_10_reads_mate_1</text> +</g> +<!-- 25->15 --> +<g id="edge37" class="edge"><title>25->15</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1156.29,-364.819C1155.94,-356.422 1155.52,-346.116 1155.12,-336.686"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1158.62,-336.405 1154.71,-326.558 1151.62,-336.694 1158.62,-336.405"/> +</g> +<!-- 26 --> +<g id="node27" class="node"><title>26</title> +<path fill="none" stroke="#5682d8" stroke-width="2" d="M1523.5,-329C1523.5,-329 1422.5,-329 1422.5,-329 1416.5,-329 1410.5,-323 1410.5,-317 1410.5,-317 1410.5,-300 1410.5,-300 1410.5,-294 1416.5,-288 1422.5,-288 1422.5,-288 1523.5,-288 1523.5,-288 1529.5,-288 1535.5,-294 1535.5,-300 1535.5,-300 1535.5,-317 1535.5,-317 1535.5,-323 1529.5,-329 1523.5,-329"/> +<text text-anchor="middle" x="1473" y="-317" font-family="sans" font-size="10.00">create_index_star</text> +<text text-anchor="middle" x="1473" y="-306" font-family="sans" font-size="10.00">index_size: 75</text> +<text text-anchor="middle" x="1473" y="-295" font-family="sans" font-size="10.00">organism: homo_sapiens</text> +</g> +<!-- 26->16 --> +<g id="edge38" class="edge"><title>26->16</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1430.24,-287.88C1408.99,-278.127 1383.26,-266.323 1361.51,-256.343"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1362.7,-253.035 1352.15,-252.046 1359.78,-259.397 1362.7,-253.035"/> +</g> +<!-- 26->19 --> +<g id="edge41" class="edge"><title>26->19</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1516.03,-287.88C1537.51,-278.083 1563.54,-266.217 1585.49,-256.209"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1586.97,-259.379 1594.62,-252.046 1584.07,-253.01 1586.97,-259.379"/> +</g> +<!-- 27->21 --> +<g id="edge45" class="edge"><title>27->21</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1005.36,-215.697C1000.87,-207.644 995.441,-197.894 990.476,-188.982"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="993.454,-187.137 985.53,-180.104 987.339,-190.544 993.454,-187.137"/> +</g> +<!-- 27->22 --> +<g id="edge47" class="edge"><title>27->22</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1039.46,-215.876C1052.3,-206.893 1068.2,-195.763 1082.09,-186.034"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1084.26,-188.792 1090.44,-180.19 1080.24,-183.057 1084.26,-188.792"/> </g> </g> </svg> diff --git a/images/rule_graph.svg b/images/rule_graph.svg index 5c845f59f06aa16328fa5144f3f22a427f401cc7..a2a0d009dcc914acf858cb6594e35f11f4f963cf 100644 --- a/images/rule_graph.svg +++ b/images/rule_graph.svg @@ -4,240 +4,310 @@ <!-- Generated by graphviz version 2.38.0 (20140413.2041) --> <!-- Title: snakemake_dag Pages: 1 --> -<svg width="940pt" height="404pt" - viewBox="0.00 0.00 939.50 404.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> +<svg width="1071pt" height="404pt" + viewBox="0.00 0.00 1070.98 404.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 400)"> <title>snakemake_dag</title> -<polygon fill="white" stroke="none" points="-4,4 -4,-400 935.5,-400 935.5,4 -4,4"/> +<polygon fill="white" stroke="none" points="-4,4 -4,-400 1066.98,-400 1066.98,4 -4,4"/> <!-- 0 --> <g id="node1" class="node"><title>0</title> -<path fill="none" stroke="#d85656" stroke-width="2" d="M296.5,-36C296.5,-36 266.5,-36 266.5,-36 260.5,-36 254.5,-30 254.5,-24 254.5,-24 254.5,-12 254.5,-12 254.5,-6 260.5,-0 266.5,-0 266.5,-0 296.5,-0 296.5,-0 302.5,-0 308.5,-6 308.5,-12 308.5,-12 308.5,-24 308.5,-24 308.5,-30 302.5,-36 296.5,-36"/> -<text text-anchor="middle" x="281.5" y="-15.5" font-family="sans" font-size="10.00">finish</text> +<path fill="none" stroke="#56c9d8" stroke-width="2" d="M304,-36C304,-36 274,-36 274,-36 268,-36 262,-30 262,-24 262,-24 262,-12 262,-12 262,-6 268,-0 274,-0 274,-0 304,-0 304,-0 310,-0 316,-6 316,-12 316,-12 316,-24 316,-24 316,-30 310,-36 304,-36"/> +<text text-anchor="middle" x="289" y="-15.5" font-family="sans" font-size="10.00">finish</text> </g> <!-- 1 --> <g id="node2" class="node"><title>1</title> -<path fill="none" stroke="#a7d856" stroke-width="2" d="M131,-108C131,-108 96,-108 96,-108 90,-108 84,-102 84,-96 84,-96 84,-84 84,-84 84,-78 90,-72 96,-72 96,-72 131,-72 131,-72 137,-72 143,-78 143,-84 143,-84 143,-96 143,-96 143,-102 137,-108 131,-108"/> -<text text-anchor="middle" x="113.5" y="-87.5" font-family="sans" font-size="10.00">pe_fastqc</text> +<path fill="none" stroke="#56d892" stroke-width="2" d="M85.5,-108C85.5,-108 50.5,-108 50.5,-108 44.5,-108 38.5,-102 38.5,-96 38.5,-96 38.5,-84 38.5,-84 38.5,-78 44.5,-72 50.5,-72 50.5,-72 85.5,-72 85.5,-72 91.5,-72 97.5,-78 97.5,-84 97.5,-84 97.5,-96 97.5,-96 97.5,-102 91.5,-108 85.5,-108"/> +<text text-anchor="middle" x="68" y="-87.5" font-family="sans" font-size="10.00">pe_fastqc</text> </g> <!-- 1->0 --> -<g id="edge5" class="edge"><title>1->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M143.282,-75.9405C146.391,-74.6005 149.507,-73.2669 152.5,-72 183.541,-58.86 218.97,-44.3377 244.835,-33.8224"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="246.302,-37.0044 254.25,-29.9997 243.668,-30.5186 246.302,-37.0044"/> +<g id="edge8" class="edge"><title>1->0</title> +<path fill="none" stroke="grey" stroke-width="2" d="M97.6525,-75.6092C100.786,-74.3338 103.944,-73.1059 107,-72 156.36,-54.1377 214.865,-37.9591 251.957,-28.3004"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="253.018,-31.6413 261.826,-25.753 251.268,-24.8635 253.018,-31.6413"/> </g> <!-- 2 --> <g id="node3" class="node"><title>2</title> -<path fill="none" stroke="#56c1d8" stroke-width="2" d="M203.5,-108C203.5,-108 173.5,-108 173.5,-108 167.5,-108 161.5,-102 161.5,-96 161.5,-96 161.5,-84 161.5,-84 161.5,-78 167.5,-72 173.5,-72 173.5,-72 203.5,-72 203.5,-72 209.5,-72 215.5,-78 215.5,-84 215.5,-84 215.5,-96 215.5,-96 215.5,-102 209.5,-108 203.5,-108"/> -<text text-anchor="middle" x="188.5" y="-87.5" font-family="sans" font-size="10.00">fastqc</text> +<path fill="none" stroke="#d8b456" stroke-width="2" d="M158,-108C158,-108 128,-108 128,-108 122,-108 116,-102 116,-96 116,-96 116,-84 116,-84 116,-78 122,-72 128,-72 128,-72 158,-72 158,-72 164,-72 170,-78 170,-84 170,-84 170,-96 170,-96 170,-102 164,-108 158,-108"/> +<text text-anchor="middle" x="143" y="-87.5" font-family="sans" font-size="10.00">fastqc</text> </g> <!-- 2->0 --> -<g id="edge6" class="edge"><title>2->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M211.489,-71.6966C223.314,-62.796 237.892,-51.8232 250.679,-42.1989"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="252.891,-44.9145 258.776,-36.1043 248.681,-39.3217 252.891,-44.9145"/> +<g id="edge7" class="edge"><title>2->0</title> +<path fill="none" stroke="grey" stroke-width="2" d="M170.119,-75.9976C193.328,-64.8701 226.942,-48.7539 252.509,-36.4956"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="254.026,-39.6497 261.53,-32.1703 251,-33.3377 254.026,-39.6497"/> </g> <!-- 3 --> <g id="node4" class="node"><title>3</title> -<path fill="none" stroke="#568ad8" stroke-width="2" d="M115,-252C115,-252 12,-252 12,-252 6,-252 -7.10543e-15,-246 -7.10543e-15,-240 -7.10543e-15,-240 -7.10543e-15,-228 -7.10543e-15,-228 -7.10543e-15,-222 6,-216 12,-216 12,-216 115,-216 115,-216 121,-216 127,-222 127,-228 127,-228 127,-240 127,-240 127,-246 121,-252 115,-252"/> -<text text-anchor="middle" x="63.5" y="-231.5" font-family="sans" font-size="10.00">pe_quantification_salmon</text> +<path fill="none" stroke="#56d8a2" stroke-width="2" d="M322,-252C322,-252 180,-252 180,-252 174,-252 168,-246 168,-240 168,-240 168,-228 168,-228 168,-222 174,-216 180,-216 180,-216 322,-216 322,-216 328,-216 334,-222 334,-228 334,-228 334,-240 334,-240 334,-246 328,-252 322,-252"/> +<text text-anchor="middle" x="251" y="-231.5" font-family="sans" font-size="10.00">pe_genome_quantification_kallisto</text> </g> <!-- 3->0 --> -<g id="edge7" class="edge"><title>3->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M90.8692,-215.973C124.885,-193.92 183.454,-152.942 224.5,-108 241.966,-88.8761 257.5,-63.7838 267.923,-45.1186"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="271.035,-46.723 272.753,-36.2684 264.89,-43.3697 271.035,-46.723"/> +<g id="edge1" class="edge"><title>3->0</title> +<path fill="none" stroke="grey" stroke-width="2" d="M250.655,-215.826C250.553,-197.82 251.126,-168.755 255,-144 260.41,-109.434 271.8,-70.7187 279.905,-45.6614"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="283.24,-46.7217 283.043,-36.1287 276.591,-44.5326 283.24,-46.7217"/> </g> <!-- 4 --> <g id="node5" class="node"><title>4</title> -<path fill="none" stroke="#d8bc56" stroke-width="2" d="M244,-252C244,-252 157,-252 157,-252 151,-252 145,-246 145,-240 145,-240 145,-228 145,-228 145,-222 151,-216 157,-216 157,-216 244,-216 244,-216 250,-216 256,-222 256,-228 256,-228 256,-240 256,-240 256,-246 250,-252 244,-252"/> -<text text-anchor="middle" x="200.5" y="-231.5" font-family="sans" font-size="10.00">quantification_salmon</text> +<path fill="none" stroke="#8fd856" stroke-width="2" d="M138,-252C138,-252 12,-252 12,-252 6,-252 1.42109e-14,-246 1.42109e-14,-240 1.42109e-14,-240 1.42109e-14,-228 1.42109e-14,-228 1.42109e-14,-222 6,-216 12,-216 12,-216 138,-216 138,-216 144,-216 150,-222 150,-228 150,-228 150,-240 150,-240 150,-246 144,-252 138,-252"/> +<text text-anchor="middle" x="75" y="-231.5" font-family="sans" font-size="10.00">genome_quantification_kallisto</text> </g> <!-- 4->0 --> -<g id="edge3" class="edge"><title>4->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M225.562,-215.933C237.162,-206.679 250.005,-194.227 257.5,-180 279.985,-137.319 283.155,-79.9722 282.771,-46.4954"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="286.261,-46.0283 282.527,-36.1132 279.263,-46.1926 286.261,-46.0283"/> +<g id="edge6" class="edge"><title>4->0</title> +<path fill="none" stroke="grey" stroke-width="2" d="M92.1508,-215.849C130.029,-177.971 220.923,-87.0767 264.668,-43.3318"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="267.171,-45.7784 271.768,-36.2325 262.222,-40.8287 267.171,-45.7784"/> </g> <!-- 5 --> <g id="node6" class="node"><title>5</title> -<path fill="none" stroke="#d6d856" stroke-width="2" d="M428.5,-252C428.5,-252 286.5,-252 286.5,-252 280.5,-252 274.5,-246 274.5,-240 274.5,-240 274.5,-228 274.5,-228 274.5,-222 280.5,-216 286.5,-216 286.5,-216 428.5,-216 428.5,-216 434.5,-216 440.5,-222 440.5,-228 440.5,-228 440.5,-240 440.5,-240 440.5,-246 434.5,-252 428.5,-252"/> -<text text-anchor="middle" x="357.5" y="-231.5" font-family="sans" font-size="10.00">pe_genome_quantification_kallisto</text> +<path fill="none" stroke="#a7d856" stroke-width="2" d="M876,-108C876,-108 788,-108 788,-108 782,-108 776,-102 776,-96 776,-96 776,-84 776,-84 776,-78 782,-72 788,-72 788,-72 876,-72 876,-72 882,-72 888,-78 888,-84 888,-84 888,-96 888,-96 888,-102 882,-108 876,-108"/> +<text text-anchor="middle" x="832" y="-87.5" font-family="sans" font-size="10.00">calculate_TIN_scores</text> </g> <!-- 5->0 --> -<g id="edge4" class="edge"><title>5->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M351.409,-215.849C338.207,-178.675 306.871,-90.4388 291.025,-45.8204"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="294.265,-44.4845 287.62,-36.2325 287.668,-46.8272 294.265,-44.4845"/> +<g id="edge5" class="edge"><title>5->0</title> +<path fill="none" stroke="grey" stroke-width="2" d="M775.935,-81.7724C664.916,-67.4607 419.77,-35.8579 326.247,-23.8017"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="326.492,-20.3043 316.126,-22.4969 325.597,-27.2469 326.492,-20.3043"/> </g> <!-- 6 --> <g id="node7" class="node"><title>6</title> -<path fill="none" stroke="#d88d56" stroke-width="2" d="M596.5,-252C596.5,-252 470.5,-252 470.5,-252 464.5,-252 458.5,-246 458.5,-240 458.5,-240 458.5,-228 458.5,-228 458.5,-222 464.5,-216 470.5,-216 470.5,-216 596.5,-216 596.5,-216 602.5,-216 608.5,-222 608.5,-228 608.5,-228 608.5,-240 608.5,-240 608.5,-246 602.5,-252 596.5,-252"/> -<text text-anchor="middle" x="533.5" y="-231.5" font-family="sans" font-size="10.00">genome_quantification_kallisto</text> +<path fill="none" stroke="#56d8b9" stroke-width="2" d="M562,-180C562,-180 450,-180 450,-180 444,-180 438,-174 438,-168 438,-168 438,-156 438,-156 438,-150 444,-144 450,-144 450,-144 562,-144 562,-144 568,-144 574,-150 574,-156 574,-156 574,-168 574,-168 574,-174 568,-180 562,-180"/> +<text text-anchor="middle" x="506" y="-159.5" font-family="sans" font-size="10.00">salmon_quantmerge_genes</text> </g> <!-- 6->0 --> <g id="edge2" class="edge"><title>6->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M466.704,-215.888C430.604,-205.599 390.594,-192.18 376.5,-180 335.744,-144.78 350.057,-117.673 321.5,-72 315.633,-62.6164 308.541,-52.8174 301.968,-44.2341"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="304.545,-41.8464 295.633,-36.1164 299.026,-46.1532 304.545,-41.8464"/> +<path fill="none" stroke="grey" stroke-width="2" d="M479.825,-143.871C440.805,-118.338 367.323,-70.253 323.813,-41.7808"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="325.552,-38.7362 315.268,-36.1893 321.719,-44.5936 325.552,-38.7362"/> </g> <!-- 7 --> <g id="node8" class="node"><title>7</title> -<path fill="none" stroke="#d86e56" stroke-width="2" d="M617.5,-108C617.5,-108 529.5,-108 529.5,-108 523.5,-108 517.5,-102 517.5,-96 517.5,-96 517.5,-84 517.5,-84 517.5,-78 523.5,-72 529.5,-72 529.5,-72 617.5,-72 617.5,-72 623.5,-72 629.5,-78 629.5,-84 629.5,-84 629.5,-96 629.5,-96 629.5,-102 623.5,-108 617.5,-108"/> -<text text-anchor="middle" x="573.5" y="-87.5" font-family="sans" font-size="10.00">calculate_TIN_scores</text> +<path fill="none" stroke="#56d87b" stroke-width="2" d="M407.5,-180C407.5,-180 276.5,-180 276.5,-180 270.5,-180 264.5,-174 264.5,-168 264.5,-168 264.5,-156 264.5,-156 264.5,-150 270.5,-144 276.5,-144 276.5,-144 407.5,-144 407.5,-144 413.5,-144 419.5,-150 419.5,-156 419.5,-156 419.5,-168 419.5,-168 419.5,-174 413.5,-180 407.5,-180"/> +<text text-anchor="middle" x="342" y="-159.5" font-family="sans" font-size="10.00">salmon_quantmerge_transcripts</text> </g> <!-- 7->0 --> -<g id="edge1" class="edge"><title>7->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M517.199,-75.5033C458.593,-61.4537 368.502,-39.8566 318.378,-27.8405"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="319.151,-24.4268 308.61,-25.4991 317.519,-31.2339 319.151,-24.4268"/> +<g id="edge4" class="edge"><title>7->0</title> +<path fill="none" stroke="grey" stroke-width="2" d="M335.607,-143.871C326.495,-119.457 309.687,-74.4258 298.941,-45.6351"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="302.192,-44.3341 295.416,-36.1893 295.633,-46.7819 302.192,-44.3341"/> </g> <!-- 8 --> <g id="node9" class="node"><title>8</title> -<path fill="none" stroke="#5673d8" stroke-width="2" d="M360.5,-324C360.5,-324 248.5,-324 248.5,-324 242.5,-324 236.5,-318 236.5,-312 236.5,-312 236.5,-300 236.5,-300 236.5,-294 242.5,-288 248.5,-288 248.5,-288 360.5,-288 360.5,-288 366.5,-288 372.5,-294 372.5,-300 372.5,-300 372.5,-312 372.5,-312 372.5,-318 366.5,-324 360.5,-324"/> -<text text-anchor="middle" x="304.5" y="-303.5" font-family="sans" font-size="10.00">pe_remove_polya_cutadapt</text> +<path fill="none" stroke="#d85656" stroke-width="2" d="M675.5,-108C675.5,-108 644.5,-108 644.5,-108 638.5,-108 632.5,-102 632.5,-96 632.5,-96 632.5,-84 632.5,-84 632.5,-78 638.5,-72 644.5,-72 644.5,-72 675.5,-72 675.5,-72 681.5,-72 687.5,-78 687.5,-84 687.5,-84 687.5,-96 687.5,-96 687.5,-102 681.5,-108 675.5,-108"/> +<text text-anchor="middle" x="660" y="-87.5" font-family="sans" font-size="10.00">star_rpm</text> </g> -<!-- 8->3 --> -<g id="edge9" class="edge"><title>8->3</title> -<path fill="none" stroke="grey" stroke-width="2" d="M245.853,-287.966C211.594,-278.015 168.184,-265.406 132.314,-254.987"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="132.989,-251.539 122.41,-252.111 131.037,-258.261 132.989,-251.539"/> -</g> -<!-- 8->5 --> -<g id="edge13" class="edge"><title>8->5</title> -<path fill="none" stroke="grey" stroke-width="2" d="M317.601,-287.697C323.891,-279.389 331.547,-269.277 338.465,-260.141"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="341.304,-262.19 344.55,-252.104 335.723,-257.964 341.304,-262.19"/> -</g> -<!-- 17 --> -<g id="node18" class="node"><title>17</title> -<path fill="none" stroke="#59d856" stroke-width="2" d="M728,-252C728,-252 639,-252 639,-252 633,-252 627,-246 627,-240 627,-240 627,-228 627,-228 627,-222 633,-216 639,-216 639,-216 728,-216 728,-216 734,-216 740,-222 740,-228 740,-228 740,-240 740,-240 740,-246 734,-252 728,-252"/> -<text text-anchor="middle" x="683.5" y="-231.5" font-family="sans" font-size="10.00">pe_map_genome_star</text> -</g> -<!-- 8->17 --> -<g id="edge24" class="edge"><title>8->17</title> -<path fill="none" stroke="grey" stroke-width="2" d="M372.886,-289.657C375.793,-289.082 378.674,-288.527 381.5,-288 482.647,-269.141 511.799,-272.997 616.656,-252.052"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="617.597,-255.433 626.703,-250.017 616.207,-248.572 617.597,-255.433"/> +<!-- 8->0 --> +<g id="edge3" class="edge"><title>8->0</title> +<path fill="none" stroke="grey" stroke-width="2" d="M632.418,-83.7958C566.659,-71.3886 400.61,-40.0585 326.141,-26.0077"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="326.736,-22.5583 316.261,-24.1435 325.438,-29.437 326.736,-22.5583"/> </g> <!-- 9 --> <g id="node10" class="node"><title>9</title> -<path fill="none" stroke="#56d8a2" stroke-width="2" d="M156,-324C156,-324 71,-324 71,-324 65,-324 59,-318 59,-312 59,-312 59,-300 59,-300 59,-294 65,-288 71,-288 71,-288 156,-288 156,-288 162,-288 168,-294 168,-300 168,-300 168,-312 168,-312 168,-318 162,-324 156,-324"/> -<text text-anchor="middle" x="113.5" y="-303.5" font-family="sans" font-size="10.00">create_index_salmon</text> +<path fill="none" stroke="#56b1d8" stroke-width="2" d="M649,-324C649,-324 537,-324 537,-324 531,-324 525,-318 525,-312 525,-312 525,-300 525,-300 525,-294 531,-288 537,-288 537,-288 649,-288 649,-288 655,-288 661,-294 661,-300 661,-300 661,-312 661,-312 661,-318 655,-324 649,-324"/> +<text text-anchor="middle" x="593" y="-303.5" font-family="sans" font-size="10.00">pe_remove_polya_cutadapt</text> </g> <!-- 9->3 --> -<g id="edge8" class="edge"><title>9->3</title> -<path fill="none" stroke="grey" stroke-width="2" d="M101.14,-287.697C95.2672,-279.474 88.1309,-269.483 81.6576,-260.421"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="84.3779,-258.207 75.7174,-252.104 78.6817,-262.276 84.3779,-258.207"/> +<g id="edge9" class="edge"><title>9->3</title> +<path fill="none" stroke="grey" stroke-width="2" d="M524.947,-290.684C520.566,-289.772 516.222,-288.872 512,-288 456.398,-276.52 394.088,-263.859 344.297,-253.792"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="344.686,-250.3 334.191,-251.749 343.299,-257.161 344.686,-250.3"/> +</g> +<!-- 12 --> +<g id="node13" class="node"><title>12</title> +<path fill="none" stroke="#56d863" stroke-width="2" d="M870.5,-252C870.5,-252 781.5,-252 781.5,-252 775.5,-252 769.5,-246 769.5,-240 769.5,-240 769.5,-228 769.5,-228 769.5,-222 775.5,-216 781.5,-216 781.5,-216 870.5,-216 870.5,-216 876.5,-216 882.5,-222 882.5,-228 882.5,-228 882.5,-240 882.5,-240 882.5,-246 876.5,-252 870.5,-252"/> +<text text-anchor="middle" x="826" y="-231.5" font-family="sans" font-size="10.00">pe_map_genome_star</text> +</g> +<!-- 9->12 --> +<g id="edge27" class="edge"><title>9->12</title> +<path fill="none" stroke="grey" stroke-width="2" d="M649.7,-287.966C682.945,-277.978 725.103,-265.312 759.857,-254.871"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="760.88,-258.219 769.45,-251.989 758.865,-251.515 760.88,-258.219"/> </g> -<!-- 9->4 --> -<g id="edge11" class="edge"><title>9->4</title> -<path fill="none" stroke="grey" stroke-width="2" d="M135.006,-287.697C145.963,-278.881 159.446,-268.032 171.325,-258.474"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="173.645,-261.1 179.242,-252.104 169.256,-255.646 173.645,-261.1"/> +<!-- 16 --> +<g id="node17" class="node"><title>16</title> +<path fill="none" stroke="#566bd8" stroke-width="2" d="M596.5,-252C596.5,-252 493.5,-252 493.5,-252 487.5,-252 481.5,-246 481.5,-240 481.5,-240 481.5,-228 481.5,-228 481.5,-222 487.5,-216 493.5,-216 493.5,-216 596.5,-216 596.5,-216 602.5,-216 608.5,-222 608.5,-228 608.5,-228 608.5,-240 608.5,-240 608.5,-246 602.5,-252 596.5,-252"/> +<text text-anchor="middle" x="545" y="-231.5" font-family="sans" font-size="10.00">pe_quantification_salmon</text> +</g> +<!-- 9->16 --> +<g id="edge33" class="edge"><title>9->16</title> +<path fill="none" stroke="grey" stroke-width="2" d="M581.135,-287.697C575.497,-279.474 568.646,-269.483 562.431,-260.421"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="565.271,-258.372 556.729,-252.104 559.497,-262.331 565.271,-258.372"/> </g> <!-- 10 --> <g id="node11" class="node"><title>10</title> -<path fill="none" stroke="#56d8d8" stroke-width="2" d="M624.5,-324C624.5,-324 528.5,-324 528.5,-324 522.5,-324 516.5,-318 516.5,-312 516.5,-312 516.5,-300 516.5,-300 516.5,-294 522.5,-288 528.5,-288 528.5,-288 624.5,-288 624.5,-288 630.5,-288 636.5,-294 636.5,-300 636.5,-300 636.5,-312 636.5,-312 636.5,-318 630.5,-324 624.5,-324"/> -<text text-anchor="middle" x="576.5" y="-303.5" font-family="sans" font-size="10.00">remove_polya_cutadapt</text> +<path fill="none" stroke="#d88556" stroke-width="2" d="M226,-324C226,-324 142,-324 142,-324 136,-324 130,-318 130,-312 130,-312 130,-300 130,-300 130,-294 136,-288 142,-288 142,-288 226,-288 226,-288 232,-288 238,-294 238,-300 238,-300 238,-312 238,-312 238,-318 232,-324 226,-324"/> +<text text-anchor="middle" x="184" y="-303.5" font-family="sans" font-size="10.00">create_index_kallisto</text> +</g> +<!-- 10->3 --> +<g id="edge10" class="edge"><title>10->3</title> +<path fill="none" stroke="grey" stroke-width="2" d="M200.562,-287.697C208.675,-279.22 218.588,-268.864 227.471,-259.583"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="230.243,-261.749 234.629,-252.104 225.186,-256.908 230.243,-261.749"/> </g> <!-- 10->4 --> -<g id="edge10" class="edge"><title>10->4</title> -<path fill="none" stroke="grey" stroke-width="2" d="M516.363,-289.831C513.372,-289.187 510.405,-288.573 507.5,-288 403.939,-267.586 373.787,-273.413 266.048,-251.975"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="266.572,-248.51 256.077,-249.958 265.184,-255.371 266.572,-248.51"/> +<g id="edge11" class="edge"><title>10->4</title> +<path fill="none" stroke="grey" stroke-width="2" d="M157.336,-287.876C143.216,-278.808 125.689,-267.552 110.439,-257.759"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="112.073,-254.648 101.767,-252.19 108.29,-260.538 112.073,-254.648"/> </g> -<!-- 10->6 --> -<g id="edge14" class="edge"><title>10->6</title> -<path fill="none" stroke="grey" stroke-width="2" d="M565.871,-287.697C560.872,-279.559 554.809,-269.689 549.288,-260.701"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="552.223,-258.793 544.007,-252.104 546.259,-262.457 552.223,-258.793"/> +<!-- 11 --> +<g id="node12" class="node"><title>11</title> +<path fill="none" stroke="#5682d8" stroke-width="2" d="M364,-324C364,-324 268,-324 268,-324 262,-324 256,-318 256,-312 256,-312 256,-300 256,-300 256,-294 262,-288 268,-288 268,-288 364,-288 364,-288 370,-288 376,-294 376,-300 376,-300 376,-312 376,-312 376,-318 370,-324 364,-324"/> +<text text-anchor="middle" x="316" y="-303.5" font-family="sans" font-size="10.00">remove_polya_cutadapt</text> </g> -<!-- 18 --> -<g id="node19" class="node"><title>18</title> -<path fill="none" stroke="#70d856" stroke-width="2" d="M843,-252C843,-252 770,-252 770,-252 764,-252 758,-246 758,-240 758,-240 758,-228 758,-228 758,-222 764,-216 770,-216 770,-216 843,-216 843,-216 849,-216 855,-222 855,-228 855,-228 855,-240 855,-240 855,-246 849,-252 843,-252"/> -<text text-anchor="middle" x="806.5" y="-231.5" font-family="sans" font-size="10.00">map_genome_star</text> +<!-- 11->4 --> +<g id="edge12" class="edge"><title>11->4</title> +<path fill="none" stroke="grey" stroke-width="2" d="M257.353,-287.966C223.094,-278.015 179.684,-265.406 143.814,-254.987"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="144.489,-251.539 133.91,-252.111 142.537,-258.261 144.489,-251.539"/> </g> -<!-- 10->18 --> -<g id="edge25" class="edge"><title>10->18</title> -<path fill="none" stroke="grey" stroke-width="2" d="M632.47,-287.966C667.475,-277.312 712.49,-263.612 747.968,-252.814"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="749.158,-256.111 757.705,-249.851 747.119,-249.414 749.158,-256.111"/> +<!-- 15 --> +<g id="node16" class="node"><title>15</title> +<path fill="none" stroke="#d86e56" stroke-width="2" d="M739.5,-252C739.5,-252 666.5,-252 666.5,-252 660.5,-252 654.5,-246 654.5,-240 654.5,-240 654.5,-228 654.5,-228 654.5,-222 660.5,-216 666.5,-216 666.5,-216 739.5,-216 739.5,-216 745.5,-216 751.5,-222 751.5,-228 751.5,-228 751.5,-240 751.5,-240 751.5,-246 745.5,-252 739.5,-252"/> +<text text-anchor="middle" x="703" y="-231.5" font-family="sans" font-size="10.00">map_genome_star</text> </g> -<!-- 11 --> -<g id="node12" class="node"><title>11</title> -<path fill="none" stroke="#56d8c1" stroke-width="2" d="M486.5,-324C486.5,-324 402.5,-324 402.5,-324 396.5,-324 390.5,-318 390.5,-312 390.5,-312 390.5,-300 390.5,-300 390.5,-294 396.5,-288 402.5,-288 402.5,-288 486.5,-288 486.5,-288 492.5,-288 498.5,-294 498.5,-300 498.5,-300 498.5,-312 498.5,-312 498.5,-318 492.5,-324 486.5,-324"/> -<text text-anchor="middle" x="444.5" y="-303.5" font-family="sans" font-size="10.00">create_index_kallisto</text> +<!-- 11->15 --> +<g id="edge32" class="edge"><title>11->15</title> +<path fill="none" stroke="grey" stroke-width="2" d="M376.141,-289.854C379.131,-289.204 382.097,-288.582 385,-288 487.311,-267.499 514.24,-270.121 617,-252 625.916,-250.428 635.331,-248.668 644.533,-246.894"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="645.255,-250.319 654.401,-244.97 643.916,-243.448 645.255,-250.319"/> </g> -<!-- 11->5 --> -<g id="edge12" class="edge"><title>11->5</title> -<path fill="none" stroke="grey" stroke-width="2" d="M422.994,-287.697C412.037,-278.881 398.554,-268.032 386.675,-258.474"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="388.744,-255.646 378.758,-252.104 384.355,-261.1 388.744,-255.646"/> +<!-- 17 --> +<g id="node18" class="node"><title>17</title> +<path fill="none" stroke="#56d8d0" stroke-width="2" d="M451.5,-252C451.5,-252 364.5,-252 364.5,-252 358.5,-252 352.5,-246 352.5,-240 352.5,-240 352.5,-228 352.5,-228 352.5,-222 358.5,-216 364.5,-216 364.5,-216 451.5,-216 451.5,-216 457.5,-216 463.5,-222 463.5,-228 463.5,-228 463.5,-240 463.5,-240 463.5,-246 457.5,-252 451.5,-252"/> +<text text-anchor="middle" x="408" y="-231.5" font-family="sans" font-size="10.00">quantification_salmon</text> </g> -<!-- 11->6 --> -<g id="edge15" class="edge"><title>11->6</title> -<path fill="none" stroke="grey" stroke-width="2" d="M466.5,-287.697C477.709,-278.881 491.502,-268.032 503.654,-258.474"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="506.057,-261.038 511.753,-252.104 501.729,-255.535 506.057,-261.038"/> +<!-- 11->17 --> +<g id="edge36" class="edge"><title>11->17</title> +<path fill="none" stroke="grey" stroke-width="2" d="M338.742,-287.697C350.44,-278.796 364.861,-267.823 377.51,-258.199"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="379.681,-260.945 385.52,-252.104 375.442,-255.374 379.681,-260.945"/> </g> -<!-- 12 --> -<g id="node13" class="node"><title>12</title> -<path fill="none" stroke="#d8a456" stroke-width="2" d="M563.5,-180C563.5,-180 397.5,-180 397.5,-180 391.5,-180 385.5,-174 385.5,-168 385.5,-168 385.5,-156 385.5,-156 385.5,-150 391.5,-144 397.5,-144 397.5,-144 563.5,-144 563.5,-144 569.5,-144 575.5,-150 575.5,-156 575.5,-156 575.5,-168 575.5,-168 575.5,-174 569.5,-180 563.5,-180"/> -<text text-anchor="middle" x="480.5" y="-159.5" font-family="sans" font-size="10.00">pe_index_genomic_alignment_samtools</text> +<!-- 12->5 --> +<g id="edge13" class="edge"><title>12->5</title> +<path fill="none" stroke="grey" stroke-width="2" d="M882.701,-227.711C942.874,-220.787 1032.12,-206.314 1053,-180 1097.93,-123.369 978.505,-102.581 898.196,-95.0901"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="898.392,-91.5938 888.123,-94.2002 897.776,-98.5667 898.392,-91.5938"/> </g> -<!-- 12->7 --> -<g id="edge17" class="edge"><title>12->7</title> -<path fill="none" stroke="grey" stroke-width="2" d="M503.489,-143.697C515.314,-134.796 529.892,-123.823 542.679,-114.199"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="544.891,-116.915 550.776,-108.104 540.681,-111.322 544.891,-116.915"/> +<!-- 12->8 --> +<g id="edge21" class="edge"><title>12->8</title> +<path fill="none" stroke="grey" stroke-width="2" d="M828.376,-215.846C830.096,-196.225 829.723,-164.213 813,-144 784.703,-109.797 733.1,-97.6237 697.71,-93.3162"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="698.055,-89.8333 687.741,-92.257 697.316,-96.7942 698.055,-89.8333"/> </g> <!-- 13 --> <g id="node14" class="node"><title>13</title> -<path fill="none" stroke="#56a2d8" stroke-width="2" d="M727,-180C727,-180 606,-180 606,-180 600,-180 594,-174 594,-168 594,-168 594,-156 594,-156 594,-150 600,-144 606,-144 606,-144 727,-144 727,-144 733,-144 739,-150 739,-156 739,-156 739,-168 739,-168 739,-174 733,-180 727,-180"/> -<text text-anchor="middle" x="666.5" y="-159.5" font-family="sans" font-size="10.00">extract_transcripts_as_bed12</text> +<path fill="none" stroke="#569ad8" stroke-width="2" d="M792,-180C792,-180 642,-180 642,-180 636,-180 630,-174 630,-168 630,-168 630,-156 630,-156 630,-150 636,-144 642,-144 642,-144 792,-144 792,-144 798,-144 804,-150 804,-156 804,-156 804,-168 804,-168 804,-174 798,-180 792,-180"/> +<text text-anchor="middle" x="717" y="-159.5" font-family="sans" font-size="10.00">index_genomic_alignment_samtools</text> +</g> +<!-- 12->13 --> +<g id="edge29" class="edge"><title>12->13</title> +<path fill="none" stroke="grey" stroke-width="2" d="M799.336,-215.876C785.216,-206.808 767.689,-195.552 752.439,-185.759"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="754.073,-182.648 743.767,-180.19 750.29,-188.538 754.073,-182.648"/> +</g> +<!-- 13->5 --> +<g id="edge15" class="edge"><title>13->5</title> +<path fill="none" stroke="grey" stroke-width="2" d="M745.132,-143.876C760.168,-134.724 778.867,-123.342 795.06,-113.485"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="797.038,-116.379 803.76,-108.19 793.398,-110.4 797.038,-116.379"/> </g> -<!-- 13->7 --> -<g id="edge18" class="edge"><title>13->7</title> -<path fill="none" stroke="grey" stroke-width="2" d="M643.511,-143.697C631.686,-134.796 617.108,-123.823 604.321,-114.199"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="606.319,-111.322 596.224,-108.104 602.109,-116.915 606.319,-111.322"/> +<!-- 13->8 --> +<g id="edge23" class="edge"><title>13->8</title> +<path fill="none" stroke="grey" stroke-width="2" d="M702.91,-143.697C696.077,-135.305 687.743,-125.07 680.244,-115.861"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="682.956,-113.649 673.928,-108.104 677.528,-118.069 682.956,-113.649"/> </g> <!-- 14 --> <g id="node15" class="node"><title>14</title> -<path fill="none" stroke="#88d856" stroke-width="2" d="M919.5,-180C919.5,-180 769.5,-180 769.5,-180 763.5,-180 757.5,-174 757.5,-168 757.5,-168 757.5,-156 757.5,-156 757.5,-150 763.5,-144 769.5,-144 769.5,-144 919.5,-144 919.5,-144 925.5,-144 931.5,-150 931.5,-156 931.5,-156 931.5,-168 931.5,-168 931.5,-174 925.5,-180 919.5,-180"/> -<text text-anchor="middle" x="844.5" y="-159.5" font-family="sans" font-size="10.00">index_genomic_alignment_samtools</text> +<path fill="none" stroke="#d89c56" stroke-width="2" d="M1031.5,-180C1031.5,-180 910.5,-180 910.5,-180 904.5,-180 898.5,-174 898.5,-168 898.5,-168 898.5,-156 898.5,-156 898.5,-150 904.5,-144 910.5,-144 910.5,-144 1031.5,-144 1031.5,-144 1037.5,-144 1043.5,-150 1043.5,-156 1043.5,-156 1043.5,-168 1043.5,-168 1043.5,-174 1037.5,-180 1031.5,-180"/> +<text text-anchor="middle" x="971" y="-159.5" font-family="sans" font-size="10.00">extract_transcripts_as_bed12</text> </g> -<!-- 14->7 --> -<g id="edge16" class="edge"><title>14->7</title> -<path fill="none" stroke="grey" stroke-width="2" d="M778.552,-143.966C736.344,-133.063 681.783,-118.97 639.554,-108.062"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="640.206,-104.615 629.648,-105.503 638.455,-111.393 640.206,-104.615"/> +<!-- 14->5 --> +<g id="edge16" class="edge"><title>14->5</title> +<path fill="none" stroke="grey" stroke-width="2" d="M936.997,-143.876C918.402,-134.512 895.173,-122.814 875.294,-112.803"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="876.64,-109.562 866.134,-108.19 873.491,-115.814 876.64,-109.562"/> </g> -<!-- 15 --> -<g id="node16" class="node"><title>15</title> -<path fill="none" stroke="#56d88a" stroke-width="2" d="M367.5,-396C367.5,-396 241.5,-396 241.5,-396 235.5,-396 229.5,-390 229.5,-384 229.5,-384 229.5,-372 229.5,-372 229.5,-366 235.5,-360 241.5,-360 241.5,-360 367.5,-360 367.5,-360 373.5,-360 379.5,-366 379.5,-372 379.5,-372 379.5,-384 379.5,-384 379.5,-390 373.5,-396 367.5,-396"/> -<text text-anchor="middle" x="304.5" y="-375.5" font-family="sans" font-size="10.00">pe_remove_adapters_cutadapt</text> +<!-- 15->5 --> +<g id="edge14" class="edge"><title>15->5</title> +<path fill="none" stroke="grey" stroke-width="2" d="M748.156,-215.971C774.497,-205.165 804.224,-191.167 813,-180 826.692,-162.577 831.084,-137.531 832.257,-118.444"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="835.761,-118.387 832.619,-108.269 828.766,-118.138 835.761,-118.387"/> </g> <!-- 15->8 --> -<g id="edge19" class="edge"><title>15->8</title> -<path fill="none" stroke="grey" stroke-width="2" d="M304.5,-359.697C304.5,-351.983 304.5,-342.712 304.5,-334.112"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="308,-334.104 304.5,-324.104 301,-334.104 308,-334.104"/> -</g> -<!-- 16 --> -<g id="node17" class="node"><title>16</title> -<path fill="none" stroke="#bed856" stroke-width="2" d="M631.5,-396C631.5,-396 521.5,-396 521.5,-396 515.5,-396 509.5,-390 509.5,-384 509.5,-384 509.5,-372 509.5,-372 509.5,-366 515.5,-360 521.5,-360 521.5,-360 631.5,-360 631.5,-360 637.5,-360 643.5,-366 643.5,-372 643.5,-372 643.5,-384 643.5,-384 643.5,-390 637.5,-396 631.5,-396"/> -<text text-anchor="middle" x="576.5" y="-375.5" font-family="sans" font-size="10.00">remove_adapters_cutadapt</text> -</g> -<!-- 16->10 --> -<g id="edge20" class="edge"><title>16->10</title> -<path fill="none" stroke="grey" stroke-width="2" d="M576.5,-359.697C576.5,-351.983 576.5,-342.712 576.5,-334.112"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="580,-334.104 576.5,-324.104 573,-334.104 580,-334.104"/> +<g id="edge22" class="edge"><title>15->8</title> +<path fill="none" stroke="grey" stroke-width="2" d="M660.382,-215.989C645.126,-207.58 629.591,-195.781 621,-180 609.599,-159.056 622.679,-134.107 636.68,-115.961"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="639.458,-118.093 643.094,-108.141 634.046,-113.654 639.458,-118.093"/> +</g> +<!-- 15->13 --> +<g id="edge30" class="edge"><title>15->13</title> +<path fill="none" stroke="grey" stroke-width="2" d="M706.461,-215.697C708.003,-207.983 709.858,-198.712 711.578,-190.112"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="715.05,-190.597 713.579,-180.104 708.186,-189.224 715.05,-190.597"/> +</g> +<!-- 16->6 --> +<g id="edge17" class="edge"><title>16->6</title> +<path fill="none" stroke="grey" stroke-width="2" d="M535.36,-215.697C530.873,-207.644 525.441,-197.894 520.476,-188.982"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="523.454,-187.137 515.53,-180.104 517.339,-190.544 523.454,-187.137"/> +</g> +<!-- 16->7 --> +<g id="edge19" class="edge"><title>16->7</title> +<path fill="none" stroke="grey" stroke-width="2" d="M495.6,-215.966C467.113,-206.142 431.114,-193.729 401.129,-183.389"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="402.216,-180.062 391.621,-180.111 399.934,-186.68 402.216,-180.062"/> +</g> +<!-- 17->6 --> +<g id="edge18" class="edge"><title>17->6</title> +<path fill="none" stroke="grey" stroke-width="2" d="M431.973,-215.876C444.55,-206.893 460.132,-195.763 473.752,-186.034"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="475.831,-188.85 481.934,-180.19 471.763,-183.154 475.831,-188.85"/> +</g> +<!-- 17->7 --> +<g id="edge20" class="edge"><title>17->7</title> +<path fill="none" stroke="grey" stroke-width="2" d="M391.685,-215.697C383.693,-207.22 373.928,-196.864 365.178,-187.583"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="367.534,-184.979 358.127,-180.104 362.441,-189.781 367.534,-184.979"/> </g> -<!-- 17->12 --> -<g id="edge21" class="edge"><title>17->12</title> -<path fill="none" stroke="grey" stroke-width="2" d="M634.1,-215.966C605.613,-206.142 569.614,-193.729 539.629,-183.389"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="540.716,-180.062 530.121,-180.111 538.434,-186.68 540.716,-180.062"/> +<!-- 18 --> +<g id="node19" class="node"><title>18</title> +<path fill="none" stroke="#d6d856" stroke-width="2" d="M658,-396C658,-396 532,-396 532,-396 526,-396 520,-390 520,-384 520,-384 520,-372 520,-372 520,-366 526,-360 532,-360 532,-360 658,-360 658,-360 664,-360 670,-366 670,-372 670,-372 670,-384 670,-384 670,-390 664,-396 658,-396"/> +<text text-anchor="middle" x="595" y="-375.5" font-family="sans" font-size="10.00">pe_remove_adapters_cutadapt</text> </g> -<!-- 18->14 --> -<g id="edge22" class="edge"><title>18->14</title> -<path fill="none" stroke="grey" stroke-width="2" d="M815.893,-215.697C820.265,-207.644 825.557,-197.894 830.395,-188.982"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="833.52,-190.563 835.215,-180.104 827.368,-187.223 833.52,-190.563"/> +<!-- 18->9 --> +<g id="edge24" class="edge"><title>18->9</title> +<path fill="none" stroke="grey" stroke-width="2" d="M594.506,-359.697C594.285,-351.983 594.02,-342.712 593.775,-334.112"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="597.273,-334 593.489,-324.104 590.276,-334.2 597.273,-334"/> </g> <!-- 19 --> <g id="node20" class="node"><title>19</title> -<path fill="none" stroke="#56d873" stroke-width="2" d="M779.5,-324C779.5,-324 709.5,-324 709.5,-324 703.5,-324 697.5,-318 697.5,-312 697.5,-312 697.5,-300 697.5,-300 697.5,-294 703.5,-288 709.5,-288 709.5,-288 779.5,-288 779.5,-288 785.5,-288 791.5,-294 791.5,-300 791.5,-300 791.5,-312 791.5,-312 791.5,-318 785.5,-324 779.5,-324"/> -<text text-anchor="middle" x="744.5" y="-303.5" font-family="sans" font-size="10.00">create_index_star</text> -</g> -<!-- 19->17 --> -<g id="edge23" class="edge"><title>19->17</title> -<path fill="none" stroke="grey" stroke-width="2" d="M729.421,-287.697C722.108,-279.305 713.19,-269.07 705.165,-259.861"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="707.614,-257.344 698.405,-252.104 702.336,-261.943 707.614,-257.344"/> -</g> -<!-- 19->18 --> -<g id="edge26" class="edge"><title>19->18</title> -<path fill="none" stroke="grey" stroke-width="2" d="M759.826,-287.697C767.259,-279.305 776.323,-269.07 784.48,-259.861"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="787.34,-261.911 791.35,-252.104 782.1,-257.27 787.34,-261.911"/> +<path fill="none" stroke="#d8c356" stroke-width="2" d="M490.5,-396C490.5,-396 403.5,-396 403.5,-396 397.5,-396 391.5,-390 391.5,-384 391.5,-384 391.5,-372 391.5,-372 391.5,-366 397.5,-360 403.5,-360 403.5,-360 490.5,-360 490.5,-360 496.5,-360 502.5,-366 502.5,-372 502.5,-372 502.5,-384 502.5,-384 502.5,-390 496.5,-396 490.5,-396"/> +<text text-anchor="middle" x="447" y="-375.5" font-family="sans" font-size="10.00">extract_transcriptome</text> +</g> +<!-- 19->10 --> +<g id="edge25" class="edge"><title>19->10</title> +<path fill="none" stroke="grey" stroke-width="2" d="M391.375,-362.331C353.51,-352.33 301.991,-338.689 248.035,-324.272"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="248.857,-320.869 238.292,-321.667 247.049,-327.631 248.857,-320.869"/> +</g> +<!-- 22 --> +<g id="node23" class="node"><title>22</title> +<path fill="none" stroke="#61d856" stroke-width="2" d="M491.5,-324C491.5,-324 406.5,-324 406.5,-324 400.5,-324 394.5,-318 394.5,-312 394.5,-312 394.5,-300 394.5,-300 394.5,-294 400.5,-288 406.5,-288 406.5,-288 491.5,-288 491.5,-288 497.5,-288 503.5,-294 503.5,-300 503.5,-300 503.5,-312 503.5,-312 503.5,-318 497.5,-324 491.5,-324"/> +<text text-anchor="middle" x="449" y="-303.5" font-family="sans" font-size="10.00">create_index_salmon</text> +</g> +<!-- 19->22 --> +<g id="edge37" class="edge"><title>19->22</title> +<path fill="none" stroke="grey" stroke-width="2" d="M447.494,-359.697C447.715,-351.983 447.98,-342.712 448.225,-334.112"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="451.724,-334.2 448.511,-324.104 444.727,-334 451.724,-334.2"/> +</g> +<!-- 20 --> +<g id="node21" class="node"><title>20</title> +<path fill="none" stroke="#bed856" stroke-width="2" d="M361,-396C361,-396 251,-396 251,-396 245,-396 239,-390 239,-384 239,-384 239,-372 239,-372 239,-366 245,-360 251,-360 251,-360 361,-360 361,-360 367,-360 373,-366 373,-372 373,-372 373,-384 373,-384 373,-390 367,-396 361,-396"/> +<text text-anchor="middle" x="306" y="-375.5" font-family="sans" font-size="10.00">remove_adapters_cutadapt</text> +</g> +<!-- 20->11 --> +<g id="edge26" class="edge"><title>20->11</title> +<path fill="none" stroke="grey" stroke-width="2" d="M308.472,-359.697C309.574,-351.983 310.898,-342.712 312.127,-334.112"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="315.607,-334.499 313.557,-324.104 308.677,-333.509 315.607,-334.499"/> +</g> +<!-- 21 --> +<g id="node22" class="node"><title>21</title> +<path fill="none" stroke="#78d856" stroke-width="2" d="M819,-324C819,-324 749,-324 749,-324 743,-324 737,-318 737,-312 737,-312 737,-300 737,-300 737,-294 743,-288 749,-288 749,-288 819,-288 819,-288 825,-288 831,-294 831,-300 831,-300 831,-312 831,-312 831,-318 825,-324 819,-324"/> +<text text-anchor="middle" x="784" y="-303.5" font-family="sans" font-size="10.00">create_index_star</text> +</g> +<!-- 21->12 --> +<g id="edge28" class="edge"><title>21->12</title> +<path fill="none" stroke="grey" stroke-width="2" d="M794.382,-287.697C799.265,-279.559 805.187,-269.689 810.579,-260.701"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="813.594,-262.48 815.737,-252.104 807.591,-258.879 813.594,-262.48"/> +</g> +<!-- 21->15 --> +<g id="edge31" class="edge"><title>21->15</title> +<path fill="none" stroke="grey" stroke-width="2" d="M763.978,-287.697C753.874,-278.965 741.464,-268.24 730.482,-258.75"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="732.647,-255.995 722.792,-252.104 728.07,-261.291 732.647,-255.995"/> +</g> +<!-- 22->16 --> +<g id="edge34" class="edge"><title>22->16</title> +<path fill="none" stroke="grey" stroke-width="2" d="M472.73,-287.697C484.937,-278.796 499.985,-267.823 513.184,-258.199"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="515.525,-260.824 521.543,-252.104 511.4,-255.168 515.525,-260.824"/> +</g> +<!-- 22->17 --> +<g id="edge35" class="edge"><title>22->17</title> +<path fill="none" stroke="grey" stroke-width="2" d="M438.865,-287.697C434.148,-279.644 428.438,-269.894 423.218,-260.982"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="426.092,-258.964 418.018,-252.104 420.052,-262.502 426.092,-258.964"/> </g> </g> </svg> diff --git a/install/environment.dev.yml b/install/environment.dev.yml index 54f012ec84b04ff7a9546e778c38e4bddf44e9fa..ea44dfbcfb7f02409682d231014e528c2845cb83 100644 --- a/install/environment.dev.yml +++ b/install/environment.dev.yml @@ -8,6 +8,7 @@ dependencies: - unzip=6.0 - pip=20.0.2 - pip: + - pandas==1.0.1 - biopython==1.76 - labkey==1.2.0 diff --git a/scripts/labkey_to_snakemake.py b/scripts/labkey_to_snakemake.py index 86ecf78426204b25cf557faac2906a2c7c05d0d3..defefe3d57d86fd7dc2c96f0bb2002b1031127b7 100755 --- a/scripts/labkey_to_snakemake.py +++ b/scripts/labkey_to_snakemake.py @@ -1,14 +1,11 @@ #!/usr/bin/env python3 -## ----------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- # Author : Katsantoni Maria, Christina Herrmann # Company: Mihaela Zavolan, Biozentrum, Basel -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- # This script is part of the Zavolan lab quantification pipeline, which is used -# for analysing RNA-seq data. The table is provided by labkey and is a csv file. -# If the user provides their own table the table should contain the following +# for analysing RNA-seq data. The table is provided by labkey as a csv file. +# If the user provides their own table the table should contain the following # columns: # ----------------------------------------------------------------------------- @@ -24,117 +21,95 @@ from Bio import SeqIO from io import StringIO from csv import writer from pathlib import Path -# for convenience, load QueryFilter explicitly (avoids long lines in filter definitions) +# (avoids long lines in filter definitions) from labkey.query import QueryFilter -# ---------------------------------------------------------------------------------------------------------------------- -def main(): - """ Preprocess sample folder and create config file for snakemake""" - - __doc__ = "Preprocess of the table and create config file." - - parser = ArgumentParser( - description=__doc__, - formatter_class=RawTextHelpFormatter) - parser.add_argument( - "genomes_path", - help="Path containing the FASTA and GTF files for all organisms", - metavar="GENOMES PATH" - ) - - parser.add_argument( - "--input-table", - type=str, - default=None, - help=( - "Input table in LabKey format containing the sample information;" - "\nexactly one of '--input-table' and '--remote' is required." - ), - metavar="FILE", - ) - - parser.add_argument( - "--remote", - action="store_true", - help=( - "Fetch LabKey table via API; exactly one of '--input-table' and" - "\n'--remote' is required." - ), - ) - - parser.add_argument( - "--project-name", - help=( - "Name of LabKey project containing table '--table-name'; required" - "\nif '--remote' is specified." - ), - metavar="STR", - ) - parser.add_argument( - "--table-name", - help="Name of LabKey table; required if '--remote' is specified.", - metavar="STR", - ) - parser.add_argument( - "--input-dict", - help=( - "Input dictionary containing the feature name conversion from \n" - "LabKey to Snakemake; default: '%(default)s'" - ), - default=os.path.join( - os.path.dirname(__file__), - 'labkey_to_snakemake.dict.tsv' - ), - metavar="FILE" - ) - - parser.add_argument( - "--samples-table", - help="Output table compatible to snakemake; default: '%(default)s'", - default='samples.tsv', - metavar="FILE" - ) - - parser.add_argument( - "--multimappers", - type=int, - default=100, - help="Number of allowed multimappers", - metavar='INT', - ) - - parser.add_argument( - "--soft-clip", - choices=['EndToEnd','Local'], - default='EndToEnd', - help="Soft-clipping option for STAR", - ) - - parser.add_argument( - "--pass-mode", - choices=['None','Basic'], - default='None', - help="2-pass mode option for STAR", - ) - - parser.add_argument( - "--libtype", - default='A', - help="Library type for salmon", - metavar="STR", - ) - - parser.add_argument( - "--config-file", - help="Configuration file to be used by Snakemake", - ) +def main(): + """ Preprocess sample folder and create config file for snakemake""" + __doc__ = "Preprocess of labkey table and create " + \ + "config file and sample table." + + parser = ArgumentParser(description=__doc__, + formatter_class=RawTextHelpFormatter) + + parser.add_argument("genomes_path", + help="Path containing the FASTA and GTF " + + " files for all organisms", + metavar="GENOMES PATH") + + parser.add_argument("--input-table", + type=str, + default=None, + help="Input table in LabKey format " + + "containing the sample information;" + + "\nexactly one '--input-table' and " + + "'--remote' is required.", + metavar="FILE") + + parser.add_argument("--remote", + action="store_true", + help="Fetch LabKey table via API; exactly one of " + + "'--input-table' and" + + "\n'--remote' is required.") + + parser.add_argument("--project-name", + help="Name of LabKey project containing table " + + " '--table-name'; required" + + "\nif '--remote' is specified.", + metavar="STR") + + parser.add_argument("--table-name", + help="Name of LabKey table; required if '--remote'" + + " is specified.", + metavar="STR") + + parser.add_argument("--input-dict", + help="Input dictionary containing the feature name " + + "conversion from LabKey to Snakemake;" + + "default: '%(default)s'", + default=os.path.join( + os.path.dirname(__file__), + 'labkey_to_snakemake.dict.tsv'), + metavar="FILE") + + parser.add_argument("--samples-table", + help="Output table compatible to snakemake;" + + "default: '%(default)s'", + default='samples.tsv', + metavar="FILE") + + parser.add_argument("--trim_polya", + type=int, + choices=[True, False], + default=True, + help="Trim poly-As option") + + parser.add_argument("--multimappers", + type=int, + default=100, + help="Number of allowed multimappers", + metavar='INT') + + parser.add_argument("--soft-clip", + choices=['EndToEnd', 'Local'], + default='EndToEnd', + help="Soft-clipping option for STAR") + + parser.add_argument("--pass-mode", + choices=['None', 'Basic'], + default='None', + help="2-pass mode option for STAR") + + parser.add_argument("--libtype", + default='A', + help="Library type for salmon", + metavar="STR") + + parser.add_argument("--config-file", + help="Configuration file to be used by Snakemake") - # __________________________________________________________________________________________________________________ - # ------------------------------------------------------------------------------------------------------------------ - # get the arguments - # ------------------------------------------------------------------------------------------------------------------ try: options = parser.parse_args() except(Exception): @@ -146,27 +121,34 @@ def main(): if options.remote and options.input_table: parser.print_help() - print("\n[ERROR] Options '--input-table' and '--remote' are mutually exclusive.") + print( + "\n[ERROR] Options '--input-table' and ", + "'--remote' are mutually exclusive.") sys.exit(1) if not options.remote and not options.input_table: parser.print_help() - print("\n[ERROR] At least one of '--input-table' and '--remote' is required.") + print("\n[ERROR] At least one of '--input-table' ", + "and '--remote' is required.") sys.exit(1) if options.remote and not options.project_name: parser.print_help() - print("\n[ERROR] If option '--remote' is specified, option '--project-name' is required.") + print( + "\n[ERROR] If option '--remote' is specified, ", + "option '--project-name' is required.") sys.exit(1) if options.remote and not options.table_name: parser.print_help() - print("\n[ERROR] If option '--remote' is specified, option '--table-name' is required.") + print( + "\n[ERROR] If option '--remote' is specified, ", + "option '--table-name' is required.") sys.exit(1) sys.stdout.write('Reading input file...\n') - if options.remote == True: + if options.remote is True: input_table = api_fetch_labkey_table( project_name=options.project_name, query_name=options.table_name) @@ -191,8 +173,11 @@ def main(): input_dict.set_index('snakemake', inplace=True, drop=True) sys.stdout.write('Create snakemake table...\n') snakemake_table = pd.DataFrame() + for index, row in input_table.iterrows(): - snakemake_table.loc[index, 'sample'] = row[input_dict.loc['replicate_name', 'labkey']] + "_" + row[input_dict.loc['condition', 'labkey']] + snakemake_table.loc[index, 'sample'] = row[ + input_dict.loc['replicate_name', 'labkey']] + "_" + row[ + input_dict.loc['condition', 'labkey']] if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED': snakemake_table.loc[index, 'seqmode'] = 'paired_end' elif row[input_dict.loc['seqmode', 'labkey']] == 'SINGLE': @@ -203,32 +188,16 @@ def main(): row[input_dict.loc['fq1', 'labkey']]) snakemake_table.loc[index, 'fq1'] = fq1 - - with gzip.open(fq1, "rt") as handle: - for record in SeqIO.parse(handle, "fastq"): - read_length = len(record.seq) - break - + read_length = get_read_length(fq1) snakemake_table.loc[index, 'index_size'] = read_length - if read_length <= 50: - snakemake_table.loc[index, 'kmer'] = 21 - elif read_length > 50: - snakemake_table.loc[index, 'kmer'] = 31 - - - if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED': - snakemake_table.loc[index, 'fq2'] = os.path.join( - row[input_dict.loc['fastq_path', 'labkey']], - row[input_dict.loc['fq2', 'labkey']]) - - snakemake_table.loc[index, 'fq1_3p'] = row[input_dict.loc['fq1_3p', 'labkey']] - snakemake_table.loc[index, 'fq1_5p'] = row[input_dict.loc['fq1_5p', 'labkey']] - - if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED': - snakemake_table.loc[index, 'fq2_3p'] = row[input_dict.loc['fq2_3p', 'labkey']] - snakemake_table.loc[index, 'fq2_5p'] = row[input_dict.loc['fq2_5p', 'labkey']] - - organism = row[input_dict.loc['organism', 'labkey']].replace(' ', '_').lower() + snakemake_table.loc[index, 'kmer'] = infer_kmer_length(read_length) + snakemake_table.loc[index, 'fq1_3p'] = row[ + input_dict.loc['fq1_3p', 'labkey']] + snakemake_table.loc[index, 'fq1_5p'] = row[ + input_dict.loc['fq1_5p', 'labkey']] + + organism = row[input_dict.loc['organism', 'labkey']].replace( + ' ', '_').lower() snakemake_table.loc[index, 'organism'] = organism snakemake_table.loc[index, 'gtf'] = os.path.join( @@ -251,39 +220,35 @@ def main(): organism, 'transcriptome.fa') - snakemake_table.loc[index, 'sd'] = row[input_dict.loc['sd', 'labkey']] - snakemake_table.loc[index, 'mean'] = row[input_dict.loc['mean', 'labkey']] + snakemake_table.loc[index, 'sd'] = row[ + input_dict.loc['sd', 'labkey']] + snakemake_table.loc[index, 'mean'] = row[ + input_dict.loc['mean', 'labkey']] snakemake_table.loc[index, 'multimappers'] = options.multimappers snakemake_table.loc[index, 'soft_clip'] = options.soft_clip snakemake_table.loc[index, 'pass_mode'] = options.pass_mode snakemake_table.loc[index, 'libtype'] = options.libtype - - if row[input_dict.loc['mate1_direction', 'labkey']] == 'SENSE': - snakemake_table.loc[index, 'kallisto_directionality'] = '--fr' - elif row[input_dict.loc['mate1_direction', 'labkey']] == 'ANTISENSE': - snakemake_table.loc[index, 'kallisto_directionality'] = '--rf' - else: - snakemake_table.loc[index, 'kallisto_directionality'] = '' - - if row[input_dict.loc['mate1_direction', 'labkey']] == 'SENSE': - snakemake_table.loc[index, 'fq1_polya'] = 'AAAAAAAAAAAAAAAAA' - elif row[input_dict.loc['mate1_direction', 'labkey']] == 'ANTISENSE': - snakemake_table.loc[index, 'fq1_polya'] = 'TTTTTTTTTTTTTTTTT' - elif row[input_dict.loc['mate1_direction', 'labkey']] == 'RANDOM': - snakemake_table.loc[index, 'fq1_polya'] = 'AAAAAAAAAAAAAAAAA' - else: - pass + if options.trim_polya is True: + snakemake_table.loc[index, 'fq1_polya'] = trim_polya( + row[input_dict.loc['mate1_direction', 'labkey']]) + snakemake_table.loc[index, 'kallisto_directionality'] = \ + get_kallisto_directionality( + row[input_dict.loc['mate1_direction', 'labkey']]) if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED': - if row[input_dict.loc['mate2_direction', 'labkey']] == 'SENSE': - snakemake_table.loc[index, 'fq2_polya'] = 'AAAAAAAAAAAAAAAAA' - elif row[input_dict.loc['mate2_direction', 'labkey']] == 'ANTISENSE': - snakemake_table.loc[index, 'fq2_polya'] = 'TTTTTTTTTTTTTTTTT' - elif row[input_dict.loc['mate2_direction', 'labkey']] == 'RANDOM': - snakemake_table.loc[index, 'fq2_polya'] = 'AAAAAAAAAAAAAAAAA' - else: - pass + fq2 = os.path.join( + row[input_dict.loc['fastq_path', 'labkey']], + row[input_dict.loc['fq2', 'labkey']]) + snakemake_table.loc[index, 'fq2'] = fq2 + + snakemake_table.loc[index, 'fq2_3p'] = row[ + input_dict.loc['fq2_3p', 'labkey']] + snakemake_table.loc[index, 'fq2_5p'] = row[ + input_dict.loc['fq2_5p', 'labkey']] + if options.trim_polya is True: + snakemake_table.loc[index, 'fq2_polya'] = trim_polya( + row[input_dict.loc['mate2_direction', 'labkey']]) snakemake_table.fillna('XXXXXXXXXXXXX', inplace=True) snakemake_table = snakemake_table.astype( @@ -301,11 +266,10 @@ def main(): header=True, index=False) - # Read file and infer read size for sjdbovwerhang with open(options.config_file, 'w') as config_file: config_file.write('''--- - samples: "'''+ options.samples_table + '''" + samples: "''' + options.samples_table + '''" output_dir: "results/" log_dir: "logs/" kallisto_indexes: "results/kallisto_indexes/" @@ -319,19 +283,54 @@ def main(): sys.stdout.write('Create config file finished successfully...\n') return + def api_fetch_labkey_table(project_name=None, query_name=None): - group_path = os.path.join( '/Zavolan Group', project_name) - server_context = labkey.utils.create_server_context('labkey.scicore.unibas.ch', group_path, 'labkey', use_ssl=True) + group_path = os.path.join('/Zavolan Group', project_name) + server_context = labkey.utils.create_server_context( + 'labkey.scicore.unibas.ch', group_path, 'labkey', use_ssl=True) schema_name = "lists" results = labkey.query.select_rows(server_context, schema_name, query_name) input_table = pd.DataFrame(results["rows"]) return input_table -# _____________________________________________________________________________ -# ----------------------------------------------------------------------------- -# Call the Main function and catch Keyboard interrups -# ----------------------------------------------------------------------------- +def get_read_length(filename): + with gzip.open(filename, "rt") as handle: + for record in SeqIO.parse(handle, "fastq"): + read_length = len(record.seq) + break + return read_length + + +def infer_kmer_length(read_length): + if read_length <= 50: + kmer = 21 + elif read_length > 50: + kmer = 31 + return kmer + + +def get_kallisto_directionality(directionality): + if directionality == 'SENSE': + final_direction = '--fr' + elif directionality == 'ANTISENSE': + final_direction = '--rf' + else: + final_direction = '' + return final_direction + + +def trim_polya(sense): + if sense == 'SENSE': + polya = 'AAAAAAAAAAAAAAAAA' + elif sense == 'ANTISENSE': + polya = 'TTTTTTTTTTTTTTTTT' + elif sense == 'RANDOM': + polya = 'AAAAAAAAAAAAAAAAA' + else: + polya = 'XXXXXXXXXXXXXXXXX' + return polya + if __name__ == '__main__': try: @@ -339,5 +338,3 @@ if __name__ == '__main__': except KeyboardInterrupt: sys.stderr.write("User interrupt!" + os.linesep) sys.exit(0) - - diff --git a/tests/test_integration_workflow/expected_output.files b/tests/test_integration_workflow/expected_output.files index 0273b8fa1c99a20c1b1759b6ba703e5a6bbb3078..490153d374d390bcdc670c095e32acabe11c29f0 100644 --- a/tests/test_integration_workflow/expected_output.files +++ b/tests/test_integration_workflow/expected_output.files @@ -1,12 +1,7 @@ results/kallisto_indexes/homo_sapiens/kallisto.idx -results/salmon_indexes/homo_sapiens/31/salmon.idx/duplicate_clusters.tsv -results/salmon_indexes/homo_sapiens/31/salmon.idx/hash.bin -results/salmon_indexes/homo_sapiens/31/salmon.idx/header.json -results/salmon_indexes/homo_sapiens/31/salmon.idx/refInfo.json -results/salmon_indexes/homo_sapiens/31/salmon.idx/rsd.bin -results/salmon_indexes/homo_sapiens/31/salmon.idx/sa.bin -results/salmon_indexes/homo_sapiens/31/salmon.idx/txpInfo.bin results/salmon_indexes/homo_sapiens/31/salmon.idx/versionInfo.json +results/salmon_indexes/homo_sapiens/31/salmon.idx/duplicate_clusters.tsv +results/salmon_indexes/homo_sapiens/31/salmon.idx/info.json results/star_indexes/homo_sapiens/75/STAR_index/chrLength.txt results/star_indexes/homo_sapiens/75/STAR_index/chrNameLength.txt results/star_indexes/homo_sapiens/75/STAR_index/chrName.txt @@ -78,21 +73,9 @@ results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/quant_kal results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/quant_kallisto/pseudoalignments.bam results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/quant_kallisto/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.kallisto.pseudo.sam results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/lib_format_counts.json -results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/quant.genes.sf -results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/quant.sf results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/aux_info/ambig_info.tsv results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/aux_info/expected_bias results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/aux_info/observed_bias results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/aux_info/observed_bias_3p results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/aux_info/unmapped_names.txt -results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str1.out.bg -results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str2.out.bg -results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.str1.out.bg -results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.str2.out.bg -results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired.ALFA_feature_counts.tsv -results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_Signal.UniqueMultiple.str1.out.bg -results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_Signal.UniqueMultiple.str2.out.bg -results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_Signal.Unique.str1.out.bg -results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_Signal.Unique.str2.out.bg -results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1.ALFA_feature_counts.tsv - +results/transcriptome/homo_sapiens/transcriptome.fa diff --git a/tests/test_integration_workflow/expected_output.md5 b/tests/test_integration_workflow/expected_output.md5 index 99a384222c8b634367ed49aa4957ca044a12e241..f4909963da52d740b87c4c5b0689bb0c5a56fc91 100644 --- a/tests/test_integration_workflow/expected_output.md5 +++ b/tests/test_integration_workflow/expected_output.md5 @@ -1,35 +1,30 @@ cbaebdb67aee4784b64aff7fec9fda42 results/kallisto_indexes/homo_sapiens/kallisto.idx -15f17d12eb9b908605bd6dbb1e9ea5c5 results/salmon_indexes/homo_sapiens/31/salmon.idx/duplicate_clusters.tsv -5dd9a2314563aa72fc0498ff173c578a results/salmon_indexes/homo_sapiens/31/salmon.idx/hash.bin -45588b6e8acf09e507f2aa5883e411d5 results/salmon_indexes/homo_sapiens/31/salmon.idx/header.json -c13cbadc90309dcede958d26c1f7c747 results/salmon_indexes/homo_sapiens/31/salmon.idx/refInfo.json -5dec8cab99b7cccd6a341cf85eb5b978 results/salmon_indexes/homo_sapiens/31/salmon.idx/rsd.bin -2a5dddf1ff309c1287f0b88f1631c0f3 results/salmon_indexes/homo_sapiens/31/salmon.idx/sa.bin -cd72748424a7d75dc8b2bdc342eb04b5 results/salmon_indexes/homo_sapiens/31/salmon.idx/txpInfo.bin -3d2de5424aae670242a9ca02c0f4c48c results/salmon_indexes/homo_sapiens/31/salmon.idx/versionInfo.json +0ac1afd9a4f380afd70be75b21814c64 results/salmon_indexes/homo_sapiens/31/salmon.idx/versionInfo.json +51b5292e3a874119c0e1aa566e95d70c results/salmon_indexes/homo_sapiens/31/salmon.idx/duplicate_clusters.tsv +4c1ab7841bbd1a1e8e3b15e7750ecc38 results/salmon_indexes/homo_sapiens/31/salmon.idx/info.json dee7cdc194d5d0617552b7a3b5ad8dfb results/star_indexes/homo_sapiens/75/STAR_index/chrLength.txt 8e2e96e2d6b7f29940ad5de40662b7cb results/star_indexes/homo_sapiens/75/STAR_index/chrNameLength.txt d0826904b8afa45352906ad9591f2bfb results/star_indexes/homo_sapiens/75/STAR_index/chrName.txt 8d3291e6bcdbe9902fbd7c887494173f results/star_indexes/homo_sapiens/75/STAR_index/chrStart.txt 83ea3c15ab782b5c55bfaefda8e7aad8 results/star_indexes/homo_sapiens/75/STAR_index/exonGeTrInfo.tab bad9d837f9a988694cc7080ee6d2997a results/star_indexes/homo_sapiens/75/STAR_index/exonInfo.tab -97e90abd8df5b91bd0b0f94dbeb66fbd results/star_indexes/homo_sapiens/75/STAR_index/geneInfo.tab +0c0b013fb8cbb8f3cb7a7bf92f3b1544 results/star_indexes/homo_sapiens/75/STAR_index/geneInfo.tab 00dda17b3c3983873d1474e9a758d6e6 results/star_indexes/homo_sapiens/75/STAR_index/Genome c0d91c3af633d9439bfd0160d11efe4d results/star_indexes/homo_sapiens/75/STAR_index/SA 27884e419e42a7c8b3b2f49543de0260 results/star_indexes/homo_sapiens/75/STAR_index/SAindex bae93882f9148a6c55816b733c32a3a2 results/star_indexes/homo_sapiens/75/STAR_index/sjdbInfo.txt -ea36f062eedc7f54ceffea2b635a25a8 results/star_indexes/homo_sapiens/75/STAR_index/sjdbList.fromGTF.out.tab +875030141343fca11f0b5aa1a37e1b66 results/star_indexes/homo_sapiens/75/STAR_index/sjdbList.fromGTF.out.tab ea36f062eedc7f54ceffea2b635a25a8 results/star_indexes/homo_sapiens/75/STAR_index/sjdbList.out.tab -59c4082523c0279740fa0452871dea4b results/star_indexes/homo_sapiens/75/STAR_index/transcriptInfo.tab +65e794aa5096551254af18a678d02264 results/star_indexes/homo_sapiens/75/STAR_index/transcriptInfo.tab 500dd49da40b16799aba62aa5cf239ba results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.remove_adapters_mate1.fastq e90e31db1ce51d930645eb74ff70d21b results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.remove_adapters_mate2.fastq 500dd49da40b16799aba62aa5cf239ba results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.remove_polya_mate1.fastq e90e31db1ce51d930645eb74ff70d21b results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.remove_polya_mate2.fastq d41d8cd98f00b204e9800998ecf8427e results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired_SJ.out.tab -69042457ef7ec3f401153e8503408ad4 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate1_fastqc/synthetic.mate_1_fastqc/fastqc_data.txt +6c5d2ffd046e24384a7557aa9be0fdfd results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate1_fastqc/synthetic.mate_1_fastqc/fastqc_data.txt c0df759ceab72ea4b1a560f991fe6497 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate1_fastqc/synthetic.mate_1_fastqc/fastqc.fo a7530faae728593900da23fca4bea97a results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate1_fastqc/synthetic.mate_1_fastqc/summary.txt -fe6c1c9343d91f5413c8319ccd72b5d5 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate1_fastqc/synthetic.mate_1_fastqc/Images/adapter_content.png +310130cbb8bbb6517f37ea0ff6586d43 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate1_fastqc/synthetic.mate_1_fastqc/Images/adapter_content.png 42741852cc110a151580bb3bb5180fc0 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate1_fastqc/synthetic.mate_1_fastqc/Images/duplication_levels.png 8b34217d5fd931966d9007a658570e67 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate1_fastqc/synthetic.mate_1_fastqc/Images/per_base_n_content.png 848396c145d2157f34bbf86757f51abe results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate1_fastqc/synthetic.mate_1_fastqc/Images/per_base_quality.png @@ -38,10 +33,10 @@ e4c1a39967ec9547a2e4c71c97982ee0 results/paired_end/synthetic_10_reads_paired_s 69b70e3f561b749bf10b186dd2480a8a results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate1_fastqc/synthetic.mate_1_fastqc/Images/per_sequence_quality.png b28aac49f537b8cba364b6422458ad28 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate1_fastqc/synthetic.mate_1_fastqc/Images/per_tile_quality.png 5b950b5dfe3c7407e9aac153db330a38 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate1_fastqc/synthetic.mate_1_fastqc/Images/sequence_length_distribution.png -9273c891e47c90fed47554fe8d1e706c results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate2_fastqc/synthetic.mate_2_fastqc/fastqc_data.txt +81ffdfa5e3d8b1e8984f15359b6306f7 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate2_fastqc/synthetic.mate_2_fastqc/fastqc_data.txt 5d406428979b59abf760b9be8b1877e2 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate2_fastqc/synthetic.mate_2_fastqc/fastqc.fo 706b6812d0313b6858e80a4e6aff453e results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate2_fastqc/synthetic.mate_2_fastqc/summary.txt -fe6c1c9343d91f5413c8319ccd72b5d5 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate2_fastqc/synthetic.mate_2_fastqc/Images/adapter_content.png +310130cbb8bbb6517f37ea0ff6586d43 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate2_fastqc/synthetic.mate_2_fastqc/Images/adapter_content.png 42741852cc110a151580bb3bb5180fc0 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate2_fastqc/synthetic.mate_2_fastqc/Images/duplication_levels.png 8b34217d5fd931966d9007a658570e67 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate2_fastqc/synthetic.mate_2_fastqc/Images/per_base_n_content.png 848396c145d2157f34bbf86757f51abe results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/mate2_fastqc/synthetic.mate_2_fastqc/Images/per_base_quality.png @@ -53,8 +48,8 @@ b28aac49f537b8cba364b6422458ad28 results/paired_end/synthetic_10_reads_paired_s 5e07e870d516a91647808bd84068d829 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/abundance.tsv 6180a904511292b0f173794ae98af991 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/pseudoalignments.bam d41d8cd98f00b204e9800998ecf8427e results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/synthetic_10_reads_paired_synthetic_10_reads_paired.kallisto.pseudo.sam -2987d73b246aacce29f64110e36fdeea results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/salmon_quant/lib_format_counts.json -8b03c43b8241d50bce5ac642564779e1 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/salmon_quant/aux_info/ambig_info.tsv +c77480e0235761f2d7f80dbceb2e2806 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/salmon_quant/lib_format_counts.json +989d6ee63b728fced9ec0249735ab83d results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/salmon_quant/aux_info/ambig_info.tsv 3407f87245d0003e0ffbfdf6d8c04f20 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/salmon_quant/aux_info/expected_bias 92bcd0592d22a6a58d0360fc76103e56 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/salmon_quant/aux_info/observed_bias 92bcd0592d22a6a58d0360fc76103e56 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/salmon_quant/aux_info/observed_bias_3p @@ -62,10 +57,10 @@ d41d8cd98f00b204e9800998ecf8427e results/paired_end/synthetic_10_reads_paired_s 12ac6d56ed50ab74ce16a4d618612847 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.remove_adapters_mate1.fastq 12ac6d56ed50ab74ce16a4d618612847 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.remove_polya_mate1.fastq d41d8cd98f00b204e9800998ecf8427e results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/map_genome/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_SJ.out.tab -69042457ef7ec3f401153e8503408ad4 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/mate1_fastqc/synthetic.mate_1_fastqc/fastqc_data.txt +6c5d2ffd046e24384a7557aa9be0fdfd results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/mate1_fastqc/synthetic.mate_1_fastqc/fastqc_data.txt c0df759ceab72ea4b1a560f991fe6497 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/mate1_fastqc/synthetic.mate_1_fastqc/fastqc.fo a7530faae728593900da23fca4bea97a results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/mate1_fastqc/synthetic.mate_1_fastqc/summary.txt -fe6c1c9343d91f5413c8319ccd72b5d5 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/mate1_fastqc/synthetic.mate_1_fastqc/Images/adapter_content.png +310130cbb8bbb6517f37ea0ff6586d43 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/mate1_fastqc/synthetic.mate_1_fastqc/Images/adapter_content.png 42741852cc110a151580bb3bb5180fc0 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/mate1_fastqc/synthetic.mate_1_fastqc/Images/duplication_levels.png 8b34217d5fd931966d9007a658570e67 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/mate1_fastqc/synthetic.mate_1_fastqc/Images/per_base_n_content.png 848396c145d2157f34bbf86757f51abe results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/mate1_fastqc/synthetic.mate_1_fastqc/Images/per_base_quality.png @@ -77,22 +72,18 @@ b28aac49f537b8cba364b6422458ad28 results/single_end/synthetic_10_reads_mate_1_s 50a9b89a9f1da2c438cb0041b64faa0e results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/quant_kallisto/abundance.tsv 3a727fbf59b74a85e1738b0eb3404a73 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/quant_kallisto/pseudoalignments.bam d41d8cd98f00b204e9800998ecf8427e results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/quant_kallisto/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.kallisto.pseudo.sam -efe791920c3109d3bfd3dfd3dd6f1cbd results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/lib_format_counts.json -644256e10c1cfff4fd67b5e7be60742a results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/quant.genes.sf -50ef8217ce367740e40dd040d6907573 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/quant.sf -8b03c43b8241d50bce5ac642564779e1 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/aux_info/ambig_info.tsv +e72f5d798c99272f8c0166dc77247db1 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/lib_format_counts.json +989d6ee63b728fced9ec0249735ab83d results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/aux_info/ambig_info.tsv 3407f87245d0003e0ffbfdf6d8c04f20 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/aux_info/expected_bias 92bcd0592d22a6a58d0360fc76103e56 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/aux_info/observed_bias 92bcd0592d22a6a58d0360fc76103e56 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/aux_info/observed_bias_3p d41d8cd98f00b204e9800998ecf8427e results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/aux_info/unmapped_names.txt -0139e75ddbfe6eb081c2c2d9b9108ab4 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str1.out.bg -c266d31e0a2ad84975cb9de335891e64 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str2.out.bg -0139e75ddbfe6eb081c2c2d9b9108ab4 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.str1.out.bg -c266d31e0a2ad84975cb9de335891e64 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.str2.out.bg -c1254a0bae19ac3ffc39f73099ffcf2b results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/synthetic_10_reads_paired_synthetic_10_reads_paired.ALFA_feature_counts.tsv -ea91b4f85622561158bff2f7c9c312b3 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.str1.out.bg -ede14ac41c10067838f375106fce4852 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.str2.out.bg -ea91b4f85622561158bff2f7c9c312b3 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.Unique.str1.out.bg -ede14ac41c10067838f375106fce4852 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.Unique.str2.out.bg -a9fdb9b135132dda339b85346525c9c5 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.ALFA_feature_counts.tsv - +0139e75ddbfe6eb081c2c2d9b9108ab4 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str1.out.bg +c266d31e0a2ad84975cb9de335891e64 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.UniqueMultiple.str2.out.bg +0139e75ddbfe6eb081c2c2d9b9108ab4 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.str1.out.bg +c266d31e0a2ad84975cb9de335891e64 results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/STAR_coverage/synthetic_10_reads_paired_synthetic_10_reads_paired_Signal.Unique.str2.out.bg +ea91b4f85622561158bff2f7c9c312b3 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/STAR_coverage/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.str1.out.bg +ede14ac41c10067838f375106fce4852 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/STAR_coverage/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.UniqueMultiple.str2.out.bg +ea91b4f85622561158bff2f7c9c312b3 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/STAR_coverage/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.Unique.str1.out.bg +ede14ac41c10067838f375106fce4852 results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/STAR_coverage/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Signal.Unique.str2.out.bg +3ce47cb1d62482c5d62337751d7e8552 results/transcriptome/homo_sapiens/transcriptome.fa diff --git a/tests/test_integration_workflow/test.local.sh b/tests/test_integration_workflow/test.local.sh index 43b5fb6945f4662fbc51ef6139765a61ff514b02..3a853b7b8c409f707a8828b16175b0e5b6bc22ad 100755 --- a/tests/test_integration_workflow/test.local.sh +++ b/tests/test_integration_workflow/test.local.sh @@ -3,6 +3,7 @@ # Tear down test environment cleanup () { rc=$? + rm -rf .fontconfig/ rm -rf .java/ rm -rf .snakemake/ rm -rf logs/ diff --git a/tests/test_integration_workflow/test.slurm.sh b/tests/test_integration_workflow/test.slurm.sh index 00aacd131ccff4c3cc1896a2db91a852c57ea841..f67878a809a4b71dc56ce2c40fc28153c0947faf 100755 --- a/tests/test_integration_workflow/test.slurm.sh +++ b/tests/test_integration_workflow/test.slurm.sh @@ -3,6 +3,7 @@ # Tear down test environment cleanup () { rc=$? + rm -rf .fontconfig/ rm -rf .java/ rm -rf .snakemake/ rm -rf logs/ diff --git a/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5 b/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5 index 47819e5202968cbfed961633829dd1b3fbfe800a..c24f6019ddacafb5bd6334f1f6dcfcc50f74b689 100644 --- a/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5 +++ b/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5 @@ -1,2 +1,2 @@ -b163e7b06bd9e0a71f2fd1fc4935fea9 config.yaml -9aece9e4acb17143b5e8f627968e03a5 samples.tsv +ba5ae0649d1fb82d94f8d19481498ffd config.yaml +cb58e046242c2702038e6e21dbd0bdb4 samples.tsv diff --git a/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5 b/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5 index 47819e5202968cbfed961633829dd1b3fbfe800a..c24f6019ddacafb5bd6334f1f6dcfcc50f74b689 100644 --- a/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5 +++ b/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5 @@ -1,2 +1,2 @@ -b163e7b06bd9e0a71f2fd1fc4935fea9 config.yaml -9aece9e4acb17143b5e8f627968e03a5 samples.tsv +ba5ae0649d1fb82d94f8d19481498ffd config.yaml +cb58e046242c2702038e6e21dbd0bdb4 samples.tsv diff --git a/workflow/rules/paired_end.snakefile.smk b/workflow/rules/paired_end.snakefile.smk index 146ec035580dc5f5c1058be964ce8ba4858b059a..027e34478265d911cc2f8f08e8f12819f31087a4 100644 --- a/workflow/rules/paired_end.snakefile.smk +++ b/workflow/rules/paired_end.snakefile.smk @@ -1,286 +1,349 @@ rule pe_fastqc: - '''A quality control tool for high throughput sequence data''' - - input: - reads1 = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"], - reads2 = lambda wildcards: samples_table.loc[wildcards.sample, "fq2"] - output: - outdir1 = directory(os.path.join(config["output_dir"],"paired_end", "{sample}", "mate1_fastqc")), - outdir2 = directory(os.path.join(config["output_dir"],"paired_end", "{sample}", "mate2_fastqc")) - threads: - 2 - singularity: - "docker://zavolab/fastqc:0.11.8" - log: - os.path.join(config["log_dir"],"paired_end", "{sample}", "fastqc.log") - shell: - "(mkdir -p {output.outdir1}; \ - mkdir -p {output.outdir2}; \ - fastqc --outdir {output.outdir1} {input.reads1} & \ - fastqc --outdir {output.outdir2} {input.reads2}) &> {log}" + ''' + A quality control tool for high throughput sequence data + ''' + input: + reads1 = lambda wildcards: + samples_table.loc[wildcards.sample, "fq1"], + reads2 = lambda wildcards: + samples_table.loc[wildcards.sample, "fq2"] + + output: + outdir1 = directory(os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "mate1_fastqc")), + outdir2 = directory(os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "mate2_fastqc")) + + threads: 2 + + singularity: + "docker://zavolab/fastqc:0.11.9-slim" + + log: + stderr = os.path.join( + config["log_dir"], + "paired_end", + "{sample}", + "fastqc.stderr.log"), + stdout = os.path.join( + config["log_dir"], + "paired_end", + "{sample}", + "fastqc.stdout.log") + + shell: + "(mkdir -p {output.outdir1}; \ + mkdir -p {output.outdir2}; \ + fastqc --outdir {output.outdir1} {input.reads1}; \ + fastqc --outdir {output.outdir2} {input.reads2}); \ + 1> {log.stdout} 2> {log.stderr}" rule pe_remove_adapters_cutadapt: - '''Remove adapters''' - input: - reads1 = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"], - reads2 = lambda wildcards: samples_table.loc[wildcards.sample, "fq2"] - output: - reads1 = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "{sample}.remove_adapters_mate1.fastq.gz"), - - reads2 = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "{sample}.remove_adapters_mate2.fastq.gz") - params: - adapter_3_mate1 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq1_3p'], - adapter_5_mate1 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq1_5p'], - adapter_3_mate2 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq2_3p'], - adapter_5_mate2 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq2_5p'] - singularity: - "docker://zavolab/cutadapt:1.16" - threads: 8 - log: - os.path.join( config["log_dir"], "paired_end", "{sample}", "remove_adapters_cutadapt.log") - shell: - "(cutadapt \ - -e 0.1 \ - -j {threads} \ - --pair-filter=both \ - -m 10 \ - -n 3 \ - -a {params.adapter_3_mate1} \ - -g {params.adapter_5_mate1} \ - -A {params.adapter_3_mate2} \ - -G {params.adapter_5_mate2} \ - -o {output.reads1} \ - -p {output.reads2} \ - {input.reads1} \ - {input.reads2}) &> {log}" + ''' + Remove adapters + ''' + input: + reads1 = lambda wildcards: + samples_table.loc[wildcards.sample, "fq1"], + reads2 = lambda wildcards: + samples_table.loc[wildcards.sample, "fq2"] + + output: + reads1 = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "{sample}.remove_adapters_mate1.fastq.gz"), + reads2 = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "{sample}.remove_adapters_mate2.fastq.gz") + + params: + adapter_3_mate1 = lambda wildcards: + samples_table.loc[wildcards.sample, 'fq1_3p'], + adapter_5_mate1 = lambda wildcards: + samples_table.loc[wildcards.sample, 'fq1_5p'], + adapter_3_mate2 = lambda wildcards: + samples_table.loc[wildcards.sample, 'fq2_3p'], + adapter_5_mate2 = lambda wildcards: + samples_table.loc[wildcards.sample, 'fq2_5p'] + + singularity: + "docker://zavolab/cutadapt:1.16-slim" + + threads: 8 + + log: + stderr = os.path.join( + config["log_dir"], + "paired_end", + "{sample}", + "remove_adapters_cutadapt.stderr.log"), + stdout = os.path.join( + config["log_dir"], + "paired_end", + "{sample}", + "remove_adapters_cutadapt.stdout.log") + + shell: + "(cutadapt \ + -e 0.1 \ + -j {threads} \ + --pair-filter=both \ + -m 10 \ + -n 3 \ + -a {params.adapter_3_mate1} \ + -g {params.adapter_5_mate1} \ + -A {params.adapter_3_mate2} \ + -G {params.adapter_5_mate2} \ + -o {output.reads1} \ + -p {output.reads2} \ + {input.reads1} \ + {input.reads2}); \ + 1> {log.stdout} 2>{log.stderr}" rule pe_remove_polya_cutadapt: - '''Remove polyA tails''' - input: - reads1 = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "{sample}.remove_adapters_mate1.fastq.gz"), - reads2 = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "{sample}.remove_adapters_mate2.fastq.gz") - output: - reads1 = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "{sample}.remove_polya_mate1.fastq.gz"), - reads2 = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "{sample}.remove_polya_mate2.fastq.gz") - params: - polya_3_mate1 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq1_polya'], - polya_3_mate2 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq2_polya'], - singularity: - "docker://zavolab/cutadapt:1.16" - threads: 8 - log: - os.path.join( config["log_dir"], "paired_end", "{sample}", "remove_polya_cutadapt.log") - shell: - '(cutadapt \ - --match-read-wildcards \ - -j {threads} \ - --pair-filter=both \ - -m 10 \ - -n 2 \ - -e 0.1 \ - -q 6 \ - -m 10 \ - -a {params.polya_3_mate1} \ - -A {params.polya_3_mate2} \ - -o {output.reads1} \ - -p {output.reads2} \ - {input.reads1} \ - {input.reads2}) &> {log}' + ''' + Remove polyA tails + ''' + input: + reads1 = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "{sample}.remove_adapters_mate1.fastq.gz"), + reads2 = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "{sample}.remove_adapters_mate2.fastq.gz") + + output: + reads1 = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "{sample}.remove_polya_mate1.fastq.gz"), + reads2 = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "{sample}.remove_polya_mate2.fastq.gz") + + params: + polya_3_mate1 = lambda wildcards: + samples_table.loc[wildcards.sample, 'fq1_polya'], + polya_3_mate2 = lambda wildcards: + samples_table.loc[wildcards.sample, 'fq2_polya'] + + singularity: + "docker://zavolab/cutadapt:1.16-slim" + + threads: 8 + + log: + stderr = os.path.join( + config["log_dir"], + "paired_end", + "{sample}", + "remove_polya_cutadapt.stderr.log"), + stdout = os.path.join( + config["log_dir"], + "paired_end", + "{sample}", + "remove_adapters_cutadapt.stdout.log") + + shell: + "(cutadapt \ + --match-read-wildcards \ + -j {threads} \ + --pair-filter=both \ + -m 10 \ + -n 2 \ + -e 0.1 \ + -q 6 \ + -m 10 \ + -a {params.polya_3_mate1} \ + -A {params.polya_3_mate2} \ + -o {output.reads1} \ + -p {output.reads2} \ + {input.reads1} \ + {input.reads2};) \ + 1> {log.stdout} 2>{log.stderr}" rule pe_map_genome_star: - '''Map to genome using STAR''' - input: - index = lambda wildcards: - os.path.join( - config["star_indexes"], - str(samples_table.loc[wildcards.sample, "organism"]), - str(samples_table.loc[wildcards.sample, "index_size"]), - "STAR_index", - "chrNameLength.txt"), - reads1 = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "{sample}.remove_polya_mate1.fastq.gz"), - reads2 = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "{sample}.remove_polya_mate2.fastq.gz") - output: - bam = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "map_genome", - "{sample}_Aligned.sortedByCoord.out.bam"), - logfile = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "map_genome", - "{sample}_Log.final.out") - params: - sample_id = "{sample}", - index = lambda wildcards: - os.path.join( - config["star_indexes"], - str(samples_table.loc[wildcards.sample, "organism"]), - str(samples_table.loc[wildcards.sample, "index_size"]), - "STAR_index"), - outFileNamePrefix = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "map_genome", - "{sample}_"), - multimappers = lambda wildcards: - str(samples_table.loc[wildcards.sample, "multimappers"]), - soft_clip = lambda wildcards: - samples_table.loc[wildcards.sample, "soft_clip"], - pass_mode = lambda wildcards: - samples_table.loc[wildcards.sample, "pass_mode"] - - singularity: - "docker://zavolab/star:2.6.0a" - - threads: 12 - - log: - os.path.join( config["log_dir"], "paired_end", "{sample}", "map_genome_star.log") - - shell: - "(STAR \ - --runMode alignReads \ - --twopassMode {params.pass_mode} \ - --runThreadN {threads} \ - --genomeDir {params.index} \ - --readFilesIn {input.reads1} {input.reads2} \ - --readFilesCommand zcat \ - --outSAMunmapped None \ - --outFilterMultimapNmax {params.multimappers} \ - --outFilterMultimapScoreRange 1 \ - --outFileNamePrefix {params.outFileNamePrefix} \ - --outSAMattributes All \ - --outStd BAM_SortedByCoordinate \ - --outSAMtype BAM SortedByCoordinate \ - --outFilterMismatchNoverLmax 0.04 \ - --outFilterScoreMinOverLread 0.3 \ - --outFilterMatchNminOverLread 0.3 \ - --outFilterType BySJout \ - --outReadsUnmapped None \ - --outSAMattrRGline ID:rnaseq_pipeline SM:{params.sample_id} \ - --alignEndsType {params.soft_clip} > {output.bam};) &> {log}" - - -rule pe_index_genomic_alignment_samtools: - '''Index the genomic alignment''' + ''' + Map to genome using STAR + ''' input: - bam = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "map_genome", - "{sample}_Aligned.sortedByCoord.out.bam"), + index = lambda wildcards: + os.path.join( + config["star_indexes"], + str(samples_table.loc[wildcards.sample, "organism"]), + str(samples_table.loc[wildcards.sample, "index_size"]), + "STAR_index", + "chrNameLength.txt"), + reads1 = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "{sample}.remove_polya_mate1.fastq.gz"), + reads2 = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "{sample}.remove_polya_mate2.fastq.gz") + output: - bai = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "map_genome", - "{sample}_Aligned.sortedByCoord.out.bam.bai"), + bam = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "map_genome", + "{sample}_Aligned.sortedByCoord.out.bam"), + logfile = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "map_genome", + "{sample}_Log.final.out") + + params: + sample_id = "{sample}", + index = lambda wildcards: + os.path.join( + config["star_indexes"], + str(samples_table.loc[wildcards.sample, "organism"]), + str(samples_table.loc[wildcards.sample, "index_size"]), + "STAR_index"), + outFileNamePrefix = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "map_genome", + "{sample}_"), + multimappers = lambda wildcards: + str(samples_table.loc[wildcards.sample, "multimappers"]), + soft_clip = lambda wildcards: + samples_table.loc[wildcards.sample, "soft_clip"], + pass_mode = lambda wildcards: + samples_table.loc[wildcards.sample, "pass_mode"] + singularity: - "docker://zavolab/samtools:1.8" + "docker://zavolab/star:2.7.3a-slim" + + threads: 12 + log: - os.path.join( config["log_dir"], "paired_end", "{sample}", "index_genomic_alignment_samtools.log") + stderr = os.path.join( + config["log_dir"], + "paired_end", + "{sample}", + "map_genome_star.stderr.log") shell: - "(samtools index {input.bam} {output.bai};) &> {log}" + "(STAR \ + --runMode alignReads \ + --twopassMode {params.pass_mode} \ + --runThreadN {threads} \ + --genomeDir {params.index} \ + --readFilesIn {input.reads1} {input.reads2} \ + --readFilesCommand zcat \ + --outSAMunmapped None \ + --outFilterMultimapNmax {params.multimappers} \ + --outFilterMultimapScoreRange 1 \ + --outFileNamePrefix {params.outFileNamePrefix} \ + --outSAMattributes All \ + --outStd BAM_SortedByCoordinate \ + --outSAMtype BAM SortedByCoordinate \ + --outFilterMismatchNoverLmax 0.04 \ + --outFilterScoreMinOverLread 0.3 \ + --outFilterMatchNminOverLread 0.3 \ + --outFilterType BySJout \ + --outReadsUnmapped None \ + --outSAMattrRGline ID:rnaseq_pipeline SM:{params.sample_id} \ + --alignEndsType {params.soft_clip} > {output.bam};) \ + 2> {log.stderr}" rule pe_quantification_salmon: - '''Quantification at transcript and gene level using Salmon''' - input: - reads1 = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "{sample}.remove_polya_mate1.fastq.gz"), - reads2 = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "{sample}.remove_polya_mate2.fastq.gz"), - gtf = lambda wildcards: - samples_table.loc[wildcards.sample, 'gtf_filtered'], - index = lambda wildcards: - os.path.join( - config["salmon_indexes"], - str(samples_table.loc[wildcards.sample, "organism"]), - str(samples_table.loc[wildcards.sample, "kmer"]), - "salmon.idx") - output: - gn_estimates = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "salmon_quant", - "quant.genes.sf"), - tr_estimates = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "salmon_quant", - "quant.sf") - params: - output_dir = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "salmon_quant"), - libType = lambda wildcards: - samples_table.loc[wildcards.sample, 'libtype'] - log: - os.path.join(config["log_dir"], "paired_end", "{sample}", "genome_quantification_salmon.log") - threads: 6 - singularity: - "docker://zavolab/salmon:0.11.0" - shell: - "(salmon quant \ + ''' + Quantification at transcript and gene level using Salmon + ''' + input: + reads1 = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "{sample}.remove_polya_mate1.fastq.gz"), + reads2 = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "{sample}.remove_polya_mate2.fastq.gz"), + gtf = lambda wildcards: + samples_table.loc[wildcards.sample, 'gtf_filtered'], + index = lambda wildcards: + os.path.join( + config["salmon_indexes"], + str(samples_table.loc[wildcards.sample, "organism"]), + str(samples_table.loc[wildcards.sample, "kmer"]), + "salmon.idx") + + output: + gn_estimates = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "salmon_quant", + "quant.genes.sf"), + tr_estimates = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "salmon_quant", + "quant.sf") + + params: + output_dir = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "salmon_quant"), + libType = lambda wildcards: + samples_table.loc[wildcards.sample, 'libtype'] + + log: + stderr = os.path.join( + config["log_dir"], + "paired_end", + "{sample}", + "genome_quantification_salmon.stderr.log"), + stdout = os.path.join( + config["log_dir"], + "paired_end", + "{sample}", + "genome_quantification_salmon.stdout.log"), + + threads: 6 + + singularity: + "docker://zavolab/salmon:1.1.0-slim" + + shell: + "(salmon quant \ --libType {params.libType} \ --seqBias \ --validateMappings \ @@ -290,182 +353,128 @@ rule pe_quantification_salmon: --geneMap {input.gtf} \ -1 {input.reads1} \ -2 {input.reads2} \ - -o {params.output_dir}) &> {log}" + -o {params.output_dir}) 1> {log.stdout} 2> {log.stderr}" rule pe_genome_quantification_kallisto: - '''Quantification at transcript and gene level using Kallisto''' - input: - reads1 = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "{sample}.remove_polya_mate1.fastq.gz"), - reads2 = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "{sample}.remove_polya_mate2.fastq.gz"), - index = lambda wildcards: - os.path.join( - config["kallisto_indexes"], - samples_table.loc[wildcards.sample, 'organism'], - "kallisto.idx") - output: - pseudoalignment = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "quant_kallisto", - "{sample}.kallisto.pseudo.sam") - params: - output_dir = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "quant_kallisto"), - directionality = lambda wildcards: - samples_table.loc[wildcards.sample, "kallisto_directionality"] - singularity: - "docker://zavolab/kallisto:0.46.1" - threads: 8 - log: - os.path.join(config["log_dir"], "paired_end", "{sample}", "genome_quantification_kallisto.log") - shell: - "(kallisto quant \ - -i {input.index} \ - -o {params.output_dir} \ - --pseudobam \ - {params.directionality} \ - {input.reads1} {input.reads2} > {output.pseudoalignment}) &> {log}" - -rule star_rpm_paired_end: - ''' Create stranded bedgraph coverage with STARs RPM normalisation ''' - input: - bam = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "map_genome", - "{sample}_Aligned.sortedByCoord.out.bam") - output: - str1 = (os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "ALFA", - "{sample}_Signal.Unique.str1.out.bg"), - os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "ALFA", - "{sample}_Signal.UniqueMultiple.str1.out.bg")), - str2 = (os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "ALFA", - "{sample}_Signal.Unique.str2.out.bg"), - os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "ALFA", - "{sample}_Signal.UniqueMultiple.str2.out.bg")) - params: - out_dir = directory(os.path.join(config["output_dir"], - "paired_end", - "{sample}", - "ALFA")), - prefix = os.path.join(config["output_dir"], - "paired_end", - "{sample}", - "ALFA", "{sample}_"), - stranded = "Stranded" - singularity: - "docker://zavolab/star:2.6.0a" - log: os.path.join(config["log_dir"], "paired_end", "{sample}", "star_rpm_paired_end.log") - threads: 4 - shell: - """ - (mkdir -p {params.out_dir}; \ - chmod -R 777 {params.out_dir}; \ - STAR \ - --runMode inputAlignmentsFromBAM \ - --runThreadN {threads} \ - --inputBAMfile {input.bam} \ - --outWigType "bedGraph" \ - --outWigStrand "{params.stranded}" \ - --outWigNorm "RPM" \ - --outFileNamePrefix {params.prefix}) &> {log} - """ + ''' + Quantification at transcript and gene level using Kallisto + ''' + input: + reads1 = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "{sample}.remove_polya_mate1.fastq.gz"), + reads2 = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "{sample}.remove_polya_mate2.fastq.gz"), + index = lambda wildcards: + os.path.join( + config["kallisto_indexes"], + samples_table.loc[wildcards.sample, 'organism'], + "kallisto.idx") -rule alfa_bg_paired_end: - ''' Run ALFA from stranded bedgraph files ''' - input: - str1 = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "ALFA", - "{sample}_Signal.UniqueMultiple.str1.out.bg"), - str2 = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "ALFA", - "{sample}_Signal.UniqueMultiple.str2.out.bg"), - gtf = lambda wildcards: os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"], - str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes.stranded.ALFA_index") - output: - biotypes = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "ALFA", - "ALFA_plots.Biotypes.pdf"), - categories = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "ALFA", - "ALFA_plots.Categories.pdf"), - table = os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "ALFA", - "{sample}.ALFA_feature_counts.tsv") - params: - out_dir = directory(os.path.join( - config["output_dir"], - "paired_end", - "{sample}", - "ALFA")), - orientation = lambda wildcards: directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]], - in_file_str1 = lambda wildcards, input: os.path.basename(input.str1), - rename_str1 = lambda wildcards: wildcards.sample + "_" + rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][0], - in_file_str2 = lambda wildcards, input: os.path.basename(input.str2), - rename_str2 = lambda wildcards: wildcards.sample + "_" + rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][1], - genome_index = lambda wildcards, input: os.path.abspath(os.path.join(os.path.dirname(input.gtf), "sorted_genes")), - name = "{sample}" - singularity: - "docker://zavolab/alfa:1.1.1" - log: os.path.abspath(os.path.join(config["log_dir"], "paired_end", "{sample}", "alfa_bg_paired_end.log")) - shell: - """ - cd {params.out_dir}; \ - cp {params.in_file_str1} {params.rename_str1}; \ - cp {params.in_file_str2} {params.rename_str2}; \ - (alfa -g {params.genome_index} \ - --bedgraph {params.rename_str1} {params.rename_str2} {params.name} \ - -s {params.orientation}) &> {log}; \ - rm {params.rename_str1} {params.rename_str2} - """ + output: + pseudoalignment = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "quant_kallisto", + "{sample}.kallisto.pseudo.sam") + params: + output_dir = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "quant_kallisto"), + directionality = lambda wildcards: + samples_table.loc[wildcards.sample, "kallisto_directionality"] + singularity: + "docker://zavolab/kallisto:0.46.1-slim" + threads: 8 + log: + stderr = os.path.join( + config["log_dir"], + "paired_end", + "{sample}", + "genome_quantification_kallisto.stderr.log") + + shell: + "(kallisto quant \ + -i {input.index} \ + -o {params.output_dir} \ + --pseudobam \ + {params.directionality} \ + {input.reads1} {input.reads2} > {output.pseudoalignment}) \ + 2> {log.stderr}" +rule alfa_bg_paired_end: + ''' Run ALFA from stranded bedgraph files ''' + input: + str1 = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "ALFA", + "{sample}_Signal.UniqueMultiple.str1.out.bg"), + str2 = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "ALFA", + "{sample}_Signal.UniqueMultiple.str2.out.bg"), + gtf = lambda wildcards: os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"], + str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes.stranded.ALFA_index") + output: + biotypes = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "ALFA", + "ALFA_plots.Biotypes.pdf"), + categories = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "ALFA", + "ALFA_plots.Categories.pdf"), + table = os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "ALFA", + "{sample}.ALFA_feature_counts.tsv") + params: + out_dir = directory(os.path.join( + config["output_dir"], + "paired_end", + "{sample}", + "ALFA")), + orientation = lambda wildcards: directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]], + in_file_str1 = lambda wildcards, input: os.path.basename(input.str1), + rename_str1 = lambda wildcards: wildcards.sample + "_" + rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][0], + in_file_str2 = lambda wildcards, input: os.path.basename(input.str2), + rename_str2 = lambda wildcards: wildcards.sample + "_" + rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][1], + genome_index = lambda wildcards, input: os.path.abspath(os.path.join(os.path.dirname(input.gtf), "sorted_genes")), + name = "{sample}" + singularity: + "docker://zavolab/alfa:1.1.1" + log: os.path.abspath(os.path.join(config["log_dir"], "paired_end", "{sample}", "alfa_bg_paired_end.log")) + shell: + """ + cd {params.out_dir}; \ + cp {params.in_file_str1} {params.rename_str1}; \ + cp {params.in_file_str2} {params.rename_str2}; \ + (alfa -g {params.genome_index} \ + --bedgraph {params.rename_str1} {params.rename_str2} {params.name} \ + -s {params.orientation}) &> {log}; \ + rm {params.rename_str1} {params.rename_str2} + """ \ No newline at end of file diff --git a/workflow/rules/single_end.snakefile.smk b/workflow/rules/single_end.snakefile.smk index 7a9f072611bc2cb491726cf880a5583587594df7..07b6eaa98b9e6f89423f4de70eecc68734674bf2 100644 --- a/workflow/rules/single_end.snakefile.smk +++ b/workflow/rules/single_end.snakefile.smk @@ -1,392 +1,436 @@ import os + rule fastqc: - ''' A quality control tool for high throughput sequence data. ''' - input: - reads = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"], - output: - outdir = directory(os.path.join(config["output_dir"], "single_end", "{sample}", "mate1_fastqc")) - params: - seqmode= lambda wildcards: samples_table.loc[wildcards.sample, "seqmode"] - singularity: - "docker://zavolab/fastqc:0.11.8" - log: - os.path.join(config["log_dir"], "single_end", "{sample}", "fastqc.log") - shell: - "(mkdir -p {output.outdir}; \ - fastqc \ - --outdir {output.outdir} \ - {input.reads}) &> {log}" + ''' + A quality control tool for high throughput sequence data. + ''' + input: + reads = lambda wildcards: + samples_table.loc[wildcards.sample, "fq1"] + + output: + outdir = directory(os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "mate1_fastqc")) + + params: + seqmode = lambda wildcards: + samples_table.loc[wildcards.sample, "seqmode"] + + singularity: + "docker://zavolab/fastqc:0.11.9-slim" + + log: + stderr = os.path.join( + config["log_dir"], + "single_end", + "{sample}", + "fastqc.stderr.log"), + stdout = os.path.join( + config["log_dir"], + "single_end", + "{sample}", + "fastqc.stdout.log") + + shell: + "(mkdir -p {output.outdir}; \ + fastqc \ + --outdir {output.outdir} \ + {input.reads};) \ + 1> {log.stdout} 2> {log.stderr}" rule remove_adapters_cutadapt: - ''' Remove adapters ''' - input: - reads = lambda wildcards: samples_table.loc[wildcards.sample, "fq1"] - output: - reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_adapters_mate1.fastq.gz") - params: - adapters_3 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq1_3p'], - adapters_5 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq1_5p'] - - singularity: - "docker://zavolab/cutadapt:1.16" - threads: 8 - log: - os.path.join(config["log_dir"], "single_end", "{sample}", "remove_adapters_cutadapt.log") - shell: - "(cutadapt \ - -e 0.1 \ - -O 1 \ - -j {threads} \ - -m 10 \ - -n 3 \ - -a {params.adapters_3} \ - -g {params.adapters_5} \ - -o {output.reads} \ - {input.reads}) &> {log}" + ''' + Remove adapters + ''' + input: + reads = lambda wildcards: + samples_table.loc[wildcards.sample, "fq1"] + + output: + reads = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "{sample}.remove_adapters_mate1.fastq.gz") + + params: + adapters_3 = lambda wildcards: + samples_table.loc[wildcards.sample, 'fq1_3p'], + adapters_5 = lambda wildcards: + samples_table.loc[wildcards.sample, 'fq1_5p'] + + singularity: + "docker://zavolab/cutadapt:1.16-slim" + + threads: 8 + + log: + stderr = os.path.join( + config["log_dir"], + "single_end", + "{sample}", + "remove_adapters_cutadapt.stderr.log"), + stdout = os.path.join( + config["log_dir"], + "single_end", + "{sample}", + "remove_adapters_cutadapt.stdout.log") + shell: + "(cutadapt \ + -e 0.1 \ + -O 1 \ + -j {threads} \ + -m 10 \ + -n 3 \ + -a {params.adapters_3} \ + -g {params.adapters_5} \ + -o {output.reads} \ + {input.reads};) \ + 1> {log.stdout} 2> {log.stderr}" rule remove_polya_cutadapt: - ''' Remove ployA tails''' - input: - reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_adapters_mate1.fastq.gz") - output: - reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_polya_mate1.fastq.gz") - params: - polya_3 = lambda wildcards: - samples_table.loc[wildcards.sample, "fq1_polya"] - singularity: - "docker://zavolab/cutadapt:1.16" - threads: 8 - log: - os.path.join(config["log_dir"], "single_end", "{sample}", "remove_polya_cutadapt.log") - shell: - "(cutadapt \ - --match-read-wildcards \ - -j {threads} \ - -n 2 \ - -e 0.1 \ - -O 1 \ - -q 6 \ - -m 10 \ - -a {params.polya_3} \ - -o {output.reads} \ - {input.reads}) &> {log}" + ''' + Remove ployA tails + ''' + input: + reads = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "{sample}.remove_adapters_mate1.fastq.gz") + + output: + reads = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "{sample}.remove_polya_mate1.fastq.gz") + + params: + polya_3 = lambda wildcards: + samples_table.loc[wildcards.sample, "fq1_polya"] + + singularity: + "docker://zavolab/cutadapt:1.16-slim" + + threads: 8 + + log: + stderr = os.path.join( + config["log_dir"], + "single_end", + "{sample}", + "remove_polya_cutadapt.stderr.log"), + stdout = os.path.join( + config["log_dir"], + "single_end", + "{sample}", + "remove_polya_cutadapt.stdout.log") + + shell: + "(cutadapt \ + --match-read-wildcards \ + -j {threads} \ + -n 2 \ + -e 0.1 \ + -O 1 \ + -q 6 \ + -m 10 \ + -a {params.polya_3} \ + -o {output.reads} \ + {input.reads}); \ + 1> {log.stdout} 2> {log.stderr}" rule map_genome_star: - ''' Map to genome using STAR. ''' - input: - index = lambda wildcards: - os.path.join( - config["star_indexes"], - str(samples_table.loc[wildcards.sample, "organism"]), - str(samples_table.loc[wildcards.sample, "index_size"]), - "STAR_index","chrNameLength.txt"), - reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_polya_mate1.fastq.gz") - output: - bam = os.path.join(config["output_dir"], "single_end", - "{sample}", - "map_genome", - "{sample}_Aligned.sortedByCoord.out.bam"), - logfile = os.path.join(config["output_dir"], "single_end", - "{sample}", - "map_genome", - "{sample}_Log.final.out") - params: - sample_id = "{sample}", - index = lambda wildcards: - os.path.join( - config["star_indexes"], - str(samples_table.loc[wildcards.sample, "organism"]), - str(samples_table.loc[wildcards.sample, "index_size"]), - "STAR_index"), - outFileNamePrefix = os.path.join( - config["output_dir"], - "single_end", - "{sample}", "map_genome", "{sample}_"), - multimappers = lambda wildcards: - samples_table.loc[wildcards.sample, "multimappers"], - soft_clip = lambda wildcards: - samples_table.loc[wildcards.sample, "soft_clip"], - pass_mode = lambda wildcards: - samples_table.loc[wildcards.sample, "pass_mode"], - singularity: - "docker://zavolab/star:2.6.0a" - threads: 12 - log: - os.path.join(config["log_dir"], "single_end", "{sample}", "map_genome_star.log") - shell: - "(STAR \ - --runMode alignReads \ - -- twopassMode {params.pass_mode} \ - --runThreadN {threads} \ - --genomeDir {params.index} \ - --readFilesIn {input.reads} \ - --readFilesCommand zcat \ - --outSAMunmapped None \ - --outFilterMultimapNmax {params.multimappers} \ - --outFilterMultimapScoreRange 1 \ - --outFileNamePrefix {params.outFileNamePrefix} \ - --outSAMattributes All \ - --outStd BAM_SortedByCoordinate \ - --outSAMtype BAM SortedByCoordinate \ - --outFilterMismatchNoverLmax 0.04 \ - --outFilterScoreMinOverLread 0.3 \ - --outFilterMatchNminOverLread 0.3 \ - --outFilterType BySJout \ - --outReadsUnmapped None \ - --outSAMattrRGline ID:rcrunch SM:{params.sample_id} \ - --alignEndsType {params.soft_clip} > {output.bam};) &> {log}" - - -rule index_genomic_alignment_samtools: - '''Index genome bamfile using samtools.''' - input: - bam = os.path.join(config["output_dir"], - "single_end", - "{sample}", - "map_genome", - "{sample}_Aligned.sortedByCoord.out.bam") - output: - bai = os.path.join(config["output_dir"], - "single_end", - "{sample}", - "map_genome", - "{sample}_Aligned.sortedByCoord.out.bam.bai") - singularity: - "docker://zavolab/samtools:1.8" - threads: 1 - log: - os.path.join(config["log_dir"], "single_end", "{sample}", "index_genomic_alignment_samtools.log") - shell: - "(samtools index {input.bam} {output.bai};) &> {log}" + ''' + Map to genome using STAR + ''' + input: + index = lambda wildcards: + os.path.join( + config["star_indexes"], + str(samples_table.loc[wildcards.sample, "organism"]), + str(samples_table.loc[wildcards.sample, "index_size"]), + "STAR_index", + "chrNameLength.txt"), + reads = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "{sample}.remove_polya_mate1.fastq.gz") + + output: + bam = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "map_genome", + "{sample}_Aligned.sortedByCoord.out.bam"), + logfile = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "map_genome", + "{sample}_Log.final.out") + + params: + sample_id = "{sample}", + index = lambda wildcards: + os.path.join( + config["star_indexes"], + str(samples_table.loc[wildcards.sample, "organism"]), + str(samples_table.loc[wildcards.sample, "index_size"]), + "STAR_index"), + outFileNamePrefix = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "map_genome", + "{sample}_"), + multimappers = lambda wildcards: + samples_table.loc[wildcards.sample, "multimappers"], + soft_clip = lambda wildcards: + samples_table.loc[wildcards.sample, "soft_clip"], + pass_mode = lambda wildcards: + samples_table.loc[wildcards.sample, "pass_mode"], + + singularity: + "docker://zavolab/star:2.7.3a-slim" + + threads: 12 + + log: + stderr = os.path.join( + config["log_dir"], + "single_end", + "{sample}", + "map_genome_star.stderr.log") + + shell: + "(STAR \ + --runMode alignReads \ + -- twopassMode {params.pass_mode} \ + --runThreadN {threads} \ + --genomeDir {params.index} \ + --readFilesIn {input.reads} \ + --readFilesCommand zcat \ + --outSAMunmapped None \ + --outFilterMultimapNmax {params.multimappers} \ + --outFilterMultimapScoreRange 1 \ + --outFileNamePrefix {params.outFileNamePrefix} \ + --outSAMattributes All \ + --outStd BAM_SortedByCoordinate \ + --outSAMtype BAM SortedByCoordinate \ + --outFilterMismatchNoverLmax 0.04 \ + --outFilterScoreMinOverLread 0.3 \ + --outFilterMatchNminOverLread 0.3 \ + --outFilterType BySJout \ + --outReadsUnmapped None \ + --outSAMattrRGline ID:rcrunch SM:{params.sample_id} \ + --alignEndsType {params.soft_clip} > {output.bam};) \ + 2> {log.stderr}" rule quantification_salmon: - ''' Quantification at transcript and gene level using Salmon. ''' - input: - reads = os.path.join( - config["output_dir"], - "single_end", - "{sample}", - "{sample}.remove_polya_mate1.fastq.gz"), - index = lambda wildcards: - os.path.join( - config["salmon_indexes"], - str(samples_table.loc[wildcards.sample, "organism"]), - str(samples_table.loc[wildcards.sample, "kmer"]), - "salmon.idx"), - gtf = lambda wildcards: samples_table.loc[wildcards.sample, "gtf_filtered"] - output: - gn_estimates = os.path.join( - config["output_dir"], - "single_end", - "{sample}", - "salmon_quant", - "quant.genes.sf"), - tr_estimates = os.path.join( - config["output_dir"], - "single_end", - "{sample}", - "salmon_quant", - "quant.sf") - params: - output_dir = os.path.join( - config["output_dir"], - "single_end", - "{sample}", - "salmon_quant"), - libType = lambda wildcards: - samples_table.loc[wildcards.sample, "libtype"] - log: - os.path.join(config["log_dir"], "single_end", "{sample}", "quantification_salmon.log") - threads: 12 - singularity: - "docker://zavolab/salmon:0.11.0" - shell: - "(salmon quant \ - --libType {params.libType} \ - --seqBias \ - --validateMappings \ - --threads {threads} \ - --writeUnmappedNames \ - --index {input.index} \ - --geneMap {input.gtf} \ - --unmatedReads {input.reads} \ - -o {params.output_dir}) &> {log}" + ''' + Quantification at transcript and gene level using Salmon + ''' + input: + reads = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "{sample}.remove_polya_mate1.fastq.gz"), + index = lambda wildcards: + os.path.join( + config["salmon_indexes"], + str(samples_table.loc[wildcards.sample, "organism"]), + str(samples_table.loc[wildcards.sample, "kmer"]), + "salmon.idx"), + gtf = lambda wildcards: + samples_table.loc[wildcards.sample, "gtf_filtered"] + + output: + gn_estimates = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "salmon_quant", + "quant.genes.sf"), + tr_estimates = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "salmon_quant", + "quant.sf") + + params: + output_dir = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "salmon_quant"), + libType = lambda wildcards: + samples_table.loc[wildcards.sample, "libtype"] + + log: + stderr = os.path.join( + config["log_dir"], + "single_end", + "{sample}", + "quantification_salmon.stderr.log"), + stdout = os.path.join( + config["log_dir"], + "single_end", + "{sample}", + "quantification_salmon.stdout.log") + + threads: 12 + + singularity: + "docker://zavolab/salmon:1.1.0-slim" + + shell: + "(salmon quant \ + --libType {params.libType} \ + --seqBias \ + --validateMappings \ + --threads {threads} \ + --writeUnmappedNames \ + --index {input.index} \ + --geneMap {input.gtf} \ + --unmatedReads {input.reads} \ + -o {params.output_dir};) \ + 1> {log.stdout} 2> {log.stderr}" rule genome_quantification_kallisto: - ''' Quantification at transcript and gene level using Kallisto. ''' - input: - reads = os.path.join( - config["output_dir"], - "single_end", - "{sample}", - "{sample}.remove_polya_mate1.fastq.gz"), - index = lambda wildcards: - os.path.join( - config["kallisto_indexes"], - samples_table.loc[wildcards.sample, "organism"], - "kallisto.idx") - output: - pseudoalignment = os.path.join( - config["output_dir"], - "single_end", - "{sample}", - "quant_kallisto", - "{sample}.kallisto.pseudo.sam") - params: - output_dir = os.path.join( - config["output_dir"], - "single_end", - "{sample}", - "quant_kallisto"), - fraglen = lambda wildcards: samples_table.loc[wildcards.sample, 'mean'], - fragsd = lambda wildcards: samples_table.loc[wildcards.sample, 'sd'], - directionality = lambda wildcards: samples_table.loc[wildcards.sample, 'kallisto_directionality'] - threads: 8 - log: - os.path.join(config["log_dir"],"kallisto_align_{sample}.log") - singularity: - "docker://zavolab/kallisto:0.46.1" - shell: - "(kallisto quant \ - -i {input.index} \ - -o {params.output_dir} \ - --single \ - -l {params.fraglen} \ - -s {params.fragsd} \ - --pseudobam \ - {params.directionality} \ - {input.reads} > {output.pseudoalignment}) &> {log}" - - -rule star_rpm_single_end: - ''' Create stranded bedgraph coverage with STARs RPM normalisation ''' - input: - bam = os.path.join( - config["output_dir"], - "single_end", - "{sample}", - "map_genome", - "{sample}_Aligned.sortedByCoord.out.bam") - output: - str1 = (os.path.join( - config["output_dir"], - "single_end", - "{sample}", - "ALFA", - "{sample}_Signal.Unique.str1.out.bg"), - os.path.join( - config["output_dir"], - "single_end", - "{sample}", - "ALFA", - "{sample}_Signal.UniqueMultiple.str1.out.bg")), - str2 = (os.path.join( - config["output_dir"], - "single_end", - "{sample}", - "ALFA", - "{sample}_Signal.Unique.str2.out.bg"), - os.path.join( - config["output_dir"], - "single_end", - "{sample}", - "ALFA", - "{sample}_Signal.UniqueMultiple.str2.out.bg")) - params: - out_dir = directory(os.path.join(config["output_dir"], - "single_end", - "{sample}", - "ALFA")), - prefix = os.path.join(config["output_dir"], - "single_end", - "{sample}", - "ALFA", "{sample}_"), - stranded = "Stranded" - singularity: - "docker://zavolab/star:2.6.0a" - log: os.path.join(config["log_dir"], "single_end", "{sample}", "star_rpm_single_end.log") - threads: 4 - shell: - """ - (mkdir -p {params.out_dir}; \ - chmod -R 777 {params.out_dir}; \ - STAR \ - --runMode inputAlignmentsFromBAM \ - --runThreadN {threads} \ - --inputBAMfile {input.bam} \ - --outWigType "bedGraph" \ - --outWigStrand {params.stranded} \ - --outWigNorm "RPM" \ - --outFileNamePrefix {params.prefix}) &> {log} - """ + ''' + Quantification at transcript and gene level using Kallisto + ''' + input: + reads = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "{sample}.remove_polya_mate1.fastq.gz"), + index = lambda wildcards: + os.path.join( + config["kallisto_indexes"], + samples_table.loc[wildcards.sample, "organism"], + "kallisto.idx") + + output: + pseudoalignment = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "quant_kallisto", + "{sample}.kallisto.pseudo.sam") + + params: + output_dir = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "quant_kallisto"), + fraglen = lambda wildcards: + samples_table.loc[wildcards.sample, 'mean'], + fragsd = lambda wildcards: + samples_table.loc[wildcards.sample, 'sd'], + directionality = lambda wildcards: + samples_table.loc[wildcards.sample, 'kallisto_directionality'] + + threads: 8 + + log: + stderr = os.path.join( + config["log_dir"], + "single_end", + "{sample}", + "genome_quantification_kallisto.stderr.log") + + singularity: + "docker://zavolab/kallisto:0.46.1-slim" + + shell: + "(kallisto quant \ + -i {input.index} \ + -o {params.output_dir} \ + --single \ + -l {params.fraglen} \ + -s {params.fragsd} \ + --pseudobam \ + {params.directionality} \ + {input.reads} > {output.pseudoalignment};) \ + 2> {log.stderr}" rule alfa_bg_single_end: - ''' Run ALFA from stranded bedgraph files ''' - input: - str1 = os.path.join( - config["output_dir"], - "single_end", - "{sample}", - "ALFA", - "{sample}_Signal.UniqueMultiple.str1.out.bg"), - str2 = os.path.join( - config["output_dir"], - "single_end", - "{sample}", - "ALFA", - "{sample}_Signal.UniqueMultiple.str2.out.bg"), - gtf = lambda wildcards: os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"], - str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes.stranded.ALFA_index") - output: - biotypes = os.path.join( - config["output_dir"], - "single_end", - "{sample}", - "ALFA", - "ALFA_plots.Biotypes.pdf"), - categories = os.path.join( - config["output_dir"], - "single_end", - "{sample}", - "ALFA", - "ALFA_plots.Categories.pdf"), - table = os.path.join( - config["output_dir"], - "single_end", - "{sample}", - "ALFA", - "{sample}.ALFA_feature_counts.tsv") - params: - out_dir = directory(os.path.join( - config["output_dir"], - "single_end", - "{sample}", - "ALFA")), - orientation = lambda wildcards: directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]], - in_file_str1 = lambda wildcards, input: os.path.basename(input.str1), - rename_str1 = lambda wildcards: wildcards.sample + "_" + rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][0], - in_file_str2 = lambda wildcards, input: os.path.basename(input.str2), - rename_str2 = lambda wildcards: wildcards.sample + "_" + rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][1], - genome_index = lambda wildcards, input: os.path.abspath(os.path.join(os.path.dirname(input.gtf), "sorted_genes")), - name = "{sample}" - singularity: - "docker://zavolab/alfa:1.1.1" - log: os.path.abspath(os.path.join(config["log_dir"], "single_end", "{sample}", "alfa_bg_single_end.log")) - shell: - """ - cd {params.out_dir}; \ - cp {params.in_file_str1} {params.rename_str1}; \ - cp {params.in_file_str2} {params.rename_str2}; \ - (alfa -g {params.genome_index} \ - --bedgraph {params.rename_str1} {params.rename_str2} {params.name} \ - -s {params.orientation}) &> {log}; \ - rm {params.rename_str1} {params.rename_str2} - """ \ No newline at end of file + ''' Run ALFA from stranded bedgraph files ''' + input: + str1 = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "ALFA", + "{sample}_Signal.UniqueMultiple.str1.out.bg"), + str2 = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "ALFA", + "{sample}_Signal.UniqueMultiple.str2.out.bg"), + gtf = lambda wildcards: os.path.join(config["alfa_indexes"], samples_table.loc[wildcards.sample, "organism"], + str(samples_table.loc[wildcards.sample, "index_size"]), "ALFA", "sorted_genes.stranded.ALFA_index") + output: + biotypes = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "ALFA", + "ALFA_plots.Biotypes.pdf"), + categories = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "ALFA", + "ALFA_plots.Categories.pdf"), + table = os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "ALFA", + "{sample}.ALFA_feature_counts.tsv") + params: + out_dir = directory(os.path.join( + config["output_dir"], + "single_end", + "{sample}", + "ALFA")), + orientation = lambda wildcards: directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]], + in_file_str1 = lambda wildcards, input: os.path.basename(input.str1), + rename_str1 = lambda wildcards: wildcards.sample + "_" + rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][0], + in_file_str2 = lambda wildcards, input: os.path.basename(input.str2), + rename_str2 = lambda wildcards: wildcards.sample + "_" + rename_files[directionality[samples_table.loc[wildcards.sample, "kallisto_directionality"]]][1], + genome_index = lambda wildcards, input: os.path.abspath(os.path.join(os.path.dirname(input.gtf), "sorted_genes")), + name = "{sample}" + singularity: + "docker://zavolab/alfa:1.1.1" + log: os.path.abspath(os.path.join(config["log_dir"], "single_end", "{sample}", "alfa_bg_single_end.log")) + shell: + """ + cd {params.out_dir}; \ + cp {params.in_file_str1} {params.rename_str1}; \ + cp {params.in_file_str2} {params.rename_str2}; \ + (alfa -g {params.genome_index} \ + --bedgraph {params.rename_str1} {params.rename_str2} {params.name} \ + -s {params.orientation}) &> {log}; \ + rm {params.rename_str1} {params.rename_str2} + """ \ No newline at end of file