From c4e20a2159325fb7343d5356a8b405cd683f6220 Mon Sep 17 00:00:00 2001 From: BIOPZ-Katsantoni Maria <maria.katsantoni@unibas.ch> Date: Fri, 21 Feb 2020 15:35:08 +0100 Subject: [PATCH] handle polyA processing in input preparation script - fixes some functions in `labkey_to_snakemake.py` - add optional argument for trimming polyA tails; they are trimmed as follows: - if mate is sense, oligo-A is added to sample table for `cutadapt` rule to trim - if mate is antisense, oligo-T is added to sample table for `cutadapt` rule to trim - if option is set to `--trim_polya`, oligo-X stretch is added to sample table and `cutadapt` will not trim --- images/dag_test_workflow.svg | 373 ++++++++++-------- scripts/labkey_to_snakemake.py | 355 +++++++++-------- .../expected_output.md5 | 2 +- .../expected_output.md5 | 2 +- 4 files changed, 389 insertions(+), 343 deletions(-) diff --git a/images/dag_test_workflow.svg b/images/dag_test_workflow.svg index 6b992f3..9f9e7ad 100644 --- a/images/dag_test_workflow.svg +++ b/images/dag_test_workflow.svg @@ -1,269 +1,318 @@ <?xml version="1.0" encoding="UTF-8" standalone="no"?> <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> -<!-- Generated by graphviz version 2.38.0 (20140413.2041) +<!-- Generated by graphviz version 2.42.3 (20191010.1750) --> <!-- Title: snakemake_dag Pages: 1 --> -<svg width="1338pt" height="409pt" - viewBox="0.00 0.00 1338.00 409.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> +<svg width="1465pt" height="409pt" + viewBox="0.00 0.00 1464.50 409.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 405)"> <title>snakemake_dag</title> -<polygon fill="white" stroke="none" points="-4,4 -4,-405 1334,-405 1334,4 -4,4"/> +<polygon fill="white" stroke="transparent" points="-4,4 -4,-405 1460.5,-405 1460.5,4 -4,4"/> <!-- 0 --> -<g id="node1" class="node"><title>0</title> -<path fill="none" stroke="#56d8c1" stroke-width="2" d="M714,-36C714,-36 684,-36 684,-36 678,-36 672,-30 672,-24 672,-24 672,-12 672,-12 672,-6 678,-0 684,-0 684,-0 714,-0 714,-0 720,-0 726,-6 726,-12 726,-12 726,-24 726,-24 726,-30 720,-36 714,-36"/> -<text text-anchor="middle" x="699" y="-15.5" font-family="sans" font-size="10.00">finish</text> +<g id="node1" class="node"> +<title>0</title> +<path fill="none" stroke="#56d873" stroke-width="2" d="M780.5,-36C780.5,-36 750.5,-36 750.5,-36 744.5,-36 738.5,-30 738.5,-24 738.5,-24 738.5,-12 738.5,-12 738.5,-6 744.5,0 750.5,0 750.5,0 780.5,0 780.5,0 786.5,0 792.5,-6 792.5,-12 792.5,-12 792.5,-24 792.5,-24 792.5,-30 786.5,-36 780.5,-36"/> +<text text-anchor="middle" x="765.5" y="-15.5" font-family="sans" font-size="10.00">finish</text> </g> <!-- 1 --> -<g id="node2" class="node"><title>1</title> -<path fill="none" stroke="#d8a456" stroke-width="2" d="M280,-108C280,-108 12,-108 12,-108 6,-108 0,-102 0,-96 0,-96 0,-84 0,-84 0,-78 6,-72 12,-72 12,-72 280,-72 280,-72 286,-72 292,-78 292,-84 292,-84 292,-96 292,-96 292,-102 286,-108 280,-108"/> -<text text-anchor="middle" x="146" y="-93" font-family="sans" font-size="10.00">pe_fastqc</text> -<text text-anchor="middle" x="146" y="-82" font-family="sans" font-size="10.00">sample: synthetic_10_reads_paired_synthetic_10_reads_paired</text> +<g id="node2" class="node"> +<title>1</title> +<path fill="none" stroke="#5673d8" stroke-width="2" d="M313,-108C313,-108 12,-108 12,-108 6,-108 0,-102 0,-96 0,-96 0,-84 0,-84 0,-78 6,-72 12,-72 12,-72 313,-72 313,-72 319,-72 325,-78 325,-84 325,-84 325,-96 325,-96 325,-102 319,-108 313,-108"/> +<text text-anchor="middle" x="162.5" y="-93" font-family="sans" font-size="10.00">pe_fastqc</text> +<text text-anchor="middle" x="162.5" y="-82" font-family="sans" font-size="10.00">sample: synthetic_10_reads_paired_synthetic_10_reads_paired</text> </g> <!-- 1->0 --> -<g id="edge1" class="edge"><title>1->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M280.572,-71.9656C406.068,-56.08 585.083,-33.4199 661.788,-23.7104"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="662.435,-27.1565 671.916,-22.4284 661.555,-20.212 662.435,-27.1565"/> +<g id="edge1" class="edge"> +<title>1->0</title> +<path fill="none" stroke="grey" stroke-width="2" d="M309.24,-71.97C447.86,-55.87 646.37,-32.83 728.13,-23.34"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="728.89,-26.77 738.42,-22.14 728.09,-19.82 728.89,-26.77"/> </g> <!-- 2 --> -<g id="node3" class="node"><title>2</title> -<path fill="none" stroke="#59d856" stroke-width="2" d="M602,-108C602,-108 322,-108 322,-108 316,-108 310,-102 310,-96 310,-96 310,-84 310,-84 310,-78 316,-72 322,-72 322,-72 602,-72 602,-72 608,-72 614,-78 614,-84 614,-84 614,-96 614,-96 614,-102 608,-108 602,-108"/> -<text text-anchor="middle" x="462" y="-93" font-family="sans" font-size="10.00">fastqc</text> -<text text-anchor="middle" x="462" y="-82" font-family="sans" font-size="10.00">sample: synthetic_10_reads_mate_1_synthetic_10_reads_mate_1</text> +<g id="node3" class="node"> +<title>2</title> +<path fill="none" stroke="#d86e56" stroke-width="2" d="M668,-108C668,-108 355,-108 355,-108 349,-108 343,-102 343,-96 343,-96 343,-84 343,-84 343,-78 349,-72 355,-72 355,-72 668,-72 668,-72 674,-72 680,-78 680,-84 680,-84 680,-96 680,-96 680,-102 674,-108 668,-108"/> +<text text-anchor="middle" x="511.5" y="-93" font-family="sans" font-size="10.00">fastqc</text> +<text text-anchor="middle" x="511.5" y="-82" font-family="sans" font-size="10.00">sample: synthetic_10_reads_mate_1_synthetic_10_reads_mate_1</text> </g> <!-- 2->0 --> -<g id="edge2" class="edge"><title>2->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M519.674,-71.9656C564.073,-58.8519 624.103,-41.1216 662.193,-29.8712"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="663.253,-33.2076 671.852,-27.0183 661.27,-26.4943 663.253,-33.2076"/> +<g id="edge2" class="edge"> +<title>2->0</title> +<path fill="none" stroke="grey" stroke-width="2" d="M573.31,-71.97C621.91,-58.57 687.98,-40.36 728.63,-29.16"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="729.68,-32.5 738.39,-26.47 727.82,-25.75 729.68,-32.5"/> </g> <!-- 3 --> -<g id="node4" class="node"><title>3</title> -<path fill="none" stroke="#d85656" stroke-width="2" d="M513.5,-252C513.5,-252 410.5,-252 410.5,-252 404.5,-252 398.5,-246 398.5,-240 398.5,-240 398.5,-228 398.5,-228 398.5,-222 404.5,-216 410.5,-216 410.5,-216 513.5,-216 513.5,-216 519.5,-216 525.5,-222 525.5,-228 525.5,-228 525.5,-240 525.5,-240 525.5,-246 519.5,-252 513.5,-252"/> -<text text-anchor="middle" x="462" y="-231.5" font-family="sans" font-size="10.00">pe_quantification_salmon</text> +<g id="node4" class="node"> +<title>3</title> +<path fill="none" stroke="#d8bc56" stroke-width="2" d="M556.5,-252C556.5,-252 438.5,-252 438.5,-252 432.5,-252 426.5,-246 426.5,-240 426.5,-240 426.5,-228 426.5,-228 426.5,-222 432.5,-216 438.5,-216 438.5,-216 556.5,-216 556.5,-216 562.5,-216 568.5,-222 568.5,-228 568.5,-228 568.5,-240 568.5,-240 568.5,-246 562.5,-252 556.5,-252"/> +<text text-anchor="middle" x="497.5" y="-231.5" font-family="sans" font-size="10.00">pe_quantification_salmon</text> </g> <!-- 3->0 --> -<g id="edge3" class="edge"><title>3->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M487.264,-215.826C520.088,-192.982 578.459,-150.513 623,-108 643.903,-88.0485 665.037,-62.797 679.694,-44.2918"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="682.537,-46.3381 685.948,-36.3074 677.026,-42.0215 682.537,-46.3381"/> +<g id="edge3" class="edge"> +<title>3->0</title> +<path fill="none" stroke="grey" stroke-width="2" d="M529.12,-215.95C568.61,-193.87 637.19,-152.87 688.5,-108 710.51,-88.76 731.97,-63.22 746.62,-44.44"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="749.53,-46.39 752.85,-36.33 743.98,-42.13 749.53,-46.39"/> </g> <!-- 4 --> -<g id="node5" class="node"><title>4</title> -<path fill="none" stroke="#d8bc56" stroke-width="2" d="M642.5,-252C642.5,-252 555.5,-252 555.5,-252 549.5,-252 543.5,-246 543.5,-240 543.5,-240 543.5,-228 543.5,-228 543.5,-222 549.5,-216 555.5,-216 555.5,-216 642.5,-216 642.5,-216 648.5,-216 654.5,-222 654.5,-228 654.5,-228 654.5,-240 654.5,-240 654.5,-246 648.5,-252 642.5,-252"/> -<text text-anchor="middle" x="599" y="-231.5" font-family="sans" font-size="10.00">quantification_salmon</text> +<g id="node5" class="node"> +<title>4</title> +<path fill="none" stroke="#d8a456" stroke-width="2" d="M700,-252C700,-252 599,-252 599,-252 593,-252 587,-246 587,-240 587,-240 587,-228 587,-228 587,-222 593,-216 599,-216 599,-216 700,-216 700,-216 706,-216 712,-222 712,-228 712,-228 712,-240 712,-240 712,-246 706,-252 700,-252"/> +<text text-anchor="middle" x="649.5" y="-231.5" font-family="sans" font-size="10.00">quantification_salmon</text> </g> <!-- 4->0 --> -<g id="edge4" class="edge"><title>4->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M623.094,-215.785C634.597,-206.411 647.644,-193.897 656,-180 681.56,-137.492 692.17,-79.7097 696.391,-46.1773"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="699.874,-46.5286 697.548,-36.1923 692.92,-45.7231 699.874,-46.5286"/> +<g id="edge4" class="edge"> +<title>4->0</title> +<path fill="none" stroke="grey" stroke-width="2" d="M682.26,-215.99C696.54,-207.05 712.3,-194.85 722.5,-180 750.73,-138.9 760.44,-80.17 763.77,-46.17"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="767.27,-46.32 764.64,-36.06 760.3,-45.72 767.27,-46.32"/> </g> <!-- 5 --> -<g id="node6" class="node"><title>5</title> -<path fill="none" stroke="#88d856" stroke-width="2" d="M827,-252C827,-252 685,-252 685,-252 679,-252 673,-246 673,-240 673,-240 673,-228 673,-228 673,-222 679,-216 685,-216 685,-216 827,-216 827,-216 833,-216 839,-222 839,-228 839,-228 839,-240 839,-240 839,-246 833,-252 827,-252"/> -<text text-anchor="middle" x="756" y="-231.5" font-family="sans" font-size="10.00">pe_genome_quantification_kallisto</text> +<g id="node6" class="node"> +<title>5</title> +<path fill="none" stroke="#d85656" stroke-width="2" d="M905,-252C905,-252 742,-252 742,-252 736,-252 730,-246 730,-240 730,-240 730,-228 730,-228 730,-222 736,-216 742,-216 742,-216 905,-216 905,-216 911,-216 917,-222 917,-228 917,-228 917,-240 917,-240 917,-246 911,-252 905,-252"/> +<text text-anchor="middle" x="823.5" y="-231.5" font-family="sans" font-size="10.00">pe_genome_quantification_kallisto</text> </g> <!-- 5->0 --> -<g id="edge5" class="edge"><title>5->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M751.432,-215.849C741.551,-178.753 718.127,-90.8101 706.219,-46.1027"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="709.546,-44.9947 703.59,-36.2325 702.782,-46.7964 709.546,-44.9947"/> +<g id="edge5" class="edge"> +<title>5->0</title> +<path fill="none" stroke="grey" stroke-width="2" d="M818.57,-215.83C815.61,-205.47 811.8,-191.99 808.5,-180 795.61,-133.13 781.12,-78.44 772.68,-46.38"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="775.99,-45.2 770.06,-36.41 769.22,-46.98 775.99,-45.2"/> </g> <!-- 6 --> -<g id="node7" class="node"><title>6</title> -<path fill="none" stroke="#568ad8" stroke-width="2" d="M995,-252C995,-252 869,-252 869,-252 863,-252 857,-246 857,-240 857,-240 857,-228 857,-228 857,-222 863,-216 869,-216 869,-216 995,-216 995,-216 1001,-216 1007,-222 1007,-228 1007,-228 1007,-240 1007,-240 1007,-246 1001,-252 995,-252"/> -<text text-anchor="middle" x="932" y="-231.5" font-family="sans" font-size="10.00">genome_quantification_kallisto</text> +<g id="node7" class="node"> +<title>6</title> +<path fill="none" stroke="#56d8d8" stroke-width="2" d="M1093.5,-252C1093.5,-252 947.5,-252 947.5,-252 941.5,-252 935.5,-246 935.5,-240 935.5,-240 935.5,-228 935.5,-228 935.5,-222 941.5,-216 947.5,-216 947.5,-216 1093.5,-216 1093.5,-216 1099.5,-216 1105.5,-222 1105.5,-228 1105.5,-228 1105.5,-240 1105.5,-240 1105.5,-246 1099.5,-252 1093.5,-252"/> +<text text-anchor="middle" x="1020.5" y="-231.5" font-family="sans" font-size="10.00">genome_quantification_kallisto</text> </g> <!-- 6->0 --> -<g id="edge6" class="edge"><title>6->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M863.1,-215.882C827.305,-205.814 788.309,-192.618 775,-180 737.506,-144.455 755.302,-118.111 732,-72 727.356,-62.8095 721.614,-53.1415 716.229,-44.6159"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="719.17,-42.7179 710.807,-36.2128 713.288,-46.5131 719.17,-42.7179"/> +<g id="edge6" class="edge"> +<title>6->0</title> +<path fill="none" stroke="grey" stroke-width="2" d="M938.16,-215.95C898.46,-206.3 856.28,-193.49 841.5,-180 803.34,-145.17 821.8,-118.11 798.5,-72 793.86,-62.81 788.11,-53.14 782.73,-44.62"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="785.67,-42.72 777.31,-36.21 779.79,-46.51 785.67,-42.72"/> </g> <!-- 7 --> -<g id="node8" class="node"><title>7</title> -<path fill="none" stroke="#5673d8" stroke-width="2" d="M923,-108C923,-108 835,-108 835,-108 829,-108 823,-102 823,-96 823,-96 823,-84 823,-84 823,-78 829,-72 835,-72 835,-72 923,-72 923,-72 929,-72 935,-78 935,-84 935,-84 935,-96 935,-96 935,-102 929,-108 923,-108"/> -<text text-anchor="middle" x="879" y="-93" font-family="sans" font-size="10.00">calculate_TIN_scores</text> -<text text-anchor="middle" x="879" y="-82" font-family="sans" font-size="10.00">seqmode: paired_end</text> +<g id="node8" class="node"> +<title>7</title> +<path fill="none" stroke="#56d8c1" stroke-width="2" d="M1006.5,-108C1006.5,-108 908.5,-108 908.5,-108 902.5,-108 896.5,-102 896.5,-96 896.5,-96 896.5,-84 896.5,-84 896.5,-78 902.5,-72 908.5,-72 908.5,-72 1006.5,-72 1006.5,-72 1012.5,-72 1018.5,-78 1018.5,-84 1018.5,-84 1018.5,-96 1018.5,-96 1018.5,-102 1012.5,-108 1006.5,-108"/> +<text text-anchor="middle" x="957.5" y="-93" font-family="sans" font-size="10.00">calculate_TIN_scores</text> +<text text-anchor="middle" x="957.5" y="-82" font-family="sans" font-size="10.00">seqmode: paired_end</text> </g> <!-- 7->0 --> -<g id="edge7" class="edge"><title>7->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M834.967,-71.8761C804.523,-60.0365 764.485,-44.4665 735.753,-33.293"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="736.685,-29.8999 726.096,-29.5374 734.148,-36.424 736.685,-29.8999"/> +<g id="edge7" class="edge"> +<title>7->0</title> +<path fill="none" stroke="grey" stroke-width="2" d="M910.53,-71.88C877.17,-59.71 833.02,-43.62 802.23,-32.39"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="803.32,-29.06 792.73,-28.93 800.93,-35.64 803.32,-29.06"/> </g> <!-- 8 --> -<g id="node9" class="node"><title>8</title> -<path fill="none" stroke="#5673d8" stroke-width="2" d="M1109,-108C1109,-108 1021,-108 1021,-108 1015,-108 1009,-102 1009,-96 1009,-96 1009,-84 1009,-84 1009,-78 1015,-72 1021,-72 1021,-72 1109,-72 1109,-72 1115,-72 1121,-78 1121,-84 1121,-84 1121,-96 1121,-96 1121,-102 1115,-108 1109,-108"/> -<text text-anchor="middle" x="1065" y="-93" font-family="sans" font-size="10.00">calculate_TIN_scores</text> -<text text-anchor="middle" x="1065" y="-82" font-family="sans" font-size="10.00">seqmode: single_end</text> +<g id="node9" class="node"> +<title>8</title> +<path fill="none" stroke="#56d8c1" stroke-width="2" d="M1210.5,-108C1210.5,-108 1114.5,-108 1114.5,-108 1108.5,-108 1102.5,-102 1102.5,-96 1102.5,-96 1102.5,-84 1102.5,-84 1102.5,-78 1108.5,-72 1114.5,-72 1114.5,-72 1210.5,-72 1210.5,-72 1216.5,-72 1222.5,-78 1222.5,-84 1222.5,-84 1222.5,-96 1222.5,-96 1222.5,-102 1216.5,-108 1210.5,-108"/> +<text text-anchor="middle" x="1162.5" y="-93" font-family="sans" font-size="10.00">calculate_TIN_scores</text> +<text text-anchor="middle" x="1162.5" y="-82" font-family="sans" font-size="10.00">seqmode: single_end</text> </g> <!-- 8->0 --> -<g id="edge8" class="edge"><title>8->0</title> -<path fill="none" stroke="grey" stroke-width="2" d="M1008.61,-78.2159C933.016,-63.7572 800.875,-38.4843 736.452,-26.1629"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="736.823,-22.6705 726.343,-24.2296 735.508,-29.5459 736.823,-22.6705"/> +<g id="edge8" class="edge"> +<title>8->0</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1102.2,-78.37C1019.06,-63.71 871.68,-37.72 802.86,-25.59"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="803.35,-22.12 792.9,-23.83 802.14,-29.01 803.35,-22.12"/> </g> <!-- 9 --> -<g id="node10" class="node"><title>9</title> -<path fill="none" stroke="#bed856" stroke-width="2" d="M726,-326.5C726,-326.5 614,-326.5 614,-326.5 608,-326.5 602,-320.5 602,-314.5 602,-314.5 602,-302.5 602,-302.5 602,-296.5 608,-290.5 614,-290.5 614,-290.5 726,-290.5 726,-290.5 732,-290.5 738,-296.5 738,-302.5 738,-302.5 738,-314.5 738,-314.5 738,-320.5 732,-326.5 726,-326.5"/> -<text text-anchor="middle" x="670" y="-306" font-family="sans" font-size="10.00">pe_remove_polya_cutadapt</text> +<g id="node10" class="node"> +<title>9</title> +<path fill="none" stroke="#56d8a2" stroke-width="2" d="M755.5,-326.5C755.5,-326.5 627.5,-326.5 627.5,-326.5 621.5,-326.5 615.5,-320.5 615.5,-314.5 615.5,-314.5 615.5,-302.5 615.5,-302.5 615.5,-296.5 621.5,-290.5 627.5,-290.5 627.5,-290.5 755.5,-290.5 755.5,-290.5 761.5,-290.5 767.5,-296.5 767.5,-302.5 767.5,-302.5 767.5,-314.5 767.5,-314.5 767.5,-320.5 761.5,-326.5 755.5,-326.5"/> +<text text-anchor="middle" x="691.5" y="-306" font-family="sans" font-size="10.00">pe_remove_polya_cutadapt</text> </g> <!-- 9->3 --> -<g id="edge9" class="edge"><title>9->3</title> -<path fill="none" stroke="grey" stroke-width="2" d="M620.97,-290.41C590.929,-279.939 552.3,-266.475 520.644,-255.441"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="521.655,-252.087 511.061,-252.1 519.351,-258.697 521.655,-252.087"/> +<g id="edge9" class="edge"> +<title>9->3</title> +<path fill="none" stroke="grey" stroke-width="2" d="M645.52,-290.32C617.74,-279.94 582.15,-266.63 552.83,-255.68"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="553.74,-252.28 543.15,-252.06 551.29,-258.84 553.74,-252.28"/> </g> <!-- 9->5 --> -<g id="edge13" class="edge"><title>9->5</title> -<path fill="none" stroke="grey" stroke-width="2" d="M690.381,-290.319C701.627,-280.838 715.758,-268.925 728.024,-258.585"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="730.376,-261.18 735.765,-252.058 725.864,-255.828 730.376,-261.18"/> +<g id="edge13" class="edge"> +<title>9->5</title> +<path fill="none" stroke="grey" stroke-width="2" d="M722.78,-290.32C740.95,-280.34 764.02,-267.67 783.49,-256.97"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="785.36,-259.94 792.44,-252.06 781.99,-253.8 785.36,-259.94"/> </g> <!-- 18 --> -<g id="node19" class="node"><title>18</title> -<path fill="none" stroke="#70d856" stroke-width="2" d="M1126.5,-252C1126.5,-252 1037.5,-252 1037.5,-252 1031.5,-252 1025.5,-246 1025.5,-240 1025.5,-240 1025.5,-228 1025.5,-228 1025.5,-222 1031.5,-216 1037.5,-216 1037.5,-216 1126.5,-216 1126.5,-216 1132.5,-216 1138.5,-222 1138.5,-228 1138.5,-228 1138.5,-240 1138.5,-240 1138.5,-246 1132.5,-252 1126.5,-252"/> -<text text-anchor="middle" x="1082" y="-231.5" font-family="sans" font-size="10.00">pe_map_genome_star</text> +<g id="node19" class="node"> +<title>18</title> +<path fill="none" stroke="#88d856" stroke-width="2" d="M1237,-252C1237,-252 1136,-252 1136,-252 1130,-252 1124,-246 1124,-240 1124,-240 1124,-228 1124,-228 1124,-222 1130,-216 1136,-216 1136,-216 1237,-216 1237,-216 1243,-216 1249,-222 1249,-228 1249,-228 1249,-240 1249,-240 1249,-246 1243,-252 1237,-252"/> +<text text-anchor="middle" x="1186.5" y="-231.5" font-family="sans" font-size="10.00">pe_map_genome_star</text> </g> <!-- 9->18 --> -<g id="edge26" class="edge"><title>9->18</title> -<path fill="none" stroke="grey" stroke-width="2" d="M738.35,-290.717C742.959,-289.749 747.541,-288.832 752,-288 865.226,-266.87 898.018,-274.364 1015.13,-252.03"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="1016.06,-255.415 1025.21,-250.075 1014.73,-248.543 1016.06,-255.415"/> +<g id="edge26" class="edge"> +<title>9->18</title> +<path fill="none" stroke="grey" stroke-width="2" d="M767.61,-291.23C773.99,-290.06 780.35,-288.97 786.5,-288 928.12,-265.74 968.19,-276.39 1113.57,-252.03"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1114.49,-255.42 1123.76,-250.29 1113.32,-248.52 1114.49,-255.42"/> </g> <!-- 10 --> -<g id="node11" class="node"><title>10</title> -<path fill="none" stroke="#56d8a2" stroke-width="2" d="M541.5,-329C541.5,-329 440.5,-329 440.5,-329 434.5,-329 428.5,-323 428.5,-317 428.5,-317 428.5,-300 428.5,-300 428.5,-294 434.5,-288 440.5,-288 440.5,-288 541.5,-288 541.5,-288 547.5,-288 553.5,-294 553.5,-300 553.5,-300 553.5,-317 553.5,-317 553.5,-323 547.5,-329 541.5,-329"/> -<text text-anchor="middle" x="491" y="-317" font-family="sans" font-size="10.00">create_index_salmon</text> -<text text-anchor="middle" x="491" y="-306" font-family="sans" font-size="10.00">kmer: 31</text> -<text text-anchor="middle" x="491" y="-295" font-family="sans" font-size="10.00">organism: homo_sapiens</text> +<g id="node11" class="node"> +<title>10</title> +<path fill="none" stroke="#56d88a" stroke-width="2" d="M565,-329C565,-329 450,-329 450,-329 444,-329 438,-323 438,-317 438,-317 438,-300 438,-300 438,-294 444,-288 450,-288 450,-288 565,-288 565,-288 571,-288 577,-294 577,-300 577,-300 577,-317 577,-317 577,-323 571,-329 565,-329"/> +<text text-anchor="middle" x="507.5" y="-317" font-family="sans" font-size="10.00">create_index_salmon</text> +<text text-anchor="middle" x="507.5" y="-306" font-family="sans" font-size="10.00">kmer: 31</text> +<text text-anchor="middle" x="507.5" y="-295" font-family="sans" font-size="10.00">organism: homo_sapiens</text> </g> <!-- 10->3 --> -<g id="edge10" class="edge"><title>10->3</title> -<path fill="none" stroke="grey" stroke-width="2" d="M483.076,-287.689C479.859,-279.647 476.094,-270.236 472.644,-261.61"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="475.891,-260.304 468.928,-252.319 469.392,-262.904 475.891,-260.304"/> +<g id="edge10" class="edge"> +<title>10->3</title> +<path fill="none" stroke="grey" stroke-width="2" d="M504.77,-287.69C503.69,-279.91 502.44,-270.84 501.29,-262.45"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="504.72,-261.75 499.89,-252.32 497.79,-262.7 504.72,-261.75"/> </g> <!-- 10->4 --> -<g id="edge12" class="edge"><title>10->4</title> -<path fill="none" stroke="grey" stroke-width="2" d="M520.226,-287.88C533.987,-278.643 550.489,-267.565 564.825,-257.941"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="567.076,-260.646 573.428,-252.166 563.174,-254.834 567.076,-260.646"/> +<g id="edge12" class="edge"> +<title>10->4</title> +<path fill="none" stroke="grey" stroke-width="2" d="M545.93,-287.88C564.79,-278.25 587.57,-266.62 606.95,-256.72"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="608.56,-259.83 615.88,-252.17 605.38,-253.6 608.56,-259.83"/> </g> <!-- 11 --> -<g id="node12" class="node"><title>11</title> -<path fill="none" stroke="#d86e56" stroke-width="2" d="M1015,-326.5C1015,-326.5 919,-326.5 919,-326.5 913,-326.5 907,-320.5 907,-314.5 907,-314.5 907,-302.5 907,-302.5 907,-296.5 913,-290.5 919,-290.5 919,-290.5 1015,-290.5 1015,-290.5 1021,-290.5 1027,-296.5 1027,-302.5 1027,-302.5 1027,-314.5 1027,-314.5 1027,-320.5 1021,-326.5 1015,-326.5"/> -<text text-anchor="middle" x="967" y="-306" font-family="sans" font-size="10.00">remove_polya_cutadapt</text> +<g id="node12" class="node"> +<title>11</title> +<path fill="none" stroke="#bed856" stroke-width="2" d="M1080.5,-326.5C1080.5,-326.5 970.5,-326.5 970.5,-326.5 964.5,-326.5 958.5,-320.5 958.5,-314.5 958.5,-314.5 958.5,-302.5 958.5,-302.5 958.5,-296.5 964.5,-290.5 970.5,-290.5 970.5,-290.5 1080.5,-290.5 1080.5,-290.5 1086.5,-290.5 1092.5,-296.5 1092.5,-302.5 1092.5,-302.5 1092.5,-314.5 1092.5,-314.5 1092.5,-320.5 1086.5,-326.5 1080.5,-326.5"/> +<text text-anchor="middle" x="1025.5" y="-306" font-family="sans" font-size="10.00">remove_polya_cutadapt</text> </g> <!-- 11->4 --> -<g id="edge11" class="edge"><title>11->4</title> -<path fill="none" stroke="grey" stroke-width="2" d="M906.62,-290.63C902.698,-289.695 898.8,-288.808 895,-288 796.444,-267.04 767.467,-272.788 664.451,-251.962"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="665.082,-248.518 654.582,-249.932 663.672,-255.375 665.082,-248.518"/> +<g id="edge11" class="edge"> +<title>11->4</title> +<path fill="none" stroke="grey" stroke-width="2" d="M958.46,-291.13C953.41,-290.03 948.38,-288.97 943.5,-288 848.22,-269.01 820.7,-271.51 722.24,-252.27"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="722.65,-248.78 712.16,-250.27 721.29,-255.65 722.65,-248.78"/> </g> <!-- 11->6 --> -<g id="edge15" class="edge"><title>11->6</title> -<path fill="none" stroke="grey" stroke-width="2" d="M958.705,-290.319C954.521,-281.651 949.355,-270.949 944.685,-261.276"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="947.734,-259.542 940.235,-252.058 941.431,-262.585 947.734,-259.542"/> +<g id="edge15" class="edge"> +<title>11->6</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1024.32,-290.32C1023.74,-281.92 1023.03,-271.62 1022.37,-262.19"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1025.86,-261.79 1021.68,-252.06 1018.87,-262.28 1025.86,-261.79"/> </g> <!-- 19 --> -<g id="node20" class="node"><title>19</title> -<path fill="none" stroke="#d88d56" stroke-width="2" d="M1241.5,-252C1241.5,-252 1168.5,-252 1168.5,-252 1162.5,-252 1156.5,-246 1156.5,-240 1156.5,-240 1156.5,-228 1156.5,-228 1156.5,-222 1162.5,-216 1168.5,-216 1168.5,-216 1241.5,-216 1241.5,-216 1247.5,-216 1253.5,-222 1253.5,-228 1253.5,-228 1253.5,-240 1253.5,-240 1253.5,-246 1247.5,-252 1241.5,-252"/> -<text text-anchor="middle" x="1205" y="-231.5" font-family="sans" font-size="10.00">map_genome_star</text> +<g id="node20" class="node"> +<title>19</title> +<path fill="none" stroke="#59d856" stroke-width="2" d="M1362,-252C1362,-252 1279,-252 1279,-252 1273,-252 1267,-246 1267,-240 1267,-240 1267,-228 1267,-228 1267,-222 1273,-216 1279,-216 1279,-216 1362,-216 1362,-216 1368,-216 1374,-222 1374,-228 1374,-228 1374,-240 1374,-240 1374,-246 1368,-252 1362,-252"/> +<text text-anchor="middle" x="1320.5" y="-231.5" font-family="sans" font-size="10.00">map_genome_star</text> </g> <!-- 11->19 --> -<g id="edge28" class="edge"><title>11->19</title> -<path fill="none" stroke="grey" stroke-width="2" d="M1023.1,-290.41C1060.19,-279.112 1108.72,-264.329 1146.32,-252.874"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="1147.58,-256.15 1156.13,-249.888 1145.54,-249.454 1147.58,-256.15"/> +<g id="edge28" class="edge"> +<title>11->19</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1092.69,-292.13C1136.95,-281.86 1196.4,-267.78 1257.02,-252.34"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1257.94,-255.72 1266.76,-249.85 1256.21,-248.94 1257.94,-255.72"/> </g> <!-- 12 --> -<g id="node13" class="node"><title>12</title> -<path fill="none" stroke="#56d8d8" stroke-width="2" d="M873.5,-326.5C873.5,-326.5 772.5,-326.5 772.5,-326.5 766.5,-326.5 760.5,-320.5 760.5,-314.5 760.5,-314.5 760.5,-302.5 760.5,-302.5 760.5,-296.5 766.5,-290.5 772.5,-290.5 772.5,-290.5 873.5,-290.5 873.5,-290.5 879.5,-290.5 885.5,-296.5 885.5,-302.5 885.5,-302.5 885.5,-314.5 885.5,-314.5 885.5,-320.5 879.5,-326.5 873.5,-326.5"/> -<text text-anchor="middle" x="823" y="-311.5" font-family="sans" font-size="10.00">create_index_kallisto</text> -<text text-anchor="middle" x="823" y="-300.5" font-family="sans" font-size="10.00">organism: homo_sapiens</text> +<g id="node13" class="node"> +<title>12</title> +<path fill="none" stroke="#d6d856" stroke-width="2" d="M923,-326.5C923,-326.5 808,-326.5 808,-326.5 802,-326.5 796,-320.5 796,-314.5 796,-314.5 796,-302.5 796,-302.5 796,-296.5 802,-290.5 808,-290.5 808,-290.5 923,-290.5 923,-290.5 929,-290.5 935,-296.5 935,-302.5 935,-302.5 935,-314.5 935,-314.5 935,-320.5 929,-326.5 923,-326.5"/> +<text text-anchor="middle" x="865.5" y="-311.5" font-family="sans" font-size="10.00">create_index_kallisto</text> +<text text-anchor="middle" x="865.5" y="-300.5" font-family="sans" font-size="10.00">organism: homo_sapiens</text> </g> <!-- 12->5 --> -<g id="edge14" class="edge"><title>12->5</title> -<path fill="none" stroke="grey" stroke-width="2" d="M807.122,-290.319C798.611,-281.109 787.979,-269.604 778.618,-259.475"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="781.122,-257.027 771.764,-252.058 775.981,-261.778 781.122,-257.027"/> +<g id="edge14" class="edge"> +<title>12->5</title> +<path fill="none" stroke="grey" stroke-width="2" d="M855.55,-290.32C850.47,-281.56 844.2,-270.73 838.55,-260.97"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="841.42,-258.96 833.38,-252.06 835.37,-262.47 841.42,-258.96"/> </g> <!-- 12->6 --> -<g id="edge16" class="edge"><title>12->6</title> -<path fill="none" stroke="grey" stroke-width="2" d="M848.831,-290.319C863.492,-280.567 882.022,-268.242 897.868,-257.702"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="899.966,-260.511 906.354,-252.058 896.089,-254.682 899.966,-260.511"/> +<g id="edge16" class="edge"> +<title>12->6</title> +<path fill="none" stroke="grey" stroke-width="2" d="M902.23,-290.32C923.95,-280.16 951.64,-267.21 974.76,-256.39"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="976.46,-259.47 984.03,-252.06 973.49,-253.12 976.46,-259.47"/> </g> <!-- 13 --> -<g id="node14" class="node"><title>13</title> -<path fill="none" stroke="#a7d856" stroke-width="2" d="M962,-180C962,-180 796,-180 796,-180 790,-180 784,-174 784,-168 784,-168 784,-156 784,-156 784,-150 790,-144 796,-144 796,-144 962,-144 962,-144 968,-144 974,-150 974,-156 974,-156 974,-168 974,-168 974,-174 968,-180 962,-180"/> -<text text-anchor="middle" x="879" y="-159.5" font-family="sans" font-size="10.00">pe_index_genomic_alignment_samtools</text> +<g id="node14" class="node"> +<title>13</title> +<path fill="none" stroke="#70d856" stroke-width="2" d="M1052,-180C1052,-180 863,-180 863,-180 857,-180 851,-174 851,-168 851,-168 851,-156 851,-156 851,-150 857,-144 863,-144 863,-144 1052,-144 1052,-144 1058,-144 1064,-150 1064,-156 1064,-156 1064,-168 1064,-168 1064,-174 1058,-180 1052,-180"/> +<text text-anchor="middle" x="957.5" y="-159.5" font-family="sans" font-size="10.00">pe_index_genomic_alignment_samtools</text> </g> <!-- 13->7 --> -<g id="edge17" class="edge"><title>13->7</title> -<path fill="none" stroke="grey" stroke-width="2" d="M879,-143.697C879,-135.983 879,-126.712 879,-118.112"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="882.5,-118.104 879,-108.104 875.5,-118.104 882.5,-118.104"/> +<g id="edge17" class="edge"> +<title>13->7</title> +<path fill="none" stroke="grey" stroke-width="2" d="M957.5,-143.7C957.5,-135.98 957.5,-126.71 957.5,-118.11"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="961,-118.1 957.5,-108.1 954,-118.1 961,-118.1"/> </g> <!-- 14 --> -<g id="node15" class="node"><title>14</title> -<path fill="none" stroke="#56d873" stroke-width="2" d="M1125.5,-180C1125.5,-180 1004.5,-180 1004.5,-180 998.5,-180 992.5,-174 992.5,-168 992.5,-168 992.5,-156 992.5,-156 992.5,-150 998.5,-144 1004.5,-144 1004.5,-144 1125.5,-144 1125.5,-144 1131.5,-144 1137.5,-150 1137.5,-156 1137.5,-156 1137.5,-168 1137.5,-168 1137.5,-174 1131.5,-180 1125.5,-180"/> -<text text-anchor="middle" x="1065" y="-159.5" font-family="sans" font-size="10.00">extract_transcripts_as_bed12</text> +<g id="node15" class="node"> +<title>14</title> +<path fill="none" stroke="#56c1d8" stroke-width="2" d="M1230.5,-180C1230.5,-180 1094.5,-180 1094.5,-180 1088.5,-180 1082.5,-174 1082.5,-168 1082.5,-168 1082.5,-156 1082.5,-156 1082.5,-150 1088.5,-144 1094.5,-144 1094.5,-144 1230.5,-144 1230.5,-144 1236.5,-144 1242.5,-150 1242.5,-156 1242.5,-156 1242.5,-168 1242.5,-168 1242.5,-174 1236.5,-180 1230.5,-180"/> +<text text-anchor="middle" x="1162.5" y="-159.5" font-family="sans" font-size="10.00">extract_transcripts_as_bed12</text> </g> <!-- 14->7 --> -<g id="edge18" class="edge"><title>14->7</title> -<path fill="none" stroke="grey" stroke-width="2" d="M1019.5,-143.876C993.62,-134.137 961.033,-121.872 933.759,-111.608"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="934.945,-108.315 924.353,-108.068 932.479,-114.866 934.945,-108.315"/> +<g id="edge18" class="edge"> +<title>14->7</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1112.61,-143.97C1083.85,-134.14 1047.49,-121.73 1017.21,-111.39"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1018.2,-108.03 1007.61,-108.11 1015.94,-114.65 1018.2,-108.03"/> </g> <!-- 14->8 --> -<g id="edge20" class="edge"><title>14->8</title> -<path fill="none" stroke="grey" stroke-width="2" d="M1065,-143.697C1065,-135.983 1065,-126.712 1065,-118.112"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="1068.5,-118.104 1065,-108.104 1061.5,-118.104 1068.5,-118.104"/> +<g id="edge20" class="edge"> +<title>14->8</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1162.5,-143.7C1162.5,-135.98 1162.5,-126.71 1162.5,-118.11"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1166,-118.1 1162.5,-108.1 1159,-118.1 1166,-118.1"/> </g> <!-- 15 --> -<g id="node16" class="node"><title>15</title> -<path fill="none" stroke="#56c1d8" stroke-width="2" d="M1318,-180C1318,-180 1168,-180 1168,-180 1162,-180 1156,-174 1156,-168 1156,-168 1156,-156 1156,-156 1156,-150 1162,-144 1168,-144 1168,-144 1318,-144 1318,-144 1324,-144 1330,-150 1330,-156 1330,-156 1330,-168 1330,-168 1330,-174 1324,-180 1318,-180"/> -<text text-anchor="middle" x="1243" y="-159.5" font-family="sans" font-size="10.00">index_genomic_alignment_samtools</text> +<g id="node16" class="node"> +<title>15</title> +<path fill="none" stroke="#56a2d8" stroke-width="2" d="M1444.5,-180C1444.5,-180 1272.5,-180 1272.5,-180 1266.5,-180 1260.5,-174 1260.5,-168 1260.5,-168 1260.5,-156 1260.5,-156 1260.5,-150 1266.5,-144 1272.5,-144 1272.5,-144 1444.5,-144 1444.5,-144 1450.5,-144 1456.5,-150 1456.5,-156 1456.5,-156 1456.5,-168 1456.5,-168 1456.5,-174 1450.5,-180 1444.5,-180"/> +<text text-anchor="middle" x="1358.5" y="-159.5" font-family="sans" font-size="10.00">index_genomic_alignment_samtools</text> </g> <!-- 15->8 --> -<g id="edge19" class="edge"><title>15->8</title> -<path fill="none" stroke="grey" stroke-width="2" d="M1199.46,-143.876C1174.8,-134.179 1143.78,-121.98 1117.75,-111.743"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="1118.99,-108.471 1108.4,-108.068 1116.43,-114.985 1118.99,-108.471"/> +<g id="edge19" class="edge"> +<title>15->8</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1310.8,-143.97C1283.42,-134.19 1248.84,-121.84 1219.97,-111.52"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1221,-108.18 1210.41,-108.11 1218.65,-114.77 1221,-108.18"/> </g> <!-- 16 --> -<g id="node17" class="node"><title>16</title> -<path fill="none" stroke="#56a2d8" stroke-width="2" d="M791,-401C791,-401 523,-401 523,-401 517,-401 511,-395 511,-389 511,-389 511,-377 511,-377 511,-371 517,-365 523,-365 523,-365 791,-365 791,-365 797,-365 803,-371 803,-377 803,-377 803,-389 803,-389 803,-395 797,-401 791,-401"/> -<text text-anchor="middle" x="657" y="-386" font-family="sans" font-size="10.00">pe_remove_adapters_cutadapt</text> -<text text-anchor="middle" x="657" y="-375" font-family="sans" font-size="10.00">sample: synthetic_10_reads_paired_synthetic_10_reads_paired</text> +<g id="node17" class="node"> +<title>16</title> +<path fill="none" stroke="#568ad8" stroke-width="2" d="M835,-401C835,-401 534,-401 534,-401 528,-401 522,-395 522,-389 522,-389 522,-377 522,-377 522,-371 528,-365 534,-365 534,-365 835,-365 835,-365 841,-365 847,-371 847,-377 847,-377 847,-389 847,-389 847,-395 841,-401 835,-401"/> +<text text-anchor="middle" x="684.5" y="-386" font-family="sans" font-size="10.00">pe_remove_adapters_cutadapt</text> +<text text-anchor="middle" x="684.5" y="-375" font-family="sans" font-size="10.00">sample: synthetic_10_reads_paired_synthetic_10_reads_paired</text> </g> <!-- 16->9 --> -<g id="edge21" class="edge"><title>16->9</title> -<path fill="none" stroke="grey" stroke-width="2" d="M660.081,-364.819C661.586,-356.422 663.434,-346.116 665.125,-336.686"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="668.621,-337.019 666.941,-326.558 661.731,-335.783 668.621,-337.019"/> +<g id="edge21" class="edge"> +<title>16->9</title> +<path fill="none" stroke="grey" stroke-width="2" d="M686.16,-364.82C686.97,-356.42 687.96,-346.12 688.88,-336.69"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="692.38,-336.85 689.85,-326.56 685.41,-336.18 692.38,-336.85"/> </g> <!-- 17 --> -<g id="node18" class="node"><title>17</title> -<path fill="none" stroke="#56d88a" stroke-width="2" d="M1113,-401C1113,-401 833,-401 833,-401 827,-401 821,-395 821,-389 821,-389 821,-377 821,-377 821,-371 827,-365 833,-365 833,-365 1113,-365 1113,-365 1119,-365 1125,-371 1125,-377 1125,-377 1125,-389 1125,-389 1125,-395 1119,-401 1113,-401"/> -<text text-anchor="middle" x="973" y="-386" font-family="sans" font-size="10.00">remove_adapters_cutadapt</text> -<text text-anchor="middle" x="973" y="-375" font-family="sans" font-size="10.00">sample: synthetic_10_reads_mate_1_synthetic_10_reads_mate_1</text> +<g id="node18" class="node"> +<title>17</title> +<path fill="none" stroke="#a7d856" stroke-width="2" d="M1190,-401C1190,-401 877,-401 877,-401 871,-401 865,-395 865,-389 865,-389 865,-377 865,-377 865,-371 871,-365 877,-365 877,-365 1190,-365 1190,-365 1196,-365 1202,-371 1202,-377 1202,-377 1202,-389 1202,-389 1202,-395 1196,-401 1190,-401"/> +<text text-anchor="middle" x="1033.5" y="-386" font-family="sans" font-size="10.00">remove_adapters_cutadapt</text> +<text text-anchor="middle" x="1033.5" y="-375" font-family="sans" font-size="10.00">sample: synthetic_10_reads_mate_1_synthetic_10_reads_mate_1</text> </g> <!-- 17->11 --> -<g id="edge22" class="edge"><title>17->11</title> -<path fill="none" stroke="grey" stroke-width="2" d="M971.578,-364.819C970.883,-356.422 970.03,-346.116 969.25,-336.686"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="972.725,-336.235 968.412,-326.558 965.748,-336.813 972.725,-336.235"/> +<g id="edge22" class="edge"> +<title>17->11</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1031.6,-364.82C1030.68,-356.42 1029.54,-346.12 1028.5,-336.69"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1031.96,-336.11 1027.38,-326.56 1025,-336.88 1031.96,-336.11"/> </g> <!-- 18->13 --> -<g id="edge23" class="edge"><title>18->13</title> -<path fill="none" stroke="grey" stroke-width="2" d="M1032.6,-215.966C1004.11,-206.142 968.114,-193.729 938.129,-183.389"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="939.216,-180.062 928.621,-180.111 936.934,-186.68 939.216,-180.062"/> +<g id="edge23" class="edge"> +<title>18->13</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1130.77,-215.97C1098.36,-206.06 1057.32,-193.51 1023.32,-183.12"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1024.06,-179.69 1013.48,-180.11 1022.02,-186.38 1024.06,-179.69"/> </g> <!-- 19->15 --> -<g id="edge24" class="edge"><title>19->15</title> -<path fill="none" stroke="grey" stroke-width="2" d="M1214.39,-215.697C1218.76,-207.644 1224.06,-197.894 1228.9,-188.982"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="1232.02,-190.563 1233.71,-180.104 1225.87,-187.223 1232.02,-190.563"/> +<g id="edge24" class="edge"> +<title>19->15</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1329.89,-215.7C1334.26,-207.64 1339.56,-197.89 1344.4,-188.98"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1347.52,-190.56 1349.21,-180.1 1341.37,-187.22 1347.52,-190.56"/> </g> <!-- 20 --> -<g id="node21" class="node"><title>20</title> -<path fill="none" stroke="#d6d856" stroke-width="2" d="M1193.5,-329C1193.5,-329 1092.5,-329 1092.5,-329 1086.5,-329 1080.5,-323 1080.5,-317 1080.5,-317 1080.5,-300 1080.5,-300 1080.5,-294 1086.5,-288 1092.5,-288 1092.5,-288 1193.5,-288 1193.5,-288 1199.5,-288 1205.5,-294 1205.5,-300 1205.5,-300 1205.5,-317 1205.5,-317 1205.5,-323 1199.5,-329 1193.5,-329"/> -<text text-anchor="middle" x="1143" y="-317" font-family="sans" font-size="10.00">create_index_star</text> -<text text-anchor="middle" x="1143" y="-306" font-family="sans" font-size="10.00">index_size: 75</text> -<text text-anchor="middle" x="1143" y="-295" font-family="sans" font-size="10.00">organism: homo_sapiens</text> +<g id="node21" class="node"> +<title>20</title> +<path fill="none" stroke="#d88d56" stroke-width="2" d="M1311,-329C1311,-329 1196,-329 1196,-329 1190,-329 1184,-323 1184,-317 1184,-317 1184,-300 1184,-300 1184,-294 1190,-288 1196,-288 1196,-288 1311,-288 1311,-288 1317,-288 1323,-294 1323,-300 1323,-300 1323,-317 1323,-317 1323,-323 1317,-329 1311,-329"/> +<text text-anchor="middle" x="1253.5" y="-317" font-family="sans" font-size="10.00">create_index_star</text> +<text text-anchor="middle" x="1253.5" y="-306" font-family="sans" font-size="10.00">index_size: 75</text> +<text text-anchor="middle" x="1253.5" y="-295" font-family="sans" font-size="10.00">organism: homo_sapiens</text> </g> <!-- 20->18 --> -<g id="edge25" class="edge"><title>20->18</title> -<path fill="none" stroke="grey" stroke-width="2" d="M1126.33,-287.689C1119.2,-279.215 1110.79,-269.22 1103.22,-260.226"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="1105.69,-257.718 1096.57,-252.319 1100.33,-262.225 1105.69,-257.718"/> +<g id="edge25" class="edge"> +<title>20->18</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1235.19,-287.69C1227.2,-279.04 1217.75,-268.81 1209.3,-259.68"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1211.86,-257.29 1202.51,-252.32 1206.72,-262.04 1211.86,-257.29"/> </g> <!-- 20->19 --> -<g id="edge27" class="edge"><title>20->19</title> -<path fill="none" stroke="grey" stroke-width="2" d="M1159.94,-287.689C1167.26,-279.128 1175.91,-269.016 1183.66,-259.951"/> -<polygon fill="grey" stroke="grey" stroke-width="2" points="1186.35,-262.194 1190.19,-252.319 1181.03,-257.645 1186.35,-262.194"/> +<g id="edge27" class="edge"> +<title>20->19</title> +<path fill="none" stroke="grey" stroke-width="2" d="M1271.81,-287.69C1279.8,-279.04 1289.25,-268.81 1297.7,-259.68"/> +<polygon fill="grey" stroke="grey" stroke-width="2" points="1300.28,-262.04 1304.49,-252.32 1295.14,-257.29 1300.28,-262.04"/> </g> </g> </svg> diff --git a/scripts/labkey_to_snakemake.py b/scripts/labkey_to_snakemake.py index b6b9569..4456e00 100755 --- a/scripts/labkey_to_snakemake.py +++ b/scripts/labkey_to_snakemake.py @@ -1,14 +1,11 @@ #!/usr/bin/env python3 -## ----------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- # Author : Katsantoni Maria, Christina Herrmann # Company: Mihaela Zavolan, Biozentrum, Basel -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- # This script is part of the Zavolan lab quantification pipeline, which is used -# for analysing RNA-seq data. The table is provided by labkey and is a csv file. -# If the user provides their own table the table should contain the following +# for analysing RNA-seq data. The table is provided by labkey as a csv file. +# If the user provides their own table the table should contain the following # columns: # ----------------------------------------------------------------------------- @@ -24,117 +21,95 @@ from Bio import SeqIO from io import StringIO from csv import writer from pathlib import Path -# for convenience, load QueryFilter explicitly (avoids long lines in filter definitions) +# (avoids long lines in filter definitions) from labkey.query import QueryFilter -# ---------------------------------------------------------------------------------------------------------------------- -def main(): - """ Preprocess sample folder and create config file for snakemake""" - - __doc__ = "Preprocess of the table and create config file." - - parser = ArgumentParser( - description=__doc__, - formatter_class=RawTextHelpFormatter) - parser.add_argument( - "genomes_path", - help="Path containing the FASTA and GTF files for all organisms", - metavar="GENOMES PATH" - ) - - parser.add_argument( - "--input-table", - type=str, - default=None, - help=( - "Input table in LabKey format containing the sample information;" - "\nexactly one of '--input-table' and '--remote' is required." - ), - metavar="FILE", - ) - - parser.add_argument( - "--remote", - action="store_true", - help=( - "Fetch LabKey table via API; exactly one of '--input-table' and" - "\n'--remote' is required." - ), - ) - - parser.add_argument( - "--project-name", - help=( - "Name of LabKey project containing table '--table-name'; required" - "\nif '--remote' is specified." - ), - metavar="STR", - ) - parser.add_argument( - "--table-name", - help="Name of LabKey table; required if '--remote' is specified.", - metavar="STR", - ) - parser.add_argument( - "--input-dict", - help=( - "Input dictionary containing the feature name conversion from \n" - "LabKey to Snakemake; default: '%(default)s'" - ), - default=os.path.join( - os.path.dirname(__file__), - 'labkey_to_snakemake.dict.tsv' - ), - metavar="FILE" - ) - - parser.add_argument( - "--samples-table", - help="Output table compatible to snakemake; default: '%(default)s'", - default='samples.tsv', - metavar="FILE" - ) - - parser.add_argument( - "--multimappers", - type=int, - default=100, - help="Number of allowed multimappers", - metavar='INT', - ) - - parser.add_argument( - "--soft-clip", - choices=['EndToEnd','Local'], - default='EndToEnd', - help="Soft-clipping option for STAR", - ) - - parser.add_argument( - "--pass-mode", - choices=['None','Basic'], - default='None', - help="2-pass mode option for STAR", - ) - - parser.add_argument( - "--libtype", - default='A', - help="Library type for salmon", - metavar="STR", - ) - - parser.add_argument( - "--config-file", - help="Configuration file to be used by Snakemake", - ) +def main(): + """ Preprocess sample folder and create config file for snakemake""" + __doc__ = "Preprocess of labkey table and create " + \ + "config file and sample table." + + parser = ArgumentParser(description=__doc__, + formatter_class=RawTextHelpFormatter) + + parser.add_argument("genomes_path", + help="Path containing the FASTA and GTF " + + " files for all organisms", + metavar="GENOMES PATH") + + parser.add_argument("--input-table", + type=str, + default=None, + help="Input table in LabKey format " + + "containing the sample information;" + + "\nexactly one '--input-table' and " + + "'--remote' is required.", + metavar="FILE") + + parser.add_argument("--remote", + action="store_true", + help="Fetch LabKey table via API; exactly one of " + + "'--input-table' and" + + "\n'--remote' is required.") + + parser.add_argument("--project-name", + help="Name of LabKey project containing table " + + " '--table-name'; required" + + "\nif '--remote' is specified.", + metavar="STR") + + parser.add_argument("--table-name", + help="Name of LabKey table; required if '--remote'" + + " is specified.", + metavar="STR") + + parser.add_argument("--input-dict", + help="Input dictionary containing the feature name " + + "conversion from LabKey to Snakemake;" + + "default: '%(default)s'", + default=os.path.join( + os.path.dirname(__file__), + 'labkey_to_snakemake.dict.tsv'), + metavar="FILE") + + parser.add_argument("--samples-table", + help="Output table compatible to snakemake;" + + "default: '%(default)s'", + default='samples.tsv', + metavar="FILE") + + parser.add_argument("--trim_polya", + type=int, + choices=[True, False], + default=True, + help="Trim poly-As option") + + parser.add_argument("--multimappers", + type=int, + default=100, + help="Number of allowed multimappers", + metavar='INT') + + parser.add_argument("--soft-clip", + choices=['EndToEnd', 'Local'], + default='EndToEnd', + help="Soft-clipping option for STAR") + + parser.add_argument("--pass-mode", + choices=['None', 'Basic'], + default='None', + help="2-pass mode option for STAR") + + parser.add_argument("--libtype", + default='A', + help="Library type for salmon", + metavar="STR") + + parser.add_argument("--config-file", + help="Configuration file to be used by Snakemake") - # __________________________________________________________________________________________________________________ - # ------------------------------------------------------------------------------------------------------------------ - # get the arguments - # ------------------------------------------------------------------------------------------------------------------ try: options = parser.parse_args() except(Exception): @@ -146,27 +121,34 @@ def main(): if options.remote and options.input_table: parser.print_help() - print("\n[ERROR] Options '--input-table' and '--remote' are mutually exclusive.") + print( + "\n[ERROR] Options '--input-table' and ", + "'--remote' are mutually exclusive.") sys.exit(1) if not options.remote and not options.input_table: parser.print_help() - print("\n[ERROR] At least one of '--input-table' and '--remote' is required.") + print("\n[ERROR] At least one of '--input-table' ", + "and '--remote' is required.") sys.exit(1) if options.remote and not options.project_name: parser.print_help() - print("\n[ERROR] If option '--remote' is specified, option '--project-name' is required.") + print( + "\n[ERROR] If option '--remote' is specified, ", + "option '--project-name' is required.") sys.exit(1) if options.remote and not options.table_name: parser.print_help() - print("\n[ERROR] If option '--remote' is specified, option '--table-name' is required.") + print( + "\n[ERROR] If option '--remote' is specified, ", + "option '--table-name' is required.") sys.exit(1) sys.stdout.write('Reading input file...\n') - if options.remote == True: + if options.remote is True: input_table = api_fetch_labkey_table( project_name=options.project_name, query_name=options.table_name) @@ -191,8 +173,11 @@ def main(): input_dict.set_index('snakemake', inplace=True, drop=True) sys.stdout.write('Create snakemake table...\n') snakemake_table = pd.DataFrame() + for index, row in input_table.iterrows(): - snakemake_table.loc[index, 'sample'] = row[input_dict.loc['replicate_name', 'labkey']] + "_" + row[input_dict.loc['condition', 'labkey']] + snakemake_table.loc[index, 'sample'] = row[ + input_dict.loc['replicate_name', 'labkey']] + "_" + row[ + input_dict.loc['condition', 'labkey']] if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED': snakemake_table.loc[index, 'seqmode'] = 'paired_end' elif row[input_dict.loc['seqmode', 'labkey']] == 'SINGLE': @@ -203,32 +188,16 @@ def main(): row[input_dict.loc['fq1', 'labkey']]) snakemake_table.loc[index, 'fq1'] = fq1 - - with gzip.open(fq1, "rt") as handle: - for record in SeqIO.parse(handle, "fastq"): - read_length = len(record.seq) - break - + read_length = get_read_length(fq1) snakemake_table.loc[index, 'index_size'] = read_length - if read_length <= 50: - snakemake_table.loc[index, 'kmer'] = 21 - elif read_length > 50: - snakemake_table.loc[index, 'kmer'] = 31 - - - if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED': - snakemake_table.loc[index, 'fq2'] = os.path.join( - row[input_dict.loc['fastq_path', 'labkey']], - row[input_dict.loc['fq2', 'labkey']]) - - snakemake_table.loc[index, 'fq1_3p'] = row[input_dict.loc['fq1_3p', 'labkey']] - snakemake_table.loc[index, 'fq1_5p'] = row[input_dict.loc['fq1_5p', 'labkey']] - - if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED': - snakemake_table.loc[index, 'fq2_3p'] = row[input_dict.loc['fq2_3p', 'labkey']] - snakemake_table.loc[index, 'fq2_5p'] = row[input_dict.loc['fq2_5p', 'labkey']] - - organism = row[input_dict.loc['organism', 'labkey']].replace(' ', '_').lower() + snakemake_table.loc[index, 'kmer'] = infer_kmer_length(read_length) + snakemake_table.loc[index, 'fq1_3p'] = row[ + input_dict.loc['fq1_3p', 'labkey']] + snakemake_table.loc[index, 'fq1_5p'] = row[ + input_dict.loc['fq1_5p', 'labkey']] + + organism = row[input_dict.loc['organism', 'labkey']].replace( + ' ', '_').lower() snakemake_table.loc[index, 'organism'] = organism snakemake_table.loc[index, 'gtf'] = os.path.join( @@ -251,39 +220,35 @@ def main(): organism, 'transcriptome.fa') - snakemake_table.loc[index, 'sd'] = row[input_dict.loc['sd', 'labkey']] - snakemake_table.loc[index, 'mean'] = row[input_dict.loc['mean', 'labkey']] + snakemake_table.loc[index, 'sd'] = row[ + input_dict.loc['sd', 'labkey']] + snakemake_table.loc[index, 'mean'] = row[ + input_dict.loc['mean', 'labkey']] snakemake_table.loc[index, 'multimappers'] = options.multimappers snakemake_table.loc[index, 'soft_clip'] = options.soft_clip snakemake_table.loc[index, 'pass_mode'] = options.pass_mode snakemake_table.loc[index, 'libtype'] = options.libtype - - if row[input_dict.loc['mate1_direction', 'labkey']] == 'SENSE': - snakemake_table.loc[index, 'kallisto_directionality'] = '--fr' - elif row[input_dict.loc['mate1_direction', 'labkey']] == 'ANTISENSE': - snakemake_table.loc[index, 'kallisto_directionality'] = '--rf' - else: - snakemake_table.loc[index, 'kallisto_directionality'] = '' - - if row[input_dict.loc['mate1_direction', 'labkey']] == 'SENSE': - snakemake_table.loc[index, 'fq1_polya'] = 'AAAAAAAAAAAAAAAAA' - elif row[input_dict.loc['mate1_direction', 'labkey']] == 'ANTISENSE': - snakemake_table.loc[index, 'fq1_polya'] = 'TTTTTTTTTTTTTTTTT' - elif row[input_dict.loc['mate1_direction', 'labkey']] == 'RANDOM': - snakemake_table.loc[index, 'fq1_polya'] = 'AAAAAAAAAAAAAAAAA' - else: - pass + if options.trim_polya is True: + snakemake_table.loc[index, 'fq1_polya'] = trim_polya( + row[input_dict.loc['mate1_direction', 'labkey']]) + snakemake_table.loc[index, 'kallisto_directionality'] = \ + get_kallisto_directionality( + row[input_dict.loc['mate1_direction', 'labkey']]) if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED': - if row[input_dict.loc['mate2_direction', 'labkey']] == 'SENSE': - snakemake_table.loc[index, 'fq2_polya'] = 'AAAAAAAAAAAAAAAAA' - elif row[input_dict.loc['mate2_direction', 'labkey']] == 'ANTISENSE': - snakemake_table.loc[index, 'fq2_polya'] = 'TTTTTTTTTTTTTTTTT' - elif row[input_dict.loc['mate2_direction', 'labkey']] == 'RANDOM': - snakemake_table.loc[index, 'fq2_polya'] = 'AAAAAAAAAAAAAAAAA' - else: - pass + fq2 = os.path.join( + row[input_dict.loc['fastq_path', 'labkey']], + row[input_dict.loc['fq2', 'labkey']]) + snakemake_table.loc[index, 'fq2'] = fq2 + + snakemake_table.loc[index, 'fq2_3p'] = row[ + input_dict.loc['fq2_3p', 'labkey']] + snakemake_table.loc[index, 'fq2_5p'] = row[ + input_dict.loc['fq2_5p', 'labkey']] + if options.trim_polya is True: + snakemake_table.loc[index, 'fq2_polya'] = trim_polya( + row[input_dict.loc['mate2_direction', 'labkey']]) snakemake_table.fillna('XXXXXXXXXXXXX', inplace=True) snakemake_table = snakemake_table.astype( @@ -301,11 +266,10 @@ def main(): header=True, index=False) - # Read file and infer read size for sjdbovwerhang with open(options.config_file, 'w') as config_file: config_file.write('''--- - samples: "'''+ options.samples_table + '''" + samples: "''' + options.samples_table + '''" output_dir: "results/" log_dir: "logs/" kallisto_indexes: "results/kallisto_indexes/" @@ -318,19 +282,54 @@ def main(): sys.stdout.write('Create config file finished successfully...\n') return + def api_fetch_labkey_table(project_name=None, query_name=None): - group_path = os.path.join( '/Zavolan Group', project_name) - server_context = labkey.utils.create_server_context('labkey.scicore.unibas.ch', group_path, 'labkey', use_ssl=True) + group_path = os.path.join('/Zavolan Group', project_name) + server_context = labkey.utils.create_server_context( + 'labkey.scicore.unibas.ch', group_path, 'labkey', use_ssl=True) schema_name = "lists" results = labkey.query.select_rows(server_context, schema_name, query_name) input_table = pd.DataFrame(results["rows"]) return input_table -# _____________________________________________________________________________ -# ----------------------------------------------------------------------------- -# Call the Main function and catch Keyboard interrups -# ----------------------------------------------------------------------------- +def get_read_length(filename): + with gzip.open(filename, "rt") as handle: + for record in SeqIO.parse(handle, "fastq"): + read_length = len(record.seq) + break + return read_length + + +def infer_kmer_length(read_length): + if read_length <= 50: + kmer = 21 + elif read_length > 50: + kmer = 31 + return kmer + + +def get_kallisto_directionality(directionality): + if directionality == 'SENSE': + final_direction = '--fr' + elif directionality == 'ANTISENSE': + final_direction = '--rf' + else: + final_direction = '' + return final_direction + + +def trim_polya(sense): + if sense == 'SENSE': + polya = 'AAAAAAAAAAAAAAAAA' + elif sense == 'ANTISENSE': + polya = 'TTTTTTTTTTTTTTTTT' + elif sense == 'RANDOM': + polya = 'AAAAAAAAAAAAAAAAA' + else: + polya = 'XXXXXXXXXXXXXXXXX' + return polya + if __name__ == '__main__': try: @@ -338,5 +337,3 @@ if __name__ == '__main__': except KeyboardInterrupt: sys.stderr.write("User interrupt!" + os.linesep) sys.exit(0) - - diff --git a/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5 b/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5 index 90bb3be..7708397 100644 --- a/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5 +++ b/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5 @@ -1,2 +1,2 @@ ba5ae0649d1fb82d94f8d19481498ffd config.yaml -9aece9e4acb17143b5e8f627968e03a5 samples.tsv +cb58e046242c2702038e6e21dbd0bdb4 samples.tsv \ No newline at end of file diff --git a/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5 b/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5 index 90bb3be..c24f601 100644 --- a/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5 +++ b/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5 @@ -1,2 +1,2 @@ ba5ae0649d1fb82d94f8d19481498ffd config.yaml -9aece9e4acb17143b5e8f627968e03a5 samples.tsv +cb58e046242c2702038e6e21dbd0bdb4 samples.tsv -- GitLab