From c4e20a2159325fb7343d5356a8b405cd683f6220 Mon Sep 17 00:00:00 2001
From: BIOPZ-Katsantoni Maria <maria.katsantoni@unibas.ch>
Date: Fri, 21 Feb 2020 15:35:08 +0100
Subject: [PATCH] handle polyA processing in input preparation script

- fixes some functions in `labkey_to_snakemake.py`
- add optional argument for trimming polyA tails; they are trimmed as follows:
  - if mate is sense, oligo-A is added to sample table for `cutadapt` rule to trim
  - if mate is antisense, oligo-T is added to sample table for `cutadapt` rule to trim
  - if option is set to `--trim_polya`, oligo-X stretch is added to sample table and `cutadapt` will not trim
---
 images/dag_test_workflow.svg                  | 373 ++++++++++--------
 scripts/labkey_to_snakemake.py                | 355 +++++++++--------
 .../expected_output.md5                       |   2 +-
 .../expected_output.md5                       |   2 +-
 4 files changed, 389 insertions(+), 343 deletions(-)

diff --git a/images/dag_test_workflow.svg b/images/dag_test_workflow.svg
index 6b992f3..9f9e7ad 100644
--- a/images/dag_test_workflow.svg
+++ b/images/dag_test_workflow.svg
@@ -1,269 +1,318 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
-<!-- Generated by graphviz version 2.38.0 (20140413.2041)
+<!-- Generated by graphviz version 2.42.3 (20191010.1750)
  -->
 <!-- Title: snakemake_dag Pages: 1 -->
-<svg width="1338pt" height="409pt"
- viewBox="0.00 0.00 1338.00 409.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="1465pt" height="409pt"
+ viewBox="0.00 0.00 1464.50 409.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 405)">
 <title>snakemake_dag</title>
-<polygon fill="white" stroke="none" points="-4,4 -4,-405 1334,-405 1334,4 -4,4"/>
+<polygon fill="white" stroke="transparent" points="-4,4 -4,-405 1460.5,-405 1460.5,4 -4,4"/>
 <!-- 0 -->
-<g id="node1" class="node"><title>0</title>
-<path fill="none" stroke="#56d8c1" stroke-width="2" d="M714,-36C714,-36 684,-36 684,-36 678,-36 672,-30 672,-24 672,-24 672,-12 672,-12 672,-6 678,-0 684,-0 684,-0 714,-0 714,-0 720,-0 726,-6 726,-12 726,-12 726,-24 726,-24 726,-30 720,-36 714,-36"/>
-<text text-anchor="middle" x="699" y="-15.5" font-family="sans" font-size="10.00">finish</text>
+<g id="node1" class="node">
+<title>0</title>
+<path fill="none" stroke="#56d873" stroke-width="2" d="M780.5,-36C780.5,-36 750.5,-36 750.5,-36 744.5,-36 738.5,-30 738.5,-24 738.5,-24 738.5,-12 738.5,-12 738.5,-6 744.5,0 750.5,0 750.5,0 780.5,0 780.5,0 786.5,0 792.5,-6 792.5,-12 792.5,-12 792.5,-24 792.5,-24 792.5,-30 786.5,-36 780.5,-36"/>
+<text text-anchor="middle" x="765.5" y="-15.5" font-family="sans" font-size="10.00">finish</text>
 </g>
 <!-- 1 -->
-<g id="node2" class="node"><title>1</title>
-<path fill="none" stroke="#d8a456" stroke-width="2" d="M280,-108C280,-108 12,-108 12,-108 6,-108 0,-102 0,-96 0,-96 0,-84 0,-84 0,-78 6,-72 12,-72 12,-72 280,-72 280,-72 286,-72 292,-78 292,-84 292,-84 292,-96 292,-96 292,-102 286,-108 280,-108"/>
-<text text-anchor="middle" x="146" y="-93" font-family="sans" font-size="10.00">pe_fastqc</text>
-<text text-anchor="middle" x="146" y="-82" font-family="sans" font-size="10.00">sample: synthetic_10_reads_paired_synthetic_10_reads_paired</text>
+<g id="node2" class="node">
+<title>1</title>
+<path fill="none" stroke="#5673d8" stroke-width="2" d="M313,-108C313,-108 12,-108 12,-108 6,-108 0,-102 0,-96 0,-96 0,-84 0,-84 0,-78 6,-72 12,-72 12,-72 313,-72 313,-72 319,-72 325,-78 325,-84 325,-84 325,-96 325,-96 325,-102 319,-108 313,-108"/>
+<text text-anchor="middle" x="162.5" y="-93" font-family="sans" font-size="10.00">pe_fastqc</text>
+<text text-anchor="middle" x="162.5" y="-82" font-family="sans" font-size="10.00">sample: synthetic_10_reads_paired_synthetic_10_reads_paired</text>
 </g>
 <!-- 1&#45;&gt;0 -->
-<g id="edge1" class="edge"><title>1&#45;&gt;0</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M280.572,-71.9656C406.068,-56.08 585.083,-33.4199 661.788,-23.7104"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="662.435,-27.1565 671.916,-22.4284 661.555,-20.212 662.435,-27.1565"/>
+<g id="edge1" class="edge">
+<title>1&#45;&gt;0</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M309.24,-71.97C447.86,-55.87 646.37,-32.83 728.13,-23.34"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="728.89,-26.77 738.42,-22.14 728.09,-19.82 728.89,-26.77"/>
 </g>
 <!-- 2 -->
-<g id="node3" class="node"><title>2</title>
-<path fill="none" stroke="#59d856" stroke-width="2" d="M602,-108C602,-108 322,-108 322,-108 316,-108 310,-102 310,-96 310,-96 310,-84 310,-84 310,-78 316,-72 322,-72 322,-72 602,-72 602,-72 608,-72 614,-78 614,-84 614,-84 614,-96 614,-96 614,-102 608,-108 602,-108"/>
-<text text-anchor="middle" x="462" y="-93" font-family="sans" font-size="10.00">fastqc</text>
-<text text-anchor="middle" x="462" y="-82" font-family="sans" font-size="10.00">sample: synthetic_10_reads_mate_1_synthetic_10_reads_mate_1</text>
+<g id="node3" class="node">
+<title>2</title>
+<path fill="none" stroke="#d86e56" stroke-width="2" d="M668,-108C668,-108 355,-108 355,-108 349,-108 343,-102 343,-96 343,-96 343,-84 343,-84 343,-78 349,-72 355,-72 355,-72 668,-72 668,-72 674,-72 680,-78 680,-84 680,-84 680,-96 680,-96 680,-102 674,-108 668,-108"/>
+<text text-anchor="middle" x="511.5" y="-93" font-family="sans" font-size="10.00">fastqc</text>
+<text text-anchor="middle" x="511.5" y="-82" font-family="sans" font-size="10.00">sample: synthetic_10_reads_mate_1_synthetic_10_reads_mate_1</text>
 </g>
 <!-- 2&#45;&gt;0 -->
-<g id="edge2" class="edge"><title>2&#45;&gt;0</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M519.674,-71.9656C564.073,-58.8519 624.103,-41.1216 662.193,-29.8712"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="663.253,-33.2076 671.852,-27.0183 661.27,-26.4943 663.253,-33.2076"/>
+<g id="edge2" class="edge">
+<title>2&#45;&gt;0</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M573.31,-71.97C621.91,-58.57 687.98,-40.36 728.63,-29.16"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="729.68,-32.5 738.39,-26.47 727.82,-25.75 729.68,-32.5"/>
 </g>
 <!-- 3 -->
-<g id="node4" class="node"><title>3</title>
-<path fill="none" stroke="#d85656" stroke-width="2" d="M513.5,-252C513.5,-252 410.5,-252 410.5,-252 404.5,-252 398.5,-246 398.5,-240 398.5,-240 398.5,-228 398.5,-228 398.5,-222 404.5,-216 410.5,-216 410.5,-216 513.5,-216 513.5,-216 519.5,-216 525.5,-222 525.5,-228 525.5,-228 525.5,-240 525.5,-240 525.5,-246 519.5,-252 513.5,-252"/>
-<text text-anchor="middle" x="462" y="-231.5" font-family="sans" font-size="10.00">pe_quantification_salmon</text>
+<g id="node4" class="node">
+<title>3</title>
+<path fill="none" stroke="#d8bc56" stroke-width="2" d="M556.5,-252C556.5,-252 438.5,-252 438.5,-252 432.5,-252 426.5,-246 426.5,-240 426.5,-240 426.5,-228 426.5,-228 426.5,-222 432.5,-216 438.5,-216 438.5,-216 556.5,-216 556.5,-216 562.5,-216 568.5,-222 568.5,-228 568.5,-228 568.5,-240 568.5,-240 568.5,-246 562.5,-252 556.5,-252"/>
+<text text-anchor="middle" x="497.5" y="-231.5" font-family="sans" font-size="10.00">pe_quantification_salmon</text>
 </g>
 <!-- 3&#45;&gt;0 -->
-<g id="edge3" class="edge"><title>3&#45;&gt;0</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M487.264,-215.826C520.088,-192.982 578.459,-150.513 623,-108 643.903,-88.0485 665.037,-62.797 679.694,-44.2918"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="682.537,-46.3381 685.948,-36.3074 677.026,-42.0215 682.537,-46.3381"/>
+<g id="edge3" class="edge">
+<title>3&#45;&gt;0</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M529.12,-215.95C568.61,-193.87 637.19,-152.87 688.5,-108 710.51,-88.76 731.97,-63.22 746.62,-44.44"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="749.53,-46.39 752.85,-36.33 743.98,-42.13 749.53,-46.39"/>
 </g>
 <!-- 4 -->
-<g id="node5" class="node"><title>4</title>
-<path fill="none" stroke="#d8bc56" stroke-width="2" d="M642.5,-252C642.5,-252 555.5,-252 555.5,-252 549.5,-252 543.5,-246 543.5,-240 543.5,-240 543.5,-228 543.5,-228 543.5,-222 549.5,-216 555.5,-216 555.5,-216 642.5,-216 642.5,-216 648.5,-216 654.5,-222 654.5,-228 654.5,-228 654.5,-240 654.5,-240 654.5,-246 648.5,-252 642.5,-252"/>
-<text text-anchor="middle" x="599" y="-231.5" font-family="sans" font-size="10.00">quantification_salmon</text>
+<g id="node5" class="node">
+<title>4</title>
+<path fill="none" stroke="#d8a456" stroke-width="2" d="M700,-252C700,-252 599,-252 599,-252 593,-252 587,-246 587,-240 587,-240 587,-228 587,-228 587,-222 593,-216 599,-216 599,-216 700,-216 700,-216 706,-216 712,-222 712,-228 712,-228 712,-240 712,-240 712,-246 706,-252 700,-252"/>
+<text text-anchor="middle" x="649.5" y="-231.5" font-family="sans" font-size="10.00">quantification_salmon</text>
 </g>
 <!-- 4&#45;&gt;0 -->
-<g id="edge4" class="edge"><title>4&#45;&gt;0</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M623.094,-215.785C634.597,-206.411 647.644,-193.897 656,-180 681.56,-137.492 692.17,-79.7097 696.391,-46.1773"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="699.874,-46.5286 697.548,-36.1923 692.92,-45.7231 699.874,-46.5286"/>
+<g id="edge4" class="edge">
+<title>4&#45;&gt;0</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M682.26,-215.99C696.54,-207.05 712.3,-194.85 722.5,-180 750.73,-138.9 760.44,-80.17 763.77,-46.17"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="767.27,-46.32 764.64,-36.06 760.3,-45.72 767.27,-46.32"/>
 </g>
 <!-- 5 -->
-<g id="node6" class="node"><title>5</title>
-<path fill="none" stroke="#88d856" stroke-width="2" d="M827,-252C827,-252 685,-252 685,-252 679,-252 673,-246 673,-240 673,-240 673,-228 673,-228 673,-222 679,-216 685,-216 685,-216 827,-216 827,-216 833,-216 839,-222 839,-228 839,-228 839,-240 839,-240 839,-246 833,-252 827,-252"/>
-<text text-anchor="middle" x="756" y="-231.5" font-family="sans" font-size="10.00">pe_genome_quantification_kallisto</text>
+<g id="node6" class="node">
+<title>5</title>
+<path fill="none" stroke="#d85656" stroke-width="2" d="M905,-252C905,-252 742,-252 742,-252 736,-252 730,-246 730,-240 730,-240 730,-228 730,-228 730,-222 736,-216 742,-216 742,-216 905,-216 905,-216 911,-216 917,-222 917,-228 917,-228 917,-240 917,-240 917,-246 911,-252 905,-252"/>
+<text text-anchor="middle" x="823.5" y="-231.5" font-family="sans" font-size="10.00">pe_genome_quantification_kallisto</text>
 </g>
 <!-- 5&#45;&gt;0 -->
-<g id="edge5" class="edge"><title>5&#45;&gt;0</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M751.432,-215.849C741.551,-178.753 718.127,-90.8101 706.219,-46.1027"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="709.546,-44.9947 703.59,-36.2325 702.782,-46.7964 709.546,-44.9947"/>
+<g id="edge5" class="edge">
+<title>5&#45;&gt;0</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M818.57,-215.83C815.61,-205.47 811.8,-191.99 808.5,-180 795.61,-133.13 781.12,-78.44 772.68,-46.38"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="775.99,-45.2 770.06,-36.41 769.22,-46.98 775.99,-45.2"/>
 </g>
 <!-- 6 -->
-<g id="node7" class="node"><title>6</title>
-<path fill="none" stroke="#568ad8" stroke-width="2" d="M995,-252C995,-252 869,-252 869,-252 863,-252 857,-246 857,-240 857,-240 857,-228 857,-228 857,-222 863,-216 869,-216 869,-216 995,-216 995,-216 1001,-216 1007,-222 1007,-228 1007,-228 1007,-240 1007,-240 1007,-246 1001,-252 995,-252"/>
-<text text-anchor="middle" x="932" y="-231.5" font-family="sans" font-size="10.00">genome_quantification_kallisto</text>
+<g id="node7" class="node">
+<title>6</title>
+<path fill="none" stroke="#56d8d8" stroke-width="2" d="M1093.5,-252C1093.5,-252 947.5,-252 947.5,-252 941.5,-252 935.5,-246 935.5,-240 935.5,-240 935.5,-228 935.5,-228 935.5,-222 941.5,-216 947.5,-216 947.5,-216 1093.5,-216 1093.5,-216 1099.5,-216 1105.5,-222 1105.5,-228 1105.5,-228 1105.5,-240 1105.5,-240 1105.5,-246 1099.5,-252 1093.5,-252"/>
+<text text-anchor="middle" x="1020.5" y="-231.5" font-family="sans" font-size="10.00">genome_quantification_kallisto</text>
 </g>
 <!-- 6&#45;&gt;0 -->
-<g id="edge6" class="edge"><title>6&#45;&gt;0</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M863.1,-215.882C827.305,-205.814 788.309,-192.618 775,-180 737.506,-144.455 755.302,-118.111 732,-72 727.356,-62.8095 721.614,-53.1415 716.229,-44.6159"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="719.17,-42.7179 710.807,-36.2128 713.288,-46.5131 719.17,-42.7179"/>
+<g id="edge6" class="edge">
+<title>6&#45;&gt;0</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M938.16,-215.95C898.46,-206.3 856.28,-193.49 841.5,-180 803.34,-145.17 821.8,-118.11 798.5,-72 793.86,-62.81 788.11,-53.14 782.73,-44.62"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="785.67,-42.72 777.31,-36.21 779.79,-46.51 785.67,-42.72"/>
 </g>
 <!-- 7 -->
-<g id="node8" class="node"><title>7</title>
-<path fill="none" stroke="#5673d8" stroke-width="2" d="M923,-108C923,-108 835,-108 835,-108 829,-108 823,-102 823,-96 823,-96 823,-84 823,-84 823,-78 829,-72 835,-72 835,-72 923,-72 923,-72 929,-72 935,-78 935,-84 935,-84 935,-96 935,-96 935,-102 929,-108 923,-108"/>
-<text text-anchor="middle" x="879" y="-93" font-family="sans" font-size="10.00">calculate_TIN_scores</text>
-<text text-anchor="middle" x="879" y="-82" font-family="sans" font-size="10.00">seqmode: paired_end</text>
+<g id="node8" class="node">
+<title>7</title>
+<path fill="none" stroke="#56d8c1" stroke-width="2" d="M1006.5,-108C1006.5,-108 908.5,-108 908.5,-108 902.5,-108 896.5,-102 896.5,-96 896.5,-96 896.5,-84 896.5,-84 896.5,-78 902.5,-72 908.5,-72 908.5,-72 1006.5,-72 1006.5,-72 1012.5,-72 1018.5,-78 1018.5,-84 1018.5,-84 1018.5,-96 1018.5,-96 1018.5,-102 1012.5,-108 1006.5,-108"/>
+<text text-anchor="middle" x="957.5" y="-93" font-family="sans" font-size="10.00">calculate_TIN_scores</text>
+<text text-anchor="middle" x="957.5" y="-82" font-family="sans" font-size="10.00">seqmode: paired_end</text>
 </g>
 <!-- 7&#45;&gt;0 -->
-<g id="edge7" class="edge"><title>7&#45;&gt;0</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M834.967,-71.8761C804.523,-60.0365 764.485,-44.4665 735.753,-33.293"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="736.685,-29.8999 726.096,-29.5374 734.148,-36.424 736.685,-29.8999"/>
+<g id="edge7" class="edge">
+<title>7&#45;&gt;0</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M910.53,-71.88C877.17,-59.71 833.02,-43.62 802.23,-32.39"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="803.32,-29.06 792.73,-28.93 800.93,-35.64 803.32,-29.06"/>
 </g>
 <!-- 8 -->
-<g id="node9" class="node"><title>8</title>
-<path fill="none" stroke="#5673d8" stroke-width="2" d="M1109,-108C1109,-108 1021,-108 1021,-108 1015,-108 1009,-102 1009,-96 1009,-96 1009,-84 1009,-84 1009,-78 1015,-72 1021,-72 1021,-72 1109,-72 1109,-72 1115,-72 1121,-78 1121,-84 1121,-84 1121,-96 1121,-96 1121,-102 1115,-108 1109,-108"/>
-<text text-anchor="middle" x="1065" y="-93" font-family="sans" font-size="10.00">calculate_TIN_scores</text>
-<text text-anchor="middle" x="1065" y="-82" font-family="sans" font-size="10.00">seqmode: single_end</text>
+<g id="node9" class="node">
+<title>8</title>
+<path fill="none" stroke="#56d8c1" stroke-width="2" d="M1210.5,-108C1210.5,-108 1114.5,-108 1114.5,-108 1108.5,-108 1102.5,-102 1102.5,-96 1102.5,-96 1102.5,-84 1102.5,-84 1102.5,-78 1108.5,-72 1114.5,-72 1114.5,-72 1210.5,-72 1210.5,-72 1216.5,-72 1222.5,-78 1222.5,-84 1222.5,-84 1222.5,-96 1222.5,-96 1222.5,-102 1216.5,-108 1210.5,-108"/>
+<text text-anchor="middle" x="1162.5" y="-93" font-family="sans" font-size="10.00">calculate_TIN_scores</text>
+<text text-anchor="middle" x="1162.5" y="-82" font-family="sans" font-size="10.00">seqmode: single_end</text>
 </g>
 <!-- 8&#45;&gt;0 -->
-<g id="edge8" class="edge"><title>8&#45;&gt;0</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M1008.61,-78.2159C933.016,-63.7572 800.875,-38.4843 736.452,-26.1629"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="736.823,-22.6705 726.343,-24.2296 735.508,-29.5459 736.823,-22.6705"/>
+<g id="edge8" class="edge">
+<title>8&#45;&gt;0</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1102.2,-78.37C1019.06,-63.71 871.68,-37.72 802.86,-25.59"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="803.35,-22.12 792.9,-23.83 802.14,-29.01 803.35,-22.12"/>
 </g>
 <!-- 9 -->
-<g id="node10" class="node"><title>9</title>
-<path fill="none" stroke="#bed856" stroke-width="2" d="M726,-326.5C726,-326.5 614,-326.5 614,-326.5 608,-326.5 602,-320.5 602,-314.5 602,-314.5 602,-302.5 602,-302.5 602,-296.5 608,-290.5 614,-290.5 614,-290.5 726,-290.5 726,-290.5 732,-290.5 738,-296.5 738,-302.5 738,-302.5 738,-314.5 738,-314.5 738,-320.5 732,-326.5 726,-326.5"/>
-<text text-anchor="middle" x="670" y="-306" font-family="sans" font-size="10.00">pe_remove_polya_cutadapt</text>
+<g id="node10" class="node">
+<title>9</title>
+<path fill="none" stroke="#56d8a2" stroke-width="2" d="M755.5,-326.5C755.5,-326.5 627.5,-326.5 627.5,-326.5 621.5,-326.5 615.5,-320.5 615.5,-314.5 615.5,-314.5 615.5,-302.5 615.5,-302.5 615.5,-296.5 621.5,-290.5 627.5,-290.5 627.5,-290.5 755.5,-290.5 755.5,-290.5 761.5,-290.5 767.5,-296.5 767.5,-302.5 767.5,-302.5 767.5,-314.5 767.5,-314.5 767.5,-320.5 761.5,-326.5 755.5,-326.5"/>
+<text text-anchor="middle" x="691.5" y="-306" font-family="sans" font-size="10.00">pe_remove_polya_cutadapt</text>
 </g>
 <!-- 9&#45;&gt;3 -->
-<g id="edge9" class="edge"><title>9&#45;&gt;3</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M620.97,-290.41C590.929,-279.939 552.3,-266.475 520.644,-255.441"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="521.655,-252.087 511.061,-252.1 519.351,-258.697 521.655,-252.087"/>
+<g id="edge9" class="edge">
+<title>9&#45;&gt;3</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M645.52,-290.32C617.74,-279.94 582.15,-266.63 552.83,-255.68"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="553.74,-252.28 543.15,-252.06 551.29,-258.84 553.74,-252.28"/>
 </g>
 <!-- 9&#45;&gt;5 -->
-<g id="edge13" class="edge"><title>9&#45;&gt;5</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M690.381,-290.319C701.627,-280.838 715.758,-268.925 728.024,-258.585"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="730.376,-261.18 735.765,-252.058 725.864,-255.828 730.376,-261.18"/>
+<g id="edge13" class="edge">
+<title>9&#45;&gt;5</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M722.78,-290.32C740.95,-280.34 764.02,-267.67 783.49,-256.97"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="785.36,-259.94 792.44,-252.06 781.99,-253.8 785.36,-259.94"/>
 </g>
 <!-- 18 -->
-<g id="node19" class="node"><title>18</title>
-<path fill="none" stroke="#70d856" stroke-width="2" d="M1126.5,-252C1126.5,-252 1037.5,-252 1037.5,-252 1031.5,-252 1025.5,-246 1025.5,-240 1025.5,-240 1025.5,-228 1025.5,-228 1025.5,-222 1031.5,-216 1037.5,-216 1037.5,-216 1126.5,-216 1126.5,-216 1132.5,-216 1138.5,-222 1138.5,-228 1138.5,-228 1138.5,-240 1138.5,-240 1138.5,-246 1132.5,-252 1126.5,-252"/>
-<text text-anchor="middle" x="1082" y="-231.5" font-family="sans" font-size="10.00">pe_map_genome_star</text>
+<g id="node19" class="node">
+<title>18</title>
+<path fill="none" stroke="#88d856" stroke-width="2" d="M1237,-252C1237,-252 1136,-252 1136,-252 1130,-252 1124,-246 1124,-240 1124,-240 1124,-228 1124,-228 1124,-222 1130,-216 1136,-216 1136,-216 1237,-216 1237,-216 1243,-216 1249,-222 1249,-228 1249,-228 1249,-240 1249,-240 1249,-246 1243,-252 1237,-252"/>
+<text text-anchor="middle" x="1186.5" y="-231.5" font-family="sans" font-size="10.00">pe_map_genome_star</text>
 </g>
 <!-- 9&#45;&gt;18 -->
-<g id="edge26" class="edge"><title>9&#45;&gt;18</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M738.35,-290.717C742.959,-289.749 747.541,-288.832 752,-288 865.226,-266.87 898.018,-274.364 1015.13,-252.03"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="1016.06,-255.415 1025.21,-250.075 1014.73,-248.543 1016.06,-255.415"/>
+<g id="edge26" class="edge">
+<title>9&#45;&gt;18</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M767.61,-291.23C773.99,-290.06 780.35,-288.97 786.5,-288 928.12,-265.74 968.19,-276.39 1113.57,-252.03"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="1114.49,-255.42 1123.76,-250.29 1113.32,-248.52 1114.49,-255.42"/>
 </g>
 <!-- 10 -->
-<g id="node11" class="node"><title>10</title>
-<path fill="none" stroke="#56d8a2" stroke-width="2" d="M541.5,-329C541.5,-329 440.5,-329 440.5,-329 434.5,-329 428.5,-323 428.5,-317 428.5,-317 428.5,-300 428.5,-300 428.5,-294 434.5,-288 440.5,-288 440.5,-288 541.5,-288 541.5,-288 547.5,-288 553.5,-294 553.5,-300 553.5,-300 553.5,-317 553.5,-317 553.5,-323 547.5,-329 541.5,-329"/>
-<text text-anchor="middle" x="491" y="-317" font-family="sans" font-size="10.00">create_index_salmon</text>
-<text text-anchor="middle" x="491" y="-306" font-family="sans" font-size="10.00">kmer: 31</text>
-<text text-anchor="middle" x="491" y="-295" font-family="sans" font-size="10.00">organism: homo_sapiens</text>
+<g id="node11" class="node">
+<title>10</title>
+<path fill="none" stroke="#56d88a" stroke-width="2" d="M565,-329C565,-329 450,-329 450,-329 444,-329 438,-323 438,-317 438,-317 438,-300 438,-300 438,-294 444,-288 450,-288 450,-288 565,-288 565,-288 571,-288 577,-294 577,-300 577,-300 577,-317 577,-317 577,-323 571,-329 565,-329"/>
+<text text-anchor="middle" x="507.5" y="-317" font-family="sans" font-size="10.00">create_index_salmon</text>
+<text text-anchor="middle" x="507.5" y="-306" font-family="sans" font-size="10.00">kmer: 31</text>
+<text text-anchor="middle" x="507.5" y="-295" font-family="sans" font-size="10.00">organism: homo_sapiens</text>
 </g>
 <!-- 10&#45;&gt;3 -->
-<g id="edge10" class="edge"><title>10&#45;&gt;3</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M483.076,-287.689C479.859,-279.647 476.094,-270.236 472.644,-261.61"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="475.891,-260.304 468.928,-252.319 469.392,-262.904 475.891,-260.304"/>
+<g id="edge10" class="edge">
+<title>10&#45;&gt;3</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M504.77,-287.69C503.69,-279.91 502.44,-270.84 501.29,-262.45"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="504.72,-261.75 499.89,-252.32 497.79,-262.7 504.72,-261.75"/>
 </g>
 <!-- 10&#45;&gt;4 -->
-<g id="edge12" class="edge"><title>10&#45;&gt;4</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M520.226,-287.88C533.987,-278.643 550.489,-267.565 564.825,-257.941"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="567.076,-260.646 573.428,-252.166 563.174,-254.834 567.076,-260.646"/>
+<g id="edge12" class="edge">
+<title>10&#45;&gt;4</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M545.93,-287.88C564.79,-278.25 587.57,-266.62 606.95,-256.72"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="608.56,-259.83 615.88,-252.17 605.38,-253.6 608.56,-259.83"/>
 </g>
 <!-- 11 -->
-<g id="node12" class="node"><title>11</title>
-<path fill="none" stroke="#d86e56" stroke-width="2" d="M1015,-326.5C1015,-326.5 919,-326.5 919,-326.5 913,-326.5 907,-320.5 907,-314.5 907,-314.5 907,-302.5 907,-302.5 907,-296.5 913,-290.5 919,-290.5 919,-290.5 1015,-290.5 1015,-290.5 1021,-290.5 1027,-296.5 1027,-302.5 1027,-302.5 1027,-314.5 1027,-314.5 1027,-320.5 1021,-326.5 1015,-326.5"/>
-<text text-anchor="middle" x="967" y="-306" font-family="sans" font-size="10.00">remove_polya_cutadapt</text>
+<g id="node12" class="node">
+<title>11</title>
+<path fill="none" stroke="#bed856" stroke-width="2" d="M1080.5,-326.5C1080.5,-326.5 970.5,-326.5 970.5,-326.5 964.5,-326.5 958.5,-320.5 958.5,-314.5 958.5,-314.5 958.5,-302.5 958.5,-302.5 958.5,-296.5 964.5,-290.5 970.5,-290.5 970.5,-290.5 1080.5,-290.5 1080.5,-290.5 1086.5,-290.5 1092.5,-296.5 1092.5,-302.5 1092.5,-302.5 1092.5,-314.5 1092.5,-314.5 1092.5,-320.5 1086.5,-326.5 1080.5,-326.5"/>
+<text text-anchor="middle" x="1025.5" y="-306" font-family="sans" font-size="10.00">remove_polya_cutadapt</text>
 </g>
 <!-- 11&#45;&gt;4 -->
-<g id="edge11" class="edge"><title>11&#45;&gt;4</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M906.62,-290.63C902.698,-289.695 898.8,-288.808 895,-288 796.444,-267.04 767.467,-272.788 664.451,-251.962"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="665.082,-248.518 654.582,-249.932 663.672,-255.375 665.082,-248.518"/>
+<g id="edge11" class="edge">
+<title>11&#45;&gt;4</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M958.46,-291.13C953.41,-290.03 948.38,-288.97 943.5,-288 848.22,-269.01 820.7,-271.51 722.24,-252.27"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="722.65,-248.78 712.16,-250.27 721.29,-255.65 722.65,-248.78"/>
 </g>
 <!-- 11&#45;&gt;6 -->
-<g id="edge15" class="edge"><title>11&#45;&gt;6</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M958.705,-290.319C954.521,-281.651 949.355,-270.949 944.685,-261.276"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="947.734,-259.542 940.235,-252.058 941.431,-262.585 947.734,-259.542"/>
+<g id="edge15" class="edge">
+<title>11&#45;&gt;6</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1024.32,-290.32C1023.74,-281.92 1023.03,-271.62 1022.37,-262.19"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="1025.86,-261.79 1021.68,-252.06 1018.87,-262.28 1025.86,-261.79"/>
 </g>
 <!-- 19 -->
-<g id="node20" class="node"><title>19</title>
-<path fill="none" stroke="#d88d56" stroke-width="2" d="M1241.5,-252C1241.5,-252 1168.5,-252 1168.5,-252 1162.5,-252 1156.5,-246 1156.5,-240 1156.5,-240 1156.5,-228 1156.5,-228 1156.5,-222 1162.5,-216 1168.5,-216 1168.5,-216 1241.5,-216 1241.5,-216 1247.5,-216 1253.5,-222 1253.5,-228 1253.5,-228 1253.5,-240 1253.5,-240 1253.5,-246 1247.5,-252 1241.5,-252"/>
-<text text-anchor="middle" x="1205" y="-231.5" font-family="sans" font-size="10.00">map_genome_star</text>
+<g id="node20" class="node">
+<title>19</title>
+<path fill="none" stroke="#59d856" stroke-width="2" d="M1362,-252C1362,-252 1279,-252 1279,-252 1273,-252 1267,-246 1267,-240 1267,-240 1267,-228 1267,-228 1267,-222 1273,-216 1279,-216 1279,-216 1362,-216 1362,-216 1368,-216 1374,-222 1374,-228 1374,-228 1374,-240 1374,-240 1374,-246 1368,-252 1362,-252"/>
+<text text-anchor="middle" x="1320.5" y="-231.5" font-family="sans" font-size="10.00">map_genome_star</text>
 </g>
 <!-- 11&#45;&gt;19 -->
-<g id="edge28" class="edge"><title>11&#45;&gt;19</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M1023.1,-290.41C1060.19,-279.112 1108.72,-264.329 1146.32,-252.874"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="1147.58,-256.15 1156.13,-249.888 1145.54,-249.454 1147.58,-256.15"/>
+<g id="edge28" class="edge">
+<title>11&#45;&gt;19</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1092.69,-292.13C1136.95,-281.86 1196.4,-267.78 1257.02,-252.34"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="1257.94,-255.72 1266.76,-249.85 1256.21,-248.94 1257.94,-255.72"/>
 </g>
 <!-- 12 -->
-<g id="node13" class="node"><title>12</title>
-<path fill="none" stroke="#56d8d8" stroke-width="2" d="M873.5,-326.5C873.5,-326.5 772.5,-326.5 772.5,-326.5 766.5,-326.5 760.5,-320.5 760.5,-314.5 760.5,-314.5 760.5,-302.5 760.5,-302.5 760.5,-296.5 766.5,-290.5 772.5,-290.5 772.5,-290.5 873.5,-290.5 873.5,-290.5 879.5,-290.5 885.5,-296.5 885.5,-302.5 885.5,-302.5 885.5,-314.5 885.5,-314.5 885.5,-320.5 879.5,-326.5 873.5,-326.5"/>
-<text text-anchor="middle" x="823" y="-311.5" font-family="sans" font-size="10.00">create_index_kallisto</text>
-<text text-anchor="middle" x="823" y="-300.5" font-family="sans" font-size="10.00">organism: homo_sapiens</text>
+<g id="node13" class="node">
+<title>12</title>
+<path fill="none" stroke="#d6d856" stroke-width="2" d="M923,-326.5C923,-326.5 808,-326.5 808,-326.5 802,-326.5 796,-320.5 796,-314.5 796,-314.5 796,-302.5 796,-302.5 796,-296.5 802,-290.5 808,-290.5 808,-290.5 923,-290.5 923,-290.5 929,-290.5 935,-296.5 935,-302.5 935,-302.5 935,-314.5 935,-314.5 935,-320.5 929,-326.5 923,-326.5"/>
+<text text-anchor="middle" x="865.5" y="-311.5" font-family="sans" font-size="10.00">create_index_kallisto</text>
+<text text-anchor="middle" x="865.5" y="-300.5" font-family="sans" font-size="10.00">organism: homo_sapiens</text>
 </g>
 <!-- 12&#45;&gt;5 -->
-<g id="edge14" class="edge"><title>12&#45;&gt;5</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M807.122,-290.319C798.611,-281.109 787.979,-269.604 778.618,-259.475"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="781.122,-257.027 771.764,-252.058 775.981,-261.778 781.122,-257.027"/>
+<g id="edge14" class="edge">
+<title>12&#45;&gt;5</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M855.55,-290.32C850.47,-281.56 844.2,-270.73 838.55,-260.97"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="841.42,-258.96 833.38,-252.06 835.37,-262.47 841.42,-258.96"/>
 </g>
 <!-- 12&#45;&gt;6 -->
-<g id="edge16" class="edge"><title>12&#45;&gt;6</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M848.831,-290.319C863.492,-280.567 882.022,-268.242 897.868,-257.702"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="899.966,-260.511 906.354,-252.058 896.089,-254.682 899.966,-260.511"/>
+<g id="edge16" class="edge">
+<title>12&#45;&gt;6</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M902.23,-290.32C923.95,-280.16 951.64,-267.21 974.76,-256.39"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="976.46,-259.47 984.03,-252.06 973.49,-253.12 976.46,-259.47"/>
 </g>
 <!-- 13 -->
-<g id="node14" class="node"><title>13</title>
-<path fill="none" stroke="#a7d856" stroke-width="2" d="M962,-180C962,-180 796,-180 796,-180 790,-180 784,-174 784,-168 784,-168 784,-156 784,-156 784,-150 790,-144 796,-144 796,-144 962,-144 962,-144 968,-144 974,-150 974,-156 974,-156 974,-168 974,-168 974,-174 968,-180 962,-180"/>
-<text text-anchor="middle" x="879" y="-159.5" font-family="sans" font-size="10.00">pe_index_genomic_alignment_samtools</text>
+<g id="node14" class="node">
+<title>13</title>
+<path fill="none" stroke="#70d856" stroke-width="2" d="M1052,-180C1052,-180 863,-180 863,-180 857,-180 851,-174 851,-168 851,-168 851,-156 851,-156 851,-150 857,-144 863,-144 863,-144 1052,-144 1052,-144 1058,-144 1064,-150 1064,-156 1064,-156 1064,-168 1064,-168 1064,-174 1058,-180 1052,-180"/>
+<text text-anchor="middle" x="957.5" y="-159.5" font-family="sans" font-size="10.00">pe_index_genomic_alignment_samtools</text>
 </g>
 <!-- 13&#45;&gt;7 -->
-<g id="edge17" class="edge"><title>13&#45;&gt;7</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M879,-143.697C879,-135.983 879,-126.712 879,-118.112"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="882.5,-118.104 879,-108.104 875.5,-118.104 882.5,-118.104"/>
+<g id="edge17" class="edge">
+<title>13&#45;&gt;7</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M957.5,-143.7C957.5,-135.98 957.5,-126.71 957.5,-118.11"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="961,-118.1 957.5,-108.1 954,-118.1 961,-118.1"/>
 </g>
 <!-- 14 -->
-<g id="node15" class="node"><title>14</title>
-<path fill="none" stroke="#56d873" stroke-width="2" d="M1125.5,-180C1125.5,-180 1004.5,-180 1004.5,-180 998.5,-180 992.5,-174 992.5,-168 992.5,-168 992.5,-156 992.5,-156 992.5,-150 998.5,-144 1004.5,-144 1004.5,-144 1125.5,-144 1125.5,-144 1131.5,-144 1137.5,-150 1137.5,-156 1137.5,-156 1137.5,-168 1137.5,-168 1137.5,-174 1131.5,-180 1125.5,-180"/>
-<text text-anchor="middle" x="1065" y="-159.5" font-family="sans" font-size="10.00">extract_transcripts_as_bed12</text>
+<g id="node15" class="node">
+<title>14</title>
+<path fill="none" stroke="#56c1d8" stroke-width="2" d="M1230.5,-180C1230.5,-180 1094.5,-180 1094.5,-180 1088.5,-180 1082.5,-174 1082.5,-168 1082.5,-168 1082.5,-156 1082.5,-156 1082.5,-150 1088.5,-144 1094.5,-144 1094.5,-144 1230.5,-144 1230.5,-144 1236.5,-144 1242.5,-150 1242.5,-156 1242.5,-156 1242.5,-168 1242.5,-168 1242.5,-174 1236.5,-180 1230.5,-180"/>
+<text text-anchor="middle" x="1162.5" y="-159.5" font-family="sans" font-size="10.00">extract_transcripts_as_bed12</text>
 </g>
 <!-- 14&#45;&gt;7 -->
-<g id="edge18" class="edge"><title>14&#45;&gt;7</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M1019.5,-143.876C993.62,-134.137 961.033,-121.872 933.759,-111.608"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="934.945,-108.315 924.353,-108.068 932.479,-114.866 934.945,-108.315"/>
+<g id="edge18" class="edge">
+<title>14&#45;&gt;7</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1112.61,-143.97C1083.85,-134.14 1047.49,-121.73 1017.21,-111.39"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="1018.2,-108.03 1007.61,-108.11 1015.94,-114.65 1018.2,-108.03"/>
 </g>
 <!-- 14&#45;&gt;8 -->
-<g id="edge20" class="edge"><title>14&#45;&gt;8</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M1065,-143.697C1065,-135.983 1065,-126.712 1065,-118.112"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="1068.5,-118.104 1065,-108.104 1061.5,-118.104 1068.5,-118.104"/>
+<g id="edge20" class="edge">
+<title>14&#45;&gt;8</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1162.5,-143.7C1162.5,-135.98 1162.5,-126.71 1162.5,-118.11"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="1166,-118.1 1162.5,-108.1 1159,-118.1 1166,-118.1"/>
 </g>
 <!-- 15 -->
-<g id="node16" class="node"><title>15</title>
-<path fill="none" stroke="#56c1d8" stroke-width="2" d="M1318,-180C1318,-180 1168,-180 1168,-180 1162,-180 1156,-174 1156,-168 1156,-168 1156,-156 1156,-156 1156,-150 1162,-144 1168,-144 1168,-144 1318,-144 1318,-144 1324,-144 1330,-150 1330,-156 1330,-156 1330,-168 1330,-168 1330,-174 1324,-180 1318,-180"/>
-<text text-anchor="middle" x="1243" y="-159.5" font-family="sans" font-size="10.00">index_genomic_alignment_samtools</text>
+<g id="node16" class="node">
+<title>15</title>
+<path fill="none" stroke="#56a2d8" stroke-width="2" d="M1444.5,-180C1444.5,-180 1272.5,-180 1272.5,-180 1266.5,-180 1260.5,-174 1260.5,-168 1260.5,-168 1260.5,-156 1260.5,-156 1260.5,-150 1266.5,-144 1272.5,-144 1272.5,-144 1444.5,-144 1444.5,-144 1450.5,-144 1456.5,-150 1456.5,-156 1456.5,-156 1456.5,-168 1456.5,-168 1456.5,-174 1450.5,-180 1444.5,-180"/>
+<text text-anchor="middle" x="1358.5" y="-159.5" font-family="sans" font-size="10.00">index_genomic_alignment_samtools</text>
 </g>
 <!-- 15&#45;&gt;8 -->
-<g id="edge19" class="edge"><title>15&#45;&gt;8</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M1199.46,-143.876C1174.8,-134.179 1143.78,-121.98 1117.75,-111.743"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="1118.99,-108.471 1108.4,-108.068 1116.43,-114.985 1118.99,-108.471"/>
+<g id="edge19" class="edge">
+<title>15&#45;&gt;8</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1310.8,-143.97C1283.42,-134.19 1248.84,-121.84 1219.97,-111.52"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="1221,-108.18 1210.41,-108.11 1218.65,-114.77 1221,-108.18"/>
 </g>
 <!-- 16 -->
-<g id="node17" class="node"><title>16</title>
-<path fill="none" stroke="#56a2d8" stroke-width="2" d="M791,-401C791,-401 523,-401 523,-401 517,-401 511,-395 511,-389 511,-389 511,-377 511,-377 511,-371 517,-365 523,-365 523,-365 791,-365 791,-365 797,-365 803,-371 803,-377 803,-377 803,-389 803,-389 803,-395 797,-401 791,-401"/>
-<text text-anchor="middle" x="657" y="-386" font-family="sans" font-size="10.00">pe_remove_adapters_cutadapt</text>
-<text text-anchor="middle" x="657" y="-375" font-family="sans" font-size="10.00">sample: synthetic_10_reads_paired_synthetic_10_reads_paired</text>
+<g id="node17" class="node">
+<title>16</title>
+<path fill="none" stroke="#568ad8" stroke-width="2" d="M835,-401C835,-401 534,-401 534,-401 528,-401 522,-395 522,-389 522,-389 522,-377 522,-377 522,-371 528,-365 534,-365 534,-365 835,-365 835,-365 841,-365 847,-371 847,-377 847,-377 847,-389 847,-389 847,-395 841,-401 835,-401"/>
+<text text-anchor="middle" x="684.5" y="-386" font-family="sans" font-size="10.00">pe_remove_adapters_cutadapt</text>
+<text text-anchor="middle" x="684.5" y="-375" font-family="sans" font-size="10.00">sample: synthetic_10_reads_paired_synthetic_10_reads_paired</text>
 </g>
 <!-- 16&#45;&gt;9 -->
-<g id="edge21" class="edge"><title>16&#45;&gt;9</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M660.081,-364.819C661.586,-356.422 663.434,-346.116 665.125,-336.686"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="668.621,-337.019 666.941,-326.558 661.731,-335.783 668.621,-337.019"/>
+<g id="edge21" class="edge">
+<title>16&#45;&gt;9</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M686.16,-364.82C686.97,-356.42 687.96,-346.12 688.88,-336.69"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="692.38,-336.85 689.85,-326.56 685.41,-336.18 692.38,-336.85"/>
 </g>
 <!-- 17 -->
-<g id="node18" class="node"><title>17</title>
-<path fill="none" stroke="#56d88a" stroke-width="2" d="M1113,-401C1113,-401 833,-401 833,-401 827,-401 821,-395 821,-389 821,-389 821,-377 821,-377 821,-371 827,-365 833,-365 833,-365 1113,-365 1113,-365 1119,-365 1125,-371 1125,-377 1125,-377 1125,-389 1125,-389 1125,-395 1119,-401 1113,-401"/>
-<text text-anchor="middle" x="973" y="-386" font-family="sans" font-size="10.00">remove_adapters_cutadapt</text>
-<text text-anchor="middle" x="973" y="-375" font-family="sans" font-size="10.00">sample: synthetic_10_reads_mate_1_synthetic_10_reads_mate_1</text>
+<g id="node18" class="node">
+<title>17</title>
+<path fill="none" stroke="#a7d856" stroke-width="2" d="M1190,-401C1190,-401 877,-401 877,-401 871,-401 865,-395 865,-389 865,-389 865,-377 865,-377 865,-371 871,-365 877,-365 877,-365 1190,-365 1190,-365 1196,-365 1202,-371 1202,-377 1202,-377 1202,-389 1202,-389 1202,-395 1196,-401 1190,-401"/>
+<text text-anchor="middle" x="1033.5" y="-386" font-family="sans" font-size="10.00">remove_adapters_cutadapt</text>
+<text text-anchor="middle" x="1033.5" y="-375" font-family="sans" font-size="10.00">sample: synthetic_10_reads_mate_1_synthetic_10_reads_mate_1</text>
 </g>
 <!-- 17&#45;&gt;11 -->
-<g id="edge22" class="edge"><title>17&#45;&gt;11</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M971.578,-364.819C970.883,-356.422 970.03,-346.116 969.25,-336.686"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="972.725,-336.235 968.412,-326.558 965.748,-336.813 972.725,-336.235"/>
+<g id="edge22" class="edge">
+<title>17&#45;&gt;11</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1031.6,-364.82C1030.68,-356.42 1029.54,-346.12 1028.5,-336.69"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="1031.96,-336.11 1027.38,-326.56 1025,-336.88 1031.96,-336.11"/>
 </g>
 <!-- 18&#45;&gt;13 -->
-<g id="edge23" class="edge"><title>18&#45;&gt;13</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M1032.6,-215.966C1004.11,-206.142 968.114,-193.729 938.129,-183.389"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="939.216,-180.062 928.621,-180.111 936.934,-186.68 939.216,-180.062"/>
+<g id="edge23" class="edge">
+<title>18&#45;&gt;13</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1130.77,-215.97C1098.36,-206.06 1057.32,-193.51 1023.32,-183.12"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="1024.06,-179.69 1013.48,-180.11 1022.02,-186.38 1024.06,-179.69"/>
 </g>
 <!-- 19&#45;&gt;15 -->
-<g id="edge24" class="edge"><title>19&#45;&gt;15</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M1214.39,-215.697C1218.76,-207.644 1224.06,-197.894 1228.9,-188.982"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="1232.02,-190.563 1233.71,-180.104 1225.87,-187.223 1232.02,-190.563"/>
+<g id="edge24" class="edge">
+<title>19&#45;&gt;15</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1329.89,-215.7C1334.26,-207.64 1339.56,-197.89 1344.4,-188.98"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="1347.52,-190.56 1349.21,-180.1 1341.37,-187.22 1347.52,-190.56"/>
 </g>
 <!-- 20 -->
-<g id="node21" class="node"><title>20</title>
-<path fill="none" stroke="#d6d856" stroke-width="2" d="M1193.5,-329C1193.5,-329 1092.5,-329 1092.5,-329 1086.5,-329 1080.5,-323 1080.5,-317 1080.5,-317 1080.5,-300 1080.5,-300 1080.5,-294 1086.5,-288 1092.5,-288 1092.5,-288 1193.5,-288 1193.5,-288 1199.5,-288 1205.5,-294 1205.5,-300 1205.5,-300 1205.5,-317 1205.5,-317 1205.5,-323 1199.5,-329 1193.5,-329"/>
-<text text-anchor="middle" x="1143" y="-317" font-family="sans" font-size="10.00">create_index_star</text>
-<text text-anchor="middle" x="1143" y="-306" font-family="sans" font-size="10.00">index_size: 75</text>
-<text text-anchor="middle" x="1143" y="-295" font-family="sans" font-size="10.00">organism: homo_sapiens</text>
+<g id="node21" class="node">
+<title>20</title>
+<path fill="none" stroke="#d88d56" stroke-width="2" d="M1311,-329C1311,-329 1196,-329 1196,-329 1190,-329 1184,-323 1184,-317 1184,-317 1184,-300 1184,-300 1184,-294 1190,-288 1196,-288 1196,-288 1311,-288 1311,-288 1317,-288 1323,-294 1323,-300 1323,-300 1323,-317 1323,-317 1323,-323 1317,-329 1311,-329"/>
+<text text-anchor="middle" x="1253.5" y="-317" font-family="sans" font-size="10.00">create_index_star</text>
+<text text-anchor="middle" x="1253.5" y="-306" font-family="sans" font-size="10.00">index_size: 75</text>
+<text text-anchor="middle" x="1253.5" y="-295" font-family="sans" font-size="10.00">organism: homo_sapiens</text>
 </g>
 <!-- 20&#45;&gt;18 -->
-<g id="edge25" class="edge"><title>20&#45;&gt;18</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M1126.33,-287.689C1119.2,-279.215 1110.79,-269.22 1103.22,-260.226"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="1105.69,-257.718 1096.57,-252.319 1100.33,-262.225 1105.69,-257.718"/>
+<g id="edge25" class="edge">
+<title>20&#45;&gt;18</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1235.19,-287.69C1227.2,-279.04 1217.75,-268.81 1209.3,-259.68"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="1211.86,-257.29 1202.51,-252.32 1206.72,-262.04 1211.86,-257.29"/>
 </g>
 <!-- 20&#45;&gt;19 -->
-<g id="edge27" class="edge"><title>20&#45;&gt;19</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M1159.94,-287.689C1167.26,-279.128 1175.91,-269.016 1183.66,-259.951"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="1186.35,-262.194 1190.19,-252.319 1181.03,-257.645 1186.35,-262.194"/>
+<g id="edge27" class="edge">
+<title>20&#45;&gt;19</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1271.81,-287.69C1279.8,-279.04 1289.25,-268.81 1297.7,-259.68"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="1300.28,-262.04 1304.49,-252.32 1295.14,-257.29 1300.28,-262.04"/>
 </g>
 </g>
 </svg>
diff --git a/scripts/labkey_to_snakemake.py b/scripts/labkey_to_snakemake.py
index b6b9569..4456e00 100755
--- a/scripts/labkey_to_snakemake.py
+++ b/scripts/labkey_to_snakemake.py
@@ -1,14 +1,11 @@
 #!/usr/bin/env python3
 
-## -----------------------------------------------------------------------------
+# -----------------------------------------------------------------------------
 # Author : Katsantoni Maria, Christina Herrmann
 # Company: Mihaela Zavolan, Biozentrum, Basel
-# -----------------------------------------------------------------------------
-
-# -----------------------------------------------------------------------------
 # This script is part of the Zavolan lab quantification pipeline, which is used
-# for analysing RNA-seq data. The table is provided by labkey and is a csv file.
-# If the user provides their own table the table should contain the following 
+# for analysing RNA-seq data. The table is provided by labkey as a csv file.
+# If the user provides their own table the table should contain the following
 # columns:
 # -----------------------------------------------------------------------------
 
@@ -24,117 +21,95 @@ from Bio import SeqIO
 from io import StringIO
 from csv import writer
 from pathlib import Path
-# for convenience, load QueryFilter explicitly (avoids long lines in filter definitions)
+# (avoids long lines in filter definitions)
 from labkey.query import QueryFilter
-# ----------------------------------------------------------------------------------------------------------------------
-def main():
-    """ Preprocess sample folder and create config file for snakemake"""
-
-    __doc__ = "Preprocess of the table and create config file."
-
-    parser = ArgumentParser(
-        description=__doc__,
-        formatter_class=RawTextHelpFormatter)
 
-    parser.add_argument(
-        "genomes_path",
-        help="Path containing the FASTA and GTF files for all organisms",
-        metavar="GENOMES PATH"
-    )
-
-    parser.add_argument(
-        "--input-table",
-        type=str,
-        default=None,
-        help=(
-            "Input table in LabKey format containing the sample information;"
-            "\nexactly one of '--input-table' and '--remote' is required."
-        ),
-        metavar="FILE",
-    )
-
-    parser.add_argument(
-        "--remote",
-        action="store_true",
-        help=(
-            "Fetch LabKey table via API; exactly one of '--input-table' and"
-            "\n'--remote' is required."
-        ),
-    )
-
-    parser.add_argument(
-        "--project-name",
-        help=(
-            "Name of LabKey project containing table '--table-name'; required"
-            "\nif '--remote' is specified."
-        ),
-        metavar="STR",
-    )
 
-    parser.add_argument(
-        "--table-name",
-        help="Name of LabKey table; required if '--remote' is specified.",
-        metavar="STR",
-    )
-    parser.add_argument(
-        "--input-dict",
-        help=(
-            "Input dictionary containing the feature name conversion from \n"
-            "LabKey to Snakemake; default: '%(default)s'"
-        ),
-        default=os.path.join(
-            os.path.dirname(__file__),
-            'labkey_to_snakemake.dict.tsv'
-        ),
-        metavar="FILE"
-    )
-
-    parser.add_argument(
-        "--samples-table",
-        help="Output table compatible to snakemake; default: '%(default)s'",
-        default='samples.tsv',
-        metavar="FILE"
-    )
-
-    parser.add_argument(
-        "--multimappers",
-        type=int,
-        default=100,
-        help="Number of allowed multimappers",
-        metavar='INT',
-    )
-
-    parser.add_argument(
-        "--soft-clip",
-        choices=['EndToEnd','Local'],
-        default='EndToEnd',
-        help="Soft-clipping option for STAR",
-    )
-
-    parser.add_argument(
-        "--pass-mode",
-        choices=['None','Basic'],
-        default='None',
-        help="2-pass mode option for STAR",
-    )
-
-    parser.add_argument(
-        "--libtype",
-        default='A',
-        help="Library type for salmon",
-        metavar="STR",
-    )
-
-    parser.add_argument(
-        "--config-file",
-        help="Configuration file to be used by Snakemake",
-    )
+def main():
+    """ Preprocess sample folder and create config file for snakemake"""
 
+    __doc__ = "Preprocess of labkey table and create " + \
+              "config file and sample table."
+
+    parser = ArgumentParser(description=__doc__,
+                            formatter_class=RawTextHelpFormatter)
+
+    parser.add_argument("genomes_path",
+                        help="Path containing the FASTA and GTF " +
+                        " files for all organisms",
+                        metavar="GENOMES PATH")
+
+    parser.add_argument("--input-table",
+                        type=str,
+                        default=None,
+                        help="Input table in LabKey format " +
+                        "containing the sample information;" +
+                        "\nexactly one '--input-table' and " +
+                        "'--remote' is required.",
+                        metavar="FILE")
+
+    parser.add_argument("--remote",
+                        action="store_true",
+                        help="Fetch LabKey table via API; exactly one of " +
+                        "'--input-table' and" +
+                        "\n'--remote' is required.")
+
+    parser.add_argument("--project-name",
+                        help="Name of LabKey project containing table " +
+                        " '--table-name'; required" +
+                        "\nif '--remote' is specified.",
+                        metavar="STR")
+
+    parser.add_argument("--table-name",
+                        help="Name of LabKey table; required if '--remote'" +
+                        " is specified.",
+                        metavar="STR")
+
+    parser.add_argument("--input-dict",
+                        help="Input dictionary containing the feature name " +
+                        "conversion from LabKey to Snakemake;" +
+                        "default: '%(default)s'",
+                        default=os.path.join(
+                            os.path.dirname(__file__),
+                            'labkey_to_snakemake.dict.tsv'),
+                        metavar="FILE")
+
+    parser.add_argument("--samples-table",
+                        help="Output table compatible to snakemake;" +
+                        "default: '%(default)s'",
+                        default='samples.tsv',
+                        metavar="FILE")
+
+    parser.add_argument("--trim_polya",
+                        type=int,
+                        choices=[True, False],
+                        default=True,
+                        help="Trim poly-As option")
+
+    parser.add_argument("--multimappers",
+                        type=int,
+                        default=100,
+                        help="Number of allowed multimappers",
+                        metavar='INT')
+
+    parser.add_argument("--soft-clip",
+                        choices=['EndToEnd', 'Local'],
+                        default='EndToEnd',
+                        help="Soft-clipping option for STAR")
+
+    parser.add_argument("--pass-mode",
+                        choices=['None', 'Basic'],
+                        default='None',
+                        help="2-pass mode option for STAR")
+
+    parser.add_argument("--libtype",
+                        default='A',
+                        help="Library type for salmon",
+                        metavar="STR")
+
+    parser.add_argument("--config-file",
+                        help="Configuration file to be used by Snakemake")
 
-    # __________________________________________________________________________________________________________________
-    # ------------------------------------------------------------------------------------------------------------------
-    # get the arguments
-    # ------------------------------------------------------------------------------------------------------------------
     try:
         options = parser.parse_args()
     except(Exception):
@@ -146,27 +121,34 @@ def main():
 
     if options.remote and options.input_table:
         parser.print_help()
-        print("\n[ERROR] Options '--input-table' and '--remote' are mutually exclusive.")
+        print(
+            "\n[ERROR] Options '--input-table' and ",
+            "'--remote' are mutually exclusive.")
         sys.exit(1)
 
     if not options.remote and not options.input_table:
         parser.print_help()
-        print("\n[ERROR] At least one of '--input-table' and '--remote' is required.")
+        print("\n[ERROR] At least one of '--input-table' ",
+              "and '--remote' is required.")
         sys.exit(1)
 
     if options.remote and not options.project_name:
         parser.print_help()
-        print("\n[ERROR] If option '--remote' is specified, option '--project-name' is required.")
+        print(
+            "\n[ERROR] If option '--remote' is specified, ",
+            "option '--project-name' is required.")
         sys.exit(1)
 
     if options.remote and not options.table_name:
         parser.print_help()
-        print("\n[ERROR] If option '--remote' is specified, option '--table-name' is required.")
+        print(
+            "\n[ERROR] If option '--remote' is specified, ",
+            "option '--table-name' is required.")
         sys.exit(1)
 
     sys.stdout.write('Reading input file...\n')
 
-    if options.remote == True:
+    if options.remote is True:
         input_table = api_fetch_labkey_table(
             project_name=options.project_name,
             query_name=options.table_name)
@@ -191,8 +173,11 @@ def main():
     input_dict.set_index('snakemake', inplace=True, drop=True)
     sys.stdout.write('Create snakemake table...\n')
     snakemake_table = pd.DataFrame()
+
     for index, row in input_table.iterrows():
-        snakemake_table.loc[index, 'sample'] = row[input_dict.loc['replicate_name', 'labkey']] + "_" + row[input_dict.loc['condition', 'labkey']]
+        snakemake_table.loc[index, 'sample'] = row[
+            input_dict.loc['replicate_name', 'labkey']] + "_" + row[
+            input_dict.loc['condition', 'labkey']]
         if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED':
             snakemake_table.loc[index, 'seqmode'] = 'paired_end'
         elif row[input_dict.loc['seqmode', 'labkey']] == 'SINGLE':
@@ -203,32 +188,16 @@ def main():
             row[input_dict.loc['fq1', 'labkey']])
 
         snakemake_table.loc[index, 'fq1'] = fq1
-
-        with gzip.open(fq1, "rt") as handle:
-            for record in SeqIO.parse(handle, "fastq"):
-                read_length = len(record.seq)
-                break
-
+        read_length = get_read_length(fq1)
         snakemake_table.loc[index, 'index_size'] = read_length
-        if read_length <= 50:
-            snakemake_table.loc[index, 'kmer'] = 21
-        elif read_length > 50:
-            snakemake_table.loc[index, 'kmer'] = 31
-
-
-        if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED':
-            snakemake_table.loc[index, 'fq2'] = os.path.join(
-                row[input_dict.loc['fastq_path', 'labkey']],
-                row[input_dict.loc['fq2', 'labkey']])
-
-        snakemake_table.loc[index, 'fq1_3p'] = row[input_dict.loc['fq1_3p', 'labkey']]
-        snakemake_table.loc[index, 'fq1_5p'] = row[input_dict.loc['fq1_5p', 'labkey']]
-
-        if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED':
-            snakemake_table.loc[index, 'fq2_3p'] = row[input_dict.loc['fq2_3p', 'labkey']]
-            snakemake_table.loc[index, 'fq2_5p'] = row[input_dict.loc['fq2_5p', 'labkey']]
-
-        organism = row[input_dict.loc['organism', 'labkey']].replace(' ', '_').lower()
+        snakemake_table.loc[index, 'kmer'] = infer_kmer_length(read_length)
+        snakemake_table.loc[index, 'fq1_3p'] = row[
+            input_dict.loc['fq1_3p', 'labkey']]
+        snakemake_table.loc[index, 'fq1_5p'] = row[
+            input_dict.loc['fq1_5p', 'labkey']]
+
+        organism = row[input_dict.loc['organism', 'labkey']].replace(
+            ' ', '_').lower()
         snakemake_table.loc[index, 'organism'] = organism
 
         snakemake_table.loc[index, 'gtf'] = os.path.join(
@@ -251,39 +220,35 @@ def main():
             organism,
             'transcriptome.fa')
 
-        snakemake_table.loc[index, 'sd'] = row[input_dict.loc['sd', 'labkey']]
-        snakemake_table.loc[index, 'mean'] = row[input_dict.loc['mean', 'labkey']]
+        snakemake_table.loc[index, 'sd'] = row[
+            input_dict.loc['sd', 'labkey']]
+        snakemake_table.loc[index, 'mean'] = row[
+            input_dict.loc['mean', 'labkey']]
         snakemake_table.loc[index, 'multimappers'] = options.multimappers
         snakemake_table.loc[index, 'soft_clip'] = options.soft_clip
         snakemake_table.loc[index, 'pass_mode'] = options.pass_mode
         snakemake_table.loc[index, 'libtype'] = options.libtype
-
-        if row[input_dict.loc['mate1_direction', 'labkey']] == 'SENSE':
-            snakemake_table.loc[index, 'kallisto_directionality'] = '--fr'
-        elif row[input_dict.loc['mate1_direction', 'labkey']] == 'ANTISENSE':
-            snakemake_table.loc[index, 'kallisto_directionality'] = '--rf'
-        else:
-            snakemake_table.loc[index, 'kallisto_directionality'] = ''
-
-        if row[input_dict.loc['mate1_direction', 'labkey']] == 'SENSE':
-            snakemake_table.loc[index, 'fq1_polya'] = 'AAAAAAAAAAAAAAAAA'
-        elif row[input_dict.loc['mate1_direction', 'labkey']] == 'ANTISENSE':
-            snakemake_table.loc[index, 'fq1_polya'] = 'TTTTTTTTTTTTTTTTT'
-        elif row[input_dict.loc['mate1_direction', 'labkey']] == 'RANDOM':
-            snakemake_table.loc[index, 'fq1_polya'] = 'AAAAAAAAAAAAAAAAA'
-        else:
-            pass
+        if options.trim_polya is True:
+            snakemake_table.loc[index, 'fq1_polya'] = trim_polya(
+                row[input_dict.loc['mate1_direction', 'labkey']])
+        snakemake_table.loc[index, 'kallisto_directionality'] = \
+            get_kallisto_directionality(
+                row[input_dict.loc['mate1_direction', 'labkey']])
 
         if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED':
-            if row[input_dict.loc['mate2_direction', 'labkey']] == 'SENSE':
-                snakemake_table.loc[index, 'fq2_polya'] = 'AAAAAAAAAAAAAAAAA'
-            elif row[input_dict.loc['mate2_direction', 'labkey']] == 'ANTISENSE':
-                snakemake_table.loc[index, 'fq2_polya'] = 'TTTTTTTTTTTTTTTTT'
-            elif row[input_dict.loc['mate2_direction', 'labkey']] == 'RANDOM':
-                snakemake_table.loc[index, 'fq2_polya'] = 'AAAAAAAAAAAAAAAAA'
-            else:
-                pass
+            fq2 = os.path.join(
+                row[input_dict.loc['fastq_path', 'labkey']],
+                row[input_dict.loc['fq2', 'labkey']])
+            snakemake_table.loc[index, 'fq2'] = fq2
+
+            snakemake_table.loc[index, 'fq2_3p'] = row[
+                input_dict.loc['fq2_3p', 'labkey']]
+            snakemake_table.loc[index, 'fq2_5p'] = row[
+                input_dict.loc['fq2_5p', 'labkey']]
 
+            if options.trim_polya is True:
+                snakemake_table.loc[index, 'fq2_polya'] = trim_polya(
+                    row[input_dict.loc['mate2_direction', 'labkey']])
 
     snakemake_table.fillna('XXXXXXXXXXXXX', inplace=True)
     snakemake_table = snakemake_table.astype(
@@ -301,11 +266,10 @@ def main():
         header=True,
         index=False)
 
-
     # Read file and infer read size for sjdbovwerhang
     with open(options.config_file, 'w') as config_file:
         config_file.write('''---
-  samples: "'''+ options.samples_table + '''"
+  samples: "''' + options.samples_table + '''"
   output_dir: "results/"
   log_dir: "logs/"
   kallisto_indexes: "results/kallisto_indexes/"
@@ -318,19 +282,54 @@ def main():
     sys.stdout.write('Create config file finished successfully...\n')
     return
 
+
 def api_fetch_labkey_table(project_name=None, query_name=None):
-    group_path = os.path.join( '/Zavolan Group', project_name)
-    server_context = labkey.utils.create_server_context('labkey.scicore.unibas.ch', group_path, 'labkey', use_ssl=True)
+    group_path = os.path.join('/Zavolan Group', project_name)
+    server_context = labkey.utils.create_server_context(
+        'labkey.scicore.unibas.ch', group_path, 'labkey', use_ssl=True)
     schema_name = "lists"
     results = labkey.query.select_rows(server_context, schema_name, query_name)
     input_table = pd.DataFrame(results["rows"])
     return input_table
 
 
-# _____________________________________________________________________________
-# -----------------------------------------------------------------------------
-# Call the Main function and catch Keyboard interrups
-# -----------------------------------------------------------------------------
+def get_read_length(filename):
+    with gzip.open(filename, "rt") as handle:
+        for record in SeqIO.parse(handle, "fastq"):
+            read_length = len(record.seq)
+            break
+    return read_length
+
+
+def infer_kmer_length(read_length):
+    if read_length <= 50:
+            kmer = 21
+    elif read_length > 50:
+        kmer = 31
+    return kmer
+
+
+def get_kallisto_directionality(directionality):
+    if directionality == 'SENSE':
+        final_direction = '--fr'
+    elif directionality == 'ANTISENSE':
+        final_direction = '--rf'
+    else:
+        final_direction = ''
+    return final_direction
+
+
+def trim_polya(sense):
+    if sense == 'SENSE':
+        polya = 'AAAAAAAAAAAAAAAAA'
+    elif sense == 'ANTISENSE':
+        polya = 'TTTTTTTTTTTTTTTTT'
+    elif sense == 'RANDOM':
+        polya = 'AAAAAAAAAAAAAAAAA'
+    else:
+        polya = 'XXXXXXXXXXXXXXXXX'
+    return polya
+
 
 if __name__ == '__main__':
     try:
@@ -338,5 +337,3 @@ if __name__ == '__main__':
     except KeyboardInterrupt:
         sys.stderr.write("User interrupt!" + os.linesep)
         sys.exit(0)
-
-
diff --git a/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5 b/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5
index 90bb3be..7708397 100644
--- a/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5
+++ b/tests/test_scripts_labkey_to_snakemake_api/expected_output.md5
@@ -1,2 +1,2 @@
 ba5ae0649d1fb82d94f8d19481498ffd  config.yaml
-9aece9e4acb17143b5e8f627968e03a5  samples.tsv
+cb58e046242c2702038e6e21dbd0bdb4  samples.tsv
\ No newline at end of file
diff --git a/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5 b/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5
index 90bb3be..c24f601 100644
--- a/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5
+++ b/tests/test_scripts_labkey_to_snakemake_table/expected_output.md5
@@ -1,2 +1,2 @@
 ba5ae0649d1fb82d94f8d19481498ffd  config.yaml
-9aece9e4acb17143b5e8f627968e03a5  samples.tsv
+cb58e046242c2702038e6e21dbd0bdb4  samples.tsv
-- 
GitLab