Skip to content
Snippets Groups Projects
Commit e7ac9662 authored by Alex Kanitz's avatar Alex Kanitz
Browse files

Merge branch 'merge_api_input_parse' into 'master'

LabKey-like input to Snakmake input

See merge request zavolan_group/pipelines/rnaseqpipeline!20
parents c5c4a29e 979e6cdd
No related branches found
No related tags found
1 merge request!20LabKey-like input to Snakmake input
Pipeline #10227 passed
Showing
with 402 additions and 56 deletions
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
<!-- 1 --> <!-- 1 -->
<g id="node2" class="node"> <g id="node2" class="node">
<title>1</title> <title>1</title>
<path fill="none" stroke="#d85656" stroke-width="2" d="M177,-108C177,-108 12,-108 12,-108 6,-108 0,-102 0,-96 0,-96 0,-84 0,-84 0,-78 6,-72 12,-72 12,-72 177,-72 177,-72 183,-72 189,-78 189,-84 189,-84 189,-96 189,-96 189,-102 183,-108 177,-108"/> <path fill="none" stroke="#c6d856" stroke-width="2" d="M177,-108C177,-108 12,-108 12,-108 6,-108 0,-102 0,-96 0,-96 0,-84 0,-84 0,-78 6,-72 12,-72 12,-72 177,-72 177,-72 183,-72 189,-78 189,-84 189,-84 189,-96 189,-96 189,-102 183,-108 177,-108"/>
<text text-anchor="middle" x="94.5" y="-93" font-family="sans" font-size="10.00">pe_fastqc</text> <text text-anchor="middle" x="94.5" y="-93" font-family="sans" font-size="10.00">pe_fastqc</text>
<text text-anchor="middle" x="94.5" y="-82" font-family="sans" font-size="10.00">sample: synthetic_10_reads_paired</text> <text text-anchor="middle" x="94.5" y="-82" font-family="sans" font-size="10.00">sample: synthetic_10_reads_paired</text>
</g> </g>
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
<!-- 2 --> <!-- 2 -->
<g id="node3" class="node"> <g id="node3" class="node">
<title>2</title> <title>2</title>
<path fill="none" stroke="#c6d856" stroke-width="2" d="M390,-108C390,-108 219,-108 219,-108 213,-108 207,-102 207,-96 207,-96 207,-84 207,-84 207,-78 213,-72 219,-72 219,-72 390,-72 390,-72 396,-72 402,-78 402,-84 402,-84 402,-96 402,-96 402,-102 396,-108 390,-108"/> <path fill="none" stroke="#56d8c9" stroke-width="2" d="M390,-108C390,-108 219,-108 219,-108 213,-108 207,-102 207,-96 207,-96 207,-84 207,-84 207,-78 213,-72 219,-72 219,-72 390,-72 390,-72 396,-72 402,-78 402,-84 402,-84 402,-96 402,-96 402,-102 396,-108 390,-108"/>
<text text-anchor="middle" x="304.5" y="-93" font-family="sans" font-size="10.00">fastqc</text> <text text-anchor="middle" x="304.5" y="-93" font-family="sans" font-size="10.00">fastqc</text>
<text text-anchor="middle" x="304.5" y="-82" font-family="sans" font-size="10.00">sample: synthetic_10_reads_mate_1</text> <text text-anchor="middle" x="304.5" y="-82" font-family="sans" font-size="10.00">sample: synthetic_10_reads_mate_1</text>
</g> </g>
...@@ -44,7 +44,7 @@ ...@@ -44,7 +44,7 @@
<!-- 3 --> <!-- 3 -->
<g id="node4" class="node"> <g id="node4" class="node">
<title>3</title> <title>3</title>
<path fill="none" stroke="#56d892" stroke-width="2" d="M621,-108C621,-108 432,-108 432,-108 426,-108 420,-102 420,-96 420,-96 420,-84 420,-84 420,-78 426,-72 432,-72 432,-72 621,-72 621,-72 627,-72 633,-78 633,-84 633,-84 633,-96 633,-96 633,-102 627,-108 621,-108"/> <path fill="none" stroke="#afd856" stroke-width="2" d="M621,-108C621,-108 432,-108 432,-108 426,-108 420,-102 420,-96 420,-96 420,-84 420,-84 420,-78 426,-72 432,-72 432,-72 621,-72 621,-72 627,-72 633,-78 633,-84 633,-84 633,-96 633,-96 633,-102 627,-108 621,-108"/>
<text text-anchor="middle" x="526.5" y="-87.5" font-family="sans" font-size="10.00">pe_index_genomic_alignment_samtools</text> <text text-anchor="middle" x="526.5" y="-87.5" font-family="sans" font-size="10.00">pe_index_genomic_alignment_samtools</text>
</g> </g>
<!-- 3&#45;&gt;0 --> <!-- 3&#45;&gt;0 -->
...@@ -56,7 +56,7 @@ ...@@ -56,7 +56,7 @@
<!-- 4 --> <!-- 4 -->
<g id="node5" class="node"> <g id="node5" class="node">
<title>4</title> <title>4</title>
<path fill="none" stroke="#56d8a9" stroke-width="2" d="M834.5,-108C834.5,-108 662.5,-108 662.5,-108 656.5,-108 650.5,-102 650.5,-96 650.5,-96 650.5,-84 650.5,-84 650.5,-78 656.5,-72 662.5,-72 662.5,-72 834.5,-72 834.5,-72 840.5,-72 846.5,-78 846.5,-84 846.5,-84 846.5,-96 846.5,-96 846.5,-102 840.5,-108 834.5,-108"/> <path fill="none" stroke="#8fd856" stroke-width="2" d="M834.5,-108C834.5,-108 662.5,-108 662.5,-108 656.5,-108 650.5,-102 650.5,-96 650.5,-96 650.5,-84 650.5,-84 650.5,-78 656.5,-72 662.5,-72 662.5,-72 834.5,-72 834.5,-72 840.5,-72 846.5,-78 846.5,-84 846.5,-84 846.5,-96 846.5,-96 846.5,-102 840.5,-108 834.5,-108"/>
<text text-anchor="middle" x="748.5" y="-87.5" font-family="sans" font-size="10.00">index_genomic_alignment_samtools</text> <text text-anchor="middle" x="748.5" y="-87.5" font-family="sans" font-size="10.00">index_genomic_alignment_samtools</text>
</g> </g>
<!-- 4&#45;&gt;0 --> <!-- 4&#45;&gt;0 -->
...@@ -68,7 +68,7 @@ ...@@ -68,7 +68,7 @@
<!-- 5 --> <!-- 5 -->
<g id="node6" class="node"> <g id="node6" class="node">
<title>5</title> <title>5</title>
<path fill="none" stroke="#70d856" stroke-width="2" d="M851.5,-180C851.5,-180 733.5,-180 733.5,-180 727.5,-180 721.5,-174 721.5,-168 721.5,-168 721.5,-156 721.5,-156 721.5,-150 727.5,-144 733.5,-144 733.5,-144 851.5,-144 851.5,-144 857.5,-144 863.5,-150 863.5,-156 863.5,-156 863.5,-168 863.5,-168 863.5,-174 857.5,-180 851.5,-180"/> <path fill="none" stroke="#5692d8" stroke-width="2" d="M851.5,-180C851.5,-180 733.5,-180 733.5,-180 727.5,-180 721.5,-174 721.5,-168 721.5,-168 721.5,-156 721.5,-156 721.5,-150 727.5,-144 733.5,-144 733.5,-144 851.5,-144 851.5,-144 857.5,-144 863.5,-150 863.5,-156 863.5,-156 863.5,-168 863.5,-168 863.5,-174 857.5,-180 851.5,-180"/>
<text text-anchor="middle" x="792.5" y="-159.5" font-family="sans" font-size="10.00">pe_quantification_salmon</text> <text text-anchor="middle" x="792.5" y="-159.5" font-family="sans" font-size="10.00">pe_quantification_salmon</text>
</g> </g>
<!-- 5&#45;&gt;0 --> <!-- 5&#45;&gt;0 -->
...@@ -80,7 +80,7 @@ ...@@ -80,7 +80,7 @@
<!-- 6 --> <!-- 6 -->
<g id="node7" class="node"> <g id="node7" class="node">
<title>6</title> <title>6</title>
<path fill="none" stroke="#5692d8" stroke-width="2" d="M995,-180C995,-180 894,-180 894,-180 888,-180 882,-174 882,-168 882,-168 882,-156 882,-156 882,-150 888,-144 894,-144 894,-144 995,-144 995,-144 1001,-144 1007,-150 1007,-156 1007,-156 1007,-168 1007,-168 1007,-174 1001,-180 995,-180"/> <path fill="none" stroke="#d88d56" stroke-width="2" d="M995,-180C995,-180 894,-180 894,-180 888,-180 882,-174 882,-168 882,-168 882,-156 882,-156 882,-150 888,-144 894,-144 894,-144 995,-144 995,-144 1001,-144 1007,-150 1007,-156 1007,-156 1007,-168 1007,-168 1007,-174 1001,-180 995,-180"/>
<text text-anchor="middle" x="944.5" y="-159.5" font-family="sans" font-size="10.00">quantification_salmon</text> <text text-anchor="middle" x="944.5" y="-159.5" font-family="sans" font-size="10.00">quantification_salmon</text>
</g> </g>
<!-- 6&#45;&gt;0 --> <!-- 6&#45;&gt;0 -->
...@@ -92,7 +92,7 @@ ...@@ -92,7 +92,7 @@
<!-- 7 --> <!-- 7 -->
<g id="node8" class="node"> <g id="node8" class="node">
<title>7</title> <title>7</title>
<path fill="none" stroke="#8fd856" stroke-width="2" d="M1200,-180C1200,-180 1037,-180 1037,-180 1031,-180 1025,-174 1025,-168 1025,-168 1025,-156 1025,-156 1025,-150 1031,-144 1037,-144 1037,-144 1200,-144 1200,-144 1206,-144 1212,-150 1212,-156 1212,-156 1212,-168 1212,-168 1212,-174 1206,-180 1200,-180"/> <path fill="none" stroke="#d8cb56" stroke-width="2" d="M1200,-180C1200,-180 1037,-180 1037,-180 1031,-180 1025,-174 1025,-168 1025,-168 1025,-156 1025,-156 1025,-150 1031,-144 1037,-144 1037,-144 1200,-144 1200,-144 1206,-144 1212,-150 1212,-156 1212,-156 1212,-168 1212,-168 1212,-174 1206,-180 1200,-180"/>
<text text-anchor="middle" x="1118.5" y="-159.5" font-family="sans" font-size="10.00">pe_genome_quantification_kallisto</text> <text text-anchor="middle" x="1118.5" y="-159.5" font-family="sans" font-size="10.00">pe_genome_quantification_kallisto</text>
</g> </g>
<!-- 7&#45;&gt;0 --> <!-- 7&#45;&gt;0 -->
...@@ -116,7 +116,7 @@ ...@@ -116,7 +116,7 @@
<!-- 9 --> <!-- 9 -->
<g id="node10" class="node"> <g id="node10" class="node">
<title>9</title> <title>9</title>
<path fill="none" stroke="#56c9d8" stroke-width="2" d="M566,-180C566,-180 465,-180 465,-180 459,-180 453,-174 453,-168 453,-168 453,-156 453,-156 453,-150 459,-144 465,-144 465,-144 566,-144 566,-144 572,-144 578,-150 578,-156 578,-156 578,-168 578,-168 578,-174 572,-180 566,-180"/> <path fill="none" stroke="#59d856" stroke-width="2" d="M566,-180C566,-180 465,-180 465,-180 459,-180 453,-174 453,-168 453,-168 453,-156 453,-156 453,-150 459,-144 465,-144 465,-144 566,-144 566,-144 572,-144 578,-150 578,-156 578,-156 578,-168 578,-168 578,-174 572,-180 566,-180"/>
<text text-anchor="middle" x="515.5" y="-159.5" font-family="sans" font-size="10.00">pe_map_genome_star</text> <text text-anchor="middle" x="515.5" y="-159.5" font-family="sans" font-size="10.00">pe_map_genome_star</text>
</g> </g>
<!-- 9&#45;&gt;3 --> <!-- 9&#45;&gt;3 -->
...@@ -128,7 +128,7 @@ ...@@ -128,7 +128,7 @@
<!-- 10 --> <!-- 10 -->
<g id="node11" class="node"> <g id="node11" class="node">
<title>10</title> <title>10</title>
<path fill="none" stroke="#afd856" stroke-width="2" d="M691,-180C691,-180 608,-180 608,-180 602,-180 596,-174 596,-168 596,-168 596,-156 596,-156 596,-150 602,-144 608,-144 608,-144 691,-144 691,-144 697,-144 703,-150 703,-156 703,-156 703,-168 703,-168 703,-174 697,-180 691,-180"/> <path fill="none" stroke="#d85656" stroke-width="2" d="M691,-180C691,-180 608,-180 608,-180 602,-180 596,-174 596,-168 596,-168 596,-156 596,-156 596,-150 602,-144 608,-144 608,-144 691,-144 691,-144 697,-144 703,-150 703,-156 703,-156 703,-168 703,-168 703,-174 697,-180 691,-180"/>
<text text-anchor="middle" x="649.5" y="-159.5" font-family="sans" font-size="10.00">map_genome_star</text> <text text-anchor="middle" x="649.5" y="-159.5" font-family="sans" font-size="10.00">map_genome_star</text>
</g> </g>
<!-- 10&#45;&gt;4 --> <!-- 10&#45;&gt;4 -->
...@@ -140,7 +140,7 @@ ...@@ -140,7 +140,7 @@
<!-- 11 --> <!-- 11 -->
<g id="node12" class="node"> <g id="node12" class="node">
<title>11</title> <title>11</title>
<path fill="none" stroke="#d88d56" stroke-width="2" d="M818.5,-254.5C818.5,-254.5 690.5,-254.5 690.5,-254.5 684.5,-254.5 678.5,-248.5 678.5,-242.5 678.5,-242.5 678.5,-230.5 678.5,-230.5 678.5,-224.5 684.5,-218.5 690.5,-218.5 690.5,-218.5 818.5,-218.5 818.5,-218.5 824.5,-218.5 830.5,-224.5 830.5,-230.5 830.5,-230.5 830.5,-242.5 830.5,-242.5 830.5,-248.5 824.5,-254.5 818.5,-254.5"/> <path fill="none" stroke="#56a9d8" stroke-width="2" d="M818.5,-254.5C818.5,-254.5 690.5,-254.5 690.5,-254.5 684.5,-254.5 678.5,-248.5 678.5,-242.5 678.5,-242.5 678.5,-230.5 678.5,-230.5 678.5,-224.5 684.5,-218.5 690.5,-218.5 690.5,-218.5 818.5,-218.5 818.5,-218.5 824.5,-218.5 830.5,-224.5 830.5,-230.5 830.5,-230.5 830.5,-242.5 830.5,-242.5 830.5,-248.5 824.5,-254.5 818.5,-254.5"/>
<text text-anchor="middle" x="754.5" y="-234" font-family="sans" font-size="10.00">pe_remove_polya_cutadapt</text> <text text-anchor="middle" x="754.5" y="-234" font-family="sans" font-size="10.00">pe_remove_polya_cutadapt</text>
</g> </g>
<!-- 11&#45;&gt;5 --> <!-- 11&#45;&gt;5 -->
...@@ -164,7 +164,7 @@ ...@@ -164,7 +164,7 @@
<!-- 12 --> <!-- 12 -->
<g id="node13" class="node"> <g id="node13" class="node">
<title>12</title> <title>12</title>
<path fill="none" stroke="#d8cb56" stroke-width="2" d="M976,-257C976,-257 861,-257 861,-257 855,-257 849,-251 849,-245 849,-245 849,-228 849,-228 849,-222 855,-216 861,-216 861,-216 976,-216 976,-216 982,-216 988,-222 988,-228 988,-228 988,-245 988,-245 988,-251 982,-257 976,-257"/> <path fill="none" stroke="#d8ac56" stroke-width="2" d="M976,-257C976,-257 861,-257 861,-257 855,-257 849,-251 849,-245 849,-245 849,-228 849,-228 849,-222 855,-216 861,-216 861,-216 976,-216 976,-216 982,-216 988,-222 988,-228 988,-228 988,-245 988,-245 988,-251 982,-257 976,-257"/>
<text text-anchor="middle" x="918.5" y="-245" font-family="sans" font-size="10.00">create_index_salmon</text> <text text-anchor="middle" x="918.5" y="-245" font-family="sans" font-size="10.00">create_index_salmon</text>
<text text-anchor="middle" x="918.5" y="-234" font-family="sans" font-size="10.00">kmer: 31</text> <text text-anchor="middle" x="918.5" y="-234" font-family="sans" font-size="10.00">kmer: 31</text>
<text text-anchor="middle" x="918.5" y="-223" font-family="sans" font-size="10.00">organism: homo_sapiens</text> <text text-anchor="middle" x="918.5" y="-223" font-family="sans" font-size="10.00">organism: homo_sapiens</text>
...@@ -208,7 +208,7 @@ ...@@ -208,7 +208,7 @@
<!-- 14 --> <!-- 14 -->
<g id="node15" class="node"> <g id="node15" class="node">
<title>14</title> <title>14</title>
<path fill="none" stroke="#56a9d8" stroke-width="2" d="M1329,-254.5C1329,-254.5 1214,-254.5 1214,-254.5 1208,-254.5 1202,-248.5 1202,-242.5 1202,-242.5 1202,-230.5 1202,-230.5 1202,-224.5 1208,-218.5 1214,-218.5 1214,-218.5 1329,-218.5 1329,-218.5 1335,-218.5 1341,-224.5 1341,-230.5 1341,-230.5 1341,-242.5 1341,-242.5 1341,-248.5 1335,-254.5 1329,-254.5"/> <path fill="none" stroke="#56d892" stroke-width="2" d="M1329,-254.5C1329,-254.5 1214,-254.5 1214,-254.5 1208,-254.5 1202,-248.5 1202,-242.5 1202,-242.5 1202,-230.5 1202,-230.5 1202,-224.5 1208,-218.5 1214,-218.5 1214,-218.5 1329,-218.5 1329,-218.5 1335,-218.5 1341,-224.5 1341,-230.5 1341,-230.5 1341,-242.5 1341,-242.5 1341,-248.5 1335,-254.5 1329,-254.5"/>
<text text-anchor="middle" x="1271.5" y="-239.5" font-family="sans" font-size="10.00">create_index_kallisto</text> <text text-anchor="middle" x="1271.5" y="-239.5" font-family="sans" font-size="10.00">create_index_kallisto</text>
<text text-anchor="middle" x="1271.5" y="-228.5" font-family="sans" font-size="10.00">organism: homo_sapiens</text> <text text-anchor="middle" x="1271.5" y="-228.5" font-family="sans" font-size="10.00">organism: homo_sapiens</text>
</g> </g>
...@@ -227,7 +227,7 @@ ...@@ -227,7 +227,7 @@
<!-- 15 --> <!-- 15 -->
<g id="node16" class="node"> <g id="node16" class="node">
<title>15</title> <title>15</title>
<path fill="none" stroke="#59d856" stroke-width="2" d="M623,-257C623,-257 508,-257 508,-257 502,-257 496,-251 496,-245 496,-245 496,-228 496,-228 496,-222 502,-216 508,-216 508,-216 623,-216 623,-216 629,-216 635,-222 635,-228 635,-228 635,-245 635,-245 635,-251 629,-257 623,-257"/> <path fill="none" stroke="#70d856" stroke-width="2" d="M623,-257C623,-257 508,-257 508,-257 502,-257 496,-251 496,-245 496,-245 496,-228 496,-228 496,-222 502,-216 508,-216 508,-216 623,-216 623,-216 629,-216 635,-222 635,-228 635,-228 635,-245 635,-245 635,-251 629,-257 623,-257"/>
<text text-anchor="middle" x="565.5" y="-245" font-family="sans" font-size="10.00">create_index_star</text> <text text-anchor="middle" x="565.5" y="-245" font-family="sans" font-size="10.00">create_index_star</text>
<text text-anchor="middle" x="565.5" y="-234" font-family="sans" font-size="10.00">index_size: 76</text> <text text-anchor="middle" x="565.5" y="-234" font-family="sans" font-size="10.00">index_size: 76</text>
<text text-anchor="middle" x="565.5" y="-223" font-family="sans" font-size="10.00">organism: homo_sapiens</text> <text text-anchor="middle" x="565.5" y="-223" font-family="sans" font-size="10.00">organism: homo_sapiens</text>
...@@ -247,7 +247,7 @@ ...@@ -247,7 +247,7 @@
<!-- 16 --> <!-- 16 -->
<g id="node17" class="node"> <g id="node17" class="node">
<title>16</title> <title>16</title>
<path fill="none" stroke="#56d8c9" stroke-width="2" d="M837,-329C837,-329 672,-329 672,-329 666,-329 660,-323 660,-317 660,-317 660,-305 660,-305 660,-299 666,-293 672,-293 672,-293 837,-293 837,-293 843,-293 849,-299 849,-305 849,-305 849,-317 849,-317 849,-323 843,-329 837,-329"/> <path fill="none" stroke="#56d8a9" stroke-width="2" d="M837,-329C837,-329 672,-329 672,-329 666,-329 660,-323 660,-317 660,-317 660,-305 660,-305 660,-299 666,-293 672,-293 672,-293 837,-293 837,-293 843,-293 849,-299 849,-305 849,-305 849,-317 849,-317 849,-323 843,-329 837,-329"/>
<text text-anchor="middle" x="754.5" y="-314" font-family="sans" font-size="10.00">pe_remove_adapters_cutadapt</text> <text text-anchor="middle" x="754.5" y="-314" font-family="sans" font-size="10.00">pe_remove_adapters_cutadapt</text>
<text text-anchor="middle" x="754.5" y="-303" font-family="sans" font-size="10.00">sample: synthetic_10_reads_paired</text> <text text-anchor="middle" x="754.5" y="-303" font-family="sans" font-size="10.00">sample: synthetic_10_reads_paired</text>
</g> </g>
...@@ -260,7 +260,7 @@ ...@@ -260,7 +260,7 @@
<!-- 17 --> <!-- 17 -->
<g id="node18" class="node"> <g id="node18" class="node">
<title>17</title> <title>17</title>
<path fill="none" stroke="#d8ac56" stroke-width="2" d="M1159,-329C1159,-329 988,-329 988,-329 982,-329 976,-323 976,-317 976,-317 976,-305 976,-305 976,-299 982,-293 988,-293 988,-293 1159,-293 1159,-293 1165,-293 1171,-299 1171,-305 1171,-305 1171,-317 1171,-317 1171,-323 1165,-329 1159,-329"/> <path fill="none" stroke="#56c9d8" stroke-width="2" d="M1159,-329C1159,-329 988,-329 988,-329 982,-329 976,-323 976,-317 976,-317 976,-305 976,-305 976,-299 982,-293 988,-293 988,-293 1159,-293 1159,-293 1165,-293 1171,-299 1171,-305 1171,-305 1171,-317 1171,-317 1171,-323 1165,-329 1159,-329"/>
<text text-anchor="middle" x="1073.5" y="-314" font-family="sans" font-size="10.00">remove_adapters_cutadapt</text> <text text-anchor="middle" x="1073.5" y="-314" font-family="sans" font-size="10.00">remove_adapters_cutadapt</text>
<text text-anchor="middle" x="1073.5" y="-303" font-family="sans" font-size="10.00">sample: synthetic_10_reads_mate_1</text> <text text-anchor="middle" x="1073.5" y="-303" font-family="sans" font-size="10.00">sample: synthetic_10_reads_mate_1</text>
</g> </g>
......
...@@ -12,15 +12,18 @@ ...@@ -12,15 +12,18 @@
import sys import sys
import gzip import gzip
import labkey
from argparse import ArgumentParser, RawTextHelpFormatter from argparse import ArgumentParser, RawTextHelpFormatter
import os import os
import sys
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from Bio import SeqIO from Bio import SeqIO
from io import StringIO from io import StringIO
from csv import writer from csv import writer
from pathlib import Path from pathlib import Path
# for convenience, load QueryFilter explicitly (avoids long lines in filter definitions)
from labkey.query import QueryFilter
# ---------------------------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------------------------
def main(): def main():
""" Preprocess sample folder and create config file for snakemake""" """ Preprocess sample folder and create config file for snakemake"""
...@@ -31,10 +34,16 @@ def main(): ...@@ -31,10 +34,16 @@ def main():
description=__doc__, description=__doc__,
formatter_class=RawTextHelpFormatter) formatter_class=RawTextHelpFormatter)
parser.add_argument(
"--samples_table",
dest="samples_table",
help="Output table compatible to snakemake",
required=True)
parser.add_argument( parser.add_argument(
"--input_table", "--input_table",
dest="input_table", dest="input_table",
help="input table containing the sample information", help="input table containing the sample information (labkey format)",
required=True, required=True,
metavar="FILE") metavar="FILE")
...@@ -42,10 +51,26 @@ def main(): ...@@ -42,10 +51,26 @@ def main():
"--input_dict", "--input_dict",
dest="input_dict", dest="input_dict",
help="input dictionary containing the feature name \ help="input dictionary containing the feature name \
conversion from labkey to snakemake allowed names", conversion from labkey to snakemake",
required=True, required=True,
metavar="FILE") metavar="FILE")
parser.add_argument(
"--remote",
help="Fetch labkey table via API",
action='store_true')
parser.add_argument(
"--project_name",
help="Name of labkey folder containing the labkey table (remote mode)",
required = False)
parser.add_argument(
"--query_name",
help="Name of labkey table (remote mode)",
required = False)
parser.add_argument( parser.add_argument(
"--genomes_path", "--genomes_path",
dest="genomes_path", dest="genomes_path",
...@@ -90,12 +115,6 @@ def main(): ...@@ -90,12 +115,6 @@ def main():
help="Configuration file to be used by Snakemake", help="Configuration file to be used by Snakemake",
required=False) required=False)
parser.add_argument(
"--samples_table",
dest="samples_table",
help="Table with samples",
required=True)
# __________________________________________________________________________________________________________________ # __________________________________________________________________________________________________________________
# ------------------------------------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------------------------
...@@ -111,13 +130,20 @@ def main(): ...@@ -111,13 +130,20 @@ def main():
sys.exit(1) sys.exit(1)
sys.stdout.write('Reading input file...\n') sys.stdout.write('Reading input file...\n')
input_table = pd.read_csv(
options.input_table, if options.remote == True:
header=0, input_table = api_fetch_labkey_table(
sep='\t', project_name=options.project_name,
index_col=None, query_name=options.query_name)
comment='#',
engine='python') else:
input_table = pd.read_csv(
options.input_table,
header=0,
sep='\t',
index_col=None,
comment='#',
engine='python')
input_dict = pd.read_csv( input_dict = pd.read_csv(
options.input_dict, options.input_dict,
...@@ -126,10 +152,12 @@ def main(): ...@@ -126,10 +152,12 @@ def main():
index_col=None, index_col=None,
comment='#', comment='#',
engine='python') engine='python')
input_dict.set_index('snakemake', inplace=True, drop=True) input_dict.set_index('snakemake', inplace=True, drop=True)
sys.stdout.write('Create snakemake table...\n') sys.stdout.write('Create snakemake table...\n')
snakemake_table = pd.DataFrame() snakemake_table = pd.DataFrame()
for index, row in input_table.iterrows(): for index, row in input_table.iterrows():
snakemake_table.loc[index, 'sample'] = row[input_dict.loc['replicate_name', 'labkey']] + row[input_dict.loc['condition', 'labkey']]
if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED': if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED':
snakemake_table.loc[index, 'seqmode'] = 'paired_end' snakemake_table.loc[index, 'seqmode'] = 'paired_end'
elif row[input_dict.loc['seqmode', 'labkey']] == 'SINGLE': elif row[input_dict.loc['seqmode', 'labkey']] == 'SINGLE':
...@@ -138,12 +166,14 @@ def main(): ...@@ -138,12 +166,14 @@ def main():
fq1 = os.path.join( fq1 = os.path.join(
row[input_dict.loc['fastq_path', 'labkey']], row[input_dict.loc['fastq_path', 'labkey']],
row[input_dict.loc['fq1', 'labkey']]) row[input_dict.loc['fq1', 'labkey']])
snakemake_table.loc[index, 'fq1'] = fq1 snakemake_table.loc[index, 'fq1'] = fq1
with gzip.open(fq1, "rt") as handle: with gzip.open(fq1, "rt") as handle:
for record in SeqIO.parse(handle, "fastq"): for record in SeqIO.parse(handle, "fastq"):
read_length = len(record.seq) read_length = len(record.seq)
break break
snakemake_table.loc[index, 'index_size'] = read_length snakemake_table.loc[index, 'index_size'] = read_length
if read_length <= 50: if read_length <= 50:
snakemake_table.loc[index, 'kmer'] = 21 snakemake_table.loc[index, 'kmer'] = 21
...@@ -151,29 +181,36 @@ def main(): ...@@ -151,29 +181,36 @@ def main():
snakemake_table.loc[index, 'kmer'] = 31 snakemake_table.loc[index, 'kmer'] = 31
snakemake_table.loc[index, 'fq2'] = os.path.join( if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED':
row[input_dict.loc['fastq_path', 'labkey']], snakemake_table.loc[index, 'fq2'] = os.path.join(
row[input_dict.loc['fq2', 'labkey']]) row[input_dict.loc['fastq_path', 'labkey']],
row[input_dict.loc['fq2', 'labkey']])
snakemake_table.loc[index, 'fq1_3p'] = row[input_dict.loc['fq1_3p', 'labkey']] snakemake_table.loc[index, 'fq1_3p'] = row[input_dict.loc['fq1_3p', 'labkey']]
snakemake_table.loc[index, 'fq1_5p'] = row[input_dict.loc['fq1_5p', 'labkey']] snakemake_table.loc[index, 'fq1_5p'] = row[input_dict.loc['fq1_5p', 'labkey']]
snakemake_table.loc[index, 'fq2_3p'] = row[input_dict.loc['fq2_3p', 'labkey']]
snakemake_table.loc[index, 'fq2_5p'] = row[input_dict.loc['fq2_5p', 'labkey']] if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED':
snakemake_table.loc[index, 'fq2_3p'] = row[input_dict.loc['fq2_3p', 'labkey']]
snakemake_table.loc[index, 'fq2_5p'] = row[input_dict.loc['fq2_5p', 'labkey']]
organism = row[input_dict.loc['organism', 'labkey']].replace(' ', '_').lower() organism = row[input_dict.loc['organism', 'labkey']].replace(' ', '_').lower()
snakemake_table.loc[index, 'organism'] = organism snakemake_table.loc[index, 'organism'] = organism
snakemake_table.loc[index, 'gtf'] = os.path.join( snakemake_table.loc[index, 'gtf'] = os.path.join(
options.genomes_path, options.genomes_path,
organism, organism,
'annotation.gtf') 'annotation.gtf')
snakemake_table.loc[index, 'gtf_filtered'] = os.path.join( snakemake_table.loc[index, 'gtf_filtered'] = os.path.join(
options.genomes_path, options.genomes_path,
organism, organism,
'annotation.gtf') 'annotation.gtf')
snakemake_table.loc[index, 'genome'] = os.path.join( snakemake_table.loc[index, 'genome'] = os.path.join(
options.genomes_path, options.genomes_path,
organism, organism,
'genome.fa') 'genome.fa')
snakemake_table.loc[index, 'tr_fasta_filtered'] = os.path.join( snakemake_table.loc[index, 'tr_fasta_filtered'] = os.path.join(
options.genomes_path, options.genomes_path,
organism, organism,
...@@ -187,9 +224,9 @@ def main(): ...@@ -187,9 +224,9 @@ def main():
snakemake_table.loc[index, 'libtype'] = options.libtype snakemake_table.loc[index, 'libtype'] = options.libtype
if row[input_dict.loc['mate1_direction', 'labkey']] == 'SENSE': if row[input_dict.loc['mate1_direction', 'labkey']] == 'SENSE':
snakemake_table.loc[index, 'kallisto_directionality'] = '--fr-stranded' snakemake_table.loc[index, 'kallisto_directionality'] = '--fr'
elif row[input_dict.loc['mate1_direction', 'labkey']] == 'ANTISENSE': elif row[input_dict.loc['mate1_direction', 'labkey']] == 'ANTISENSE':
snakemake_table.loc[index, 'kallisto_directionality'] = '--rf-stranded' snakemake_table.loc[index, 'kallisto_directionality'] = '--rf'
else: else:
snakemake_table.loc[index, 'kallisto_directionality'] = '' snakemake_table.loc[index, 'kallisto_directionality'] = ''
...@@ -202,36 +239,49 @@ def main(): ...@@ -202,36 +239,49 @@ def main():
else: else:
pass pass
if row[input_dict.loc['mate2_direction', 'labkey']] == 'SENSE': if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED':
snakemake_table.loc[index, 'fq2_polya'] = 'AAAAAAAAAAAAAAAAA' if row[input_dict.loc['mate2_direction', 'labkey']] == 'SENSE':
elif row[input_dict.loc['mate2_direction', 'labkey']] == 'ANTISENSE': snakemake_table.loc[index, 'fq2_polya'] = 'AAAAAAAAAAAAAAAAA'
snakemake_table.loc[index, 'fq2_polya'] = 'TTTTTTTTTTTTTTTTT' elif row[input_dict.loc['mate2_direction', 'labkey']] == 'ANTISENSE':
elif row[input_dict.loc['mate2_direction', 'labkey']] == 'RANDOM': snakemake_table.loc[index, 'fq2_polya'] = 'TTTTTTTTTTTTTTTTT'
snakemake_table.loc[index, 'fq2_polya'] = 'AAAAAAAAAAAAAAAAA' elif row[input_dict.loc['mate2_direction', 'labkey']] == 'RANDOM':
else: snakemake_table.loc[index, 'fq2_polya'] = 'AAAAAAAAAAAAAAAAA'
pass else:
pass
snakemake_table.fillna('XXXXXXXXXXXXX', inplace=True)
snakemake_table.to_csv( snakemake_table.to_csv(
options.samples_table, options.samples_table,
sep='\t', sep='\t',
header=True, header=True,
index=False) index=False)
# Read file and infer read size for sjdbovwerhang # Read file and infer read size for sjdbovwerhang
with open(options.config_file, 'w') as config_file: with open(options.config_file, 'w') as config_file:
config_file.write('''--- config_file.write('''---
output_dir: "results" output_dir: "results"
local_log: "local_log" local_log: "local_log"
star_indexes: "star_indexes" star_indexes: "results/star_indexes"
kallisto_indexes: "kallisto_indexes" kallisto_indexes: "results/kallisto_indexes"
samples: "'''+ options.samples_table + '''"
salmon_indexes: "results/salmon_indexes"
...''') ...''')
sys.stdout.write('Create snakemake table finished successfully...\n') sys.stdout.write('Create snakemake table finished successfully...\n')
sys.stdout.write('Create config file...\n') sys.stdout.write('Create config file...\n')
sys.stdout.write('Create config file finished successfully...\n') sys.stdout.write('Create config file finished successfully...\n')
return return
def api_fetch_labkey_table(project_name=None, query_name=None):
group_path = os.path.join( '/Zavolan Group', project_name)
server_context = labkey.utils.create_server_context('labkey.scicore.unibas.ch', group_path, 'labkey', use_ssl=True)
schema_name = "lists"
results = labkey.query.select_rows(server_context, schema_name, query_name)
input_table = pd.DataFrame(results["rows"])
return input_table
# _____________________________________________________________________________ # _____________________________________________________________________________
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
......
#!genome-build GRCh38.p13
#!genome-version GRCh38
#!genome-date 2013-12
#!genome-build-accession NCBI:GCA_000001405.28
#!genebuild-last-updated 2019-08
1-10000-20000 havana gene 1870 4410 . + . gene_id "ENSG00000223972"; gene_version "5"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene";
1-10000-20000 havana transcript 1870 4410 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; tag "basic"; transcript_support_level "1";
1-10000-20000 havana exon 1870 2228 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "1"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00002234944"; exon_version "1"; tag "basic"; transcript_support_level "1";
1-10000-20000 havana exon 2614 2722 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00003582793"; exon_version "1"; tag "basic"; transcript_support_level "1";
1-10000-20000 havana exon 3222 4410 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "3"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00002312635"; exon_version "1"; tag "basic"; transcript_support_level "1";
1-10000-20000 havana transcript 2011 3671 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA";
1-10000-20000 havana exon 2011 2058 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "1"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001948541"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1-10000-20000 havana exon 2180 2228 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001671638"; exon_version "2"; tag "basic"; transcript_support_level "NA";
1-10000-20000 havana exon 2614 2698 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "3"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001758273"; exon_version "2"; tag "basic"; transcript_support_level "NA";
1-10000-20000 havana exon 2976 3053 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "4"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001799933"; exon_version "2"; tag "basic"; transcript_support_level "NA";
1-10000-20000 havana exon 3222 3375 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "5"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001746346"; exon_version "2"; tag "basic"; transcript_support_level "NA";
1-10000-20000 havana exon 3454 3671 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "6"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001863096"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1-10000-20000 havana gene 4405 8367 . - . gene_id "ENSG00000227232"; gene_version "5"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene";
1-10000-20000 havana transcript 4405 8367 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA";
1-10000-20000 havana exon 8269 8367 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "3"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003477500"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1-10000-20000 havana exon 7916 8062 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "4"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003565697"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1-10000-20000 havana exon 7607 7743 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "5"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003475637"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1-10000-20000 havana exon 7234 7369 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "6"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003502542"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1-10000-20000 havana exon 6859 7056 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "7"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003553898"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1-10000-20000 havana exon 6608 6766 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "8"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003621279"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1-10000-20000 havana exon 5797 5948 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "9"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00002030414"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1-10000-20000 havana exon 5006 5039 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "10"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00001935574"; exon_version "1"; tag "basic"; transcript_support_level "NA";
1-10000-20000 havana exon 4405 4502 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "11"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00001843071"; exon_version "1"; tag "basic"; transcript_support_level "NA";
>1-10000-20000
ntaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaacc
ctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaacccaaccctaaccc
taaccctaaccctaaccctaaccctaacccctaaccctaaccctaaccctaaccctaacc
taaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaacccctaacc
ctaaccctaaaccctaaaccctaaccctaaccctaaccctaaccctaaccccaaccccaa
ccccaaccccaaccccaaccccaaccctaacccctaaccctaaccctaaccctaccctaa
ccctaaccctaaccctaaccctaaccctaacccctaacccctaaccctaaccctaaccct
aaccctaaccctaaccctaacccctaaccctaaccctaaccctaaccctcgcggtaccct
cagccggcccgcccgcccgggtctgacctgaggagaactgtgctccgccttcagagtacc
accgaaatctgtgcagaggacaacgcagctccgccctcgcggtgctctccgggtctgtgc
tgaggagaacgcaactccgccgttgcaaaggcgcgccgcgccggcgcaggcgcagagagg
cgcgccgcgccggcgcaggcgcagagaggcgcgccgcgccggcgcaggcgcagagaggcg
cgccgcgccggcgcaggcgcagagaggcgcgccgcgccggcgcaggcgcagagaggcgcg
ccgcgccggcgcaggcgcagacacatgctagcgcgtcggggtggaggcgtggcgcaggcg
cagagaggcgcgccgcgccggcgcaggcgcagagacacatgctaccgcgtccaggggtgg
aggcgtggcgcaggcgcagagaggcgcaccgcgccggcgcaggcgcagagacacatgcta
gcgcgtccaggggtggaggcgtggcgcaggcgcagagacgcaagcctacgggcgggggtt
gggggggcgtgtgttgcaggagcaaagtcgcacggcgccgggctggggcggggggagggt
ggcgccgtgcacgcgcagaaactcacgtcacggtggcgcggcgcagagacgggtagaacc
tcagtaatccgaaaagccgggatcgaccgccccttgcttgcagccgggcactacaggacc
cgcttgctcacggtgctgtgccagggcgccccctgctggcgactagggcaactgcagggc
tctcttgcttagagtggtggccagcgccccctgctggcgccggggcactgcagggccctc
ttgcttactgtatagtggtggcacgccgcctgctggcagctagggacattgcagggtcct
cttgctcaaggtgtagtggcagcacgcccacctgctggcagctggggacactgccgggcc
ctcttgctcCAACAGTACTGGCGGATTATAGGGAAACACCCGGAGCATATGCTGTTTGGT
CTCAGtagactcctaaatatgggattcctgggtttaaaagtaaaaaataaatatgtttaa
tttgtgaactgattaccatcagaattgtactgttctgtatcccaccagcaatgtctagga
atgcctgtttctccacaaagtgtttacttttggatttttgccagtctaacaggtgaAGcc
ctggagattcttattagtgatttgggctggggcctggccatgtgtatttttttaaatttc
cactgatgattttgctgcatggccggtgttgagaatgactgCGCAAATTTGCCGGATTTC
CTTTGCTGTTCCTGCATGTAGTTTAAACGAGATTGCCAGCACCGGGTATCATTCACCATT
TTTCTTTTCGTTAACTTGCCGTCAGCCTTTTCTTTGACCTCTTCTTTCTGTTCATGTGTA
TTTGCTGTCTCTTAGCCCAGACTTCCCGTGTCCTTTCCACCGGGCCTTTGAGAGGTCACA
GGGTCTTGATGCTGTGGTCTTCATCTGCAGGTGTCTGACTTCCAGCAACTGCTGGCCTGT
GCCAGGGTGCAAGCTGAGCACTGGAGTGGAGTTTTCCTGTGGAGAGGAGCCATGCCTAGA
GTGGGATGGGCCATTGTTCATCTTCTGGCCCCTGTTGTCTGCATGTAACTTAATACCACA
ACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGAGAGCATCAACTTCTCTCACAAC
CTAGGCCAGTAAGTAGTGCTTGTGCTCATCTCCTTGGCTGTGATACGTGGCCGGCCCTCG
CTCCAGCAGCTGGACCCCTACCTGCCGTCTGCTGCCATCGGAGCCCAAAGCCGGGCTGTG
ACTGCTCAGACCAGCCGGCTGGAGGGAGGGGCTCAGCAGGTCTGGCTTTGGCCCTGGGAG
AGCAGGTGGAAGATCAGGCAGGCCATCGCTGCCACAGAACCCAGTGGATTGGCCTAGGTG
GGATCTCTGAGCTCAACAAGCCCTCTCTGGGTGGTAGGTGCAGAGACGGGAGGGGCAGAG
CCGCAGGCACAGCCAAGAGGGCTGAAGAAATGGTAGAACGGAGCAGCTGGTGATGTGTGG
GCCCACCGGCCCCAGGCTCCTGTCTCCCCCCAGGTGTGTGGTGATGCCAGGCATGCCCTT
CCCCAGCATCAGGTCTCCAGAGCTGCAGAAGACGACGGCCGACTTGGATCACACTCTTGT
GAGTGTCCCCAGTGTTGCAGAGGTGAGAGGAGAGTAGACAGTGAGTGGGAGTGGCGTCGC
CCCTAGGGCTCTACGGGGCCGGCGTCTCCTGTCTCCTGGAGAGGCTTCGATGCCCCTCCA
CACCCTCTTGATCTTCCCTGTGATGTCATCTGGAGCCCTGCTGCTTGCGGTGGCCTATAA
AGCCTCCTAGTCTGGCTCCAAGGCCTGGCAGAGTCTTTCCCAGGGAAAGCTACAAGCAGC
AAACAGTCTGCATGGGTCATCCCCTTCACTCCCAGCTCAGAGCCCAGGCCAGGGGCCCCC
AAGAAAGGCTCTGGTGGAGAACCTGTGCATGAAGGCTGTCAACCAGTCCATAGGCAAGCC
TGGCTGCCTCCAGCTGGGTCGACAGACAGGGGCTGGAGAAGGGGAGAAGAGGAAAGTGAG
GTTGCCTGCCCTGTCTCCTACCTGAGGCTGAGGAAGGAGAAGGGGATGCACTGTTGGGGA
GGCAGCTGTAACTCAAAGCCTTAGCCTCTGTTCCCACGAAGGCAGGGCCATCAGGCACCA
AAGGGATTCTGCCAGCATAGTGCTCCTGGACCAGTGATACACCCGGCACCCTGTCCTGGA
CACGCTGTTGGCCTGGATCTGAGCCCTGGTGGAGGTCAAAGCCACCTTTGGTTCTGCCAT
TGCTGCTGTGTGGAAGTTCACTCCTGCCTTTTCCTTTCCCTAGAGCCTCCACCACCCCGA
GATCACATTTCTCACTGCCTTTTGTCTGCCCAGTTTCACCAGAAGTAGGCCTCTTCCTGA
CAGGCAGCTGCACCACTGCCTGGCGCTGTGCCCTTCCTTTGCTCTGCCCGCTGGAGACGG
TGTTTGTCATGGGCCTGGTCTGCAGGGATCCTGCTACAAAGGTGAAACCCAGGAGAGTGT
GGAGTCCAGAGTGTTGCCAGGACCCAGGCACAGGCATTAGTGCCCGTTGGAGAAAACAGG
GGAATCCCGAAGAAATGGTGGGTCCTGGCCATCCGTGAGATCTTCCCAGGGCAGCTCCCC
TCTGTGGAATCCAATCTGTCTTCCATCCTGCGTGGCCGAGGGCCAGGCTTCTCACTGGGC
CTCTGCAGGAGGCTGCCATTTGTCCTGCCCACCTTCTTAGAAGCGAGACGGAGCAGACCC
ATCTGCTACTGCCCTTTCTATAATAACTAAAGTTAGCTGCCCTGGACTATTCACCCCCTA
GTCTCAATTTAAGAAGATCCCCATGGCCACAGGGCCCCTGCCTGGGGGCTTGTCACCTCC
CCCACCTTCTTCCTGAGTCATTCCTGCAGCCTTGCTCCCTAACCTGCCCCACAGCCTTGC
CTGGATTTCTATCTCCCTGGCTTGGTGCCAGTTCCTCCAAGTCGATGGCACCTCCCTCCC
TCTCAACCACTTGAGCAAACTCCAAGACATCTTCTACCCCAACACCAGCAATTGTGCCAA
GGGCCATTAGGCTCTCAGCATGACTATTTTTAGAGACCCCGTGTCTGTCACTGAAACCTT
TTTTGTGGGAGACTATTCCTCCCATCTGCAACAGCTGCCCCTGCTGACTGCCCTTCTCTC
CTCCCTCTCATCCCAGAGAAACAGGTCAGCTGGGAGCTTCTGCCCCCACTGCCTAGGGAC
CAACAGGGGCAGGAGGCAGTCACTGACCCCGAGACGTTTGCATCCTGCACAGCTAGAGAT
CCTTTATTAAAAGCACACTGTTGGTTTCTGCTCAGTTCTTTATTGATTGGTGTGCCGTTT
TCTCTGGAAGCCTCTTAAGAACACAGTGGCGCAGGCTGGGTGGAGCCGTCCCCCCATGGA
GCACAGGCAGACAGAAGTCCCCGCCCCAGCTGTGTGGCCTCAAGCCAGCCTTCCGCTCCT
TGAAGCTGGTCTCCACACAGTGCTGGTTCCGTCACCCCCTCCCAAGGAAGTAGGTCTGAG
CAGCTTGTCCTGGCTGTGTCCATGTCAGAGCAACGGCCCAAGTCTGGGTCTGGGGGGGAA
GGTGTCATGGAGCCCCCTACGATTCCCAGTCGTCCTCGTCCTCCTCTGCCTGTGGCTGCT
GCGGTGGCGGCAGAGGAGGGATGGAGTCTGACACGCGGGCAAAGGCTCCTCCGGGCCCCT
CACCAGCCCCAGGTCCTTTCCCAGAGATGCCTGGAGGGAAAAGGCTGAGTGAGGGTGGTT
GGTGGGAAACCCTGGTTCCCCCAGCCCCCGGAGACTTAAATACAGGAAGAAAAAGGCAGG
ACAGAATTACAAGGTGCTGGCCCAGGGCGGGCAGCGGCCCTGCCTCCTACCCTTGCGCCT
CATGACCAGCTTGTTGAAGAGATCCGACATCAAGTGCCCACCTTGGCTCGTGGCTCTCAC
TGCAACGGGAAAGCCACAGACTGGGGTGAAGAGTTCAGTCACATGCGACCGGTGACTCCC
TGTCCCCACCCCCATGACACTCCCCAGCCCTCCAAGGCCACTGTGTTTCCCAGTTAGCTC
AGAGCCTCAGTCGATCCCTGACCCAGCACCGGGCACTGATGAGACAGCGGCTGTTTGAGG
AGCCACCTCCCAGCCACCTCGGGGCCAGGGCCAGGGTGTGCAGCAccactgtacaatggg
gaaactggcccagagaggtgaggcagcttgcctggggtcacagagcaaggcaaaagcagc
gctgggtacaagctcaAAACCATAGTGCCCAGGGCACTGCCGCTGCAGGCGCAGGCATCG
CATCACACCAGTGTCTGCGTTCACAGCAGGCATCATCAGTAGCCTCCAGAGGCCTCAGGT
CCAGTCTCTAAAAATATCTCAGGAGGCTGCAGTGGCTGACCATTGCCTTGGACCGCTCTT
GGCAGTCGAAGAAGATTCTCCTGTCAGTTTGAGCTGGGTGAGCTTAGAGAGGAAAGCTCC
ACTATGGCTCCCAAACCAGGAAGGAGCCATAGCCCAGGCAGGAGGGCTGAGGACCTCTGG
TGGCGGCCCAGGGCTTCCAGCATGTGCCCTAGGGGAAGCAGGGGCCAGCTGGCAAGAGCA
GGGGGTGGGCAGAAAGCACCCGGTGGACTCAGGGCTGGAGGGGAGGAGGCGATCTTGCCC
AAGGCCCTCCGACTGCAAGCTCCAGGGCCCGCTCACCTTGCTCCTGCTCCTTCTGCTGCT
GCTTCTCCAGCTTTCGCTCCTTCATGCTGCGCAGCTTGGCCTTGCCGATGCCCCCAGCTT
GGCGGATGGACTCTAGCAGAGTGGCCAGCCACCGGAGGGGTCAACCACTTCCCTGGGAGC
TCCCTGGACTGGAGCCGGGAGGTGGGGAACAGGGCAAGGAGGAAAGGCTGCTCAGGCAGG
GCTGGGGAAGCTTACTGTGTCCAAGAGCCTGCTGGGAGGGAAGTCACCTCCCCTCAAACG
AGGAGCCCTGCGCTGGGGAGGCCGGACCTTTGGAGACTGTGTGTGGGGGCCTGGGCACTG
ACTTCTGCAACCACCTGAGCGCGGGCATCCTGTGTGCAGATACTCCCTGCTTCCTCTCTA
GCCCCCACCCTGCAGAGCTGGACCCCTGAGCTAGCCATGCTCTGACAGTCTCAGTTGCAC
ACACGAGCCAGCAGAGGGGTTTTGTGCCACTTCTGGATGCTAGGGTTACACTGGGAGACA
CAGCAGTGAAGCTGAAATGAAAAATGTGTTGCTGTAGTTTGTTATTAGACCCCTTCTTTC
CATTGGTTTAATTAGGAATGGGGAACCCAGAGCCTCACTTGTTCAGGCTCCCTCTGCCCT
AGAAGTGAGAAGTCCAGAGCTCTACAGTTTGAAAACCACTATTTTATGAACCAAGTAGAA
CAAGATATTTGAAATGGAAACTATTCAAAAAATTGAGAATTTCTGACCACTTAACAAACC
CACAGAAAATCCACCCGAGTGCACTGAGCACGCCAGAAATCAGGTGGCCTCAAAGAGCTG
CTCCCACCTGAAGGAGACGCGCTGCTGCTGCTGTCGTCCTGCCTGGCGCCTTGGCCTACA
GGGGCCGCGGTTGAGGGTGGGAGTGGGGGTGCACTGGCCAGCACCTCAGGAGCtgggggt
ggtggtgggggcggtgggggtggtgttagtACCCCATCTTGTAGGTCTGAAACACAAAGT
GTGGGGTGTCTAGGGAAGAAGGTGTGTGACCAGGGAGGTCCCCGGCCCAGCTCCCATCCC
AGAACCCAGCTCACCTACCTTGAGAGGCTCGGCTACCTCAGTGTGGAAGGTGGGCAGTTC
TGGAATGGTGCCAGGGGCAGAGGGGGCAATGCCGGGGCCCAGGTCGGCAATGTACATGAG
GTCGTTGGCAATGCCGGGCAGGTCAGGCAGGTAGGATGGAACATCAATCTCAGGCACCTG
GCCCAGGTCTGGCACATAGAAGTAGTTCTCTGGGACCTGCAAGATTAGGCAGGGACATGT
GAGAGGTGACAGGGACCTGCAGGGGCAGCCAACAAGACCTTGTGTGCACCTCCCATGGGT
GGAATAAGGGGCCCAACAGCCTTGACTGGAGAGGAGCTCTGGCAAGGCCCTGGGCCACTG
CACCTGTCTCCACCTCTGTCCCACCCCTCCCACCTGCTGTTCCAGCTGCTCTCTCTTGCT
GATGGACAAGGGGGCATCAAACAGCTTCTCCTCTGTCTCTGCCCCCAGCATCACATGGGT
CTTTGTTACAGCACCAGCCAGGGGGTCCAGGAAGACATACTTCTTCTACCTACAGAGGCG
ACATGGGGGTCAGGCAAGCTGACACCCGCTGTCCTGAGCCCATGTTCCTCTCCCACATCA
TCAGGGGCACAGCGTGCACTGTGGGGTCCCAGGCCTCCCGAGCCGAGCCACCCGTCACCC
CCTGGCTCCTGGCCTATGTGCTGTACCTGTGTCTGATGCCCTGGGTCCCCACTAAGCCAG
GCCGGGCCTCCCGCCCACACCCCTCGGCCCTGCCCTCTGGCCATACAGGTTCTCGGTGGT
GTTGAAGAGCAGCAAGGAGCTGACAGAGCTGATGTTGCTGGGAAGACCCCCAAGTCCCTC
TTCTGCATCGTCCTCGGGCTCCGGCTTGGTGCTCACGCACACAGGAAAGTCCTTCAGCTT
CTCCTGAGAGGGCCAGGATGGCCAAGGGATGGTGAATATTTGGTGCTGGGCCTAATCAGC
TGCCATCCCATCCCAGTCAGCCTCCTCTGGGGGACAGAACCCTATGGTGGCCCCGGCTCC
TCCCCAGTATCCAGTCCTCCTGGTGTGTGACAGGCTATATGCGCGGCCAGCAGACCTGCA
GGGCCCGCTCGTCCAGGGGGCGGTGCTTGCTCTGGATCCTGTGGCGGGGGCGTCTCTGCA
GGCCAGGGTCCTGGGCGCCCGTGAAGATGGAGCCATATTCCTGCAGGCGCCCTGGAGCAG
GGTACTTGGCACTGGAGAACACCTGTGGACACAGGGACAAGTCTGAGGGGGCCCCAAGAG
GCTCAGAGGGCTAGGATTGCTTGGCAGGAGAGGGTGGAGTTGGAAGCCTGGGCGAGAAGA
AAGCTCAAGGTACAGGTGGGCAGCAGGGCAGAGACTGGGCAGCCTCAGAGGCACGGGGAA
ATGGAGGGACTGCCCAGTAGCCTCAGGACACAGGGGTATGGGGACTACCTTGATGGCCTT
CTTGCTGCCCTTGATCTTCTCAATCTTGGCCTGGGCCAAGGAGACCTTCTCTCCAATGGC
CTGCACCTGGCTCCGGCTCTGCTCTACCTGCTGGGAGATCCTGCCATGGAGAAGATCACA
GAGGCTGGGCTGCTCCCCACCCTCTGCACACCTCCTGCTTCTAACAGCAGAGCTGCCAGG
CCAGGCCCTCAGGCAAGGGCTCTGAAGTCAGGGTCACCTACTTGCCAGGGCCGATCTTGG
TGCCATCCAGGGGGCCTCTACAAGGATAATCTGACCTGCAGGGTCGAGGAGTTGACGGTG
CTGAGTTCCCTGCACTCTCAGTAGGGACAGGCCCTATGCTGCCACCTGTACATGCTATCT
GAAGGACAGCCTCCAGGGCACACAGAGGATGGTATTTACACATGCACACATGGCTACTGA
TGGGGCAAGCACTTCACAACCCCTCATGATCACGTGCAGCAGACAATGTGGCCTCTGCAG
AGGGGGAACGGAGACCGGAGGCTGAGACTGGCAAGGCTGGACCTGAGTGTCGTCACCTAA
ATTCAGACGGGGAACTGCCCCTGCACATACTGAACGGCTCACTGAGCAAACCCCGAGTCC
CGACCACCGCCTCAGTGTGGTCTAGCTcctcacctgcttccatcctccctggtgcggggt
gggcccagtgatatcagctgcctgctgttccccagatgtgccaagtgcattcttgtgtgc
ttgcatctcatggaacgccatttccccagacatccctgtggctggctccTGATGCCCGAG
GCCCAAGTGTCTGATGCTTTAAGGCACATCACCCCACTCATGCTTTTCCATGTTCTTTGG
CCGCAGCAAGGCCGCTCTCACTGCAAAGTTAACTCTGATGCGTGTGTAACACAACATCCT
CCTCCCAGTCGCCCCTGTAGCTCCCCTACCTCCAAGAGCCCAGCCCTTGCCCACAGGGCC
ACACTCCACGTGCAGAGCAGCCTCAGCACTCACCGGGCACGAGCGAGCCTGTGTGGTGCG
CAGGGAtgagaaggcagaggcgcgactggggttcatgaggaagggcaggaggagggtgtg
ggatggtggaggggtttgagaaggcagaggcgcgactggggttcatgaggaaagggaggg
ggaggatgtgggatggtggaggggCTGCAGACTCTGGGCTAGGGAAAGCTGGGATGTCTC
TAAAGGTTGGAATGAATGGCCTAGAATCCGACCCAATAAGCCAAAGCCACTTCCACCAAC
GTTAGAAGGCCTTGGCCCCCAGAGAGCCAATTTCACAATCCAGAAGTCCCCGTGCCCTAA
AGGGTCTGCCCTGATTACTCCTGGCTCCTTGTGTGCAGGGGGCTCAGGCATGGCAGGGCT
GGGAGTACCAGCAGGCACTCAAGCGGCTTAAGTGTTCCATGACAGACTGGTATGAAGGTG
GCCACAATTCAGAAAGAAAAAAGAAGAGCACCATCTCCTTCCAGTGAGGAAGCGGGACCA
CCACCCAGCGTGTGCTCCATCTTTTCTGGCTGGGGAGAGGCCTTCATCTGCTGTAAAGGG
TCCTCCAGCACAAGCTGTCTTAATTGACCCTAGTTCCCAGGGCAGCCTCGTTCTGCCTTG
GGTGCTGACACGACCTTCGGTAGGTGCATAAGCTCTGCATTCGAGGTCCACAGGGGCAGT
GGGAGGGAACTGagactggggagggacaaaggctgctctgt
>ENST00000456328 gene=DDX11L1
GTTAACTTGCCGTCAGCCTTTTCTTTGACCTCTTCTTTCTGTTCATGTGTATTTGCTGTCTCTTAGCCCA
GACTTCCCGTGTCCTTTCCACCGGGCCTTTGAGAGGTCACAGGGTCTTGATGCTGTGGTCTTCATCTGCA
GGTGTCTGACTTCCAGCAACTGCTGGCCTGTGCCAGGGTGCAAGCTGAGCACTGGAGTGGAGTTTTCCTG
TGGAGAGGAGCCATGCCTAGAGTGGGATGGGCCATTGTTCATCTTCTGGCCCCTGTTGTCTGCATGTAAC
TTAATACCACAACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGAGAGCATCAACTTCTCTCACAA
CCTAGGCCAGTGTGTGGTGATGCCAGGCATGCCCTTCCCCAGCATCAGGTCTCCAGAGCTGCAGAAGACG
ACGGCCGACTTGGATCACACTCTTGTGAGTGTCCCCAGTGTTGCAGAGGCAGGGCCATCAGGCACCAAAG
GGATTCTGCCAGCATAGTGCTCCTGGACCAGTGATACACCCGGCACCCTGTCCTGGACACGCTGTTGGCC
TGGATCTGAGCCCTGGTGGAGGTCAAAGCCACCTTTGGTTCTGCCATTGCTGCTGTGTGGAAGTTCACTC
CTGCCTTTTCCTTTCCCTAGAGCCTCCACCACCCCGAGATCACATTTCTCACTGCCTTTTGTCTGCCCAG
TTTCACCAGAAGTAGGCCTCTTCCTGACAGGCAGCTGCACCACTGCCTGGCGCTGTGCCCTTCCTTTGCT
CTGCCCGCTGGAGACGGTGTTTGTCATGGGCCTGGTCTGCAGGGATCCTGCTACAAAGGTGAAACCCAGG
AGAGTGTGGAGTCCAGAGTGTTGCCAGGACCCAGGCACAGGCATTAGTGCCCGTTGGAGAAAACAGGGGA
ATCCCGAAGAAATGGTGGGTCCTGGCCATCCGTGAGATCTTCCCAGGGCAGCTCCCCTCTGTGGAATCCA
ATCTGTCTTCCATCCTGCGTGGCCGAGGGCCAGGCTTCTCACTGGGCCTCTGCAGGAGGCTGCCATTTGT
CCTGCCCACCTTCTTAGAAGCGAGACGGAGCAGACCCATCTGCTACTGCCCTTTCTATAATAACTAAAGT
TAGCTGCCCTGGACTATTCACCCCCTAGTCTCAATTTAAGAAGATCCCCATGGCCACAGGGCCCCTGCCT
GGGGGCTTGTCACCTCCCCCACCTTCTTCCTGAGTCATTCCTGCAGCCTTGCTCCCTAACCTGCCCCACA
GCCTTGCCTGGATTTCTATCTCCCTGGCTTGGTGCCAGTTCCTCCAAGTCGATGGCACCTCCCTCCCTCT
CAACCACTTGAGCAAACTCCAAGACATCTTCTACCCCAACACCAGCAATTGTGCCAAGGGCCATTAGGCT
CTCAGCATGACTATTTTTAGAGACCCCGTGTCTGTCACTGAAACCTTTTTTGTGGGAGACTATTCCTCCC
ATCTGCAACAGCTGCCCCTGCTGACTGCCCTTCTCTCCTCCCTCTCATCCCAGAGAAACAGGTCAGCTGG
GAGCTTCTGCCCCCACTGCCTAGGGACCAACAGGGGCAGGAGGCAGTCACTGACCCCGAGACGTTTGCAT
CCTGCACAGCTAGAGATCCTTTATTAAAAGCACACTGTTGGTTTCTG
>ENST00000450305 gene=DDX11L1
GTGTCTGACTTCCAGCAACTGCTGGCCTGTGCCAGGGTGCAAGCTGAGTTGGAGGAAAGATGAGTGAGAG
CATCAACTTCTCTCACAACCTAGGCCAGTGTGTGGTGATGCCAGGCATGCCCTTCCCCAGCATCAGGTCT
CCAGAGCTGCAGAAGACGACGGCCGACTTGGATCACACTCTTCTCAGAGCCCAGGCCAGGGGCCCCCAAG
AAAGGCTCTGGTGGAGAACCTGTGCATGAAGGCTGTCAACCAGTCCATAGGCAGGGCCATCAGGCACCAA
AGGGATTCTGCCAGCATAGTGCTCCTGGACCAGTGATACACCCGGCACCCTGTCCTGGACACGCTGTTGG
CCTGGATCTGAGCCCTGGTGGAGGTCAAAGCCACCTTTGGTTCTGCCATTGCTGCTGTGTGGAATTTCAC
CAGAAGTAGGCCTCTTCCTGACAGGCAGCTGCACCACTGCCTGGCGCTGTGCCCTTCCTTTGCTCTGCCC
GCTGGAGACGGTGTTTGTCATGGGCCTGGTCTGCAGGGATCCTGCTACAAAGGTGAAACCCAGGAGAGTG
TGGAGTCCAGAGTGTTGCCAGGACCCAGGCACAGGCATTAGTGCCCGTTGGAGAAAACAGGGGAATCCCG
AA
>ENST00000488147 gene=WASH7P
GTAGAGCAGAGCCGGAGCCAGGTGCAGGCCATTGGAGAGAAGGTCTCCTTGGCCCAGGCCAAGATTGAGA
AGATCAAGGGCAGCAAGAAGGCCATCAAGGTGTTCTCCAGTGCCAAGTACCCTGCTCCAGGGCGCCTGCA
GGAATATGGCTCCATCTTCACGGGCGCCCAGGACCCTGGCCTGCAGAGACGCCCCCGCCACAGGATCCAG
AGCAAGCACCGCCCCCTGGACGAGCGGGCCCTGCAGGAGAAGCTGAAGGACTTTCCTGTGTGCGTGAGCA
CCAAGCCGGAGCCCGAGGACGATGCAGAAGAGGGACTTGGGGGTCTTCCCAGCAACATCAGCTCTGTCAG
CTCCTTGCTGCTCTTCAACACCACCGAGAACCTGTAGAAGAAGTATGTCTTCCTGGACCCCCTGGCTGGT
GCTGTAACAAAGACCCATGTGATGCTGGGGGCAGAGACAGAGGAGAAGCTGTTTGATGCCCCCTTGTCCA
TCAGCAAGAGAGAGCAGCTGGAACAGCAGGTCCCAGAGAACTACTTCTATGTGCCAGACCTGGGCCAGGT
GCCTGAGATTGATGTTCCATCCTACCTGCCTGACCTGCCCGGCATTGCCAACGACCTCATGTACATTGCC
GACCTGGGCCCCGGCATTGCCCCCTCTGCCCCTGGCACCATTCCAGAACTGCCCACCTTCCACACTGAGG
TAGCCGAGCCTCTCAAGACCTACAAGATGGGGTactaacaccacccccaccgcccccaccaccaccccca
GCTCCTGAGGTGCTGGCCAGTGCACCCCCACTCCCACCCTCAACCGCGGCCCCTGTAGGCCAAGGCGCCA
GGCAGGACGACAGCAGCAGCAGCGCGTCTCCTTCAGTCCAGGGAGCTCCCAGGGAAGTGGTTGACCCCTC
CGGTGGCTGGCCACTCTGCTAGAGTCCATCCGCCAAGCTGGGGGCATCGGCAAGGCCAAGCTGCGCAGCA
TGAAGGAGCGAAAGCTGGAGAAGCAGCAGCAGAAGGAGCAGGAGCAAGTGAGAGCCACGAGCCAAGGTGG
GCACTTGATGTCGCTCCATGGGGGGACGGCTCCACCCAGCCTGCGCCACTGTGTTCTTAAGAGGCTTCCA
GAGAAAACGGCACACCAATCAATAAAGAACTGAGCAGAAA
File added
File added
File added
1-10000-20000 3397 3472 NS500318:863:HY2KYBGXC:1:11101:14671:1067 0 + DDX11L1
1-10000-20000 3249 3324 NS500318:863:HY2KYBGXC:1:11101:24439:1068 0 + DDX11L1
1-10000-20000 3735 3810 NS500318:863:HY2KYBGXC:1:11101:14965:1069 0 + DDX11L1
1-10000-20000 2055 2130 NS500318:863:HY2KYBGXC:1:11101:21081:1069 0 + DDX11L1
1-10000-20000 3567 3642 NS500318:863:HY2KYBGXC:1:11101:18197:1070 0 + DDX11L1
1-10000-20000 7920 7995 NS500318:863:HY2KYBGXC:1:11101:4614:1071 0 - WASH7P
1-10000-20000 6665 6740 NS500318:863:HY2KYBGXC:1:11101:8467:1073 0 - WASH7P
1-10000-20000 6915 6990 NS500318:863:HY2KYBGXC:1:11101:18960:1076 0 - WASH7P
1-10000-20000 5841 5916 NS500318:863:HY2KYBGXC:1:11101:6851:1076 0 - WASH7P
1-10000-20000 7929 8004 NS500318:863:HY2KYBGXC:1:11101:14731:1078 0 - WASH7P
1-10000-20000 3422 3497 NS500318:863:HY2KYBGXC:1:11101:14671:1067 0 + DDX11L1
1-10000-20000 3274 3349 NS500318:863:HY2KYBGXC:1:11101:24439:1068 0 + DDX11L1
1-10000-20000 3760 3835 NS500318:863:HY2KYBGXC:1:11101:14965:1069 0 + DDX11L1
1-10000-20000 2080 2155 NS500318:863:HY2KYBGXC:1:11101:21081:1069 0 + DDX11L1
1-10000-20000 3592 3667 NS500318:863:HY2KYBGXC:1:11101:18197:1070 0 + DDX11L1
1-10000-20000 7945 8020 NS500318:863:HY2KYBGXC:1:11101:4614:1071 0 - WASH7P
1-10000-20000 6690 6765 NS500318:863:HY2KYBGXC:1:11101:8467:1073 0 - WASH7P
1-10000-20000 6940 7015 NS500318:863:HY2KYBGXC:1:11101:18960:1076 0 - WASH7P
1-10000-20000 5866 5941 NS500318:863:HY2KYBGXC:1:11101:6851:1076 0 - WASH7P
1-10000-20000 7954 8029 NS500318:863:HY2KYBGXC:1:11101:14731:1078 0 - WASH7P
1-10000-20000 3397 3497 NS500318:863:HY2KYBGXC:1:11101:14671:1067 0 + DDX11L1
1-10000-20000 3249 3349 NS500318:863:HY2KYBGXC:1:11101:24439:1068 0 + DDX11L1
1-10000-20000 3735 3835 NS500318:863:HY2KYBGXC:1:11101:14965:1069 0 + DDX11L1
1-10000-20000 2055 2155 NS500318:863:HY2KYBGXC:1:11101:21081:1069 0 + DDX11L1
1-10000-20000 3567 3667 NS500318:863:HY2KYBGXC:1:11101:18197:1070 0 + DDX11L1
1-10000-20000 7920 8020 NS500318:863:HY2KYBGXC:1:11101:4614:1071 0 - WASH7P
1-10000-20000 6665 6765 NS500318:863:HY2KYBGXC:1:11101:8467:1073 0 - WASH7P
1-10000-20000 6915 7015 NS500318:863:HY2KYBGXC:1:11101:18960:1076 0 - WASH7P
1-10000-20000 5841 5941 NS500318:863:HY2KYBGXC:1:11101:6851:1076 0 - WASH7P
1-10000-20000 7929 8029 NS500318:863:HY2KYBGXC:1:11101:14731:1078 0 - WASH7P
de940b0dd38a67a7433536a5b3aee0ac config.yaml 95fb0448dc6871cb415012d254260c5a config.yaml
d9c9ea4cd6108d39a2521dd87cd0c7e1 samples.tsv 4b51a822bcc83ffd744bf76f810162fc samples.tsv
File deleted
File deleted
File deleted
File deleted
Entry date Path to FASTQ file(s) Condition name Replicate name End type (PAIRED or SINGLE) Name of Mate1 FASTQ file Name of Mate2 FASTQ file Direction of Mate1 (SENSE, ANTISENSE or RANDOM) Direction of Mate2 (SENSE, ANTISENSE or RANDOM) 5' adapter of Mate1 3' adapter of Mate1 5' adapter of Mate2 3' adapter of Mate2 Fragment length mean Fragment length SD Quality control flag (PASSED or FAILED) Checksum of raw Mate1 FASTQ file Checksum of raw Mate2 FASTQ file Name of metadata file Name of quality control file for Mate1 Name of quality control file for Mate2 Organism Taxon ID Name of Strain / Isolate / Breed / Ecotype Strain / Isolate / Breed / Ecotype ID Biomaterial provider Source / tissue name Tissue code Additional tissue description Genotype short name Genotype description Disease short name Disease description Abbreviation for treatment Treatment description Gender Age Developmental stage Passage number Sample preparation date (YYYY-MM-DD) Prepared by Documentation Name of protocol file Sequencing date (YYYY-MM-DD) Sequencing instrument Library preparation kit Cycles Molecule Contaminant sequences Name of BioAnalyzer file Entry date Path to FASTQ file(s) Condition name Replicate name End type (PAIRED or SINGLE) Name of Mate1 FASTQ file Name of Mate2 FASTQ file Direction of Mate1 (SENSE, ANTISENSE or RANDOM) Direction of Mate2 (SENSE, ANTISENSE or RANDOM) 5' adapter of Mate1 3' adapter of Mate1 5' adapter of Mate2 3' adapter of Mate2 Fragment length mean Fragment length SD Quality control flag (PASSED or FAILED) Checksum of raw Mate1 FASTQ file Checksum of raw Mate2 FASTQ file Name of metadata file Name of quality control file for Mate1 Name of quality control file for Mate2 Organism Taxon ID Name of Strain / Isolate / Breed / Ecotype Strain / Isolate / Breed / Ecotype ID Biomaterial provider Source / tissue name Tissue code Additional tissue description Genotype short name Genotype description Disease short name Disease description Abbreviation for treatment Treatment description Gender Age Developmental stage Passage number Sample preparation date (YYYY-MM-DD) Prepared by Documentation Name of protocol file Sequencing date (YYYY-MM-DD) Sequencing instrument Library preparation kit Cycles Molecule Contaminant sequences Name of BioAnalyzer file
Fri Dec 20 00:00:00 CET 2019 . LN18C LN18C_rep1 PAIRED input_lib_1.mate_1.fastq.gz input_lib_1.mate_2.fastq.gz ANTISENSE SENSE AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 300.0 100.0 xxx xxx xxx xxx xxx xxx Homo sapiens 9606 xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx Fri Dec 20 00:00:00 CET 2019 ../input_files/project1 synthetic_10_reads_paired synthetic_10_reads_paired PAIRED synthetic.mate_1.fastq.gz synthetic.mate_2.fastq.gz SENSE ANTISENSE AGATCGGAAGAGCACA AGATCGGAAGAGCGT 250 100 xxx xxx xxx xxx xxx xxx Homo sapiens 9606 xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx
Fri Dec 20 00:00:00 CET 2019 . LN18C LN18C_rep2 PAIRED input_lib_2.mate_2.fastq.gz input_lib_2.mate_2.fastq.gz ANTISENSE SENSE AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 300.0 100.0 xxx xxx xxx xxx xxx xxx Homo sapiens 9606 xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx Fri Dec 20 00:00:00 CET 2019 ../input_files/project2 synthetic_10_reads_mate_1 synthetic_10_reads_mate_1 SINGLE synthetic.mate_1.fastq.gz SENSE AGATCGGAAGAGCACA 250 100 xxx xxx xxx xxx xxx xxx Homo sapiens 9606 xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx
\ No newline at end of file
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# Tear down test environment # Tear down test environment
trap 'rm config.yaml samples.tsv && cd $user_dir' EXIT # quotes command is exected after script exits, regardless of exit status trap 'rm config.yaml samples.tsv && cd $user_dir' EXIT # quotes command is exected after script exits, regardless of exit status
#
# Set up test environment # Set up test environment
set -eo pipefail # ensures that script exits at first command that exits with non-zero status set -eo pipefail # ensures that script exits at first command that exits with non-zero status
set -u # ensures that script exits when unset variables are used set -u # ensures that script exits when unset variables are used
...@@ -17,6 +17,22 @@ python "../../scripts/labkey_to_snakemake.py" \ ...@@ -17,6 +17,22 @@ python "../../scripts/labkey_to_snakemake.py" \
--input_dict="../../scripts/input_dict_caption.tsv" \ --input_dict="../../scripts/input_dict_caption.tsv" \
--config_file="config.yaml" \ --config_file="config.yaml" \
--samples_table="samples.tsv" \ --samples_table="samples.tsv" \
--genomes_path="." --genomes_path="../input_files" \
--multimappers='10' \
# --remote \
# --project_name "TEST_LABKEY" \
# --query_name "RNA_Seq_data_template"
snakemake \
--snakefile="../../snakemake/Snakefile" \
--configfile="config.yaml" \
--dryrun \
# --rulegraph \
# --printshellcmds \
# | dot -Tpng > "rulegraph.png"
md5sum --check "expected_output.md5" md5sum --check "expected_output.md5"
# snakemake --rulegraph --configfile config.yaml | dot -Tpng > rulegraph.png
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment