diff --git a/images/dag_test_workflow.svg b/images/dag_test_workflow.svg index d33de514ab86935a2d9c2b5c7dc111373fe5ad4e..cd5d63a9a610f30f9303dc59fb2078a33c7d9e21 100644 --- a/images/dag_test_workflow.svg +++ b/images/dag_test_workflow.svg @@ -18,7 +18,7 @@ <!-- 1 --> <g id="node2" class="node"> <title>1</title> -<path fill="none" stroke="#d85656" stroke-width="2" d="M177,-108C177,-108 12,-108 12,-108 6,-108 0,-102 0,-96 0,-96 0,-84 0,-84 0,-78 6,-72 12,-72 12,-72 177,-72 177,-72 183,-72 189,-78 189,-84 189,-84 189,-96 189,-96 189,-102 183,-108 177,-108"/> +<path fill="none" stroke="#c6d856" stroke-width="2" d="M177,-108C177,-108 12,-108 12,-108 6,-108 0,-102 0,-96 0,-96 0,-84 0,-84 0,-78 6,-72 12,-72 12,-72 177,-72 177,-72 183,-72 189,-78 189,-84 189,-84 189,-96 189,-96 189,-102 183,-108 177,-108"/> <text text-anchor="middle" x="94.5" y="-93" font-family="sans" font-size="10.00">pe_fastqc</text> <text text-anchor="middle" x="94.5" y="-82" font-family="sans" font-size="10.00">sample: synthetic_10_reads_paired</text> </g> @@ -31,7 +31,7 @@ <!-- 2 --> <g id="node3" class="node"> <title>2</title> -<path fill="none" stroke="#c6d856" stroke-width="2" d="M390,-108C390,-108 219,-108 219,-108 213,-108 207,-102 207,-96 207,-96 207,-84 207,-84 207,-78 213,-72 219,-72 219,-72 390,-72 390,-72 396,-72 402,-78 402,-84 402,-84 402,-96 402,-96 402,-102 396,-108 390,-108"/> +<path fill="none" stroke="#56d8c9" stroke-width="2" d="M390,-108C390,-108 219,-108 219,-108 213,-108 207,-102 207,-96 207,-96 207,-84 207,-84 207,-78 213,-72 219,-72 219,-72 390,-72 390,-72 396,-72 402,-78 402,-84 402,-84 402,-96 402,-96 402,-102 396,-108 390,-108"/> <text text-anchor="middle" x="304.5" y="-93" font-family="sans" font-size="10.00">fastqc</text> <text text-anchor="middle" x="304.5" y="-82" font-family="sans" font-size="10.00">sample: synthetic_10_reads_mate_1</text> </g> @@ -44,7 +44,7 @@ <!-- 3 --> <g id="node4" class="node"> <title>3</title> -<path fill="none" stroke="#56d892" stroke-width="2" d="M621,-108C621,-108 432,-108 432,-108 426,-108 420,-102 420,-96 420,-96 420,-84 420,-84 420,-78 426,-72 432,-72 432,-72 621,-72 621,-72 627,-72 633,-78 633,-84 633,-84 633,-96 633,-96 633,-102 627,-108 621,-108"/> +<path fill="none" stroke="#afd856" stroke-width="2" d="M621,-108C621,-108 432,-108 432,-108 426,-108 420,-102 420,-96 420,-96 420,-84 420,-84 420,-78 426,-72 432,-72 432,-72 621,-72 621,-72 627,-72 633,-78 633,-84 633,-84 633,-96 633,-96 633,-102 627,-108 621,-108"/> <text text-anchor="middle" x="526.5" y="-87.5" font-family="sans" font-size="10.00">pe_index_genomic_alignment_samtools</text> </g> <!-- 3->0 --> @@ -56,7 +56,7 @@ <!-- 4 --> <g id="node5" class="node"> <title>4</title> -<path fill="none" stroke="#56d8a9" stroke-width="2" d="M834.5,-108C834.5,-108 662.5,-108 662.5,-108 656.5,-108 650.5,-102 650.5,-96 650.5,-96 650.5,-84 650.5,-84 650.5,-78 656.5,-72 662.5,-72 662.5,-72 834.5,-72 834.5,-72 840.5,-72 846.5,-78 846.5,-84 846.5,-84 846.5,-96 846.5,-96 846.5,-102 840.5,-108 834.5,-108"/> +<path fill="none" stroke="#8fd856" stroke-width="2" d="M834.5,-108C834.5,-108 662.5,-108 662.5,-108 656.5,-108 650.5,-102 650.5,-96 650.5,-96 650.5,-84 650.5,-84 650.5,-78 656.5,-72 662.5,-72 662.5,-72 834.5,-72 834.5,-72 840.5,-72 846.5,-78 846.5,-84 846.5,-84 846.5,-96 846.5,-96 846.5,-102 840.5,-108 834.5,-108"/> <text text-anchor="middle" x="748.5" y="-87.5" font-family="sans" font-size="10.00">index_genomic_alignment_samtools</text> </g> <!-- 4->0 --> @@ -68,7 +68,7 @@ <!-- 5 --> <g id="node6" class="node"> <title>5</title> -<path fill="none" stroke="#70d856" stroke-width="2" d="M851.5,-180C851.5,-180 733.5,-180 733.5,-180 727.5,-180 721.5,-174 721.5,-168 721.5,-168 721.5,-156 721.5,-156 721.5,-150 727.5,-144 733.5,-144 733.5,-144 851.5,-144 851.5,-144 857.5,-144 863.5,-150 863.5,-156 863.5,-156 863.5,-168 863.5,-168 863.5,-174 857.5,-180 851.5,-180"/> +<path fill="none" stroke="#5692d8" stroke-width="2" d="M851.5,-180C851.5,-180 733.5,-180 733.5,-180 727.5,-180 721.5,-174 721.5,-168 721.5,-168 721.5,-156 721.5,-156 721.5,-150 727.5,-144 733.5,-144 733.5,-144 851.5,-144 851.5,-144 857.5,-144 863.5,-150 863.5,-156 863.5,-156 863.5,-168 863.5,-168 863.5,-174 857.5,-180 851.5,-180"/> <text text-anchor="middle" x="792.5" y="-159.5" font-family="sans" font-size="10.00">pe_quantification_salmon</text> </g> <!-- 5->0 --> @@ -80,7 +80,7 @@ <!-- 6 --> <g id="node7" class="node"> <title>6</title> -<path fill="none" stroke="#5692d8" stroke-width="2" d="M995,-180C995,-180 894,-180 894,-180 888,-180 882,-174 882,-168 882,-168 882,-156 882,-156 882,-150 888,-144 894,-144 894,-144 995,-144 995,-144 1001,-144 1007,-150 1007,-156 1007,-156 1007,-168 1007,-168 1007,-174 1001,-180 995,-180"/> +<path fill="none" stroke="#d88d56" stroke-width="2" d="M995,-180C995,-180 894,-180 894,-180 888,-180 882,-174 882,-168 882,-168 882,-156 882,-156 882,-150 888,-144 894,-144 894,-144 995,-144 995,-144 1001,-144 1007,-150 1007,-156 1007,-156 1007,-168 1007,-168 1007,-174 1001,-180 995,-180"/> <text text-anchor="middle" x="944.5" y="-159.5" font-family="sans" font-size="10.00">quantification_salmon</text> </g> <!-- 6->0 --> @@ -92,7 +92,7 @@ <!-- 7 --> <g id="node8" class="node"> <title>7</title> -<path fill="none" stroke="#8fd856" stroke-width="2" d="M1200,-180C1200,-180 1037,-180 1037,-180 1031,-180 1025,-174 1025,-168 1025,-168 1025,-156 1025,-156 1025,-150 1031,-144 1037,-144 1037,-144 1200,-144 1200,-144 1206,-144 1212,-150 1212,-156 1212,-156 1212,-168 1212,-168 1212,-174 1206,-180 1200,-180"/> +<path fill="none" stroke="#d8cb56" stroke-width="2" d="M1200,-180C1200,-180 1037,-180 1037,-180 1031,-180 1025,-174 1025,-168 1025,-168 1025,-156 1025,-156 1025,-150 1031,-144 1037,-144 1037,-144 1200,-144 1200,-144 1206,-144 1212,-150 1212,-156 1212,-156 1212,-168 1212,-168 1212,-174 1206,-180 1200,-180"/> <text text-anchor="middle" x="1118.5" y="-159.5" font-family="sans" font-size="10.00">pe_genome_quantification_kallisto</text> </g> <!-- 7->0 --> @@ -116,7 +116,7 @@ <!-- 9 --> <g id="node10" class="node"> <title>9</title> -<path fill="none" stroke="#56c9d8" stroke-width="2" d="M566,-180C566,-180 465,-180 465,-180 459,-180 453,-174 453,-168 453,-168 453,-156 453,-156 453,-150 459,-144 465,-144 465,-144 566,-144 566,-144 572,-144 578,-150 578,-156 578,-156 578,-168 578,-168 578,-174 572,-180 566,-180"/> +<path fill="none" stroke="#59d856" stroke-width="2" d="M566,-180C566,-180 465,-180 465,-180 459,-180 453,-174 453,-168 453,-168 453,-156 453,-156 453,-150 459,-144 465,-144 465,-144 566,-144 566,-144 572,-144 578,-150 578,-156 578,-156 578,-168 578,-168 578,-174 572,-180 566,-180"/> <text text-anchor="middle" x="515.5" y="-159.5" font-family="sans" font-size="10.00">pe_map_genome_star</text> </g> <!-- 9->3 --> @@ -128,7 +128,7 @@ <!-- 10 --> <g id="node11" class="node"> <title>10</title> -<path fill="none" stroke="#afd856" stroke-width="2" d="M691,-180C691,-180 608,-180 608,-180 602,-180 596,-174 596,-168 596,-168 596,-156 596,-156 596,-150 602,-144 608,-144 608,-144 691,-144 691,-144 697,-144 703,-150 703,-156 703,-156 703,-168 703,-168 703,-174 697,-180 691,-180"/> +<path fill="none" stroke="#d85656" stroke-width="2" d="M691,-180C691,-180 608,-180 608,-180 602,-180 596,-174 596,-168 596,-168 596,-156 596,-156 596,-150 602,-144 608,-144 608,-144 691,-144 691,-144 697,-144 703,-150 703,-156 703,-156 703,-168 703,-168 703,-174 697,-180 691,-180"/> <text text-anchor="middle" x="649.5" y="-159.5" font-family="sans" font-size="10.00">map_genome_star</text> </g> <!-- 10->4 --> @@ -140,7 +140,7 @@ <!-- 11 --> <g id="node12" class="node"> <title>11</title> -<path fill="none" stroke="#d88d56" stroke-width="2" d="M818.5,-254.5C818.5,-254.5 690.5,-254.5 690.5,-254.5 684.5,-254.5 678.5,-248.5 678.5,-242.5 678.5,-242.5 678.5,-230.5 678.5,-230.5 678.5,-224.5 684.5,-218.5 690.5,-218.5 690.5,-218.5 818.5,-218.5 818.5,-218.5 824.5,-218.5 830.5,-224.5 830.5,-230.5 830.5,-230.5 830.5,-242.5 830.5,-242.5 830.5,-248.5 824.5,-254.5 818.5,-254.5"/> +<path fill="none" stroke="#56a9d8" stroke-width="2" d="M818.5,-254.5C818.5,-254.5 690.5,-254.5 690.5,-254.5 684.5,-254.5 678.5,-248.5 678.5,-242.5 678.5,-242.5 678.5,-230.5 678.5,-230.5 678.5,-224.5 684.5,-218.5 690.5,-218.5 690.5,-218.5 818.5,-218.5 818.5,-218.5 824.5,-218.5 830.5,-224.5 830.5,-230.5 830.5,-230.5 830.5,-242.5 830.5,-242.5 830.5,-248.5 824.5,-254.5 818.5,-254.5"/> <text text-anchor="middle" x="754.5" y="-234" font-family="sans" font-size="10.00">pe_remove_polya_cutadapt</text> </g> <!-- 11->5 --> @@ -164,7 +164,7 @@ <!-- 12 --> <g id="node13" class="node"> <title>12</title> -<path fill="none" stroke="#d8cb56" stroke-width="2" d="M976,-257C976,-257 861,-257 861,-257 855,-257 849,-251 849,-245 849,-245 849,-228 849,-228 849,-222 855,-216 861,-216 861,-216 976,-216 976,-216 982,-216 988,-222 988,-228 988,-228 988,-245 988,-245 988,-251 982,-257 976,-257"/> +<path fill="none" stroke="#d8ac56" stroke-width="2" d="M976,-257C976,-257 861,-257 861,-257 855,-257 849,-251 849,-245 849,-245 849,-228 849,-228 849,-222 855,-216 861,-216 861,-216 976,-216 976,-216 982,-216 988,-222 988,-228 988,-228 988,-245 988,-245 988,-251 982,-257 976,-257"/> <text text-anchor="middle" x="918.5" y="-245" font-family="sans" font-size="10.00">create_index_salmon</text> <text text-anchor="middle" x="918.5" y="-234" font-family="sans" font-size="10.00">kmer: 31</text> <text text-anchor="middle" x="918.5" y="-223" font-family="sans" font-size="10.00">organism: homo_sapiens</text> @@ -208,7 +208,7 @@ <!-- 14 --> <g id="node15" class="node"> <title>14</title> -<path fill="none" stroke="#56a9d8" stroke-width="2" d="M1329,-254.5C1329,-254.5 1214,-254.5 1214,-254.5 1208,-254.5 1202,-248.5 1202,-242.5 1202,-242.5 1202,-230.5 1202,-230.5 1202,-224.5 1208,-218.5 1214,-218.5 1214,-218.5 1329,-218.5 1329,-218.5 1335,-218.5 1341,-224.5 1341,-230.5 1341,-230.5 1341,-242.5 1341,-242.5 1341,-248.5 1335,-254.5 1329,-254.5"/> +<path fill="none" stroke="#56d892" stroke-width="2" d="M1329,-254.5C1329,-254.5 1214,-254.5 1214,-254.5 1208,-254.5 1202,-248.5 1202,-242.5 1202,-242.5 1202,-230.5 1202,-230.5 1202,-224.5 1208,-218.5 1214,-218.5 1214,-218.5 1329,-218.5 1329,-218.5 1335,-218.5 1341,-224.5 1341,-230.5 1341,-230.5 1341,-242.5 1341,-242.5 1341,-248.5 1335,-254.5 1329,-254.5"/> <text text-anchor="middle" x="1271.5" y="-239.5" font-family="sans" font-size="10.00">create_index_kallisto</text> <text text-anchor="middle" x="1271.5" y="-228.5" font-family="sans" font-size="10.00">organism: homo_sapiens</text> </g> @@ -227,7 +227,7 @@ <!-- 15 --> <g id="node16" class="node"> <title>15</title> -<path fill="none" stroke="#59d856" stroke-width="2" d="M623,-257C623,-257 508,-257 508,-257 502,-257 496,-251 496,-245 496,-245 496,-228 496,-228 496,-222 502,-216 508,-216 508,-216 623,-216 623,-216 629,-216 635,-222 635,-228 635,-228 635,-245 635,-245 635,-251 629,-257 623,-257"/> +<path fill="none" stroke="#70d856" stroke-width="2" d="M623,-257C623,-257 508,-257 508,-257 502,-257 496,-251 496,-245 496,-245 496,-228 496,-228 496,-222 502,-216 508,-216 508,-216 623,-216 623,-216 629,-216 635,-222 635,-228 635,-228 635,-245 635,-245 635,-251 629,-257 623,-257"/> <text text-anchor="middle" x="565.5" y="-245" font-family="sans" font-size="10.00">create_index_star</text> <text text-anchor="middle" x="565.5" y="-234" font-family="sans" font-size="10.00">index_size: 76</text> <text text-anchor="middle" x="565.5" y="-223" font-family="sans" font-size="10.00">organism: homo_sapiens</text> @@ -247,7 +247,7 @@ <!-- 16 --> <g id="node17" class="node"> <title>16</title> -<path fill="none" stroke="#56d8c9" stroke-width="2" d="M837,-329C837,-329 672,-329 672,-329 666,-329 660,-323 660,-317 660,-317 660,-305 660,-305 660,-299 666,-293 672,-293 672,-293 837,-293 837,-293 843,-293 849,-299 849,-305 849,-305 849,-317 849,-317 849,-323 843,-329 837,-329"/> +<path fill="none" stroke="#56d8a9" stroke-width="2" d="M837,-329C837,-329 672,-329 672,-329 666,-329 660,-323 660,-317 660,-317 660,-305 660,-305 660,-299 666,-293 672,-293 672,-293 837,-293 837,-293 843,-293 849,-299 849,-305 849,-305 849,-317 849,-317 849,-323 843,-329 837,-329"/> <text text-anchor="middle" x="754.5" y="-314" font-family="sans" font-size="10.00">pe_remove_adapters_cutadapt</text> <text text-anchor="middle" x="754.5" y="-303" font-family="sans" font-size="10.00">sample: synthetic_10_reads_paired</text> </g> @@ -260,7 +260,7 @@ <!-- 17 --> <g id="node18" class="node"> <title>17</title> -<path fill="none" stroke="#d8ac56" stroke-width="2" d="M1159,-329C1159,-329 988,-329 988,-329 982,-329 976,-323 976,-317 976,-317 976,-305 976,-305 976,-299 982,-293 988,-293 988,-293 1159,-293 1159,-293 1165,-293 1171,-299 1171,-305 1171,-305 1171,-317 1171,-317 1171,-323 1165,-329 1159,-329"/> +<path fill="none" stroke="#56c9d8" stroke-width="2" d="M1159,-329C1159,-329 988,-329 988,-329 982,-329 976,-323 976,-317 976,-317 976,-305 976,-305 976,-299 982,-293 988,-293 988,-293 1159,-293 1159,-293 1165,-293 1171,-299 1171,-305 1171,-305 1171,-317 1171,-317 1171,-323 1165,-329 1159,-329"/> <text text-anchor="middle" x="1073.5" y="-314" font-family="sans" font-size="10.00">remove_adapters_cutadapt</text> <text text-anchor="middle" x="1073.5" y="-303" font-family="sans" font-size="10.00">sample: synthetic_10_reads_mate_1</text> </g> diff --git a/scripts/labkey_to_snakemake.py b/scripts/labkey_to_snakemake.py index f8f752752f91e84ddfa66a110a3016069fe01ede..f0f0ebaffd93a772da63ee7bff982f430545acfa 100755 --- a/scripts/labkey_to_snakemake.py +++ b/scripts/labkey_to_snakemake.py @@ -12,15 +12,18 @@ import sys import gzip +import labkey from argparse import ArgumentParser, RawTextHelpFormatter import os +import sys import numpy as np import pandas as pd from Bio import SeqIO from io import StringIO from csv import writer from pathlib import Path - +# for convenience, load QueryFilter explicitly (avoids long lines in filter definitions) +from labkey.query import QueryFilter # ---------------------------------------------------------------------------------------------------------------------- def main(): """ Preprocess sample folder and create config file for snakemake""" @@ -31,10 +34,16 @@ def main(): description=__doc__, formatter_class=RawTextHelpFormatter) + parser.add_argument( + "--samples_table", + dest="samples_table", + help="Output table compatible to snakemake", + required=True) + parser.add_argument( "--input_table", dest="input_table", - help="input table containing the sample information", + help="input table containing the sample information (labkey format)", required=True, metavar="FILE") @@ -42,10 +51,26 @@ def main(): "--input_dict", dest="input_dict", help="input dictionary containing the feature name \ - conversion from labkey to snakemake allowed names", + conversion from labkey to snakemake", required=True, metavar="FILE") + parser.add_argument( + "--remote", + help="Fetch labkey table via API", + action='store_true') + + parser.add_argument( + "--project_name", + help="Name of labkey folder containing the labkey table (remote mode)", + required = False) + + parser.add_argument( + "--query_name", + help="Name of labkey table (remote mode)", + required = False) + + parser.add_argument( "--genomes_path", dest="genomes_path", @@ -90,12 +115,6 @@ def main(): help="Configuration file to be used by Snakemake", required=False) - parser.add_argument( - "--samples_table", - dest="samples_table", - help="Table with samples", - required=True) - # __________________________________________________________________________________________________________________ # ------------------------------------------------------------------------------------------------------------------ @@ -111,13 +130,20 @@ def main(): sys.exit(1) sys.stdout.write('Reading input file...\n') - input_table = pd.read_csv( - options.input_table, - header=0, - sep='\t', - index_col=None, - comment='#', - engine='python') + + if options.remote == True: + input_table = api_fetch_labkey_table( + project_name=options.project_name, + query_name=options.query_name) + + else: + input_table = pd.read_csv( + options.input_table, + header=0, + sep='\t', + index_col=None, + comment='#', + engine='python') input_dict = pd.read_csv( options.input_dict, @@ -126,10 +152,12 @@ def main(): index_col=None, comment='#', engine='python') + input_dict.set_index('snakemake', inplace=True, drop=True) sys.stdout.write('Create snakemake table...\n') snakemake_table = pd.DataFrame() for index, row in input_table.iterrows(): + snakemake_table.loc[index, 'sample'] = row[input_dict.loc['replicate_name', 'labkey']] + row[input_dict.loc['condition', 'labkey']] if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED': snakemake_table.loc[index, 'seqmode'] = 'paired_end' elif row[input_dict.loc['seqmode', 'labkey']] == 'SINGLE': @@ -138,12 +166,14 @@ def main(): fq1 = os.path.join( row[input_dict.loc['fastq_path', 'labkey']], row[input_dict.loc['fq1', 'labkey']]) + snakemake_table.loc[index, 'fq1'] = fq1 with gzip.open(fq1, "rt") as handle: for record in SeqIO.parse(handle, "fastq"): read_length = len(record.seq) break + snakemake_table.loc[index, 'index_size'] = read_length if read_length <= 50: snakemake_table.loc[index, 'kmer'] = 21 @@ -151,29 +181,36 @@ def main(): snakemake_table.loc[index, 'kmer'] = 31 - snakemake_table.loc[index, 'fq2'] = os.path.join( - row[input_dict.loc['fastq_path', 'labkey']], - row[input_dict.loc['fq2', 'labkey']]) + if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED': + snakemake_table.loc[index, 'fq2'] = os.path.join( + row[input_dict.loc['fastq_path', 'labkey']], + row[input_dict.loc['fq2', 'labkey']]) snakemake_table.loc[index, 'fq1_3p'] = row[input_dict.loc['fq1_3p', 'labkey']] snakemake_table.loc[index, 'fq1_5p'] = row[input_dict.loc['fq1_5p', 'labkey']] - snakemake_table.loc[index, 'fq2_3p'] = row[input_dict.loc['fq2_3p', 'labkey']] - snakemake_table.loc[index, 'fq2_5p'] = row[input_dict.loc['fq2_5p', 'labkey']] + + if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED': + snakemake_table.loc[index, 'fq2_3p'] = row[input_dict.loc['fq2_3p', 'labkey']] + snakemake_table.loc[index, 'fq2_5p'] = row[input_dict.loc['fq2_5p', 'labkey']] organism = row[input_dict.loc['organism', 'labkey']].replace(' ', '_').lower() snakemake_table.loc[index, 'organism'] = organism + snakemake_table.loc[index, 'gtf'] = os.path.join( options.genomes_path, organism, 'annotation.gtf') + snakemake_table.loc[index, 'gtf_filtered'] = os.path.join( options.genomes_path, organism, 'annotation.gtf') + snakemake_table.loc[index, 'genome'] = os.path.join( options.genomes_path, organism, 'genome.fa') + snakemake_table.loc[index, 'tr_fasta_filtered'] = os.path.join( options.genomes_path, organism, @@ -187,9 +224,9 @@ def main(): snakemake_table.loc[index, 'libtype'] = options.libtype if row[input_dict.loc['mate1_direction', 'labkey']] == 'SENSE': - snakemake_table.loc[index, 'kallisto_directionality'] = '--fr-stranded' + snakemake_table.loc[index, 'kallisto_directionality'] = '--fr' elif row[input_dict.loc['mate1_direction', 'labkey']] == 'ANTISENSE': - snakemake_table.loc[index, 'kallisto_directionality'] = '--rf-stranded' + snakemake_table.loc[index, 'kallisto_directionality'] = '--rf' else: snakemake_table.loc[index, 'kallisto_directionality'] = '' @@ -202,36 +239,49 @@ def main(): else: pass - if row[input_dict.loc['mate2_direction', 'labkey']] == 'SENSE': - snakemake_table.loc[index, 'fq2_polya'] = 'AAAAAAAAAAAAAAAAA' - elif row[input_dict.loc['mate2_direction', 'labkey']] == 'ANTISENSE': - snakemake_table.loc[index, 'fq2_polya'] = 'TTTTTTTTTTTTTTTTT' - elif row[input_dict.loc['mate2_direction', 'labkey']] == 'RANDOM': - snakemake_table.loc[index, 'fq2_polya'] = 'AAAAAAAAAAAAAAAAA' - else: - pass + if row[input_dict.loc['seqmode', 'labkey']] == 'PAIRED': + if row[input_dict.loc['mate2_direction', 'labkey']] == 'SENSE': + snakemake_table.loc[index, 'fq2_polya'] = 'AAAAAAAAAAAAAAAAA' + elif row[input_dict.loc['mate2_direction', 'labkey']] == 'ANTISENSE': + snakemake_table.loc[index, 'fq2_polya'] = 'TTTTTTTTTTTTTTTTT' + elif row[input_dict.loc['mate2_direction', 'labkey']] == 'RANDOM': + snakemake_table.loc[index, 'fq2_polya'] = 'AAAAAAAAAAAAAAAAA' + else: + pass + + snakemake_table.fillna('XXXXXXXXXXXXX', inplace=True) snakemake_table.to_csv( options.samples_table, sep='\t', header=True, index=False) + # Read file and infer read size for sjdbovwerhang with open(options.config_file, 'w') as config_file: config_file.write('''--- output_dir: "results" local_log: "local_log" - star_indexes: "star_indexes" - kallisto_indexes: "kallisto_indexes" + star_indexes: "results/star_indexes" + kallisto_indexes: "results/kallisto_indexes" + samples: "'''+ options.samples_table + '''" + salmon_indexes: "results/salmon_indexes" ...''') - sys.stdout.write('Create snakemake table finished successfully...\n') sys.stdout.write('Create config file...\n') sys.stdout.write('Create config file finished successfully...\n') return +def api_fetch_labkey_table(project_name=None, query_name=None): + group_path = os.path.join( '/Zavolan Group', project_name) + server_context = labkey.utils.create_server_context('labkey.scicore.unibas.ch', group_path, 'labkey', use_ssl=True) + schema_name = "lists" + results = labkey.query.select_rows(server_context, schema_name, query_name) + input_table = pd.DataFrame(results["rows"]) + return input_table + # _____________________________________________________________________________ # ----------------------------------------------------------------------------- diff --git a/tests/input_files/homo_sapiens/annotation.gtf b/tests/input_files/homo_sapiens/annotation.gtf new file mode 100644 index 0000000000000000000000000000000000000000..e41d542766ef72a2df834e9e634a62ab061b459c --- /dev/null +++ b/tests/input_files/homo_sapiens/annotation.gtf @@ -0,0 +1,28 @@ +#!genome-build GRCh38.p13 +#!genome-version GRCh38 +#!genome-date 2013-12 +#!genome-build-accession NCBI:GCA_000001405.28 +#!genebuild-last-updated 2019-08 +1-10000-20000 havana gene 1870 4410 . + . gene_id "ENSG00000223972"; gene_version "5"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; +1-10000-20000 havana transcript 1870 4410 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; tag "basic"; transcript_support_level "1"; +1-10000-20000 havana exon 1870 2228 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "1"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00002234944"; exon_version "1"; tag "basic"; transcript_support_level "1"; +1-10000-20000 havana exon 2614 2722 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00003582793"; exon_version "1"; tag "basic"; transcript_support_level "1"; +1-10000-20000 havana exon 3222 4410 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "3"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-202"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00002312635"; exon_version "1"; tag "basic"; transcript_support_level "1"; +1-10000-20000 havana transcript 2011 3671 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA"; +1-10000-20000 havana exon 2011 2058 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "1"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001948541"; exon_version "1"; tag "basic"; transcript_support_level "NA"; +1-10000-20000 havana exon 2180 2228 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001671638"; exon_version "2"; tag "basic"; transcript_support_level "NA"; +1-10000-20000 havana exon 2614 2698 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "3"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001758273"; exon_version "2"; tag "basic"; transcript_support_level "NA"; +1-10000-20000 havana exon 2976 3053 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "4"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001799933"; exon_version "2"; tag "basic"; transcript_support_level "NA"; +1-10000-20000 havana exon 3222 3375 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "5"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001746346"; exon_version "2"; tag "basic"; transcript_support_level "NA"; +1-10000-20000 havana exon 3454 3671 . + . gene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "6"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001863096"; exon_version "1"; tag "basic"; transcript_support_level "NA"; +1-10000-20000 havana gene 4405 8367 . - . gene_id "ENSG00000227232"; gene_version "5"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; +1-10000-20000 havana transcript 4405 8367 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA"; +1-10000-20000 havana exon 8269 8367 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "3"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003477500"; exon_version "1"; tag "basic"; transcript_support_level "NA"; +1-10000-20000 havana exon 7916 8062 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "4"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003565697"; exon_version "1"; tag "basic"; transcript_support_level "NA"; +1-10000-20000 havana exon 7607 7743 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "5"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003475637"; exon_version "1"; tag "basic"; transcript_support_level "NA"; +1-10000-20000 havana exon 7234 7369 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "6"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003502542"; exon_version "1"; tag "basic"; transcript_support_level "NA"; +1-10000-20000 havana exon 6859 7056 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "7"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003553898"; exon_version "1"; tag "basic"; transcript_support_level "NA"; +1-10000-20000 havana exon 6608 6766 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "8"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00003621279"; exon_version "1"; tag "basic"; transcript_support_level "NA"; +1-10000-20000 havana exon 5797 5948 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "9"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00002030414"; exon_version "1"; tag "basic"; transcript_support_level "NA"; +1-10000-20000 havana exon 5006 5039 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "10"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00001935574"; exon_version "1"; tag "basic"; transcript_support_level "NA"; +1-10000-20000 havana exon 4405 4502 . - . gene_id "ENSG00000227232"; gene_version "5"; transcript_id "ENST00000488147"; transcript_version "1"; exon_number "11"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene"; transcript_name "WASH7P-201"; transcript_source "havana"; transcript_biotype "unprocessed_pseudogene"; exon_id "ENSE00001843071"; exon_version "1"; tag "basic"; transcript_support_level "NA"; diff --git a/tests/input_files/homo_sapiens/genome.fa b/tests/input_files/homo_sapiens/genome.fa new file mode 100644 index 0000000000000000000000000000000000000000..7c6b040a306d1d8c23079ffad83d4cb0f0173959 --- /dev/null +++ b/tests/input_files/homo_sapiens/genome.fa @@ -0,0 +1,168 @@ +>1-10000-20000 +ntaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaacc +ctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaacccaaccctaaccc +taaccctaaccctaaccctaaccctaacccctaaccctaaccctaaccctaaccctaacc +taaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaacccctaacc +ctaaccctaaaccctaaaccctaaccctaaccctaaccctaaccctaaccccaaccccaa +ccccaaccccaaccccaaccccaaccctaacccctaaccctaaccctaaccctaccctaa +ccctaaccctaaccctaaccctaaccctaacccctaacccctaaccctaaccctaaccct +aaccctaaccctaaccctaacccctaaccctaaccctaaccctaaccctcgcggtaccct +cagccggcccgcccgcccgggtctgacctgaggagaactgtgctccgccttcagagtacc +accgaaatctgtgcagaggacaacgcagctccgccctcgcggtgctctccgggtctgtgc +tgaggagaacgcaactccgccgttgcaaaggcgcgccgcgccggcgcaggcgcagagagg +cgcgccgcgccggcgcaggcgcagagaggcgcgccgcgccggcgcaggcgcagagaggcg +cgccgcgccggcgcaggcgcagagaggcgcgccgcgccggcgcaggcgcagagaggcgcg +ccgcgccggcgcaggcgcagacacatgctagcgcgtcggggtggaggcgtggcgcaggcg +cagagaggcgcgccgcgccggcgcaggcgcagagacacatgctaccgcgtccaggggtgg +aggcgtggcgcaggcgcagagaggcgcaccgcgccggcgcaggcgcagagacacatgcta +gcgcgtccaggggtggaggcgtggcgcaggcgcagagacgcaagcctacgggcgggggtt +gggggggcgtgtgttgcaggagcaaagtcgcacggcgccgggctggggcggggggagggt +ggcgccgtgcacgcgcagaaactcacgtcacggtggcgcggcgcagagacgggtagaacc +tcagtaatccgaaaagccgggatcgaccgccccttgcttgcagccgggcactacaggacc +cgcttgctcacggtgctgtgccagggcgccccctgctggcgactagggcaactgcagggc +tctcttgcttagagtggtggccagcgccccctgctggcgccggggcactgcagggccctc +ttgcttactgtatagtggtggcacgccgcctgctggcagctagggacattgcagggtcct +cttgctcaaggtgtagtggcagcacgcccacctgctggcagctggggacactgccgggcc +ctcttgctcCAACAGTACTGGCGGATTATAGGGAAACACCCGGAGCATATGCTGTTTGGT +CTCAGtagactcctaaatatgggattcctgggtttaaaagtaaaaaataaatatgtttaa +tttgtgaactgattaccatcagaattgtactgttctgtatcccaccagcaatgtctagga +atgcctgtttctccacaaagtgtttacttttggatttttgccagtctaacaggtgaAGcc +ctggagattcttattagtgatttgggctggggcctggccatgtgtatttttttaaatttc +cactgatgattttgctgcatggccggtgttgagaatgactgCGCAAATTTGCCGGATTTC +CTTTGCTGTTCCTGCATGTAGTTTAAACGAGATTGCCAGCACCGGGTATCATTCACCATT +TTTCTTTTCGTTAACTTGCCGTCAGCCTTTTCTTTGACCTCTTCTTTCTGTTCATGTGTA +TTTGCTGTCTCTTAGCCCAGACTTCCCGTGTCCTTTCCACCGGGCCTTTGAGAGGTCACA +GGGTCTTGATGCTGTGGTCTTCATCTGCAGGTGTCTGACTTCCAGCAACTGCTGGCCTGT +GCCAGGGTGCAAGCTGAGCACTGGAGTGGAGTTTTCCTGTGGAGAGGAGCCATGCCTAGA +GTGGGATGGGCCATTGTTCATCTTCTGGCCCCTGTTGTCTGCATGTAACTTAATACCACA +ACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGAGAGCATCAACTTCTCTCACAAC +CTAGGCCAGTAAGTAGTGCTTGTGCTCATCTCCTTGGCTGTGATACGTGGCCGGCCCTCG +CTCCAGCAGCTGGACCCCTACCTGCCGTCTGCTGCCATCGGAGCCCAAAGCCGGGCTGTG +ACTGCTCAGACCAGCCGGCTGGAGGGAGGGGCTCAGCAGGTCTGGCTTTGGCCCTGGGAG +AGCAGGTGGAAGATCAGGCAGGCCATCGCTGCCACAGAACCCAGTGGATTGGCCTAGGTG +GGATCTCTGAGCTCAACAAGCCCTCTCTGGGTGGTAGGTGCAGAGACGGGAGGGGCAGAG +CCGCAGGCACAGCCAAGAGGGCTGAAGAAATGGTAGAACGGAGCAGCTGGTGATGTGTGG +GCCCACCGGCCCCAGGCTCCTGTCTCCCCCCAGGTGTGTGGTGATGCCAGGCATGCCCTT +CCCCAGCATCAGGTCTCCAGAGCTGCAGAAGACGACGGCCGACTTGGATCACACTCTTGT +GAGTGTCCCCAGTGTTGCAGAGGTGAGAGGAGAGTAGACAGTGAGTGGGAGTGGCGTCGC +CCCTAGGGCTCTACGGGGCCGGCGTCTCCTGTCTCCTGGAGAGGCTTCGATGCCCCTCCA +CACCCTCTTGATCTTCCCTGTGATGTCATCTGGAGCCCTGCTGCTTGCGGTGGCCTATAA +AGCCTCCTAGTCTGGCTCCAAGGCCTGGCAGAGTCTTTCCCAGGGAAAGCTACAAGCAGC +AAACAGTCTGCATGGGTCATCCCCTTCACTCCCAGCTCAGAGCCCAGGCCAGGGGCCCCC +AAGAAAGGCTCTGGTGGAGAACCTGTGCATGAAGGCTGTCAACCAGTCCATAGGCAAGCC +TGGCTGCCTCCAGCTGGGTCGACAGACAGGGGCTGGAGAAGGGGAGAAGAGGAAAGTGAG +GTTGCCTGCCCTGTCTCCTACCTGAGGCTGAGGAAGGAGAAGGGGATGCACTGTTGGGGA +GGCAGCTGTAACTCAAAGCCTTAGCCTCTGTTCCCACGAAGGCAGGGCCATCAGGCACCA +AAGGGATTCTGCCAGCATAGTGCTCCTGGACCAGTGATACACCCGGCACCCTGTCCTGGA +CACGCTGTTGGCCTGGATCTGAGCCCTGGTGGAGGTCAAAGCCACCTTTGGTTCTGCCAT +TGCTGCTGTGTGGAAGTTCACTCCTGCCTTTTCCTTTCCCTAGAGCCTCCACCACCCCGA +GATCACATTTCTCACTGCCTTTTGTCTGCCCAGTTTCACCAGAAGTAGGCCTCTTCCTGA +CAGGCAGCTGCACCACTGCCTGGCGCTGTGCCCTTCCTTTGCTCTGCCCGCTGGAGACGG +TGTTTGTCATGGGCCTGGTCTGCAGGGATCCTGCTACAAAGGTGAAACCCAGGAGAGTGT +GGAGTCCAGAGTGTTGCCAGGACCCAGGCACAGGCATTAGTGCCCGTTGGAGAAAACAGG +GGAATCCCGAAGAAATGGTGGGTCCTGGCCATCCGTGAGATCTTCCCAGGGCAGCTCCCC +TCTGTGGAATCCAATCTGTCTTCCATCCTGCGTGGCCGAGGGCCAGGCTTCTCACTGGGC +CTCTGCAGGAGGCTGCCATTTGTCCTGCCCACCTTCTTAGAAGCGAGACGGAGCAGACCC +ATCTGCTACTGCCCTTTCTATAATAACTAAAGTTAGCTGCCCTGGACTATTCACCCCCTA +GTCTCAATTTAAGAAGATCCCCATGGCCACAGGGCCCCTGCCTGGGGGCTTGTCACCTCC +CCCACCTTCTTCCTGAGTCATTCCTGCAGCCTTGCTCCCTAACCTGCCCCACAGCCTTGC +CTGGATTTCTATCTCCCTGGCTTGGTGCCAGTTCCTCCAAGTCGATGGCACCTCCCTCCC +TCTCAACCACTTGAGCAAACTCCAAGACATCTTCTACCCCAACACCAGCAATTGTGCCAA +GGGCCATTAGGCTCTCAGCATGACTATTTTTAGAGACCCCGTGTCTGTCACTGAAACCTT +TTTTGTGGGAGACTATTCCTCCCATCTGCAACAGCTGCCCCTGCTGACTGCCCTTCTCTC +CTCCCTCTCATCCCAGAGAAACAGGTCAGCTGGGAGCTTCTGCCCCCACTGCCTAGGGAC +CAACAGGGGCAGGAGGCAGTCACTGACCCCGAGACGTTTGCATCCTGCACAGCTAGAGAT +CCTTTATTAAAAGCACACTGTTGGTTTCTGCTCAGTTCTTTATTGATTGGTGTGCCGTTT +TCTCTGGAAGCCTCTTAAGAACACAGTGGCGCAGGCTGGGTGGAGCCGTCCCCCCATGGA +GCACAGGCAGACAGAAGTCCCCGCCCCAGCTGTGTGGCCTCAAGCCAGCCTTCCGCTCCT +TGAAGCTGGTCTCCACACAGTGCTGGTTCCGTCACCCCCTCCCAAGGAAGTAGGTCTGAG +CAGCTTGTCCTGGCTGTGTCCATGTCAGAGCAACGGCCCAAGTCTGGGTCTGGGGGGGAA +GGTGTCATGGAGCCCCCTACGATTCCCAGTCGTCCTCGTCCTCCTCTGCCTGTGGCTGCT +GCGGTGGCGGCAGAGGAGGGATGGAGTCTGACACGCGGGCAAAGGCTCCTCCGGGCCCCT +CACCAGCCCCAGGTCCTTTCCCAGAGATGCCTGGAGGGAAAAGGCTGAGTGAGGGTGGTT +GGTGGGAAACCCTGGTTCCCCCAGCCCCCGGAGACTTAAATACAGGAAGAAAAAGGCAGG +ACAGAATTACAAGGTGCTGGCCCAGGGCGGGCAGCGGCCCTGCCTCCTACCCTTGCGCCT +CATGACCAGCTTGTTGAAGAGATCCGACATCAAGTGCCCACCTTGGCTCGTGGCTCTCAC +TGCAACGGGAAAGCCACAGACTGGGGTGAAGAGTTCAGTCACATGCGACCGGTGACTCCC +TGTCCCCACCCCCATGACACTCCCCAGCCCTCCAAGGCCACTGTGTTTCCCAGTTAGCTC +AGAGCCTCAGTCGATCCCTGACCCAGCACCGGGCACTGATGAGACAGCGGCTGTTTGAGG +AGCCACCTCCCAGCCACCTCGGGGCCAGGGCCAGGGTGTGCAGCAccactgtacaatggg +gaaactggcccagagaggtgaggcagcttgcctggggtcacagagcaaggcaaaagcagc +gctgggtacaagctcaAAACCATAGTGCCCAGGGCACTGCCGCTGCAGGCGCAGGCATCG +CATCACACCAGTGTCTGCGTTCACAGCAGGCATCATCAGTAGCCTCCAGAGGCCTCAGGT +CCAGTCTCTAAAAATATCTCAGGAGGCTGCAGTGGCTGACCATTGCCTTGGACCGCTCTT +GGCAGTCGAAGAAGATTCTCCTGTCAGTTTGAGCTGGGTGAGCTTAGAGAGGAAAGCTCC +ACTATGGCTCCCAAACCAGGAAGGAGCCATAGCCCAGGCAGGAGGGCTGAGGACCTCTGG +TGGCGGCCCAGGGCTTCCAGCATGTGCCCTAGGGGAAGCAGGGGCCAGCTGGCAAGAGCA +GGGGGTGGGCAGAAAGCACCCGGTGGACTCAGGGCTGGAGGGGAGGAGGCGATCTTGCCC +AAGGCCCTCCGACTGCAAGCTCCAGGGCCCGCTCACCTTGCTCCTGCTCCTTCTGCTGCT +GCTTCTCCAGCTTTCGCTCCTTCATGCTGCGCAGCTTGGCCTTGCCGATGCCCCCAGCTT +GGCGGATGGACTCTAGCAGAGTGGCCAGCCACCGGAGGGGTCAACCACTTCCCTGGGAGC +TCCCTGGACTGGAGCCGGGAGGTGGGGAACAGGGCAAGGAGGAAAGGCTGCTCAGGCAGG +GCTGGGGAAGCTTACTGTGTCCAAGAGCCTGCTGGGAGGGAAGTCACCTCCCCTCAAACG +AGGAGCCCTGCGCTGGGGAGGCCGGACCTTTGGAGACTGTGTGTGGGGGCCTGGGCACTG +ACTTCTGCAACCACCTGAGCGCGGGCATCCTGTGTGCAGATACTCCCTGCTTCCTCTCTA +GCCCCCACCCTGCAGAGCTGGACCCCTGAGCTAGCCATGCTCTGACAGTCTCAGTTGCAC +ACACGAGCCAGCAGAGGGGTTTTGTGCCACTTCTGGATGCTAGGGTTACACTGGGAGACA +CAGCAGTGAAGCTGAAATGAAAAATGTGTTGCTGTAGTTTGTTATTAGACCCCTTCTTTC +CATTGGTTTAATTAGGAATGGGGAACCCAGAGCCTCACTTGTTCAGGCTCCCTCTGCCCT +AGAAGTGAGAAGTCCAGAGCTCTACAGTTTGAAAACCACTATTTTATGAACCAAGTAGAA +CAAGATATTTGAAATGGAAACTATTCAAAAAATTGAGAATTTCTGACCACTTAACAAACC +CACAGAAAATCCACCCGAGTGCACTGAGCACGCCAGAAATCAGGTGGCCTCAAAGAGCTG +CTCCCACCTGAAGGAGACGCGCTGCTGCTGCTGTCGTCCTGCCTGGCGCCTTGGCCTACA +GGGGCCGCGGTTGAGGGTGGGAGTGGGGGTGCACTGGCCAGCACCTCAGGAGCtgggggt +ggtggtgggggcggtgggggtggtgttagtACCCCATCTTGTAGGTCTGAAACACAAAGT +GTGGGGTGTCTAGGGAAGAAGGTGTGTGACCAGGGAGGTCCCCGGCCCAGCTCCCATCCC +AGAACCCAGCTCACCTACCTTGAGAGGCTCGGCTACCTCAGTGTGGAAGGTGGGCAGTTC +TGGAATGGTGCCAGGGGCAGAGGGGGCAATGCCGGGGCCCAGGTCGGCAATGTACATGAG +GTCGTTGGCAATGCCGGGCAGGTCAGGCAGGTAGGATGGAACATCAATCTCAGGCACCTG +GCCCAGGTCTGGCACATAGAAGTAGTTCTCTGGGACCTGCAAGATTAGGCAGGGACATGT +GAGAGGTGACAGGGACCTGCAGGGGCAGCCAACAAGACCTTGTGTGCACCTCCCATGGGT +GGAATAAGGGGCCCAACAGCCTTGACTGGAGAGGAGCTCTGGCAAGGCCCTGGGCCACTG +CACCTGTCTCCACCTCTGTCCCACCCCTCCCACCTGCTGTTCCAGCTGCTCTCTCTTGCT +GATGGACAAGGGGGCATCAAACAGCTTCTCCTCTGTCTCTGCCCCCAGCATCACATGGGT +CTTTGTTACAGCACCAGCCAGGGGGTCCAGGAAGACATACTTCTTCTACCTACAGAGGCG +ACATGGGGGTCAGGCAAGCTGACACCCGCTGTCCTGAGCCCATGTTCCTCTCCCACATCA +TCAGGGGCACAGCGTGCACTGTGGGGTCCCAGGCCTCCCGAGCCGAGCCACCCGTCACCC +CCTGGCTCCTGGCCTATGTGCTGTACCTGTGTCTGATGCCCTGGGTCCCCACTAAGCCAG +GCCGGGCCTCCCGCCCACACCCCTCGGCCCTGCCCTCTGGCCATACAGGTTCTCGGTGGT +GTTGAAGAGCAGCAAGGAGCTGACAGAGCTGATGTTGCTGGGAAGACCCCCAAGTCCCTC +TTCTGCATCGTCCTCGGGCTCCGGCTTGGTGCTCACGCACACAGGAAAGTCCTTCAGCTT +CTCCTGAGAGGGCCAGGATGGCCAAGGGATGGTGAATATTTGGTGCTGGGCCTAATCAGC +TGCCATCCCATCCCAGTCAGCCTCCTCTGGGGGACAGAACCCTATGGTGGCCCCGGCTCC +TCCCCAGTATCCAGTCCTCCTGGTGTGTGACAGGCTATATGCGCGGCCAGCAGACCTGCA +GGGCCCGCTCGTCCAGGGGGCGGTGCTTGCTCTGGATCCTGTGGCGGGGGCGTCTCTGCA +GGCCAGGGTCCTGGGCGCCCGTGAAGATGGAGCCATATTCCTGCAGGCGCCCTGGAGCAG +GGTACTTGGCACTGGAGAACACCTGTGGACACAGGGACAAGTCTGAGGGGGCCCCAAGAG +GCTCAGAGGGCTAGGATTGCTTGGCAGGAGAGGGTGGAGTTGGAAGCCTGGGCGAGAAGA +AAGCTCAAGGTACAGGTGGGCAGCAGGGCAGAGACTGGGCAGCCTCAGAGGCACGGGGAA +ATGGAGGGACTGCCCAGTAGCCTCAGGACACAGGGGTATGGGGACTACCTTGATGGCCTT +CTTGCTGCCCTTGATCTTCTCAATCTTGGCCTGGGCCAAGGAGACCTTCTCTCCAATGGC +CTGCACCTGGCTCCGGCTCTGCTCTACCTGCTGGGAGATCCTGCCATGGAGAAGATCACA +GAGGCTGGGCTGCTCCCCACCCTCTGCACACCTCCTGCTTCTAACAGCAGAGCTGCCAGG +CCAGGCCCTCAGGCAAGGGCTCTGAAGTCAGGGTCACCTACTTGCCAGGGCCGATCTTGG +TGCCATCCAGGGGGCCTCTACAAGGATAATCTGACCTGCAGGGTCGAGGAGTTGACGGTG +CTGAGTTCCCTGCACTCTCAGTAGGGACAGGCCCTATGCTGCCACCTGTACATGCTATCT +GAAGGACAGCCTCCAGGGCACACAGAGGATGGTATTTACACATGCACACATGGCTACTGA +TGGGGCAAGCACTTCACAACCCCTCATGATCACGTGCAGCAGACAATGTGGCCTCTGCAG +AGGGGGAACGGAGACCGGAGGCTGAGACTGGCAAGGCTGGACCTGAGTGTCGTCACCTAA +ATTCAGACGGGGAACTGCCCCTGCACATACTGAACGGCTCACTGAGCAAACCCCGAGTCC +CGACCACCGCCTCAGTGTGGTCTAGCTcctcacctgcttccatcctccctggtgcggggt +gggcccagtgatatcagctgcctgctgttccccagatgtgccaagtgcattcttgtgtgc +ttgcatctcatggaacgccatttccccagacatccctgtggctggctccTGATGCCCGAG +GCCCAAGTGTCTGATGCTTTAAGGCACATCACCCCACTCATGCTTTTCCATGTTCTTTGG +CCGCAGCAAGGCCGCTCTCACTGCAAAGTTAACTCTGATGCGTGTGTAACACAACATCCT +CCTCCCAGTCGCCCCTGTAGCTCCCCTACCTCCAAGAGCCCAGCCCTTGCCCACAGGGCC +ACACTCCACGTGCAGAGCAGCCTCAGCACTCACCGGGCACGAGCGAGCCTGTGTGGTGCG +CAGGGAtgagaaggcagaggcgcgactggggttcatgaggaagggcaggaggagggtgtg +ggatggtggaggggtttgagaaggcagaggcgcgactggggttcatgaggaaagggaggg +ggaggatgtgggatggtggaggggCTGCAGACTCTGGGCTAGGGAAAGCTGGGATGTCTC +TAAAGGTTGGAATGAATGGCCTAGAATCCGACCCAATAAGCCAAAGCCACTTCCACCAAC +GTTAGAAGGCCTTGGCCCCCAGAGAGCCAATTTCACAATCCAGAAGTCCCCGTGCCCTAA +AGGGTCTGCCCTGATTACTCCTGGCTCCTTGTGTGCAGGGGGCTCAGGCATGGCAGGGCT +GGGAGTACCAGCAGGCACTCAAGCGGCTTAAGTGTTCCATGACAGACTGGTATGAAGGTG +GCCACAATTCAGAAAGAAAAAAGAAGAGCACCATCTCCTTCCAGTGAGGAAGCGGGACCA +CCACCCAGCGTGTGCTCCATCTTTTCTGGCTGGGGAGAGGCCTTCATCTGCTGTAAAGGG +TCCTCCAGCACAAGCTGTCTTAATTGACCCTAGTTCCCAGGGCAGCCTCGTTCTGCCTTG +GGTGCTGACACGACCTTCGGTAGGTGCATAAGCTCTGCATTCGAGGTCCACAGGGGCAGT +GGGAGGGAACTGagactggggagggacaaaggctgctctgt diff --git a/tests/input_files/homo_sapiens/transcriptome.fa b/tests/input_files/homo_sapiens/transcriptome.fa new file mode 100644 index 0000000000000000000000000000000000000000..a55426c270a5458fd5d6c36b723efd8d4cb5b296 --- /dev/null +++ b/tests/input_files/homo_sapiens/transcriptome.fa @@ -0,0 +1,54 @@ +>ENST00000456328 gene=DDX11L1 +GTTAACTTGCCGTCAGCCTTTTCTTTGACCTCTTCTTTCTGTTCATGTGTATTTGCTGTCTCTTAGCCCA +GACTTCCCGTGTCCTTTCCACCGGGCCTTTGAGAGGTCACAGGGTCTTGATGCTGTGGTCTTCATCTGCA +GGTGTCTGACTTCCAGCAACTGCTGGCCTGTGCCAGGGTGCAAGCTGAGCACTGGAGTGGAGTTTTCCTG +TGGAGAGGAGCCATGCCTAGAGTGGGATGGGCCATTGTTCATCTTCTGGCCCCTGTTGTCTGCATGTAAC +TTAATACCACAACCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGAGAGCATCAACTTCTCTCACAA +CCTAGGCCAGTGTGTGGTGATGCCAGGCATGCCCTTCCCCAGCATCAGGTCTCCAGAGCTGCAGAAGACG +ACGGCCGACTTGGATCACACTCTTGTGAGTGTCCCCAGTGTTGCAGAGGCAGGGCCATCAGGCACCAAAG +GGATTCTGCCAGCATAGTGCTCCTGGACCAGTGATACACCCGGCACCCTGTCCTGGACACGCTGTTGGCC +TGGATCTGAGCCCTGGTGGAGGTCAAAGCCACCTTTGGTTCTGCCATTGCTGCTGTGTGGAAGTTCACTC +CTGCCTTTTCCTTTCCCTAGAGCCTCCACCACCCCGAGATCACATTTCTCACTGCCTTTTGTCTGCCCAG +TTTCACCAGAAGTAGGCCTCTTCCTGACAGGCAGCTGCACCACTGCCTGGCGCTGTGCCCTTCCTTTGCT +CTGCCCGCTGGAGACGGTGTTTGTCATGGGCCTGGTCTGCAGGGATCCTGCTACAAAGGTGAAACCCAGG +AGAGTGTGGAGTCCAGAGTGTTGCCAGGACCCAGGCACAGGCATTAGTGCCCGTTGGAGAAAACAGGGGA +ATCCCGAAGAAATGGTGGGTCCTGGCCATCCGTGAGATCTTCCCAGGGCAGCTCCCCTCTGTGGAATCCA +ATCTGTCTTCCATCCTGCGTGGCCGAGGGCCAGGCTTCTCACTGGGCCTCTGCAGGAGGCTGCCATTTGT +CCTGCCCACCTTCTTAGAAGCGAGACGGAGCAGACCCATCTGCTACTGCCCTTTCTATAATAACTAAAGT +TAGCTGCCCTGGACTATTCACCCCCTAGTCTCAATTTAAGAAGATCCCCATGGCCACAGGGCCCCTGCCT +GGGGGCTTGTCACCTCCCCCACCTTCTTCCTGAGTCATTCCTGCAGCCTTGCTCCCTAACCTGCCCCACA +GCCTTGCCTGGATTTCTATCTCCCTGGCTTGGTGCCAGTTCCTCCAAGTCGATGGCACCTCCCTCCCTCT +CAACCACTTGAGCAAACTCCAAGACATCTTCTACCCCAACACCAGCAATTGTGCCAAGGGCCATTAGGCT +CTCAGCATGACTATTTTTAGAGACCCCGTGTCTGTCACTGAAACCTTTTTTGTGGGAGACTATTCCTCCC +ATCTGCAACAGCTGCCCCTGCTGACTGCCCTTCTCTCCTCCCTCTCATCCCAGAGAAACAGGTCAGCTGG +GAGCTTCTGCCCCCACTGCCTAGGGACCAACAGGGGCAGGAGGCAGTCACTGACCCCGAGACGTTTGCAT +CCTGCACAGCTAGAGATCCTTTATTAAAAGCACACTGTTGGTTTCTG +>ENST00000450305 gene=DDX11L1 +GTGTCTGACTTCCAGCAACTGCTGGCCTGTGCCAGGGTGCAAGCTGAGTTGGAGGAAAGATGAGTGAGAG +CATCAACTTCTCTCACAACCTAGGCCAGTGTGTGGTGATGCCAGGCATGCCCTTCCCCAGCATCAGGTCT +CCAGAGCTGCAGAAGACGACGGCCGACTTGGATCACACTCTTCTCAGAGCCCAGGCCAGGGGCCCCCAAG +AAAGGCTCTGGTGGAGAACCTGTGCATGAAGGCTGTCAACCAGTCCATAGGCAGGGCCATCAGGCACCAA +AGGGATTCTGCCAGCATAGTGCTCCTGGACCAGTGATACACCCGGCACCCTGTCCTGGACACGCTGTTGG +CCTGGATCTGAGCCCTGGTGGAGGTCAAAGCCACCTTTGGTTCTGCCATTGCTGCTGTGTGGAATTTCAC +CAGAAGTAGGCCTCTTCCTGACAGGCAGCTGCACCACTGCCTGGCGCTGTGCCCTTCCTTTGCTCTGCCC +GCTGGAGACGGTGTTTGTCATGGGCCTGGTCTGCAGGGATCCTGCTACAAAGGTGAAACCCAGGAGAGTG +TGGAGTCCAGAGTGTTGCCAGGACCCAGGCACAGGCATTAGTGCCCGTTGGAGAAAACAGGGGAATCCCG +AA +>ENST00000488147 gene=WASH7P +GTAGAGCAGAGCCGGAGCCAGGTGCAGGCCATTGGAGAGAAGGTCTCCTTGGCCCAGGCCAAGATTGAGA +AGATCAAGGGCAGCAAGAAGGCCATCAAGGTGTTCTCCAGTGCCAAGTACCCTGCTCCAGGGCGCCTGCA +GGAATATGGCTCCATCTTCACGGGCGCCCAGGACCCTGGCCTGCAGAGACGCCCCCGCCACAGGATCCAG +AGCAAGCACCGCCCCCTGGACGAGCGGGCCCTGCAGGAGAAGCTGAAGGACTTTCCTGTGTGCGTGAGCA +CCAAGCCGGAGCCCGAGGACGATGCAGAAGAGGGACTTGGGGGTCTTCCCAGCAACATCAGCTCTGTCAG +CTCCTTGCTGCTCTTCAACACCACCGAGAACCTGTAGAAGAAGTATGTCTTCCTGGACCCCCTGGCTGGT +GCTGTAACAAAGACCCATGTGATGCTGGGGGCAGAGACAGAGGAGAAGCTGTTTGATGCCCCCTTGTCCA +TCAGCAAGAGAGAGCAGCTGGAACAGCAGGTCCCAGAGAACTACTTCTATGTGCCAGACCTGGGCCAGGT +GCCTGAGATTGATGTTCCATCCTACCTGCCTGACCTGCCCGGCATTGCCAACGACCTCATGTACATTGCC +GACCTGGGCCCCGGCATTGCCCCCTCTGCCCCTGGCACCATTCCAGAACTGCCCACCTTCCACACTGAGG +TAGCCGAGCCTCTCAAGACCTACAAGATGGGGTactaacaccacccccaccgcccccaccaccaccccca +GCTCCTGAGGTGCTGGCCAGTGCACCCCCACTCCCACCCTCAACCGCGGCCCCTGTAGGCCAAGGCGCCA +GGCAGGACGACAGCAGCAGCAGCGCGTCTCCTTCAGTCCAGGGAGCTCCCAGGGAAGTGGTTGACCCCTC +CGGTGGCTGGCCACTCTGCTAGAGTCCATCCGCCAAGCTGGGGGCATCGGCAAGGCCAAGCTGCGCAGCA +TGAAGGAGCGAAAGCTGGAGAAGCAGCAGCAGAAGGAGCAGGAGCAAGTGAGAGCCACGAGCCAAGGTGG +GCACTTGATGTCGCTCCATGGGGGGACGGCTCCACCCAGCCTGCGCCACTGTGTTCTTAAGAGGCTTCCA +GAGAAAACGGCACACCAATCAATAAAGAACTGAGCAGAAA diff --git a/tests/input_files/project1/synthetic.mate_1.fastq.gz b/tests/input_files/project1/synthetic.mate_1.fastq.gz new file mode 100644 index 0000000000000000000000000000000000000000..6eef226394bbb57b2a0692fde823c4dd252b7fee Binary files /dev/null and b/tests/input_files/project1/synthetic.mate_1.fastq.gz differ diff --git a/tests/input_files/project1/synthetic.mate_2.fastq.gz b/tests/input_files/project1/synthetic.mate_2.fastq.gz new file mode 100644 index 0000000000000000000000000000000000000000..d48b1cc65d4614ee96a604da5f80f92c99e9bed7 Binary files /dev/null and b/tests/input_files/project1/synthetic.mate_2.fastq.gz differ diff --git a/tests/input_files/project2/synthetic.mate_1.fastq.gz b/tests/input_files/project2/synthetic.mate_1.fastq.gz new file mode 100644 index 0000000000000000000000000000000000000000..6eef226394bbb57b2a0692fde823c4dd252b7fee Binary files /dev/null and b/tests/input_files/project2/synthetic.mate_1.fastq.gz differ diff --git a/tests/input_files/synthetic.mate_1.bed b/tests/input_files/synthetic.mate_1.bed new file mode 100644 index 0000000000000000000000000000000000000000..f838343e902d43817f8d2dabd7c82b7c62a5f755 --- /dev/null +++ b/tests/input_files/synthetic.mate_1.bed @@ -0,0 +1,10 @@ +1-10000-20000 3397 3472 NS500318:863:HY2KYBGXC:1:11101:14671:1067 0 + DDX11L1 +1-10000-20000 3249 3324 NS500318:863:HY2KYBGXC:1:11101:24439:1068 0 + DDX11L1 +1-10000-20000 3735 3810 NS500318:863:HY2KYBGXC:1:11101:14965:1069 0 + DDX11L1 +1-10000-20000 2055 2130 NS500318:863:HY2KYBGXC:1:11101:21081:1069 0 + DDX11L1 +1-10000-20000 3567 3642 NS500318:863:HY2KYBGXC:1:11101:18197:1070 0 + DDX11L1 +1-10000-20000 7920 7995 NS500318:863:HY2KYBGXC:1:11101:4614:1071 0 - WASH7P +1-10000-20000 6665 6740 NS500318:863:HY2KYBGXC:1:11101:8467:1073 0 - WASH7P +1-10000-20000 6915 6990 NS500318:863:HY2KYBGXC:1:11101:18960:1076 0 - WASH7P +1-10000-20000 5841 5916 NS500318:863:HY2KYBGXC:1:11101:6851:1076 0 - WASH7P +1-10000-20000 7929 8004 NS500318:863:HY2KYBGXC:1:11101:14731:1078 0 - WASH7P diff --git a/tests/input_files/synthetic.mate_2.bed b/tests/input_files/synthetic.mate_2.bed new file mode 100644 index 0000000000000000000000000000000000000000..de4b7133d0fa866c071cdd8d31e522bee5e30bf4 --- /dev/null +++ b/tests/input_files/synthetic.mate_2.bed @@ -0,0 +1,10 @@ +1-10000-20000 3422 3497 NS500318:863:HY2KYBGXC:1:11101:14671:1067 0 + DDX11L1 +1-10000-20000 3274 3349 NS500318:863:HY2KYBGXC:1:11101:24439:1068 0 + DDX11L1 +1-10000-20000 3760 3835 NS500318:863:HY2KYBGXC:1:11101:14965:1069 0 + DDX11L1 +1-10000-20000 2080 2155 NS500318:863:HY2KYBGXC:1:11101:21081:1069 0 + DDX11L1 +1-10000-20000 3592 3667 NS500318:863:HY2KYBGXC:1:11101:18197:1070 0 + DDX11L1 +1-10000-20000 7945 8020 NS500318:863:HY2KYBGXC:1:11101:4614:1071 0 - WASH7P +1-10000-20000 6690 6765 NS500318:863:HY2KYBGXC:1:11101:8467:1073 0 - WASH7P +1-10000-20000 6940 7015 NS500318:863:HY2KYBGXC:1:11101:18960:1076 0 - WASH7P +1-10000-20000 5866 5941 NS500318:863:HY2KYBGXC:1:11101:6851:1076 0 - WASH7P +1-10000-20000 7954 8029 NS500318:863:HY2KYBGXC:1:11101:14731:1078 0 - WASH7P diff --git a/tests/input_files/synthetic.paired.bed b/tests/input_files/synthetic.paired.bed new file mode 100644 index 0000000000000000000000000000000000000000..e372afbf8082916f55425f7cd333fac0da908d55 --- /dev/null +++ b/tests/input_files/synthetic.paired.bed @@ -0,0 +1,10 @@ +1-10000-20000 3397 3497 NS500318:863:HY2KYBGXC:1:11101:14671:1067 0 + DDX11L1 +1-10000-20000 3249 3349 NS500318:863:HY2KYBGXC:1:11101:24439:1068 0 + DDX11L1 +1-10000-20000 3735 3835 NS500318:863:HY2KYBGXC:1:11101:14965:1069 0 + DDX11L1 +1-10000-20000 2055 2155 NS500318:863:HY2KYBGXC:1:11101:21081:1069 0 + DDX11L1 +1-10000-20000 3567 3667 NS500318:863:HY2KYBGXC:1:11101:18197:1070 0 + DDX11L1 +1-10000-20000 7920 8020 NS500318:863:HY2KYBGXC:1:11101:4614:1071 0 - WASH7P +1-10000-20000 6665 6765 NS500318:863:HY2KYBGXC:1:11101:8467:1073 0 - WASH7P +1-10000-20000 6915 7015 NS500318:863:HY2KYBGXC:1:11101:18960:1076 0 - WASH7P +1-10000-20000 5841 5941 NS500318:863:HY2KYBGXC:1:11101:6851:1076 0 - WASH7P +1-10000-20000 7929 8029 NS500318:863:HY2KYBGXC:1:11101:14731:1078 0 - WASH7P diff --git a/tests/test_scripts_labkey_to_snakemake/expected_output.md5 b/tests/test_scripts_labkey_to_snakemake/expected_output.md5 index 622cddb24f18000cdf60d7d6a1547d2048a549b0..9f6d770457156565d39955f819c437ef641e7343 100644 --- a/tests/test_scripts_labkey_to_snakemake/expected_output.md5 +++ b/tests/test_scripts_labkey_to_snakemake/expected_output.md5 @@ -1,2 +1,2 @@ -de940b0dd38a67a7433536a5b3aee0ac config.yaml -d9c9ea4cd6108d39a2521dd87cd0c7e1 samples.tsv +95fb0448dc6871cb415012d254260c5a config.yaml +4b51a822bcc83ffd744bf76f810162fc samples.tsv diff --git a/tests/test_scripts_labkey_to_snakemake/input_lib_1.mate_1.fastq.gz b/tests/test_scripts_labkey_to_snakemake/input_lib_1.mate_1.fastq.gz deleted file mode 100644 index ddd5858c0318c865dd3c1e4a20ca03c07144daac..0000000000000000000000000000000000000000 Binary files a/tests/test_scripts_labkey_to_snakemake/input_lib_1.mate_1.fastq.gz and /dev/null differ diff --git a/tests/test_scripts_labkey_to_snakemake/input_lib_1.mate_2.fastq.gz b/tests/test_scripts_labkey_to_snakemake/input_lib_1.mate_2.fastq.gz deleted file mode 100644 index 0e97fa50b8d9f6457a2d73f6f0aa324a5949a1e0..0000000000000000000000000000000000000000 Binary files a/tests/test_scripts_labkey_to_snakemake/input_lib_1.mate_2.fastq.gz and /dev/null differ diff --git a/tests/test_scripts_labkey_to_snakemake/input_lib_2.mate_1.fastq.gz b/tests/test_scripts_labkey_to_snakemake/input_lib_2.mate_1.fastq.gz deleted file mode 100644 index af6dc8e311511b121c45266063b97f7882519334..0000000000000000000000000000000000000000 Binary files a/tests/test_scripts_labkey_to_snakemake/input_lib_2.mate_1.fastq.gz and /dev/null differ diff --git a/tests/test_scripts_labkey_to_snakemake/input_lib_2.mate_2.fastq.gz b/tests/test_scripts_labkey_to_snakemake/input_lib_2.mate_2.fastq.gz deleted file mode 100644 index dbc4e5096650863aeaa4bcea12b21a9c4b4d2509..0000000000000000000000000000000000000000 Binary files a/tests/test_scripts_labkey_to_snakemake/input_lib_2.mate_2.fastq.gz and /dev/null differ diff --git a/tests/test_scripts_labkey_to_snakemake/input_table.tsv b/tests/test_scripts_labkey_to_snakemake/input_table.tsv index 4b24cbfeb3085aab0c537fb239b082f132b3cbb5..f688762bb7620ded05eb5a99abe15b9a96c45048 100644 --- a/tests/test_scripts_labkey_to_snakemake/input_table.tsv +++ b/tests/test_scripts_labkey_to_snakemake/input_table.tsv @@ -1,3 +1,3 @@ -Entry date Path to FASTQ file(s) Condition name Replicate name End type (PAIRED or SINGLE) Name of Mate1 FASTQ file Name of Mate2 FASTQ file Direction of Mate1 (SENSE, ANTISENSE or RANDOM) Direction of Mate2 (SENSE, ANTISENSE or RANDOM) 5' adapter of Mate1 3' adapter of Mate1 5' adapter of Mate2 3' adapter of Mate2 Fragment length mean Fragment length SD Quality control flag (PASSED or FAILED) Checksum of raw Mate1 FASTQ file Checksum of raw Mate2 FASTQ file Name of metadata file Name of quality control file for Mate1 Name of quality control file for Mate2 Organism Taxon ID Name of Strain / Isolate / Breed / Ecotype Strain / Isolate / Breed / Ecotype ID Biomaterial provider Source / tissue name Tissue code Additional tissue description Genotype short name Genotype description Disease short name Disease description Abbreviation for treatment Treatment description Gender Age Developmental stage Passage number Sample preparation date (YYYY-MM-DD) Prepared by Documentation Name of protocol file Sequencing date (YYYY-MM-DD) Sequencing instrument Library preparation kit Cycles Molecule Contaminant sequences Name of BioAnalyzer file -Fri Dec 20 00:00:00 CET 2019 . LN18C LN18C_rep1 PAIRED input_lib_1.mate_1.fastq.gz input_lib_1.mate_2.fastq.gz ANTISENSE SENSE AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 300.0 100.0 xxx xxx xxx xxx xxx xxx Homo sapiens 9606 xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx -Fri Dec 20 00:00:00 CET 2019 . LN18C LN18C_rep2 PAIRED input_lib_2.mate_2.fastq.gz input_lib_2.mate_2.fastq.gz ANTISENSE SENSE AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 300.0 100.0 xxx xxx xxx xxx xxx xxx Homo sapiens 9606 xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx +Entry date Path to FASTQ file(s) Condition name Replicate name End type (PAIRED or SINGLE) Name of Mate1 FASTQ file Name of Mate2 FASTQ file Direction of Mate1 (SENSE, ANTISENSE or RANDOM) Direction of Mate2 (SENSE, ANTISENSE or RANDOM) 5' adapter of Mate1 3' adapter of Mate1 5' adapter of Mate2 3' adapter of Mate2 Fragment length mean Fragment length SD Quality control flag (PASSED or FAILED) Checksum of raw Mate1 FASTQ file Checksum of raw Mate2 FASTQ file Name of metadata file Name of quality control file for Mate1 Name of quality control file for Mate2 Organism Taxon ID Name of Strain / Isolate / Breed / Ecotype Strain / Isolate / Breed / Ecotype ID Biomaterial provider Source / tissue name Tissue code Additional tissue description Genotype short name Genotype description Disease short name Disease description Abbreviation for treatment Treatment description Gender Age Developmental stage Passage number Sample preparation date (YYYY-MM-DD) Prepared by Documentation Name of protocol file Sequencing date (YYYY-MM-DD) Sequencing instrument Library preparation kit Cycles Molecule Contaminant sequences Name of BioAnalyzer file +Fri Dec 20 00:00:00 CET 2019 ../input_files/project1 synthetic_10_reads_paired synthetic_10_reads_paired PAIRED synthetic.mate_1.fastq.gz synthetic.mate_2.fastq.gz SENSE ANTISENSE AGATCGGAAGAGCACA AGATCGGAAGAGCGT 250 100 xxx xxx xxx xxx xxx xxx Homo sapiens 9606 xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx +Fri Dec 20 00:00:00 CET 2019 ../input_files/project2 synthetic_10_reads_mate_1 synthetic_10_reads_mate_1 SINGLE synthetic.mate_1.fastq.gz SENSE AGATCGGAAGAGCACA 250 100 xxx xxx xxx xxx xxx xxx Homo sapiens 9606 xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx \ No newline at end of file diff --git a/tests/test_scripts_labkey_to_snakemake/test.sh b/tests/test_scripts_labkey_to_snakemake/test.sh index c168305bebc4ca9eb08697daef89ab0be94b658b..27846ccec8c49aacab4d006346a2247ec8a5eca3 100755 --- a/tests/test_scripts_labkey_to_snakemake/test.sh +++ b/tests/test_scripts_labkey_to_snakemake/test.sh @@ -2,7 +2,7 @@ # Tear down test environment trap 'rm config.yaml samples.tsv && cd $user_dir' EXIT # quotes command is exected after script exits, regardless of exit status - +# # Set up test environment set -eo pipefail # ensures that script exits at first command that exits with non-zero status set -u # ensures that script exits when unset variables are used @@ -17,6 +17,22 @@ python "../../scripts/labkey_to_snakemake.py" \ --input_dict="../../scripts/input_dict_caption.tsv" \ --config_file="config.yaml" \ --samples_table="samples.tsv" \ - --genomes_path="." + --genomes_path="../input_files" \ + --multimappers='10' \ + # --remote \ + # --project_name "TEST_LABKEY" \ + # --query_name "RNA_Seq_data_template" + + +snakemake \ + --snakefile="../../snakemake/Snakefile" \ + --configfile="config.yaml" \ + --dryrun \ + # --rulegraph \ + # --printshellcmds \ + # | dot -Tpng > "rulegraph.png" + md5sum --check "expected_output.md5" + # snakemake --rulegraph --configfile config.yaml | dot -Tpng > rulegraph.png +