From 05c167fdd3d48e092dd94a64a20551ebfe4a9c73 Mon Sep 17 00:00:00 2001 From: BIOPZ-Katsantoni Maria <maria.katsantoni@unibas.ch> Date: Mon, 15 Jun 2020 19:39:21 +0200 Subject: [PATCH] fix: Renamed samples_concat.tsv to samples.multiple_lanes.tsv. Renamed rows with split with the same name as the other test samples, so that I do not change the tests (md5 and sunch). Removed the one lane samples. Created config that uses this tsv file --- .gitlab-ci.yml | 1 + Snakefile | 189 ++++++++++++------ tests/input_files/config.mutliple_lanes.yml | 12 ++ .../synthetic_split_lane2.mate_2.fastq.gz | Bin 407 -> 414 bytes tests/input_files/samples.multiple_lanes.tsv | 5 + tests/input_files/samples_concat.tsv | 7 - .../expected_output.md5 | 108 ++++++++++ .../test.local.sh | 81 ++++++++ .../test.slurm.sh | 93 +++++++++ workflow/rules/paired_end.snakefile.smk | 93 +++++++-- workflow/rules/single_end.snakefile.smk | 94 +++++++-- 11 files changed, 573 insertions(+), 110 deletions(-) create mode 100644 tests/input_files/config.mutliple_lanes.yml create mode 100644 tests/input_files/samples.multiple_lanes.tsv delete mode 100644 tests/input_files/samples_concat.tsv create mode 100644 tests/test_integration_workflow_multiple_lanes/expected_output.md5 create mode 100755 tests/test_integration_workflow_multiple_lanes/test.local.sh create mode 100755 tests/test_integration_workflow_multiple_lanes/test.slurm.sh diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index fe34e07..44bd02a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -19,4 +19,5 @@ test: - bash tests/test_create_dag_image/test.sh - bash tests/test_create_rule_graph/test.sh - bash tests/test_integration_workflow/test.local.sh + - bash tests/test_integration_workflow_multiple_lanes/test.local.sh diff --git a/Snakefile b/Snakefile index 909ad8a..ae56321 100644 --- a/Snakefile +++ b/Snakefile @@ -13,6 +13,15 @@ samples_table = pd.read_csv( sep="\t", ) + +def get_sample(column_id, search_id=None, search_value=None): + if search_id: + if search_id == 'index': + return str(samples_table[column_id][samples_table.index == search_value][0]) + else: + return str(samples_table[column_id][samples_table[search_id] == search_value][0]) + else: + return str(samples_table[column_id][0]) # Global config localrules: start, finish, rename_star_rpm_for_alfa, prepare_multiqc_config @@ -45,7 +54,7 @@ rule finish: "bigWig", "{unique_type}", "{sample}_{unique_type}_{strand}.bw"), - sample=samples_table.index.values, + sample=pd.unique(samples_table.index.values), strand=["plus", "minus"], unique_type=["Unique", "UniqueMultiple"]), @@ -72,7 +81,10 @@ rule start: ''' input: reads = lambda wildcards: - samples_table.loc[wildcards.sample, wildcards.mate], + expand( + pd.Series( + samples_table.loc[wildcards.sample, wildcards.mate] + ).values) output: reads = os.path.join( @@ -98,7 +110,7 @@ rule start: "docker://bash:5.0.16" shell: - "(cp {input.reads} {output.reads}) \ + "(cat {input.reads} > {output.reads}) \ 1> {log.stdout} 2> {log.stderr} " @@ -152,13 +164,16 @@ rule create_index_star: """ input: genome = lambda wildcards: - samples_table['genome'] - [samples_table['organism'] == wildcards.organism] - [0], + get_sample( + 'genome', + search_id='organism', + search_value=wildcards.organism), + gtf = lambda wildcards: - samples_table['gtf'] - [samples_table['organism'] == wildcards.organism] - [0] + get_sample( + 'gtf', + search_id='organism', + search_value=wildcards.organism) output: chromosome_info = os.path.join( @@ -220,12 +235,15 @@ rule extract_transcriptome: """ input: genome = lambda wildcards: - samples_table['genome'][ - samples_table['organism'] == wildcards.organism][0], + get_sample( + 'genome', + search_id='organism', + search_value=wildcards.organism), gtf = lambda wildcards: - samples_table['gtf'][ - samples_table['organism'] == wildcards.organism][0] - + get_sample( + 'gtf', + search_id='organism', + search_value=wildcards.organism) output: transcriptome = os.path.join( config['output_dir'], @@ -263,9 +281,10 @@ rule concatenate_transcriptome_and_genome: "transcriptome.fa"), genome = lambda wildcards: - samples_table['genome'] - [samples_table['organism'] == wildcards.organism] - [0] + get_sample( + 'genome', + search_id='organism', + search_value=wildcards.organism) output: genome_transcriptome = os.path.join( @@ -301,8 +320,8 @@ rule create_index_salmon: chr_names = lambda wildcards: os.path.join( config['star_indexes'], - samples_table["organism"][0], - str(samples_table["index_size"][0]), + get_sample('organism'), + get_sample("index_size"), "STAR_index", "chrName.txt") @@ -386,7 +405,7 @@ rule extract_transcripts_as_bed12: """ input: gtf = lambda wildcards: - samples_table['gtf'][0] + get_sample('gtf') output: bed12 = os.path.join( @@ -469,7 +488,10 @@ rule calculate_TIN_scores: "map_genome", "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam"), sample=wildcards.sample, - seqmode=samples_table.loc[wildcards.sample, 'seqmode']), + seqmode=get_sample( + 'seqmode', + search_id='index', + search_value=wildcards.sample)), bai = lambda wildcards: expand( os.path.join( @@ -479,7 +501,10 @@ rule calculate_TIN_scores: "map_genome", "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam.bai"), sample=wildcards.sample, - seqmode=samples_table.loc[wildcards.sample, 'seqmode']), + seqmode=get_sample( + 'seqmode', + search_id='index', + search_value=wildcards.sample)), transcripts_bed12 = os.path.join( config['output_dir'], "full_transcripts_protein_coding.bed") @@ -528,7 +553,7 @@ rule merge_TIN_scores: "{sample}", "TIN", "TIN_score.tsv"), - sample=samples_table.index.values), + sample=pd.unique(samples_table.index.values)), output: TIN_scores_merged = os.path.join( @@ -552,9 +577,10 @@ rule merge_TIN_scores: "TIN", "TIN_score.tsv"), zip, - sample=[i for i in list(samples_table.index.values)], - seqmode=[samples_table.loc[i, 'seqmode'] - for i in list(samples_table.index.values)])) + sample=[i for i in pd.unique(samples_table.index.values)], + seqmode=[get_sample('seqmode', + search_id='index', + search_value=i) for i in pd.unique(samples_table.index.values)])) threads: 1 @@ -623,9 +649,12 @@ rule salmon_quantmerge_genes: "{sample}.salmon.{seqmode}", "quant.sf"), zip, - sample=samples_table.index.values, - seqmode=[samples_table.loc[i, 'seqmode'] - for i in list(samples_table.index.values)]) + sample=pd.unique(samples_table.index.values), + seqmode=[get_sample( + 'seqmode', + search_id='index', + search_value=i) + for i in pd.unique(samples_table.index.values)]) output: salmon_out = os.path.join( @@ -642,12 +671,15 @@ rule salmon_quantmerge_genes: "{sample}", "{sample}.salmon.{seqmode}"), zip, - sample=[i for i in list(samples_table.index.values)], - seqmode=[samples_table.loc[i, 'seqmode'] - for i in list(samples_table.index.values)]), + sample=[i for i in pd.unique(samples_table.index.values)], + seqmode=[get_sample( + 'seqmode', + search_id='index', + search_value=i) + for i in pd.unique(samples_table.index.values)]), sample_name_list = expand( "{sample}", - sample=list(samples_table.index.values)), + sample=pd.unique(samples_table.index.values)), salmon_merge_on = "{salmon_merge_on}" log: @@ -686,9 +718,12 @@ rule salmon_quantmerge_transcripts: "{sample}.salmon.{seqmode}", "quant.sf"), zip, - sample=[i for i in list(samples_table.index.values)], - seqmode=[samples_table.loc[i, 'seqmode'] - for i in list(samples_table.index.values)]) + sample=[i for i in pd.unique(samples_table.index.values)], + seqmode=[get_sample( + 'seqmode', + search_id='index', + search_value=i) + for i in pd.unique(samples_table.index.values)]) output: salmon_out = os.path.join( @@ -705,13 +740,16 @@ rule salmon_quantmerge_transcripts: "{sample}", "{sample}.salmon.{seqmode}"), zip, - sample=[i for i in list(samples_table.index.values)], - seqmode=[samples_table.loc[i, 'seqmode'] - for i in list(samples_table.index.values)]), + sample=[i for i in pd.unique(samples_table.index.values)], + seqmode=[get_sample( + 'seqmode', + search_id='index', + search_value=i) + for i in pd.unique(samples_table.index.values)]), sample_name_list = expand( "{sample}", - sample=list(samples_table.index.values)), + sample=pd.unique(samples_table.index.values)), salmon_merge_on = "{salmon_merge_on}" log: @@ -750,7 +788,10 @@ rule star_rpm: "map_genome", "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam"), sample=wildcards.sample, - seqmode=samples_table.loc[wildcards.sample, 'seqmode']), + seqmode=get_sample( + 'seqmode', + search_id='index', + search_value=wildcards.sample)), bai = lambda wildcards: expand( os.path.join( @@ -760,7 +801,10 @@ rule star_rpm: "map_genome", "{sample}.{seqmode}.Aligned.sortedByCoord.out.bam.bai"), sample=wildcards.sample, - seqmode=samples_table.loc[wildcards.sample, 'seqmode']), + seqmode=get_sample( + 'seqmode', + search_id='index', + search_value=wildcards.sample)) output: str1 = os.path.join( @@ -840,8 +884,10 @@ rule rename_star_rpm_for_alfa: "{sample}_Signal.{unique}.{plus}.out.bg"), sample=wildcards.sample, unique=wildcards.unique, - plus=samples_table.loc[wildcards.sample, 'alfa_plus']), - + plus=get_sample( + 'alfa_plus', + search_id='index', + search_value=wildcards.sample)), minus = lambda wildcards: expand( os.path.join( @@ -852,7 +898,10 @@ rule rename_star_rpm_for_alfa: "{sample}_Signal.{unique}.{minus}.out.bg"), sample=wildcards.sample, unique=wildcards.unique, - minus=samples_table.loc[wildcards.sample, 'alfa_minus']) + minus=get_sample( + 'alfa_minus', + search_id='index', + search_value=wildcards.sample)) output: plus = os.path.join( @@ -872,7 +921,10 @@ rule rename_star_rpm_for_alfa: params: orientation = lambda wildcards: - samples_table.loc[wildcards.sample, "kallisto_directionality"] + get_sample( + 'kallisto_directionality', + search_id='index', + search_value=wildcards.sample), log: stderr = os.path.join( @@ -899,8 +951,11 @@ rule generate_alfa_index: ''' Generate ALFA index files from sorted GTF file ''' input: gtf = lambda wildcards: - samples_table["gtf"] - [samples_table["organism"] == wildcards.organism][0], + get_sample( + 'gtf', + search_id='organism', + search_value=wildcards.organism), + chr_len = os.path.join( config["star_indexes"], "{organism}", @@ -967,8 +1022,14 @@ rule alfa_qc: gtf = lambda wildcards: os.path.join( config["alfa_indexes"], - samples_table.loc[wildcards.sample, "organism"], - str(samples_table.loc[wildcards.sample, "index_size"]), + get_sample( + 'organism', + search_id='index', + search_value=wildcards.sample), + get_sample( + 'index_size', + search_id='index', + search_value=wildcards.sample), "ALFA", "sorted_genes.stranded.ALFA_index") @@ -1003,8 +1064,10 @@ rule alfa_qc: minus = lambda wildcards, input: os.path.basename(input.minus), alfa_orientation = lambda wildcards: - [samples_table.loc[ - wildcards.sample, "alfa_directionality"]], + get_sample( + 'alfa_directionality', + search_id='index', + search_value=wildcards.sample), genome_index = lambda wildcards, input: os.path.abspath( os.path.join( @@ -1045,7 +1108,7 @@ rule alfa_qc_all_samples: "ALFA", "{unique}", "{sample}.ALFA_feature_counts.tsv"), - sample=samples_table.index.values, + sample=pd.unique(samples_table.index.values), unique=wildcards.unique) output: biotypes = os.path.join( @@ -1158,7 +1221,7 @@ rule multiqc_report: "{sample}", "fastqc", "{mate}"), - sample=samples_table.index.values, + sample=pd.unique(samples_table.index.values), mate="fq1"), fastqc_pe = expand( @@ -1168,7 +1231,7 @@ rule multiqc_report: "{sample}", "fastqc", "{mate}"), - sample=[i for i in list( + sample=[i for i in pd.unique( samples_table[samples_table['seqmode'] == 'pe'].index.values)], mate="fq2"), @@ -1180,9 +1243,9 @@ rule multiqc_report: "quant_kallisto", "{sample}.{seqmode}.kallisto.pseudo.sam"), zip, - sample=[i for i in list(samples_table.index.values)], - seqmode=[samples_table.loc[i, 'seqmode'] - for i in list(samples_table.index.values)]), + sample=[i for i in pd.unique(samples_table.index.values)], + seqmode=[get_sample('seqmode', search_id='index', search_value=i) + for i in pd.unique(samples_table.index.values)]), TIN_boxplot_PNG = os.path.join( config['output_dir'], @@ -1284,8 +1347,14 @@ rule prepare_bigWig: chr_sizes = lambda wildcards: os.path.join( config['star_indexes'], - samples_table.loc[wildcards.sample, "organism"], - str(samples_table.loc[wildcards.sample, "index_size"]), + get_sample( + 'organism', + search_id='index', + search_value=wildcards.sample), + get_sample( + 'index_size', + search_id='index', + search_value=wildcards.sample), "STAR_index", "chrNameLength.txt") diff --git a/tests/input_files/config.mutliple_lanes.yml b/tests/input_files/config.mutliple_lanes.yml new file mode 100644 index 0000000..8cfbb38 --- /dev/null +++ b/tests/input_files/config.mutliple_lanes.yml @@ -0,0 +1,12 @@ +--- + samples: "../input_files/samples.multiple_lanes.tsv" + output_dir: "results" + log_dir: "logs" + kallisto_indexes: "results/kallisto_indexes" + salmon_indexes: "results/salmon_indexes" + star_indexes: "results/star_indexes" + alfa_indexes: "results/alfa_indexes" + report_description: "No description provided by user" + report_logo: "../../images/logo.128px.png" + report_url: "https://zavolan.biozentrum.unibas.ch/" +... \ No newline at end of file diff --git a/tests/input_files/pe_lane2/synthetic_split_lane2.mate_2.fastq.gz b/tests/input_files/pe_lane2/synthetic_split_lane2.mate_2.fastq.gz index 693389493c4dc5a9a843b853783beb05ea0b37b5..98e06db62d4326246856ef51a54c24e592f03915 100644 GIT binary patch delta 344 zcmbQvJdasYzMF%?rsR1X19Nd@UP(r3NoI0<aY0UI$wXld+Yn*bW=H+YiPitbXVzIp zMxD5I@QT)qc(WIu_2=u!x9^kx6rv_|>h|@2e_w~%FMTqp_+7E2X20t9<A1j3PoG$- zQ$O?Xc2Um2l|_rDSZs9>2=!Rub}ef6W#(;}OO$W4MVrOmRPpdx?C#%}f56bn?fbq@ zcY7A;T>ZKKPK#a+iy42rcTHZ8`GR}f-mmfzTsd#%K{3gSE1wjj=f$mHm$_?rZuh*u zpBBBowI@ZwsQcKVgZ-P!_HNF79AOc|^|+!i@3P&x(!9yL+2xM$UYIE@Az)eKTi>ww z<%IW}C3hX`SZLjOELp~N@!y6Vqh*(k&u!(7tZCgC7uo)a{c)B|`13uL{Qr+AESmDl zYGS6`-Q&l4k1IY{7XR?y{|hSf*5n?@wYPrZCmeS*fh{ZdS*VY3$*X;g-`~ue;>W_k F000+osPF&) delta 337 zcmbQoJe^rszMF$1Q|w|K19Nd@UP(r3NoMjyNewF>VaH}i{mY59|HWt4?Tm~%aqHj} ztr_uVFFxzb|EfC?JTpV%{qw)afBNoU@?=u+yJAVre%0^C|7_7;KCw=x{>;DIT`em@ zECapfL@9M>se~wJuf6;F<>!L_T&b5F+a6By4L9Qyy8Pne<j2wlJ$)DVuCFm)tl|Cj zQ+%G2xEa&z_QR_4q#yIH*#366R<O$|W8)i2%>7YzDW@-Ue{EQ7eXR1j`2IaFcA4o* zw#B$4KYVaJa`(NP+X{7L^jZt|SZpuPln>v2@-8#~W44TEY-}!k@t5TvWLQ1<ozu3? zFzJQgqK^{p7c=Y?X7{}O(o_7_Ni05bP58Bie-7@DI)2qM|K4GH!w!w5yZlmi^}I`d xeC)A6!R7S@|LZe^{I8ik*fw9bWI5;hECc4PW}mf|Nm`ZOU)9aA%A19O0RZ&MqwN3y diff --git a/tests/input_files/samples.multiple_lanes.tsv b/tests/input_files/samples.multiple_lanes.tsv new file mode 100644 index 0000000..7968fb4 --- /dev/null +++ b/tests/input_files/samples.multiple_lanes.tsv @@ -0,0 +1,5 @@ +sample seqmode fq1 fq2 index_size kmer fq1_3p fq1_5p fq2_3p fq2_5p organism gtf genome sd mean kallisto_directionality alfa_directionality alfa_plus alfa_minus multimappers soft_clip pass_mode libtype fq1_polya_3p fq1_polya_5p fq2_polya_3p fq2_polya_5p +synthetic_10_reads_paired_synthetic_10_reads_paired pe ../input_files/pe_lane1/synthetic_split_lane1.mate_1.fastq.gz ../input_files/pe_lane1/synthetic_split_lane1.mate_2.fastq.gz 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXXXX AGATCGGAAGAGCGT XXXXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 --fr fr-firststrand str1 str2 10 EndToEnd None A AAAAAAAAAAAAAAA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX TTTTTTTTTTTTTTT +synthetic_10_reads_paired_synthetic_10_reads_paired pe ../input_files/pe_lane2/synthetic_split_lane2.mate_1.fastq.gz ../input_files/pe_lane2/synthetic_split_lane2.mate_2.fastq.gz 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXXXX AGATCGGAAGAGCGT XXXXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 --fr fr-firststrand str1 str2 10 EndToEnd None A AAAAAAAAAAAAAAA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX TTTTTTTTTTTTTTT +synthetic_10_reads_mate_1_synthetic_10_reads_mate_1 se ../input_files/se_lane1/synthetic_split_lane1.mate_1.fastq.gz XXXXXXXXXXXXXXX 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 --fr fr-firststrand str1 str2 10 EndToEnd None A AAAAAAAAAAAAAAA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX +synthetic_10_reads_mate_1_synthetic_10_reads_mate_1 se ../input_files/se_lane2/synthetic_split_lane2.mate_1.fastq.gz XXXXXXXXXXXXXXX 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 --fr fr-firststrand str1 str2 10 EndToEnd None A AAAAAAAAAAAAAAA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX diff --git a/tests/input_files/samples_concat.tsv b/tests/input_files/samples_concat.tsv deleted file mode 100644 index 6740f70..0000000 --- a/tests/input_files/samples_concat.tsv +++ /dev/null @@ -1,7 +0,0 @@ -sample seqmode fq1 fq2 index_size kmer fq1_3p fq1_5p fq2_3p fq2_5p organism gtf genome sd mean kallisto_directionality alfa_directionality alfa_plus alfa_minus multimappers soft_clip pass_mode libtype fq1_polya_3p fq1_polya_5p fq2_polya_3p fq2_polya_5p -synthetic_split_10_reads_paired_synthetic_split_10_reads_paired pe ../input_files/pe_lane1/synthetic_split_lane1.mate_1.fastq.gz ../input_files/pe_lane1/synthetic_split_lane1.mate_2.fastq.gz 74 31 AGATCGGAAGAGCACA XXXXXXXXXXXXXXX AGATCGGAAGAGCGT XXXXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 --fr fr-firststrand str1 str2 10 EndToEnd None A AAAAAAAAAAAAAAA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX TTTTTTTTTTTTTTT -synthetic_split_10_reads_paired_synthetic_split_10_reads_paired pe ../input_files/pe_lane2/synthetic_split_lane2.mate_1.fastq.gz ../input_files/pe_lane2/synthetic_split_lane2.mate_2.fastq.gz 74 31 AGATCGGAAGAGCACA XXXXXXXXXXXXXXX AGATCGGAAGAGCGT XXXXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 --fr fr-firststrand str1 str2 10 EndToEnd None A AAAAAAAAAAAAAAA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX TTTTTTTTTTTTTTT -synthetic_10_reads_paired_synthetic_10_reads_paired pe ../input_files/project1/synthetic.mate_1.fastq.gz ../input_files/project1/synthetic.mate_2.fastq.gz 74 31 AGATCGGAAGAGCACA XXXXXXXXXXXXXXX AGATCGGAAGAGCGT XXXXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 --fr fr-firststrand str1 str2 10 EndToEnd None A AAAAAAAAAAAAAAA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX TTTTTTTTTTTTTTT -synthetic_10_reads_mate_1_synthetic_10_reads_mate_1 se ../input_files/project2/synthetic.mate_1.fastq.gz XXXXXXXXXXXXXXX 74 31 AGATCGGAAGAGCACA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 --fr fr-firststrand str1 str2 10 EndToEnd None A AAAAAAAAAAAAAAA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX -synthetic_split_10_reads_mate_1_synthetic_split_10_reads_mate_1 se ../input_files/se_lane1/synthetic_split_lane1.mate_1.fastq.gz XXXXXXXXXXXXXXX 74 31 AGATCGGAAGAGCACA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 --fr fr-firststrand str1 str2 10 EndToEnd None A AAAAAAAAAAAAAAA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX -synthetic_split_10_reads_mate_1_synthetic_split_10_reads_mate_1 se ../input_files/se_lane2/synthetic_split_lane2.mate_1.fastq.gz XXXXXXXXXXXXXXX 74 31 AGATCGGAAGAGCACA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 --fr fr-firststrand str1 str2 10 EndToEnd None A AAAAAAAAAAAAAAA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX diff --git a/tests/test_integration_workflow_multiple_lanes/expected_output.md5 b/tests/test_integration_workflow_multiple_lanes/expected_output.md5 new file mode 100644 index 0000000..f8f074e --- /dev/null +++ b/tests/test_integration_workflow_multiple_lanes/expected_output.md5 @@ -0,0 +1,108 @@ +cbaebdb67aee4784b64aff7fec9fda42 results/kallisto_indexes/homo_sapiens/kallisto.idx +0ac1afd9a4f380afd70be75b21814c64 results/salmon_indexes/homo_sapiens/31/salmon.idx/versionInfo.json +51b5292e3a874119c0e1aa566e95d70c results/salmon_indexes/homo_sapiens/31/salmon.idx/duplicate_clusters.tsv +7f8679a6e6622e1b611642b5735f357c results/salmon_indexes/homo_sapiens/31/salmon.idx/info.json +dee7cdc194d5d0617552b7a3b5ad8dfb results/star_indexes/homo_sapiens/75/STAR_index/chrLength.txt +8e2e96e2d6b7f29940ad5de40662b7cb results/star_indexes/homo_sapiens/75/STAR_index/chrNameLength.txt +d0826904b8afa45352906ad9591f2bfb results/star_indexes/homo_sapiens/75/STAR_index/chrName.txt +8d3291e6bcdbe9902fbd7c887494173f results/star_indexes/homo_sapiens/75/STAR_index/chrStart.txt +83ea3c15ab782b5c55bfaefda8e7aad8 results/star_indexes/homo_sapiens/75/STAR_index/exonGeTrInfo.tab +bad9d837f9a988694cc7080ee6d2997a results/star_indexes/homo_sapiens/75/STAR_index/exonInfo.tab +0c0b013fb8cbb8f3cb7a7bf92f3b1544 results/star_indexes/homo_sapiens/75/STAR_index/geneInfo.tab +00dda17b3c3983873d1474e9a758d6e6 results/star_indexes/homo_sapiens/75/STAR_index/Genome +c0d91c3af633d9439bfd0160d11efe4d results/star_indexes/homo_sapiens/75/STAR_index/SA +27884e419e42a7c8b3b2f49543de0260 results/star_indexes/homo_sapiens/75/STAR_index/SAindex +bae93882f9148a6c55816b733c32a3a2 results/star_indexes/homo_sapiens/75/STAR_index/sjdbInfo.txt +875030141343fca11f0b5aa1a37e1b66 results/star_indexes/homo_sapiens/75/STAR_index/sjdbList.fromGTF.out.tab +ea36f062eedc7f54ceffea2b635a25a8 results/star_indexes/homo_sapiens/75/STAR_index/sjdbList.out.tab +65e794aa5096551254af18a678d02264 results/star_indexes/homo_sapiens/75/STAR_index/transcriptInfo.tab +500dd49da40b16799aba62aa5cf239ba results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.remove_adapters_mate1.fastq +500dd49da40b16799aba62aa5cf239ba results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.remove_polya_mate1.fastq +e90e31db1ce51d930645eb74ff70d21b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.remove_adapters_mate2.fastq +1c0796d7e0bdab0e99780b2e11d80c19 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.remove_polya_mate2.fastq +d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.SJ.out.tab +9896744dd90ff3eef00c91fa1f721366 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/fastqc_data.txt +6946ba80af318b9c1052b264dc674a51 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/fastqc.fo +2603f3031242e97411a71571f6ad9e53 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/summary.txt +c39fc9108e6f6c0df45acc9391daad9c results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/fastqc_data.txt +82c37e4cb9c1e167383d589ccb5c80b4 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/fastqc.fo +2029b1ecea0c5fb3c54238813cf02a26 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/summary.txt +310130cbb8bbb6517f37ea0ff6586d43 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/adapter_content.png +42741852cc110a151580bb3bb5180fc0 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/duplication_levels.png +8b34217d5fd931966d9007a658570e67 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_base_n_content.png +848396c145d2157f34bbf86757f51abe results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_base_quality.png +56bd6a5f95196121173609eb70618166 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_base_sequence_content.png +e4c1a39967ec9547a2e4c71c97982ee0 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_sequence_gc_content.png +69b70e3f561b749bf10b186dd2480a8a results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_sequence_quality.png +b28aac49f537b8cba364b6422458ad28 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_tile_quality.png +5b950b5dfe3c7407e9aac153db330a38 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/sequence_length_distribution.png +310130cbb8bbb6517f37ea0ff6586d43 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/adapter_content.png +42741852cc110a151580bb3bb5180fc0 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/duplication_levels.png +8b34217d5fd931966d9007a658570e67 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_base_n_content.png +848396c145d2157f34bbf86757f51abe results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_base_quality.png +73a907996c12a3c39bea535588e65658 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_base_sequence_content.png +3a5ef8cfdbab5c8987941fdd46145ca4 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_sequence_gc_content.png +69b70e3f561b749bf10b186dd2480a8a results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_sequence_quality.png +b28aac49f537b8cba364b6422458ad28 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_tile_quality.png +5b950b5dfe3c7407e9aac153db330a38 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/sequence_length_distribution.png +2e77276535976efccb244627231624bf results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/abundance.tsv +d013650f813b815a790c9e6a51c7559b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/pseudoalignments.bam +d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.kallisto.pseudo.sam +981b59830d74d300bb5dd3e602e0d86f results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/lib_format_counts.json +989d6ee63b728fced9ec0249735ab83d results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/aux_info/ambig_info.tsv +3407f87245d0003e0ffbfdf6d8c04f20 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/aux_info/expected_bias +92bcd0592d22a6a58d0360fc76103e56 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/aux_info/observed_bias +92bcd0592d22a6a58d0360fc76103e56 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/aux_info/observed_bias_3p +d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/aux_info/unmapped_names.txt +500dd49da40b16799aba62aa5cf239ba results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.remove_adapters_mate1.fastq +500dd49da40b16799aba62aa5cf239ba results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.remove_polya_mate1.fastq +d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/map_genome/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.SJ.out.tab +fdb8c6ddd39b606414b2785d6ec2da8a results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/fastqc_data.txt +3cb70940acdcca512207bd8613085538 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/fastqc.fo +fc276a1711cc35f7a9d5328bdbbab810 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/summary.txt +310130cbb8bbb6517f37ea0ff6586d43 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/adapter_content.png +42741852cc110a151580bb3bb5180fc0 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/duplication_levels.png +8b34217d5fd931966d9007a658570e67 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_base_n_content.png +848396c145d2157f34bbf86757f51abe results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_base_quality.png +56bd6a5f95196121173609eb70618166 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_base_sequence_content.png +e4c1a39967ec9547a2e4c71c97982ee0 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_sequence_gc_content.png +69b70e3f561b749bf10b186dd2480a8a results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_sequence_quality.png +b28aac49f537b8cba364b6422458ad28 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_tile_quality.png +5b950b5dfe3c7407e9aac153db330a38 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/sequence_length_distribution.png +50a9b89a9f1da2c438cb0041b64faa0e results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/quant_kallisto/abundance.tsv +fd8242418230a4edb33350be2e4f1d78 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/quant_kallisto/pseudoalignments.bam +d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/quant_kallisto/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.kallisto.pseudo.sam +d6ae863b39ca6ec5d0f63c03036f9dda results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/lib_format_counts.json +989d6ee63b728fced9ec0249735ab83d results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/aux_info/ambig_info.tsv +3407f87245d0003e0ffbfdf6d8c04f20 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/aux_info/expected_bias +92bcd0592d22a6a58d0360fc76103e56 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/aux_info/observed_bias +92bcd0592d22a6a58d0360fc76103e56 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/aux_info/observed_bias_3p +d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/aux_info/unmapped_names.txt +3ce47cb1d62482c5d62337751d7e8552 results/transcriptome/homo_sapiens/transcriptome.fa +6b44c507f0a1c9f7369db0bb1deef0fd results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.stranded.ALFA_index +2caebc23faf78fdbbbdbb118d28bd6b5 results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.unstranded.ALFA_index +bcccf679a8c083d01527514c9f5680a0 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/UniqueMultiple/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.UniqueMultiple.minus.bg +ea91b4f85622561158bff2f7c9c312b3 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/UniqueMultiple/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.UniqueMultiple.plus.bg +bcccf679a8c083d01527514c9f5680a0 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/Unique/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.Unique.minus.bg +ea91b4f85622561158bff2f7c9c312b3 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/Unique/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.Unique.plus.bg +90ae442ebf35015eab2dd4e804c2bafb results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/UniqueMultiple/synthetic_10_reads_paired_synthetic_10_reads_paired.UniqueMultiple.minus.bg +16652c037090f3eed1123618a2e75107 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/UniqueMultiple/synthetic_10_reads_paired_synthetic_10_reads_paired.UniqueMultiple.plus.bg +90ae442ebf35015eab2dd4e804c2bafb results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired.Unique.minus.bg +16652c037090f3eed1123618a2e75107 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired.Unique.plus.bg +c1254a0bae19ac3ffc39f73099ffcf2b results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/UniqueMultiple/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.ALFA_feature_counts.tsv +c1254a0bae19ac3ffc39f73099ffcf2b results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/ALFA/Unique/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.ALFA_feature_counts.tsv +53fd53f884352d0493b2ca99cef5d76d results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/UniqueMultiple/synthetic_10_reads_paired_synthetic_10_reads_paired.ALFA_feature_counts.tsv +53fd53f884352d0493b2ca99cef5d76d results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/ALFA/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired.ALFA_feature_counts.tsv +ed3428feeb7257b0a69ead76a417e339 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/bigWig/UniqueMultiple/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_UniqueMultiple_minus.bw +2767ca6a648f3e37b7e3b05ce7845460 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/bigWig/UniqueMultiple/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_UniqueMultiple_plus.bw +ed3428feeb7257b0a69ead76a417e339 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/bigWig/Unique/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Unique_minus.bw +2767ca6a648f3e37b7e3b05ce7845460 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/bigWig/Unique/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Unique_plus.bw +69e2bf688165e9fb7c9c49a8763f5632 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/UniqueMultiple/synthetic_10_reads_paired_synthetic_10_reads_paired_UniqueMultiple_minus.bw +ec5aab1b79e7880dfa590e5bc7db5232 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/UniqueMultiple/synthetic_10_reads_paired_synthetic_10_reads_paired_UniqueMultiple_plus.bw +69e2bf688165e9fb7c9c49a8763f5632 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Unique_minus.bw +ec5aab1b79e7880dfa590e5bc7db5232 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/bigWig/Unique/synthetic_10_reads_paired_synthetic_10_reads_paired_Unique_plus.bw +3e4db5fad83e162bcc19abbe81333a95 results/multiqc_summary/multiqc_data/multiqc_cutadapt.txt +0c6363588cf6ff74d49f27c164185918 results/multiqc_summary/multiqc_data/multiqc_star.txt +dd81441ca97912a62292d317af2c107c results/multiqc_summary/multiqc_data/multiqc_kallisto.txt +ba090b1b4a2473891de97493d3244956 results/multiqc_summary/multiqc_data/multiqc_fastqc.txt +0703b4cb7ec2abfab13ccd5f58c2d536 results/multiqc_summary/multiqc_data/multiqc_general_stats.txt diff --git a/tests/test_integration_workflow_multiple_lanes/test.local.sh b/tests/test_integration_workflow_multiple_lanes/test.local.sh new file mode 100755 index 0000000..018b47d --- /dev/null +++ b/tests/test_integration_workflow_multiple_lanes/test.local.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +# Tear down test environment +cleanup () { + rc=$? + rm -rf .cache/ + rm -rf .config/ + rm -rf .fontconfig/ + rm -rf .java/ + rm -rf .snakemake/ + rm -rf logs/ + rm -rf results/ + rm -rf snakemake_report.html + cd $user_dir + echo "Exit status: $rc" +} +trap cleanup EXIT + +# Set up test environment +set -eo pipefail # ensures that script exits at first command that exits with non-zero status +set -u # ensures that script exits when unset variables are used +set -x # facilitates debugging by printing out executed commands +user_dir=$PWD +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" +cd $script_dir + +# Run tests +snakemake \ + --snakefile="../../Snakefile" \ + --configfile="../input_files/config.mutliple_lanes.yml" \ + --cores=4 \ + --printshellcmds \ + --rerun-incomplete \ + --use-singularity \ + --singularity-args="--bind ${PWD}/../input_files,${PWD}/../../images" \ + --verbose + +# Create a Snakemake report after the workflow execution +snakemake \ + --snakefile="../../Snakefile" \ + --configfile="../input_files/config.mutliple_lanes.yml" \ + --report="snakemake_report.html" + +# Check md5 sum of some output files +find results/ -type f -name \*\.gz -exec gunzip '{}' \; +find results/ -type f -name \*\.zip -exec sh -c 'unzip -o {} -d $(dirname {})' \; +md5sum --check "expected_output.md5" + + + +# Check whether STAR produces expected alignments +# STAR alignments need to be fully within ground truth alignments for tests to pass; not checking +# vice versa because processing might cut off parts of reads (if testing STAR directly, add '-f 1' +# as additional option) +echo "Verifying STAR output" +result=$(bedtools intersect -F 1 -v -bed \ + -a ../input_files/synthetic.mate_1.bed \ + -b results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/map_genome/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.Aligned.sortedByCoord.out.bam \ + | wc -l) +if [ $result != "0" ]; then + echo "Alignments for mate 1 reads are not consistent with ground truth" + exit 1 +fi +result=$(bedtools intersect -F 1 -v -bed \ + -a <(cat ../input_files/synthetic.mate_1.bed ../input_files/synthetic.mate_2.bed) \ + -b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.Aligned.sortedByCoord.out.bam \ + | wc -l) +if [ $result != "0" ]; then + echo "Alignments for mate 1 reads are not consistent with ground truth" + exit 1 +fi + +# Check whether Salmon assigns reads to expected genes +echo "Verifying Salmon output" +diff \ + <(cat results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \ + <(cat ../input_files/synthetic.mate_1.bed | cut -f7 | sort | uniq -c | sort -k2nr | awk '{printf($2"\t"$1"\n")}') +diff \ + <(cat results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \ + <(cat ../input_files/synthetic.mate_1.bed | cut -f7 | sort | uniq -c | sort -k2nr | awk '{printf($2"\t"$1"\n")}') + diff --git a/tests/test_integration_workflow_multiple_lanes/test.slurm.sh b/tests/test_integration_workflow_multiple_lanes/test.slurm.sh new file mode 100755 index 0000000..f2dd459 --- /dev/null +++ b/tests/test_integration_workflow_multiple_lanes/test.slurm.sh @@ -0,0 +1,93 @@ +#!/bin/bash + +# Tear down test environment +cleanup () { + rc=$? + rm -rf .cache/ + rm -rf .config/ + rm -rf .fontconfig/ + rm -rf .java/ + rm -rf .snakemake/ + rm -rf logs/ + rm -rf results/ + rm -rf snakemake_report.html + cd $user_dir + echo "Exit status: $rc" +} +trap cleanup EXIT + +# Set up test environment +set -eo pipefail # ensures that script exits at first command that exits with non-zero status +set -u # ensures that script exits when unset variables are used +set -x # facilitates debugging by printing out executed commands +user_dir=$PWD +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" +cd $script_dir + +# Run tests +snakemake \ + --snakefile="../../Snakefile" \ + --configfile="../input_files/config.mutliple_lanes.yml" \ + --cluster-config="../input_files/cluster.json" \ + --cluster="sbatch --cpus-per-task={cluster.threads} --mem={cluster.mem} --qos={cluster.queue} --time={cluster.time} --job-name={cluster.name} -o {cluster.out} -p scicore" \ + --cores=256 \ + --printshellcmds \ + --rerun-incomplete \ + --use-singularity \ + --singularity-args="--bind ${PWD}/../input_files,${PWD}/../../images" \ + --verbose + +# Create a Snakemake report after the workflow execution +snakemake \ + --snakefile="../../Snakefile" \ + --configfile="../input_files/config.mutliple_lanes.yml" \ + --report="snakemake_report.html" + +# Check md5 sum of some output files +find results/ -type f -name \*\.gz -exec gunzip '{}' \; +find results/ -type f -name \*\.zip -exec sh -c 'unzip -o {} -d $(dirname {})' \; +md5sum --check "expected_output.md5" + +# Checksum file generated with +# find results/ \ +# -type f \ +# -name \*\.gz \ +# -exec gunzip '{}' \; +# find results/ \ +# -type f \ +# -name \*\.zip \ +# -exec sh -c 'unzip -o {} -d $(dirname {})' \; +# md5sum $(cat expected_output.files) > expected_output.md5 + +# Check whether STAR produces expected alignments +# STAR alignments need to be fully within ground truth alignments for tests to pass; not checking +# vice versa because processing might cut off parts of reads (if testing STAR directly, add '-f 1' +# as additional option) +echo "Verifying STAR output" +result=$(bedtools intersect -F 1 -v -bed \ + -a ../input_files/synthetic.mate_1.bed \ + -b results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/map_genome/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.Aligned.sortedByCoord.out.bam \ + | wc -l) +if [ $result != "0" ]; then + echo "Alignments for mate 1 reads are not consistent with ground truth" + exit 1 +fi +result=$(bedtools intersect -F 1 -v -bed \ + -a <(cat ../input_files/synthetic.mate_1.bed ../input_files/synthetic.mate_2.bed) \ + -b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.Aligned.sortedByCoord.out.bam \ + | wc -l) +if [ $result != "0" ]; then + echo "Alignments for mate 1 reads are not consistent with ground truth" + exit 1 +fi + +# Check whether Salmon assigns reads to expected genes +echo "Verifying Salmon output" +diff \ + <(cat results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \ + <(cat ../input_files/synthetic.mate_1.bed | cut -f7 | sort | uniq -c | sort -k2nr | awk '{printf($2"\t"$1"\n")}') +diff \ + <(cat results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \ + <(cat ../input_files/synthetic.mate_1.bed | cut -f7 | sort | uniq -c | sort -k2nr | awk '{printf($2"\t"$1"\n")}') + + diff --git a/workflow/rules/paired_end.snakefile.smk b/workflow/rules/paired_end.snakefile.smk index d66b120..25046d0 100644 --- a/workflow/rules/paired_end.snakefile.smk +++ b/workflow/rules/paired_end.snakefile.smk @@ -31,13 +31,13 @@ rule pe_remove_adapters_cutadapt: params: adapter_3_mate1 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq1_3p'], + get_sample('fq1_3p', search_id='index', search_value=wildcards.sample), adapter_5_mate1 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq1_5p'], + get_sample('fq1_5p', search_id='index', search_value=wildcards.sample), adapter_3_mate2 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq2_3p'], + get_sample('fq2_3p', search_id='index', search_value=wildcards.sample), adapter_5_mate2 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq2_5p'] + get_sample('fq2_5p', search_id='index', search_value=wildcards.sample) singularity: "docker://zavolab/cutadapt:1.16-slim" @@ -104,13 +104,25 @@ rule pe_remove_polya_cutadapt: params: polya_3_mate1 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq1_polya_3p'], + get_sample( + 'fq1_polya_3p', + search_id='index', + search_value=wildcards.sample), polya_5_mate1 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq1_polya_5p'], + get_sample( + 'fq1_polya_5p', + search_id='index', + search_value=wildcards.sample), polya_3_mate2 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq2_polya_3p'], + get_sample( + 'fq2_polya_3p', + search_id='index', + search_value=wildcards.sample), polya_5_mate2 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq2_polya_5p'] + get_sample( + 'fq2_polya_5p', + search_id='index', + search_value=wildcards.sample) singularity: "docker://zavolab/cutadapt:1.16-slim" @@ -156,8 +168,14 @@ rule pe_map_genome_star: index = lambda wildcards: os.path.join( config["star_indexes"], - str(samples_table.loc[wildcards.sample, "organism"]), - str(samples_table.loc[wildcards.sample, "index_size"]), + get_sample( + 'organism', + search_id='index', + search_value=wildcards.sample), + get_sample( + 'index_size', + search_id='index', + search_value=wildcards.sample), "STAR_index", "chrNameLength.txt"), reads1 = os.path.join( @@ -190,8 +208,14 @@ rule pe_map_genome_star: index = lambda wildcards: os.path.join( config["star_indexes"], - str(samples_table.loc[wildcards.sample, "organism"]), - str(samples_table.loc[wildcards.sample, "index_size"]), + get_sample( + 'organism', + search_id='index', + search_value=wildcards.sample), + get_sample( + 'index_size', + search_id='index', + search_value=wildcards.sample), "STAR_index"), outFileNamePrefix = os.path.join( config["output_dir"], @@ -200,11 +224,20 @@ rule pe_map_genome_star: "map_genome", "{sample}.pe."), multimappers = lambda wildcards: - str(samples_table.loc[wildcards.sample, "multimappers"]), + get_sample( + 'multimappers', + search_id='index', + search_value=wildcards.sample), soft_clip = lambda wildcards: - samples_table.loc[wildcards.sample, "soft_clip"], + get_sample( + 'soft_clip', + search_id='index', + search_value=wildcards.sample), pass_mode = lambda wildcards: - samples_table.loc[wildcards.sample, "pass_mode"] + get_sample( + 'pass_mode', + search_id='index', + search_value=wildcards.sample), singularity: "docker://zavolab/star:2.7.3a-slim" @@ -259,12 +292,21 @@ rule pe_quantification_salmon: "{sample}", "{sample}.pe.remove_polya_mate2.fastq.gz"), gtf = lambda wildcards: - samples_table.loc[wildcards.sample, 'gtf'], + get_sample( + 'gtf', + search_id='index', + search_value=wildcards.sample), index = lambda wildcards: os.path.join( config["salmon_indexes"], - str(samples_table.loc[wildcards.sample, "organism"]), - str(samples_table.loc[wildcards.sample, "kmer"]), + get_sample( + 'organism', + search_id='index', + search_value=wildcards.sample), + get_sample( + 'kmer', + search_id='index', + search_value=wildcards.sample), "salmon.idx") output: @@ -288,7 +330,10 @@ rule pe_quantification_salmon: "{sample}", "{sample}.salmon.pe"), libType = lambda wildcards: - samples_table.loc[wildcards.sample, 'libtype'] + get_sample( + 'libtype', + search_id='index', + search_value=wildcards.sample) log: stderr = os.path.join( @@ -340,7 +385,10 @@ rule pe_genome_quantification_kallisto: index = lambda wildcards: os.path.join( config["kallisto_indexes"], - samples_table.loc[wildcards.sample, 'organism'], + get_sample( + 'organism', + search_id='index', + search_value=wildcards.sample), "kallisto.idx") output: @@ -358,7 +406,10 @@ rule pe_genome_quantification_kallisto: "{sample}", "quant_kallisto"), directionality = lambda wildcards: - samples_table.loc[wildcards.sample, "kallisto_directionality"] + get_sample( + 'kallisto_directionality', + search_id='index', + search_value=wildcards.sample) singularity: "docker://zavolab/kallisto:0.46.1-slim" diff --git a/workflow/rules/single_end.snakefile.smk b/workflow/rules/single_end.snakefile.smk index 41e5062..071b9e5 100644 --- a/workflow/rules/single_end.snakefile.smk +++ b/workflow/rules/single_end.snakefile.smk @@ -19,9 +19,15 @@ rule remove_adapters_cutadapt: params: adapters_3 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq1_3p'], + get_sample( + 'fq1_3p', + search_id='index', + search_value=wildcards.sample), adapters_5 = lambda wildcards: - samples_table.loc[wildcards.sample, 'fq1_5p'] + get_sample( + 'fq1_5p', + search_id='index', + search_value=wildcards.sample) singularity: "docker://zavolab/cutadapt:1.16-slim" @@ -72,9 +78,15 @@ rule remove_polya_cutadapt: params: polya_3 = lambda wildcards: - samples_table.loc[wildcards.sample, "fq1_polya_3p"], + get_sample( + 'fq1_polya_3p', + search_id='index', + search_value=wildcards.sample), polya_5 = lambda wildcards: - samples_table.loc[wildcards.sample, "fq1_polya_5p"] + get_sample( + 'fq1_polya_5p', + search_id='index', + search_value=wildcards.sample) singularity: "docker://zavolab/cutadapt:1.16-slim" @@ -115,8 +127,8 @@ rule map_genome_star: index = lambda wildcards: os.path.join( config["star_indexes"], - str(samples_table.loc[wildcards.sample, "organism"]), - str(samples_table.loc[wildcards.sample, "index_size"]), + get_sample('organism', search_id='index', search_value=wildcards.sample), + get_sample('index_size', search_id='index', search_value=wildcards.sample), "STAR_index", "chrNameLength.txt"), reads = os.path.join( @@ -144,8 +156,8 @@ rule map_genome_star: index = lambda wildcards: os.path.join( config["star_indexes"], - str(samples_table.loc[wildcards.sample, "organism"]), - str(samples_table.loc[wildcards.sample, "index_size"]), + get_sample('organism', search_id='index', search_value=wildcards.sample), + get_sample('index_size', search_id='index', search_value=wildcards.sample), "STAR_index"), outFileNamePrefix = os.path.join( config["output_dir"], @@ -154,11 +166,20 @@ rule map_genome_star: "map_genome", "{sample}.se."), multimappers = lambda wildcards: - samples_table.loc[wildcards.sample, "multimappers"], + get_sample( + 'multimappers', + search_id='index', + search_value=wildcards.sample), soft_clip = lambda wildcards: - samples_table.loc[wildcards.sample, "soft_clip"], + get_sample( + 'soft_clip', + search_id='index', + search_value=wildcards.sample), pass_mode = lambda wildcards: - samples_table.loc[wildcards.sample, "pass_mode"], + get_sample( + 'pass_mode', + search_id='index', + search_value=wildcards.sample) singularity: "docker://zavolab/star:2.7.3a-slim" @@ -210,11 +231,20 @@ rule quantification_salmon: index = lambda wildcards: os.path.join( config["salmon_indexes"], - str(samples_table.loc[wildcards.sample, "organism"]), - str(samples_table.loc[wildcards.sample, "kmer"]), + get_sample( + 'organism', + search_id='index', + search_value=wildcards.sample), + get_sample( + 'kmer', + search_id='index', + search_value=wildcards.sample), "salmon.idx"), gtf = lambda wildcards: - samples_table.loc[wildcards.sample, "gtf"] + get_sample( + 'gtf', + search_id='index', + search_value=wildcards.sample) output: gn_estimates = os.path.join( @@ -237,12 +267,20 @@ rule quantification_salmon: "{sample}", "{sample}.salmon.se"), libType = lambda wildcards: - samples_table.loc[wildcards.sample, "libtype"], + get_sample( + 'libtype', + search_id='index', + search_value=wildcards.sample), fraglen = lambda wildcards: - samples_table.loc[wildcards.sample, 'mean'], + get_sample( + 'mean', + search_id='index', + search_value=wildcards.sample), fragsd = lambda wildcards: - samples_table.loc[wildcards.sample, 'sd'] - + get_sample( + 'sd', + search_id='index', + search_value=wildcards.sample) log: stderr = os.path.join( config["log_dir"], @@ -289,7 +327,10 @@ rule genome_quantification_kallisto: index = lambda wildcards: os.path.join( config["kallisto_indexes"], - samples_table.loc[wildcards.sample, "organism"], + get_sample( + 'organism', + search_id='index', + search_value=wildcards.sample), "kallisto.idx") output: @@ -307,11 +348,20 @@ rule genome_quantification_kallisto: "{sample}", "quant_kallisto"), fraglen = lambda wildcards: - samples_table.loc[wildcards.sample, 'mean'], + get_sample( + 'mean', + search_id='index', + search_value=wildcards.sample), fragsd = lambda wildcards: - samples_table.loc[wildcards.sample, 'sd'], + get_sample( + 'sd', + search_id='index', + search_value=wildcards.sample), directionality = lambda wildcards: - samples_table.loc[wildcards.sample, 'kallisto_directionality'] + get_sample( + 'kallisto_directionality', + search_id='index', + search_value=wildcards.sample) threads: 8 -- GitLab