-
- add script that prepares Snakemake input files 'samples.tsv' and 'config.yaml' from LabKey table - script either connects to API directly (with '--remote' and related options) or processes a tab-separated LabKey dump file - add tests for both use cases - common input files for tests now in 'tests/input_files' - update all other tests to account for new file locations - update documentation
- add script that prepares Snakemake input files 'samples.tsv' and 'config.yaml' from LabKey table - script either connects to API directly (with '--remote' and related options) or processes a tab-separated LabKey dump file - add tests for both use cases - common input files for tests now in 'tests/input_files' - update all other tests to account for new file locations - update documentation
test.slurm.sh 3.27 KiB
#!/bin/bash
# Tear down test environment
trap 'rm -rf logs/ results/ .snakemake/ .java/ local_log/ && cd $user_dir' EXIT # quoted command is exected after script exits, regardless of exit status
# Set up test environment
set -eo pipefail # ensures that script exits at first command that exits with non-zero status
set -u # ensures that script exits when unset variables are used
set -x # facilitates debugging by printing out executed commands
user_dir=$PWD
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
cd $script_dir
mkdir -p logs/cluster_log
mkdir -p logs/local_log
# Run tests
snakemake \
--snakefile="../../Snakefile" \
--configfile="../input_files/config.yaml" \
--cluster-config="../input_files/cluster.json" \
--cluster="sbatch --cpus-per-task={cluster.threads} --mem={cluster.mem} --qos={cluster.queue} --time={cluster.time} --job-name={cluster.name} -o {cluster.out} -p scicore" \
--cores=256 \
--printshellcmds \
--rerun-incomplete \
--use-singularity \
--singularity-args="--bind ${PWD}/../input_files"
# Check md5 sum of some output files
find results/ -type f -name \*\.gz -exec gunzip '{}' \;
find results/ -type f -name \*\.zip -exec sh -c 'unzip -o {} -d $(dirname {})' \;
md5sum --check "expected_output.md5"
# Checksum file generated with
# find results/ \
# -type f \
# -name \*\.gz \
# -exec gunzip '{}' \;
# find results/ \
# -type f \
# -name \*\.zip \
# -exec sh -c 'unzip -o {} -d $(dirname {})' \;
# md5sum $(cat expected_output.files) > expected_output.md5
# Check whether STAR produces expected alignments
# STAR alignments need to be fully within ground truth alignments for tests to pass; not checking
# vice versa because processing might cut off parts of reads (if testing STAR directly, add '-f 1'
# as additional option)
echo "Verifying STAR output"
result=$(bedtools intersect -F 1 -v -bed \
-a ../input_files/synthetic.mate_1.bed \
-b results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/map_genome/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1_Aligned.sortedByCoord.out.bam \
| wc -l)
if [ $result != "0" ]; then
echo "Alignments for mate 1 reads are not consistent with ground truth"
exit 1
fi
result=$(bedtools intersect -F 1 -v -bed \
-a <(cat ../input_files/synthetic.mate_1.bed ../input_files/synthetic.mate_2.bed) \
-b results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired_Aligned.sortedByCoord.out.bam \
| wc -l)
if [ $result != "0" ]; then
echo "Alignments for mate 1 reads are not consistent with ground truth"
exit 1
fi
# Check whether Salmon assigns reads to expected genes
echo "Verifying Salmon output"
diff \
<(cat results/single_end/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/salmon_quant/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \
<(cat ../input_files/synthetic.mate_1.bed | cut -f7 | sort | uniq -c | sort -k2nr | awk '{printf($2"\t"$1"\n")}')
diff \
<(cat results/paired_end/synthetic_10_reads_paired_synthetic_10_reads_paired/salmon_quant/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \
<(cat ../input_files/synthetic.mate_1.bed | cut -f7 | sort | uniq -c | sort -k2nr | awk '{printf($2"\t"$1"\n")}')