Skip to content
Snippets Groups Projects
Commit 5f1d8ecc authored by BIOPZ-Iborra de Toledo Paula's avatar BIOPZ-Iborra de Toledo Paula
Browse files

Updated test folder. Working local and slurm tests.

parent 525937b5
Branches
Tags
1 merge request!1WIP: Updated workflow
---
##############################################################################
### Necessary input files
### Necessary inputs
##############################################################################
genome: "/scicore/home/zavolan/devagy74/projects/filter-anno/GRCh38.genome.processed.fa"
organism: "homo_sapiens"
#genome: "/scicore/home/zavolan/devagy74/projects/filter-anno/GRCh38.genome.processed.fa"
genome_url: "ftp://ftp.ensembl.org/pub/release-98/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.Y.fa.gz"
#gtf: "/scicore/home/zavolan/devagy74/projects/filter-anno/GRCh38_96.gene_annotations.filtered.gtf"
gtf_url: "ftp://ftp.ensembl.org/pub/release-96/gtf/homo_sapiens/Homo_sapiens.GRCh38.96.chr.gtf.gz"
gtf_prefix_name: "GRCh38_96"
gtf_url: "ftp://ftp.ensembl.org/pub/release-98/gtf/homo_sapiens/Homo_sapiens.GRCh38.98.gtf.gz"
prefix_name: "GRCh38.98_chrY"
##############################################################################
### Directories
##############################################################################
output_dir: "results"
scripts_dir: "../scripts"
local_log: "logs/local_log"
cluster_log: "logs/cluster_log"
local_log: "logs/local"
cluster_log: "logs/cluster"
...
results/homo_sapiens/GRCh38.98_chrY/genome.processed.fa
results/homo_sapiens/GRCh38.98_chrY/transcriptome_index_segemehl.idx
results/homo_sapiens/GRCh38.98_chrY/transcriptome_idtrim.fa
results/homo_sapiens/GRCh38.98_chrY/genome.processed.fa.fai
results/homo_sapiens/GRCh38.98_chrY/gene_annotations.filtered.gtf
results/homo_sapiens/GRCh38.98_chrY/exons.gtf
results/homo_sapiens/GRCh38.98_chrY/transcriptome.fa
results/homo_sapiens/GRCh38.98_chrY/exons.bed
results/homo_sapiens/GRCh38.98_chrY/headerOfCollapsedFasta.sam
results/homo_sapiens/GRCh38.98_chrY/genome_index_segemehl.idx
9161071fdb39a1376c11307fb724b122 results/transcriptome_index_segemehl.idx
f90eb515017dc586af7944cc971333c4 results/transcriptome_idtrim.fa
cf57bc82642990336aa0a30404c71761 results/exons.gtf
999c2944d153632f54a1f9456f84fbf6 results/transcriptome.fa
e24fdf58ea83750f0d1dfdf3fbde292e results/exons.bed
46d25b79e5005b4387bcfb15e5e7d97e results/headerOfCollapsedFasta.sam
f854fbbe370d3d942c3e56996ba40f42 results/genome_index_segemehl.idx
583f395125f769102ff08ff84b60e0d3 results/homo_sapiens/GRCh38.98_chrY/genome.processed.fa
a5a6fd2cab7d7919b80761fc25f2777a results/homo_sapiens/GRCh38.98_chrY/transcriptome_index_segemehl.idx
bf1e37165b908729327599801ff5147b results/homo_sapiens/GRCh38.98_chrY/transcriptome_idtrim.fa
f37a213f94d11bf2260f50f2c9f199d2 results/homo_sapiens/GRCh38.98_chrY/genome.processed.fa.fai
0b3dfe8cf4d644637671572fca629f69 results/homo_sapiens/GRCh38.98_chrY/gene_annotations.filtered.gtf
6fe52e2e126ef2e0c368fb1bf267f453 results/homo_sapiens/GRCh38.98_chrY/exons.gtf
5ab1c2f39ab35fabc6673c73beb3097b results/homo_sapiens/GRCh38.98_chrY/transcriptome.fa
51ac61c61825929f8f05c4b4f821f04d results/homo_sapiens/GRCh38.98_chrY/exons.bed
7027e0b45c8e46ef79f1cb234fa3b839 results/homo_sapiens/GRCh38.98_chrY/headerOfCollapsedFasta.sam
11b0b7c50160aa8837dd92eda516c124 results/homo_sapiens/GRCh38.98_chrY/genome_index_segemehl.idx
#!/bin/bash
# Tear down test environment
trap 'cd $user_dir' EXIT # quotes command is exected after script exits, regardless of exit status
cleanup () {
rc=$?
#rm -rf .snakemake
#rm -rf logs/
cd $user_dir
echo "Exit status: $rc"
}
trap cleanup EXIT
# Set up test environment
set -eo pipefail # ensures that script exits at first command that exits with non-zero status
......@@ -18,4 +25,5 @@ snakemake \
--dag \
--printshellcmds \
--dryrun \
| dot -Tsvg > "../images/workflow_dag.svg"
--verbose \
| dot -Tsvg > "../images/workflow_dag.svg"
\ No newline at end of file
#!/bin/bash
# Tear down test environment
cleanup () {
rc=$?
#rm -rf .snakemake
#rm -rf logs/
cd $user_dir
echo "Exit status: $rc"
}
trap cleanup EXIT
# Set up test environment
set -eo pipefail # ensures that script exits at first command that exits with non-zero status
set -u # ensures that script exits when unset variables are used
set -x # facilitates debugging by printing out executed commands
user_dir=$PWD
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
cd $script_dir
# Run tests
snakemake \
--snakefile="../snakemake/Snakefile" \
--configfile="config.yaml" \
--rulegraph \
--printshellcmds \
--dryrun \
--verbose \
| dot -Tsvg > "../images/rule_graph.svg"
\ No newline at end of file
#!/bin/bash
# Tear down test environment
trap 'rm -rf .snakemake/ && cd $user_dir' EXIT # quotes command is exected after script exits, regardless of exit status
cleanup () {
rc=$?
#rm -rf .snakemake/
rm -rf logs/
rm -rf results/
cd $user_dir
echo "Exit status: $rc"
}
trap cleanup EXIT
# Set up test environment
set -eo pipefail # ensures that script exits at first command that exits with non-zero status
......@@ -10,35 +18,27 @@ set -x # facilitates debugging by printing out executed commands
user_dir=$PWD
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
cd $script_dir
mkdir -p logs/cluster_log
mkdir -p logs/local_log
# Run tests
snakemake \
-p \
--printshellcmds \
--snakefile="../snakemake/Snakefile" \
--use-singularity \
--singularity-args "--no-home --bind ${PWD},/scicore/home/zavolan/devagy74/projects" \
--cores 256 \
--local-cores 10 \
--cores=4 \
--rerun-incomplete \
--configfile config.yaml \
--jobscript ../jobscript.sh \
--cluster-config ../cluster.json \
--cluster "sbatch \
--cpus-per-task={cluster.threads} \
--mem={cluster.mem} \
--qos={cluster.queue} \
--time={cluster.time} \
-o {params.cluster_log} \
-p scicore \
--export=JOB_NAME={rule} \
--open-mode=append"
--verbose
# Check md5 sum of some output files
find results/ -type f -name \*\.gz -exec gunzip '{}' \;
md5sum --check "test.md5"
md5sum --check "expected_output.md5"
# Checksum file generated with
# find results/ \
# -type f \
# -exec md5sum '{}' \; \
# > expected_output.md5
# -name \*\.gz \
# -exec gunzip '{}' \;
# > expected_output.files
# md5sum $(cat expected_output.files) > expected_output.md5
#!/bin/bash
# Tear down test environment
cleanup () {
rc=$?
#rm -rf .snakemake/
#rm -rf logs/
#rm -rf results/
cd $user_dir
echo "Exit status: $rc"
}
trap cleanup EXIT
# Set up test environment
set -eo pipefail # ensures that script exits at first command that exits with non-zero status
set -u # ensures that script exits when unset variables are used
set -x # facilitates debugging by printing out executed commands
mkdir -p logs/cluster/homo_sapiens/GRCh38.98_chrY
mkdir -p logs/local/homo_sapiens/GRCh38.98_chrY
mkdir -p results/homo_sapiens/GRCh38.98_chrY
user_dir=$PWD
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
cd $script_dir
# Run tests
snakemake \
--snakefile="../snakemake/Snakefile" \
--configfile="config.yaml" \
--cluster-config="../cluster.json" \
--cores=256 \
--jobscript="../jobscript.sh" \
--printshellcmds \
--rerun-incomplete \
--use-singularity \
--singularity-args="--no-home --bind ${PWD},/scicore/home/zavolan/devagy74/projects" \
--cluster "sbatch \
--cpus-per-task={cluster.threads} \
--mem={cluster.mem} \
--qos={cluster.queue} \
--time={cluster.time} \
--export=JOB_NAME={rule} \
-o {params.cluster_log} \
-p scicore \
--open-mode=append" \
--verbose
# Check md5 sum of some output files
find results/ -type f -name \*\.gz -exec gunzip '{}' \;
find results/ -type f -name \*\.zip -exec sh -c 'unzip -o {} -d $(dirname {})' \;
md5sum --check "expected_output.md5"
# Checksum file generated with
# find results/ \
# -type f \
# -name \*\.gz \
# -exec gunzip '{}' \;
# > expected_output.files
# md5sum $(cat expected_output.files) > expected_output.md5
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment