Skip to content
Snippets Groups Projects
Commit d291ae17 authored by Iris Mestres Pascual's avatar Iris Mestres Pascual
Browse files

chore: remove template RUNS/ directory

parent accf8ec8
Branches
No related tags found
1 merge request!27chore: remove template RUNS/ directory
Showing
with 0 additions and 646 deletions
{
"__default__" :
{
"queue": "30min",
"time": "00:05:00",
"threads": "1",
"mem": "4G"
},
"cutadapt":
{
"threads":"{resources.threads}"
},
"mapping_genome_segemehl":
{
"mem":"{resources.mem}G"
},
"mapping_transcriptome_segemehl":
{
"mem":"{resources.mem}G"
},
"mapping_genome_oligomap":
{
"mem":"{resources.mem}G"
},
"mapping_transcriptome_oligomap":
{
"mem":"{resources.mem}G"
},
"sort_transcriptome_oligomap":
{
"threads":"{resources.threads}"
},
"sort_genome_oligomap":
{
"threads":"{resources.threads}"
},
"remove_inferiors":
{
"threads":"{resources.threads}",
"mem":"{resources.mem}G"
},
"generate_segemehl_index_transcriptome":
{
"threads":"{resources.threads}",
"mem":"{resources.mem}G"
},
"generate_segemehl_index_genome":
{
"threads":"{resources.threads}",
"mem":"{resources.mem}G"
},
"sort_alignment":
{
"mem":"{resources.mem}G",
"threads":"{resources.threads}"
}
}
sample sample_file adapter format
sample_lib path/to/sample_library_file XXXXXXXXXXXXXXXXXX library_format
{
"__default__" :
{
"queue": "6hours",
"time": "05:00:00",
"threads": "1",
"mem": "4G"
},
"cutadapt":
{
"threads":"{resources.threads}"
},
"mapping_genome_segemehl":
{
"queue": "1day",
"time": "{resources.time}:00:00",
"threads":"{resources.threads}",
"mem":"{resources.mem}G"
},
"mapping_transcriptome_segemehl":
{
"queue": "1day",
"time": "{resources.time}:00:00",
"threads":"{resources.threads}",
"mem":"{resources.mem}G"
},
"mapping_genome_oligomap":
{
"time": "{resources.time}:00:00",
"threads":"{resources.threads}",
"mem":"{resources.mem}G"
},
"mapping_transcriptome_oligomap":
{
"time": "{resources.time}:00:00",
"threads":"{resources.threads}",
"mem":"{resources.mem}G"
},
"sort_transcriptome_oligomap":
{
"threads":"{resources.threads}"
},
"sort_genome_oligomap":
{
"time": "{resources.time}:00:00",
"threads":"{resources.threads}"
},
"oligomap_genome_toSAM":
{
"time": "{resources.time}-00:00:00",
"queue": "{resources.queue}day"
},
"remove_inferiors":
{
"threads":"{resources.threads}",
"mem":"{resources.mem}G"
}
}
---
#### GLOBAL PARAMETERS ####
# Directories
# Usually there is no need to change these
scripts_dir: "../../../scripts"
output_dir: "results"
local_log: "logs/local"
cluster_log: "logs/cluster"
# Resources: genome, transcriptome, genes, miRs
# All of these are produced by the "prepare" workflow
genome: "path/to/genome.processed.fa"
gtf: "path/to/gene_annotations.filtered.gtf"
transcriptome: "path/to/transcriptome_idtrim.fa"
transcriptome_index_segemehl: "path/to/transcriptome_index_segemehl.idx"
genome_index_segemehl: "path/to/genome_index_segemehl.idx"
exons: "path/to/exons.bed"
header_of_collapsed_fasta: "path/to/headerOfCollapsedFasta.sam"
# Tool parameters: quality filter
q_value: 10 # Q (Phred) score; minimum quality score to keep
p_value: 50 # minimum % of bases that must have Q quality
# Tool parameters: adapter removal
error_rate: 0.1 # fraction of allowed errors
minimum_length: 15 # discard processed reads shorter than the indicated length
overlap: 3 # minimum overlap length of adapter and read to trim the bases
max_n: 0 # discard reads containing more than the indicated number of N bases
# Tool parameters: mapping
max_length_reads: 30 # maximum length of processed reads to map with oligomap
nh: 100 # discard reads with more mappings than the indicated number
# Inputs information
input_dir: "path/to/input_directory"
sample: ["sample_1", "sample_2"] # put all sample names, separated by comma
#### PARAMETERS SPECIFIC TO INPUTS ####
sample_1: # one section per list item in "sample"; names have to match
adapter: "XXXXXXXXXXXXXXXXXXXX" # 3' adapter sequence to trim
format: "fa" # file format; currently supported: "fa"
...
results/small_input/formatted.fasta
results/small_input/header_sorted_catMappings.sam
results/small_input/fa/reads.fa
results/small_input/GenomeMappings.sam
results/small_input/segemehlTranscriptome_map.sam
results/small_input/noheader_GenomeMappings.sam
results/small_input/oligoGenome_map.fa
results/small_input/cutted.fasta
results/small_input/segemehlGenome_map.sam
results/small_input/oligoTranscriptome_converted.sam
results/small_input/nhfiltered_GenomeMappings.sam
results/small_input/oligoTranscriptome_map.fa
results/small_input/oligoTranscriptome_report.txt
results/small_input/TransToGen.sam
results/small_input/concatenated_header_catMappings.sam
results/small_input/oligoGenome_converted.sam
results/small_input/TranscriptomeMappings.sam
results/small_input/noheader_TranscriptomeMappings.sam
results/small_input/oligoTranscriptome_sorted.fa
results/small_input/collapsed.fasta
results/small_input/catMappings.sam
results/small_input/filtered_for_oligomap.fasta
results/small_input/oligoGenome_sorted.fa
results/small_input/nhfiltered_TranscriptomeMappings.sam
results/small_input/oligoGenome_report.txt
\ No newline at end of file
#!/bin/bash
# Tear down environment
cleanup () {
rc=$?
rm $(cat intermediate_files.txt)
cd $user_dir
echo "Exit status: $rc"
}
trap cleanup EXIT
# Set up test environment
set -eo pipefail # ensures that script exits at first command that exits with non-zero status
set -u # ensures that script exits when unset variables are used
set -x # facilitates debugging by printing out executed commands
user_dir=$PWD
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
cd $script_dir
# Run workflow
snakemake \
--snakefile="../../../workflow/map/Snakefile" \
--configfile="config.yaml" \
--use-singularity \
--singularity-args "--bind ${PWD}/../../../" \
--cores=4 \
--printshellcmds \
--rerun-incomplete \
--verbose
# Snakemake report
snakemake \
--snakefile="../../../workflow/map/Snakefile" \
--configfile="config.yaml" \
--report="snakemake_report.html"
#!/bin/bash
# Tear down environment
cleanup () {
rc=$?
rm $(cat intermediate_files.txt)
cd $user_dir
echo "Exit status: $rc"
}
trap cleanup EXIT
# Set up test environment
set -eo pipefail # ensures that script exits at first command that exits with non-zero status
set -u # ensures that script exits when unset variables are used
set -x # facilitates debugging by printing out executed commands
user_dir=$PWD
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
cd $script_dir
# Have to match directories indicated in config.yaml
mkdir -p logs/cluster/{sample_1,sample_2}
mkdir -p logs/local/{sample_1,sample_2}
mkdir -p results/{sample_1,sample_2}
# Run workflow
snakemake \
--snakefile="../../../workflow/map/Snakefile" \
--configfile="config.yaml" \
--cluster-config="cluster.json" \
--cluster "sbatch \
--cpus-per-task={cluster.threads} \
--mem={cluster.mem} \
--qos={cluster.queue} \
--time={cluster.time} \
--export=JOB_NAME={rule} \
-o {params.cluster_log} \
-p scicore \
--open-mode=append" \
--use-singularity \
--singularity-args="--no-home --bind ${PWD}/../../../" \
--jobscript="../../../jobscript.sh" \
--jobs=20 \
--cores=256 \
--printshellcmds \
--rerun-incomplete \
--verbose
# Snakemake report
snakemake \
--snakefile="../../../workflow/map/Snakefile" \
--configfile="config.yaml" \
--report="snakemake_report.html"
{
"__default__" :
{
"queue": "6hours",
"time": "05:00:00",
"threads": "1",
"mem": "4G"
},
"generate_segemehl_index_transcriptome":
{
"time": "{resources.time}:00:00",
"threads":"{resources.threads}",
"mem":"{resources.mem}G"
},
"generate_segemehl_index_genome":
{
"time": "{resources.time}:00:00",
"threads":"{resources.threads}",
"mem":"{resources.mem}G"
}
}
---
#### GLOBAL PARAMETERS #####
# Directories
# Usually there is no need to change these
scripts_dir: "../../../scripts"
output_dir: "results"
local_log: "logs/local"
cluster_log: "logs/cluster"
# Isomirs annotation file
# Number of base pairs to add/substract from 5' (start) and 3' (end) coordinates.
bp_5p: [0] # array of numbers, e.g., [-2,-1,0,+1], to include 2 upstream and 1 downstream nts
bp_3p: [0] # array of numbers, e.g., [-2,-1,0,+1], to include 2 upstream and 1 downstream nts
# List of inputs
organism: ["org/pre"] # e.g., ["homo_sapiens/GRCh38.100", "mus_musculus/GRCm37.98"]
# this string specifies a path, and the "/" is important for this
# "pre" specifies the assembly version
#### PARAMETERS SPECIFIC TO INPUTS ####
org/pre: # One section for each list item in "organism"; entry should match precisely what
# is in the "organism" section above, one entry per list item above, omitting the ""
# URLs to genome, gene & miRNA annotations
genome_url: # FTP/HTTP URL to gzipped genome in FASTA format, Ensembl style
# e.g. "ftp://ftp.ensembl.org/pub/release-106/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.primary_assembly.fa.gz"
gtf_url: # FTP/HTTP URL to gzipped gene annotations in GTF format, Ensembl style
# e.g. "ftp://ftp.ensembl.org/pub/release-106/gtf/homo_sapiens/Homo_sapiens.GRCh38.106.chr.gtf.gz"
mirna_url: # FTP/HTTP URL to unzipped microRNA annotations in GFF format, miRBase style
# e.g. "https://www.mirbase.org/ftp/CURRENT/genomes/hsa.gff3"
# Chromosome name mappings between UCSC <-> Ensembl
# Other organisms available at: https://github.com/dpryan79/ChromosomeMappings
map_chr_url: # FTP/HTTP URL to mapping table
# e.g. "https://raw.githubusercontent.com/dpryan79/ChromosomeMappings/master/GRCh38_UCSC2ensembl.txt"
# Chromosome name mapping parameters:
column: 1 # Column number from input file where to change chromosome name
delimiter: "TAB" # Delimiter of the input file
...
#!/bin/bash
# Tear down environment
cleanup () {
rc=$?
cd $user_dir
echo "Exit status: $rc"
}
trap cleanup EXIT
# Set up test environment
set -eo pipefail # ensures that script exits at first command that exits with non-zero status
set -u # ensures that script exits when unset variables are used
set -x # facilitates debugging by printing out executed commands
user_dir=$PWD
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
cd $script_dir
# Run workflow
snakemake \
--snakefile="../../../workflow/prepare/Snakefile" \
--configfile="config.yaml" \
--use-singularity \
--singularity-args "--bind ${PWD}/../../../" \
--cores=4 \
--printshellcmds \
--rerun-incomplete \
--verbose
# Snakemake report
snakemake \
--snakefile="../../../workflow/prepare/Snakefile" \
--configfile="config.yaml" \
--report="snakemake_report.html"
#!/bin/bash
# Tear down environment
cleanup () {
rc=$?
cd $user_dir
echo "Exit status: $rc"
}
trap cleanup EXIT
# Set up test environment
set -eo pipefail # ensures that script exits at first command that exits with non-zero status
set -u # ensures that script exits when unset variables are used
set -x # facilitates debugging by printing out executed commands
user_dir=$PWD
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
cd $script_dir
# Have to match directories indicated in config.yaml
mkdir -p logs/cluster/org/pre
mkdir -p logs/local/org/pre
mkdir -p results/org/pre
# Run workflow
snakemake \
--snakefile="../../../workflow/prepare/Snakefile" \
--configfile="config.yaml" \
--cluster-config="cluster.json" \
--cluster "sbatch \
--cpus-per-task={cluster.threads} \
--mem={cluster.mem} \
--qos={cluster.queue} \
--time={cluster.time} \
--export=JOB_NAME={rule} \
-o {params.cluster_log} \
-p scicore \
--open-mode=append" \
--use-singularity \
--singularity-args="--no-home --bind ${PWD}/../../../" \
--jobscript="../../../jobscript.sh" \
--jobs=20 \
--cores=256 \
--printshellcmds \
--rerun-incomplete \
--verbose
# Snakemake report
snakemake \
--snakefile="../../../workflow/prepare/Snakefile" \
--configfile="config.yaml" \
--report="snakemake_report.html"
{
"__default__" :
{
"queue": "6hours",
"time": "05:00:00",
"threads": "1",
"mem": "4G"
},
"sort_alignment":
{
"mem":"{resources.mem}G",
"threads":"{resources.threads}"
}
}
---
#### GLOBAL PARAMETERS ####
# Directories
# Usually there is no need to change these
output_dir: "results"
scripts_dir: "../../../scripts"
local_log: "logs/local"
cluster_log: "logs/cluster"
# Types of miRNAs to quantify
# Remove miRNA types you are not interested in
mir_list: ["miRNA", "miRNA_primary_transcript", "isomirs"]
# Resources: miR annotations, chromosome name mappings
# All of these are produced by the "prepare" workflow
mirnas_anno: "path/to/mirna_filtered.bed"
isomirs_anno: "path/to/isomirs_annotation.bed"
# Inputs information
input_dir: "path/to/input_directory"
sample: ["sample_1", "sample_2"] # put all samples, separated by comma
...
#!/bin/bash
# Tear down environment
cleanup () {
rc=$?
cd $user_dir
echo "Exit status: $rc"
}
trap cleanup EXIT
# Set up test environment
set -eo pipefail # ensures that script exits at first command that exits with non-zero status
set -u # ensures that script exits when unset variables are used
set -x # facilitates debugging by printing out executed commands
user_dir=$PWD
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
cd $script_dir
# Run workflow
snakemake \
--snakefile="../../../workflow/quantify/Snakefile" \
--configfile="config.yaml" \
--use-singularity \
--singularity-args "--bind ${PWD}/../../../" \
--cores=4 \
--printshellcmds \
--rerun-incomplete \
--verbose
# Snakemake report
snakemake \
--snakefile="../../../workflow/quantify/Snakefile" \
--configfile="config.yaml" \
--report="snakemake_report.html"
#!/bin/bash
# Tear down environment
cleanup () {
rc=$?
cd $user_dir
echo "Exit status: $rc"
}
trap cleanup EXIT
# Set up test environment
set -eo pipefail # ensures that script exits at first command that exits with non-zero status
set -u # ensures that script exits when unset variables are used
set -x # facilitates debugging by printing out executed commands
user_dir=$PWD
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
cd $script_dir
# Have to match directories indicated in config.yaml
mkdir -p logs/cluster
mkdir -p logs/local
mkdir -p results
# Run workflow
snakemake \
--snakefile="../../../workflow/quantify/Snakefile" \
--configfile="config.yaml" \
--cluster-config="cluster.json" \
--cluster "sbatch \
--cpus-per-task={cluster.threads} \
--mem={cluster.mem} \
--qos={cluster.queue} \
--time={cluster.time} \
--export=JOB_NAME={rule} \
-o {params.cluster_log} \
-p scicore \
--open-mode=append" \
--use-singularity \
--singularity-args="--no-home --bind ${PWD}/../../../" \
--jobscript="../../../jobscript.sh" \
--jobs=20 \
--cores=256 \
--printshellcmds \
--rerun-incomplete \
--verbose
# Snakemake report
snakemake \
--snakefile="../../../workflow/quantify/Snakefile" \
--configfile="config.yaml" \
--report="snakemake_report.html"
#!/bin/bash
# Tear down test environment
cleanup () {
rc=$?
cd $user_dir
echo "Exit status: $rc"
}
trap cleanup EXIT
# Set up test environment
set -eo pipefail # ensures that script exits at first command that exits with non-zero status
set -u # ensures that script exits when unset variables are used
set -x # facilitates debugging by printing out executed commands
user_dir=$PWD
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
cd $script_dir
# Run test
snakemake \
--snakefile="../../workflow/Snakefile" \
--cores 4 \
--use-singularity \
--singularity-args "--bind ${PWD}/../" \
--printshellcmds \
--rerun-incomplete \
--verbose
# Snakemake report
snakemake \
--snakefile="../../workflow/Snakefile" \
--report="snakemake_report.html"
#!/bin/bash
# Tear down test environment
cleanup () {
rc=$?
cd $user_dir
echo "Exit status: $rc"
}
trap cleanup EXIT
# Set up test environment
set -eo pipefail # ensures that script exits at first command that exits with non-zero status
set -u # ensures that script exits when unset variables are used
set -x # facilitates debugging by printing out executed commands
user_dir=$PWD
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
cd $script_dir
# Have to match directories indicated in config.yaml files
mkdir -p logs/cluster/{homo_sapiens/chrY,test_lib}
mkdir -p logs/local/{homo_sapiens/chrY,test_lib}
mkdir -p results/{homo_sapiens/chrY,test_lib}
# Run test
snakemake \
--snakefile="../../workflow/Snakefile" \
--cores=256 \
--cluster-config="cluster.json" \
--cluster "sbatch \
--cpus-per-task={cluster.threads} \
--mem={cluster.mem} \
--qos={cluster.queue} \
--time={cluster.time} \
--export=JOB_NAME={rule} \
-o {params.cluster_log} \
-p scicore \
--open-mode=append" \
--jobscript="../../jobscript.sh" \
--jobs=20 \
--use-singularity \
--singularity-args="--bind ${PWD}/../" \
--printshellcmds \
--rerun-incomplete \
--verbose
# Snakemake report
snakemake \
--snakefile="../../workflow/Snakefile" \
--report="snakemake_report.html"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment