chore: remove template RUNS/ directory

d291ae17 · Iris Mestres Pascual · accf8ec8 · accf8ec8 · accf8ec8 · accf8ec8
Commit d291ae17 authored 2 years ago by Iris Mestres Pascual
--- a/RUNS/COPY_ENTIRE_JOB_DIRECTORY_FOR_EACH_RUN
+++ b/RUNS/COPY_ENTIRE_JOB_DIRECTORY_FOR_EACH_RUN
--- a/RUNS/JOB/cluster.json
+++ b/RUNS/JOB/cluster.json
-{
-  "__default__" :
-  {
-    "queue": "30min",
-    "time": "00:05:00",
-    "threads": "1",
-    "mem": "4G"
-  },
-
-  "cutadapt":
-  {
-    "threads":"{resources.threads}"
-  },
-
-  "mapping_genome_segemehl":
-  {
-    "mem":"{resources.mem}G"
-  },
-
-  "mapping_transcriptome_segemehl":
-  {
-    "mem":"{resources.mem}G"
-  },
-
-  "mapping_genome_oligomap":
-  {
-    "mem":"{resources.mem}G"
-  },
-
-  "mapping_transcriptome_oligomap":
-  {
-    "mem":"{resources.mem}G"
-  },
-
-  "sort_transcriptome_oligomap":
-  {
-    "threads":"{resources.threads}"
-  },
-
-  "sort_genome_oligomap":
-  {
-    "threads":"{resources.threads}"
-  },
-
-  "remove_inferiors":
-  {
-    "threads":"{resources.threads}",
-    "mem":"{resources.mem}G"
-  },
-
-  "generate_segemehl_index_transcriptome":
-  {
-    "threads":"{resources.threads}",
-    "mem":"{resources.mem}G"
-  },
-
-  "generate_segemehl_index_genome":
-  {
-    "threads":"{resources.threads}",
-    "mem":"{resources.mem}G"
-  },
-
-  "sort_alignment":
-  {
-    "mem":"{resources.mem}G",
-    "threads":"{resources.threads}"
-  }
-}
--- a/RUNS/JOB/input_files/samples_table.csv
+++ b/RUNS/JOB/input_files/samples_table.csv
-sample	sample_file	adapter	format
-sample_lib	path/to/sample_library_file	XXXXXXXXXXXXXXXXXX	library_format
--- a/RUNS/JOB/map/cluster.json
+++ b/RUNS/JOB/map/cluster.json
-{
-  "__default__" :
-  {
-    "queue": "6hours",
-    "time": "05:00:00",
-    "threads": "1",
-    "mem": "4G"
-  },
-
-  "cutadapt":
-  {
-    "threads":"{resources.threads}"
-  },
-
-  "mapping_genome_segemehl":
-  {
-    "queue": "1day",
-    "time": "{resources.time}:00:00",
-    "threads":"{resources.threads}",
-    "mem":"{resources.mem}G"
-  },
-
-  "mapping_transcriptome_segemehl":
-  {
-    "queue": "1day",
-    "time": "{resources.time}:00:00",
-    "threads":"{resources.threads}",
-    "mem":"{resources.mem}G"
-  },
-
-  "mapping_genome_oligomap":
-  {
-    "time": "{resources.time}:00:00",
-    "threads":"{resources.threads}",
-    "mem":"{resources.mem}G"
-  },
-
-  "mapping_transcriptome_oligomap":
-  {
-    "time": "{resources.time}:00:00",
-    "threads":"{resources.threads}",
-    "mem":"{resources.mem}G"
-  },
-
-  "sort_transcriptome_oligomap":
-  {
-    "threads":"{resources.threads}"
-  },
-
-  "sort_genome_oligomap":
-  {
-    "time": "{resources.time}:00:00",
-    "threads":"{resources.threads}"
-  },
-
-  "oligomap_genome_toSAM":
-  {
-    "time": "{resources.time}-00:00:00",
-    "queue": "{resources.queue}day"
-  },
-
-  "remove_inferiors":
-  {
-    "threads":"{resources.threads}",
-    "mem":"{resources.mem}G"
-  }
-}
--- a/RUNS/JOB/map/config.yaml
+++ b/RUNS/JOB/map/config.yaml
---
-#### GLOBAL PARAMETERS ####
-
-# Directories
-# Usually there is no need to change these
-scripts_dir: "../../../scripts"
-output_dir: "results"
-local_log: "logs/local"
-cluster_log: "logs/cluster"
-
-# Resources: genome, transcriptome, genes, miRs
-# All of these are produced by the "prepare" workflow
-genome: "path/to/genome.processed.fa"
-gtf: "path/to/gene_annotations.filtered.gtf"
-transcriptome: "path/to/transcriptome_idtrim.fa"
-transcriptome_index_segemehl: "path/to/transcriptome_index_segemehl.idx"
-genome_index_segemehl: "path/to/genome_index_segemehl.idx"
-exons: "path/to/exons.bed"
-header_of_collapsed_fasta: "path/to/headerOfCollapsedFasta.sam"
-
-# Tool parameters: quality filter
-q_value: 10  # Q (Phred) score; minimum quality score to keep
-p_value: 50  # minimum % of bases that must have Q quality
-
-# Tool parameters: adapter removal
-error_rate: 0.1  # fraction of allowed errors
-minimum_length: 15  # discard processed reads shorter than the indicated length
-overlap: 3  # minimum overlap length of adapter and read to trim the bases
-max_n: 0  # discard reads containing more than the indicated number of N bases
-
-# Tool parameters: mapping
-max_length_reads: 30  # maximum length of processed reads to map with oligomap
-nh: 100  # discard reads with more mappings than the indicated number
-
-# Inputs information
-input_dir: "path/to/input_directory"
-sample: ["sample_1", "sample_2"]  # put all sample names, separated by comma
-
-#### PARAMETERS SPECIFIC TO INPUTS ####
-
-sample_1:  # one section per list item in "sample"; names have to match
-    adapter: "XXXXXXXXXXXXXXXXXXXX"  # 3' adapter sequence to trim
-    format: "fa"  # file format; currently supported: "fa"
-...
--- a/RUNS/JOB/map/intermediate_files.txt
+++ b/RUNS/JOB/map/intermediate_files.txt
-results/small_input/formatted.fasta
-results/small_input/header_sorted_catMappings.sam
-results/small_input/fa/reads.fa
-results/small_input/GenomeMappings.sam
-results/small_input/segemehlTranscriptome_map.sam
-results/small_input/noheader_GenomeMappings.sam
-results/small_input/oligoGenome_map.fa
-results/small_input/cutted.fasta
-results/small_input/segemehlGenome_map.sam
-results/small_input/oligoTranscriptome_converted.sam
-results/small_input/nhfiltered_GenomeMappings.sam
-results/small_input/oligoTranscriptome_map.fa
-results/small_input/oligoTranscriptome_report.txt
-results/small_input/TransToGen.sam
-results/small_input/concatenated_header_catMappings.sam
-results/small_input/oligoGenome_converted.sam
-results/small_input/TranscriptomeMappings.sam
-results/small_input/noheader_TranscriptomeMappings.sam
-results/small_input/oligoTranscriptome_sorted.fa
-results/small_input/collapsed.fasta
-results/small_input/catMappings.sam
-results/small_input/filtered_for_oligomap.fasta
-results/small_input/oligoGenome_sorted.fa
-results/small_input/nhfiltered_TranscriptomeMappings.sam
-results/small_input/oligoGenome_report.txt
\ No newline at end of file
--- a/RUNS/JOB/map/run_workflow_local.sh
+++ b/RUNS/JOB/map/run_workflow_local.sh
-#!/bin/bash
-
-# Tear down environment
-cleanup () {
-    rc=$?
-    rm $(cat intermediate_files.txt) 
-    cd $user_dir
-    echo "Exit status: $rc"
-}
-trap cleanup EXIT
-
-# Set up test environment
-set -eo pipefail  # ensures that script exits at first command that exits with non-zero status
-set -u  # ensures that script exits when unset variables are used
-set -x  # facilitates debugging by printing out executed commands
-user_dir=$PWD
-script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
-cd $script_dir
-
-# Run workflow
-snakemake \
-    --snakefile="../../../workflow/map/Snakefile" \
-    --configfile="config.yaml" \
-    --use-singularity \
-    --singularity-args "--bind ${PWD}/../../../" \
-    --cores=4 \
-    --printshellcmds \
-    --rerun-incomplete \
-    --verbose
-
-# Snakemake report
-snakemake \
-    --snakefile="../../../workflow/map/Snakefile" \
-    --configfile="config.yaml" \
-    --report="snakemake_report.html"
--- a/RUNS/JOB/map/run_workflow_slurm.sh
+++ b/RUNS/JOB/map/run_workflow_slurm.sh
-#!/bin/bash
-
-# Tear down environment
-cleanup () {
-    rc=$?
-    rm $(cat intermediate_files.txt)
-    cd $user_dir
-    echo "Exit status: $rc"
-}
-trap cleanup EXIT
-
-# Set up test environment
-set -eo pipefail  # ensures that script exits at first command that exits with non-zero status
-set -u  # ensures that script exits when unset variables are used
-set -x  # facilitates debugging by printing out executed commands
-user_dir=$PWD
-script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
-cd $script_dir
-
-# Have to match directories indicated in config.yaml
-mkdir -p logs/cluster/{sample_1,sample_2}
-mkdir -p logs/local/{sample_1,sample_2}
-mkdir -p results/{sample_1,sample_2}
-
-# Run workflow
-snakemake \
-    --snakefile="../../../workflow/map/Snakefile" \
-    --configfile="config.yaml" \
-    --cluster-config="cluster.json" \
-    --cluster "sbatch \
-        --cpus-per-task={cluster.threads} \
-        --mem={cluster.mem} \
-        --qos={cluster.queue} \
-        --time={cluster.time} \
-        --export=JOB_NAME={rule} \
-        -o {params.cluster_log} \
-        -p scicore \
-        --open-mode=append" \
-    --use-singularity \
-    --singularity-args="--no-home --bind ${PWD}/../../../" \
-    --jobscript="../../../jobscript.sh" \
-    --jobs=20 \
-    --cores=256 \
-    --printshellcmds \
-    --rerun-incomplete \
-    --verbose
-
-# Snakemake report
-snakemake \
-    --snakefile="../../../workflow/map/Snakefile" \
-    --configfile="config.yaml" \
-    --report="snakemake_report.html"
--- a/RUNS/JOB/prepare/cluster.json
+++ b/RUNS/JOB/prepare/cluster.json
-{
-  "__default__" :
-  {
-    "queue": "6hours",
-    "time": "05:00:00",
-    "threads": "1",
-    "mem": "4G"
-  },
-
-  "generate_segemehl_index_transcriptome":
-  {
-    "time": "{resources.time}:00:00",
-    "threads":"{resources.threads}",
-    "mem":"{resources.mem}G"
-  },
-
-  "generate_segemehl_index_genome":
-  {
-    "time": "{resources.time}:00:00",
-    "threads":"{resources.threads}",
-    "mem":"{resources.mem}G"
-  }
-}
--- a/RUNS/JOB/prepare/config.yaml
+++ b/RUNS/JOB/prepare/config.yaml
---
-#### GLOBAL PARAMETERS #####
-
-# Directories
-# Usually there is no need to change these
-scripts_dir: "../../../scripts"
-output_dir: "results"
-local_log: "logs/local"
-cluster_log: "logs/cluster"
-
-# Isomirs annotation file
-# Number of base pairs to add/substract from 5' (start) and 3' (end) coordinates.
-bp_5p: [0] # array of numbers, e.g., [-2,-1,0,+1], to include 2 upstream and 1 downstream nts
-bp_3p: [0] # array of numbers, e.g., [-2,-1,0,+1], to include 2 upstream and 1 downstream nts
-
-# List of inputs
-organism: ["org/pre"] # e.g., ["homo_sapiens/GRCh38.100", "mus_musculus/GRCm37.98"]
-# this string specifies a path, and the "/" is important for this
-# "pre" specifies the assembly version
-
-#### PARAMETERS SPECIFIC TO INPUTS ####
-
-org/pre: # One section for each list item in "organism"; entry should match precisely what
-# is in the "organism" section above, one entry per list item above, omitting the ""
-  # URLs to genome, gene & miRNA annotations
-  genome_url: # FTP/HTTP URL to gzipped genome in FASTA format, Ensembl style
-  # e.g. "ftp://ftp.ensembl.org/pub/release-106/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.primary_assembly.fa.gz"
-  gtf_url: # FTP/HTTP URL to gzipped gene annotations in GTF format, Ensembl style
-  # e.g. "ftp://ftp.ensembl.org/pub/release-106/gtf/homo_sapiens/Homo_sapiens.GRCh38.106.chr.gtf.gz"
-  mirna_url: # FTP/HTTP URL to unzipped microRNA annotations in GFF format, miRBase style
-  # e.g. "https://www.mirbase.org/ftp/CURRENT/genomes/hsa.gff3"
-
-  # Chromosome name mappings between UCSC <-> Ensembl
-  # Other organisms available at: https://github.com/dpryan79/ChromosomeMappings
-  map_chr_url: # FTP/HTTP URL to mapping table
-  # e.g. "https://raw.githubusercontent.com/dpryan79/ChromosomeMappings/master/GRCh38_UCSC2ensembl.txt"
-  # Chromosome name mapping parameters:
-  column: 1 # Column number from input file where to change chromosome name
-  delimiter: "TAB" # Delimiter of the input file
-...
--- a/RUNS/JOB/prepare/run_workflow_local.sh
+++ b/RUNS/JOB/prepare/run_workflow_local.sh
-#!/bin/bash
-
-# Tear down environment
-cleanup () {
-    rc=$?
-    cd $user_dir
-    echo "Exit status: $rc"
-}
-trap cleanup EXIT
-
-# Set up test environment
-set -eo pipefail  # ensures that script exits at first command that exits with non-zero status
-set -u  # ensures that script exits when unset variables are used
-set -x  # facilitates debugging by printing out executed commands
-user_dir=$PWD
-script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
-cd $script_dir
-
-# Run workflow
-snakemake \
-    --snakefile="../../../workflow/prepare/Snakefile" \
-    --configfile="config.yaml" \
-    --use-singularity \
-    --singularity-args "--bind ${PWD}/../../../" \
-    --cores=4 \
-    --printshellcmds \
-    --rerun-incomplete \
-    --verbose
-
-# Snakemake report
-snakemake \
-    --snakefile="../../../workflow/prepare/Snakefile" \
-    --configfile="config.yaml" \
-    --report="snakemake_report.html"
--- a/RUNS/JOB/prepare/run_workflow_slurm.sh
+++ b/RUNS/JOB/prepare/run_workflow_slurm.sh
-#!/bin/bash
-
-# Tear down environment
-cleanup () {
-    rc=$?
-    cd $user_dir
-    echo "Exit status: $rc"
-}
-trap cleanup EXIT
-
-# Set up test environment
-set -eo pipefail  # ensures that script exits at first command that exits with non-zero status
-set -u  # ensures that script exits when unset variables are used
-set -x  # facilitates debugging by printing out executed commands
-user_dir=$PWD
-script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
-cd $script_dir
-
-# Have to match directories indicated in config.yaml
-mkdir -p logs/cluster/org/pre
-mkdir -p logs/local/org/pre
-mkdir -p results/org/pre
-
-# Run workflow
-snakemake \
-    --snakefile="../../../workflow/prepare/Snakefile" \
-    --configfile="config.yaml" \
-    --cluster-config="cluster.json" \
-    --cluster "sbatch \
-        --cpus-per-task={cluster.threads} \
-        --mem={cluster.mem} \
-        --qos={cluster.queue} \
-        --time={cluster.time} \
-        --export=JOB_NAME={rule} \
-        -o {params.cluster_log} \
-        -p scicore \
-        --open-mode=append" \
-    --use-singularity \
-    --singularity-args="--no-home --bind ${PWD}/../../../" \
-    --jobscript="../../../jobscript.sh" \
-    --jobs=20 \
-    --cores=256 \
-    --printshellcmds \
-    --rerun-incomplete \
-    --verbose
-
-# Snakemake report
-snakemake \
-    --snakefile="../../../workflow/prepare/Snakefile" \
-    --configfile="config.yaml" \
-    --report="snakemake_report.html"
--- a/RUNS/JOB/quantify/cluster.json
+++ b/RUNS/JOB/quantify/cluster.json
-{
-  "__default__" :
-  {
-    "queue": "6hours",
-    "time": "05:00:00",
-    "threads": "1",
-    "mem": "4G"
-  },
-
-  "sort_alignment":
-  {
-    "mem":"{resources.mem}G",
-    "threads":"{resources.threads}"
-  }
-}
--- a/RUNS/JOB/quantify/config.yaml
+++ b/RUNS/JOB/quantify/config.yaml
---
-#### GLOBAL PARAMETERS ####
-
-# Directories
-# Usually there is no need to change these
-output_dir: "results"
-scripts_dir: "../../../scripts"
-local_log: "logs/local"
-cluster_log: "logs/cluster"
-
-# Types of miRNAs to quantify
-# Remove miRNA types you are not interested in
-mir_list: ["miRNA", "miRNA_primary_transcript", "isomirs"]
-
-# Resources: miR annotations, chromosome name mappings
-# All of these are produced by the "prepare" workflow
-mirnas_anno: "path/to/mirna_filtered.bed"
-isomirs_anno: "path/to/isomirs_annotation.bed"
-
-# Inputs information
-input_dir: "path/to/input_directory"
-sample: ["sample_1", "sample_2"]  # put all samples, separated by comma
-...
--- a/RUNS/JOB/quantify/run_workflow_local.sh
+++ b/RUNS/JOB/quantify/run_workflow_local.sh
-#!/bin/bash
-
-# Tear down environment
-cleanup () {
-    rc=$?
-    cd $user_dir
-    echo "Exit status: $rc"
-}
-trap cleanup EXIT
-
-# Set up test environment
-set -eo pipefail  # ensures that script exits at first command that exits with non-zero status
-set -u  # ensures that script exits when unset variables are used
-set -x  # facilitates debugging by printing out executed commands
-user_dir=$PWD
-script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
-cd $script_dir
-
-# Run workflow
-snakemake \
-    --snakefile="../../../workflow/quantify/Snakefile" \
-    --configfile="config.yaml" \
-    --use-singularity \
-    --singularity-args "--bind ${PWD}/../../../" \
-    --cores=4 \
-    --printshellcmds \
-    --rerun-incomplete \
-    --verbose
-
-# Snakemake report
-snakemake \
-    --snakefile="../../../workflow/quantify/Snakefile" \
-    --configfile="config.yaml" \
-    --report="snakemake_report.html"
--- a/RUNS/JOB/quantify/run_workflow_slurm.sh
+++ b/RUNS/JOB/quantify/run_workflow_slurm.sh
-#!/bin/bash
-
-# Tear down environment
-cleanup () {
-    rc=$?
-    cd $user_dir
-    echo "Exit status: $rc"
-}
-trap cleanup EXIT
-
-# Set up test environment
-set -eo pipefail  # ensures that script exits at first command that exits with non-zero status
-set -u  # ensures that script exits when unset variables are used
-set -x  # facilitates debugging by printing out executed commands
-user_dir=$PWD
-script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
-cd $script_dir
-
-# Have to match directories indicated in config.yaml
-mkdir -p logs/cluster
-mkdir -p logs/local
-mkdir -p results
-
-# Run workflow
-snakemake \
-    --snakefile="../../../workflow/quantify/Snakefile" \
-    --configfile="config.yaml" \
-    --cluster-config="cluster.json" \
-    --cluster "sbatch \
-        --cpus-per-task={cluster.threads} \
-        --mem={cluster.mem} \
-        --qos={cluster.queue} \
-        --time={cluster.time} \
-        --export=JOB_NAME={rule} \
-        -o {params.cluster_log} \
-        -p scicore \
-        --open-mode=append" \
-    --use-singularity \
-    --singularity-args="--no-home --bind ${PWD}/../../../" \
-    --jobscript="../../../jobscript.sh" \
-    --jobs=20 \
-    --cores=256 \
-    --printshellcmds \
-    --rerun-incomplete \
-    --verbose
-
-# Snakemake report
-snakemake \
-    --snakefile="../../../workflow/quantify/Snakefile" \
-    --configfile="config.yaml" \
-    --report="snakemake_report.html"
--- a/RUNS/JOB/run_workflow_local.sh
+++ b/RUNS/JOB/run_workflow_local.sh
-#!/bin/bash
-
-# Tear down test environment
-cleanup () {
-    rc=$?
-    cd $user_dir
-    echo "Exit status: $rc"
-}
-trap cleanup EXIT
-
-# Set up test environment
-set -eo pipefail  # ensures that script exits at first command that exits with non-zero status
-set -u  # ensures that script exits when unset variables are used
-set -x  # facilitates debugging by printing out executed commands
-user_dir=$PWD
-script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
-cd $script_dir
-
-# Run test
-snakemake \
-    --snakefile="../../workflow/Snakefile" \
-    --cores 4  \
-    --use-singularity \
-    --singularity-args "--bind ${PWD}/../" \
-    --printshellcmds \
-    --rerun-incomplete \
-    --verbose
-
-
-# Snakemake report
-snakemake \
-    --snakefile="../../workflow/Snakefile" \
-    --report="snakemake_report.html"
--- a/RUNS/JOB/run_workflow_slurm.sh
+++ b/RUNS/JOB/run_workflow_slurm.sh
-#!/bin/bash
-
-# Tear down test environment
-cleanup () {
-    rc=$?
-    cd $user_dir
-    echo "Exit status: $rc"
-}
-trap cleanup EXIT
-
-# Set up test environment
-set -eo pipefail  # ensures that script exits at first command that exits with non-zero status
-set -u  # ensures that script exits when unset variables are used
-set -x  # facilitates debugging by printing out executed commands
-user_dir=$PWD
-script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
-cd $script_dir
-
-# Have to match directories indicated in config.yaml files
-mkdir -p logs/cluster/{homo_sapiens/chrY,test_lib}
-mkdir -p logs/local/{homo_sapiens/chrY,test_lib}
-mkdir -p results/{homo_sapiens/chrY,test_lib}
-
-# Run test
-snakemake \
-    --snakefile="../../workflow/Snakefile" \
-    --cores=256 \
-    --cluster-config="cluster.json" \
-    --cluster "sbatch \
-        --cpus-per-task={cluster.threads} \
-        --mem={cluster.mem} \
-        --qos={cluster.queue} \
-        --time={cluster.time} \
-        --export=JOB_NAME={rule} \
-        -o {params.cluster_log} \
-        -p scicore \
-        --open-mode=append" \
-    --jobscript="../../jobscript.sh" \
-    --jobs=20 \
-    --use-singularity \
-    --singularity-args="--bind ${PWD}/../" \
-    --printshellcmds \
-    --rerun-incomplete \
-    --verbose
-
-# Snakemake report
-snakemake \
-    --snakefile="../../workflow/Snakefile" \
-    --report="snakemake_report.html"