From 0348cdc3f8152605f4a9e5eaed7c34b063869bf5 Mon Sep 17 00:00:00 2001 From: Alex Kanitz <alexander.kanitz@unibas.ch> Date: Fri, 8 Jul 2022 14:32:54 +0000 Subject: [PATCH] feat: merge quantification pipeline --- README.md | 87 +- RUNS/JOB/map/config.yaml | 13 +- RUNS/JOB/map/run_workflow_local.sh | 10 +- RUNS/JOB/map/run_workflow_slurm.sh | 27 +- RUNS/JOB/prepare/config.yaml | 31 +- RUNS/JOB/prepare/run_workflow_slurm.sh | 12 +- RUNS/JOB/quantify/cluster.json | 15 + RUNS/JOB/quantify/config.yaml | 23 + RUNS/JOB/quantify/run_workflow_local.sh | 34 + RUNS/JOB/quantify/run_workflow_slurm.sh | 50 + environment.root.yml | 6 +- environment.yml | 6 +- images/rule_graph_map.svg | 70 +- images/rule_graph_prepare.svg | 62 +- images/rule_graph_quantify.svg | 97 ++ images/workflow_dag_map.svg | 58 +- images/workflow_dag_prepare.svg | 76 +- images/workflow_dag_quantify.svg | 228 ++++ scripts/merge_tables.R | 134 +++ scripts/mirna_quantification.py | 164 +++ test/cluster_map.json | 67 -- test/cluster_prepare.json | 23 - test/config_map.yaml | 9 +- test/config_prepare.yaml | 14 +- test/config_quantify.yaml | 23 + test/expected_output.files | 47 - test/expected_output.md5 | 85 +- test/test_cleanup.sh | 3 + test/test_dag.sh | 10 + test/test_rule_graph.sh | 10 + test/test_workflow_local.sh | 20 +- test/test_workflow_slurm.sh | 35 +- workflow/map/Snakefile | 1450 ++++++++++++++--------- workflow/prepare/Snakefile | 668 ++++++----- workflow/quantify/Snakefile | 317 +++++ 35 files changed, 2743 insertions(+), 1241 deletions(-) create mode 100644 RUNS/JOB/quantify/cluster.json create mode 100644 RUNS/JOB/quantify/config.yaml create mode 100755 RUNS/JOB/quantify/run_workflow_local.sh create mode 100755 RUNS/JOB/quantify/run_workflow_slurm.sh create mode 100644 images/rule_graph_quantify.svg create mode 100644 images/workflow_dag_quantify.svg create mode 100755 scripts/merge_tables.R create mode 100755 scripts/mirna_quantification.py delete mode 100644 test/cluster_map.json delete mode 100644 test/cluster_prepare.json create mode 100644 test/config_quantify.yaml delete mode 100644 test/expected_output.files create mode 100644 workflow/quantify/Snakefile diff --git a/README.md b/README.md index 89cce75..5c2e815 100644 --- a/README.md +++ b/README.md @@ -151,7 +151,16 @@ workflow: #### _QUANTIFY_ -Coming soon... +The third and final workflow, **_QUANTIFY_** quantifies miRNA expression by +intersecting the alignments from the **_MAP_** workflow with the annotations +generated in the **_PREPARE_** workflow. Intersections are computed with +[`bedtools`][bedtools] for one or multiple of mature, primary transcripts and +isomiRs. Reads consistent with each miRNA are counted and tabulated. + +The scheme below is a visual representation of an example run of the +**_QUANTIFY_** workflow: + +> ![rule-graph-quantify][rule-graph-quantify] ### Running the workflows @@ -209,8 +218,7 @@ workflow. These contain notes on how to fill in each parameter. ```yaml --- - -############################## GLOBAL PARAMETERS ############################## +#### GLOBAL PARAMETERS ##### # Directories # Usually there is no need to change these @@ -221,28 +229,26 @@ cluster_log: "logs/cluster" # Isomirs annotation file # Number of base pairs to add/substract from 5' (start) and 3' (end) coordinates. -bp_5p: [0] # array of numbers, e.g., [-2,-1,0,+1], to include 2 upstream and 1 downstream nts -bp_3p: [0] # array of numbers, e.g., [-2,-1,0,+1], to include 2 upstream and 1 downstream nts +bp_5p: [0] # array of numbers, e.g., [-2,-1,0,+1], to include 2 upstream and 1 downstream nts +bp_3p: [0] # array of numbers, e.g., [-2,-1,0,+1], to include 2 upstream and 1 downstream nts -# List of "organism/prefix" identifiers -organism: ["org/pre"] # e.g., ["homo_sapiens/GRCh38.100", "mus_musculus/GRCm37.98"] +# List of inputs +organism: ["org/pre"] # e.g., ["homo_sapiens/GRCh38.100", "mus_musculus/GRCm37.98"] -################### PARAMETERS SPECIFIC TO ORGANISM VERSION ################### - -org/pre: # One section for each list item in "organism"; names have to match precisely +#### PARAMETERS SPECIFIC TO INPUTS #### +org/pre: # One section for each list item in "organism"; names have to match precisely # URLs to genome, gene & miRNA annotations - genome_url: # FTP/HTTP URL to gzipped genome in FASTA format, Ensembl style - gtf_url: # FTP/HTTP URL to gzipped gene annotations in GTF format, Ensembl style - mirna_url: # FTP/HTTP URL to unzipped microRNA annotations in GFF format, miRBase style + genome_url: # FTP/HTTP URL to gzipped genome in FASTA format, Ensembl style + gtf_url: # FTP/HTTP URL to gzipped gene annotations in GTF format, Ensembl style + mirna_url: # FTP/HTTP URL to unzipped microRNA annotations in GFF format, miRBase style # Chromosome name mappings between UCSC <-> Ensembl - # Available at: https://github.com/dpryan79/ChromosomeMappings; e.g., `GRCh38_UCSC2ensembl.txt` - map_chr_url: # FTP/HTTP URL to mapping table + # Other organisms available at: https://github.com/dpryan79/ChromosomeMappings + map_chr_url: # FTP/HTTP URL to mapping table # Chromosome name mapping parameters: - column: 1 # Column number from input file where to change chromosome name - delimiter: "TAB" # Delimiter of the input file - + column: 1 # Column number from input file where to change chromosome name + delimiter: "TAB" # Delimiter of the input file ... ``` @@ -258,8 +264,7 @@ org/pre: # One section for each list item in "organism"; names have to match pr ```yaml --- - -############################## GLOBAL PARAMETERS ############################## +#### GLOBAL PARAMETERS #### # Directories # Usually there is no need to change these @@ -292,24 +297,49 @@ max_n: 0 # discard reads containing more than the indicated number of N bases max_length_reads: 30 # maximum length of processed reads to map with oligomap nh: 100 # discard reads with more mappings than the indicated number -# Sample information +# Inputs information input_dir: "path/to/input_directory" -sample: ["sample_1"] # put all samples, separated by comma & without file extension - # (e.g., "sample_1" instead of "sample_1.fa.gz") +sample: ["sample_1", "sample_2"] # put all sample names, separated by comma -######################## PARAMETERS SPECIFIC TO SAMPLE ######################## +#### PARAMETERS SPECIFIC TO INPUTS #### -sample_1: # One section for each list item in "sample"; names have to match precisely +sample_1: # one section per list item in "sample"; names have to match adapter: "XXXXXXXXXXXXXXXXXXXX" # 3' adapter sequence to trim format: "fa" # file format; currently supported: "fa" - ... ``` #### _QUANTIFY_ -Coming soon... +**File location:** `RUNS/JOB/quantify/config.yaml` + +```yaml +--- +#### GLOBAL PARAMETERS #### + +# Directories +# Usually there is no need to change these +output_dir: "results" +scripts_dir: "../scripts" +local_log: "logs/local" +cluster_log: "logs/cluster" + +# Types of miRNAs to quantify +# Remove miRNA types you are not interested in +mir_list: ["miRNA", "miRNA_primary_transcript", "isomirs"] + +# Resources: miR annotations, chromosome name mappings +# All of these are produced by the "prepare" workflow +mirnas_anno: "path/to/mirna_filtered.bed" +isomirs_anno: "path/to/isomirs_annotation.bed" +# Inputs information +input_dir: "path/to/input_directory" +sample: ["sample_1", "sample_2"] # put all samples, separated by comma +... +``` + +[bedtools]: <https://github.com/arq5x/bedtools2> [conda]: <https://docs.conda.io/projects/conda/en/latest/index.html> [cluster execution]: <https://snakemake.readthedocs.io/en/stable/executing/cluster-cloud.html#cluster-execution> [ensembl]: <https://ensembl.org/> @@ -317,8 +347,9 @@ Coming soon... [miniconda-installation]: <https://docs.conda.io/en/latest/miniconda.html> [mirbase]: <https://mirbase.org/> [oligomap]: <https://bio.tools/oligomap> -[rule-graph-prepare]: images/rule_graph_prepare.svg [rule-graph-map]: images/rule_graph_map.svg +[rule-graph-prepare]: images/rule_graph_prepare.svg +[rule-graph-quantify]: images/rule_graph_quantify.svg [segemehl]: <https://www.bioinf.uni-leipzig.de/Software/segemehl/> [singularity]: <https://sylabs.io/singularity/> [slurm]: <https://slurm.schedmd.com/documentation.html> diff --git a/RUNS/JOB/map/config.yaml b/RUNS/JOB/map/config.yaml index bcd4f7b..74d451f 100644 --- a/RUNS/JOB/map/config.yaml +++ b/RUNS/JOB/map/config.yaml @@ -1,6 +1,5 @@ --- - -############################## GLOBAL PARAMETERS ############################## +#### GLOBAL PARAMETERS #### # Directories # Usually there is no need to change these @@ -33,15 +32,13 @@ max_n: 0 # discard reads containing more than the indicated number of N bases max_length_reads: 30 # maximum length of processed reads to map with oligomap nh: 100 # discard reads with more mappings than the indicated number -# Sample information +# Inputs information input_dir: "path/to/input_directory" -sample: ["sample_1"] # put all samples, separated by comma & without file extension - # (e.g., "sample_1" instead of "sample_1.fa.gz") +sample: ["sample_1", "sample_2"] # put all sample names, separated by comma -######################## PARAMETERS SPECIFIC TO SAMPLE ######################## +#### PARAMETERS SPECIFIC TO INPUTS #### -sample_1: # One section for each list item in "sample"; names have to match precisely +sample_1: # one section per list item in "sample"; names have to match adapter: "XXXXXXXXXXXXXXXXXXXX" # 3' adapter sequence to trim format: "fa" # file format; currently supported: "fa" - ... diff --git a/RUNS/JOB/map/run_workflow_local.sh b/RUNS/JOB/map/run_workflow_local.sh index c31cd6b..4bef4a4 100755 --- a/RUNS/JOB/map/run_workflow_local.sh +++ b/RUNS/JOB/map/run_workflow_local.sh @@ -19,17 +19,17 @@ cd $script_dir # Run workflow snakemake \ - --printshellcmds \ - --snakefile="../snakemake/Snakefile" \ + --snakefile="../../../workflow/map/Snakefile" \ + --configfile="config.yaml" \ --use-singularity \ - --singularity-args "--bind ${PWD}/../" \ + --singularity-args "--bind ${PWD}/../../../" \ --cores=4 \ + --printshellcmds \ --rerun-incomplete \ - --configfile="config.yaml" \ --verbose # Snakemake report snakemake \ - --snakefile="../snakemake/Snakefile" \ + --snakefile="../../../workflow/map/Snakefile" \ --configfile="config.yaml" \ --report="snakemake_report.html" diff --git a/RUNS/JOB/map/run_workflow_slurm.sh b/RUNS/JOB/map/run_workflow_slurm.sh index fcc8b52..7f21a9e 100755 --- a/RUNS/JOB/map/run_workflow_slurm.sh +++ b/RUNS/JOB/map/run_workflow_slurm.sh @@ -13,24 +13,21 @@ trap cleanup EXIT set -eo pipefail # ensures that script exits at first command that exits with non-zero status set -u # ensures that script exits when unset variables are used set -x # facilitates debugging by printing out executed commands -mkdir -p logs/cluster -mkdir -p logs/local -mkdir -p results/ user_dir=$PWD script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" cd $script_dir +# Have to match directories indicated in config.yaml +mkdir -p logs/cluster +mkdir -p logs/local +mkdir -p results + + # Run workflow snakemake \ - --snakefile="../snakemake/Snakefile" \ + --snakefile="../../../workflow/map/Snakefile" \ --configfile="config.yaml" \ - --cluster-config="../cluster.json" \ - --cores=256 \ - --jobscript="../jobscript.sh" \ - --printshellcmds \ - --rerun-incomplete \ - --use-singularity \ - --singularity-args="--no-home --bind ${PWD}/../" \ + --cluster-config="cluster.json" \ --cluster "sbatch \ --cpus-per-task={cluster.threads} \ --mem={cluster.mem} \ @@ -40,10 +37,16 @@ snakemake \ -o {params.cluster_log} \ -p scicore \ --open-mode=append" \ + --use-singularity \ + --singularity-args="--no-home --bind ${PWD}/../../../" \ + --jobscript="../../../jobscript.sh" \ + --cores=256 \ + --printshellcmds \ + --rerun-incomplete \ --verbose # Snakemake report snakemake \ - --snakefile="../snakemake/Snakefile" \ + --snakefile="../../../workflow/map/Snakefile" \ --configfile="config.yaml" \ --report="snakemake_report.html" diff --git a/RUNS/JOB/prepare/config.yaml b/RUNS/JOB/prepare/config.yaml index 81a4fbb..3e1a5bb 100644 --- a/RUNS/JOB/prepare/config.yaml +++ b/RUNS/JOB/prepare/config.yaml @@ -1,6 +1,5 @@ --- - -############################## GLOBAL PARAMETERS ############################## +#### GLOBAL PARAMETERS ##### # Directories # Usually there is no need to change these @@ -11,26 +10,24 @@ cluster_log: "logs/cluster" # Isomirs annotation file # Number of base pairs to add/substract from 5' (start) and 3' (end) coordinates. -bp_5p: [0] # array of numbers, e.g., [-2,-1,0,+1], to include 2 upstream and 1 downstream nts -bp_3p: [0] # array of numbers, e.g., [-2,-1,0,+1], to include 2 upstream and 1 downstream nts - -# List of "organism/prefix" identifiers -organism: ["org/pre"] # e.g., ["homo_sapiens/GRCh38.100", "mus_musculus/GRCm37.98"] +bp_5p: [0] # array of numbers, e.g., [-2,-1,0,+1], to include 2 upstream and 1 downstream nts +bp_3p: [0] # array of numbers, e.g., [-2,-1,0,+1], to include 2 upstream and 1 downstream nts -################### PARAMETERS SPECIFIC TO ORGANISM VERSION ################### +# List of inputs +organism: ["org/pre"] # e.g., ["homo_sapiens/GRCh38.100", "mus_musculus/GRCm37.98"] -org/pre: # One section for each list item in "organism"; names have to match precisely +#### PARAMETERS SPECIFIC TO INPUTS #### +org/pre: # One section for each list item in "organism"; names have to match precisely # URLs to genome, gene & miRNA annotations - genome_url: # FTP/HTTP URL to gzipped genome in FASTA format, Ensembl style - gtf_url: # FTP/HTTP URL to gzipped gene annotations in GTF format, Ensembl style - mirna_url: # FTP/HTTP URL to unzipped microRNA annotations in GFF format, miRBase style + genome_url: # FTP/HTTP URL to gzipped genome in FASTA format, Ensembl style + gtf_url: # FTP/HTTP URL to gzipped gene annotations in GTF format, Ensembl style + mirna_url: # FTP/HTTP URL to unzipped microRNA annotations in GFF format, miRBase style # Chromosome name mappings between UCSC <-> Ensembl - # Available at: https://github.com/dpryan79/ChromosomeMappings; e.g., `GRCh38_UCSC2ensembl.txt` - map_chr_url: # FTP/HTTP URL to mapping table + # Other organisms available at: https://github.com/dpryan79/ChromosomeMappings + map_chr_url: # FTP/HTTP URL to mapping table # Chromosome name mapping parameters: - column: 1 # Column number from input file where to change chromosome name - delimiter: "TAB" # Delimiter of the input file - + column: 1 # Column number from input file where to change chromosome name + delimiter: "TAB" # Delimiter of the input file ... diff --git a/RUNS/JOB/prepare/run_workflow_slurm.sh b/RUNS/JOB/prepare/run_workflow_slurm.sh index d5d9390..f9f1f5a 100755 --- a/RUNS/JOB/prepare/run_workflow_slurm.sh +++ b/RUNS/JOB/prepare/run_workflow_slurm.sh @@ -12,17 +12,15 @@ trap cleanup EXIT set -eo pipefail # ensures that script exits at first command that exits with non-zero status set -u # ensures that script exits when unset variables are used set -x # facilitates debugging by printing out executed commands - -#### CHANGE PATHS WITH YOUR ORGANISM AND PREFIX_NAME #### -mkdir -p logs/cluster/ORGANISM/PREFIX_NAME -mkdir -p logs/local/ORGANISM/PREFIX_NAME -mkdir -p results/ORGANISM/PREFIX_NAME -######################################################### - user_dir=$PWD script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" cd $script_dir +# Have to match directories indicated in config.yaml +mkdir -p logs/cluster +mkdir -p logs/local +mkdir -p results + # Run workflow snakemake \ --snakefile="../../../workflow/prepare/Snakefile" \ diff --git a/RUNS/JOB/quantify/cluster.json b/RUNS/JOB/quantify/cluster.json new file mode 100644 index 0000000..ef1ea5b --- /dev/null +++ b/RUNS/JOB/quantify/cluster.json @@ -0,0 +1,15 @@ +{ + "__default__" : + { + "queue": "6hours", + "time": "05:00:00", + "threads": "1", + "mem": "4G" + }, + + "sortaligment": + { + "mem":"{resources.mem}G", + "threads":"{resources.threads}" + } +} \ No newline at end of file diff --git a/RUNS/JOB/quantify/config.yaml b/RUNS/JOB/quantify/config.yaml new file mode 100644 index 0000000..73b729f --- /dev/null +++ b/RUNS/JOB/quantify/config.yaml @@ -0,0 +1,23 @@ +--- +#### GLOBAL PARAMETERS #### + +# Directories +# Usually there is no need to change these +output_dir: "results" +scripts_dir: "../scripts" +local_log: "logs/local" +cluster_log: "logs/cluster" + +# Types of miRNAs to quantify +# Remove miRNA types you are not interested in +mir_list: ["miRNA", "miRNA_primary_transcript", "isomirs"] + +# Resources: miR annotations, chromosome name mappings +# All of these are produced by the "prepare" workflow +mirnas_anno: "path/to/mirna_filtered.bed" +isomirs_anno: "path/to/isomirs_annotation.bed" + +# Inputs information +input_dir: "path/to/input_directory" +sample: ["sample_1", "sample_2"] # put all samples, separated by comma +... diff --git a/RUNS/JOB/quantify/run_workflow_local.sh b/RUNS/JOB/quantify/run_workflow_local.sh new file mode 100755 index 0000000..630b19d --- /dev/null +++ b/RUNS/JOB/quantify/run_workflow_local.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# Tear down environment +cleanup () { + rc=$? + cd $user_dir + echo "Exit status: $rc" +} +trap cleanup EXIT + +# Set up test environment +set -eo pipefail # ensures that script exits at first command that exits with non-zero status +set -u # ensures that script exits when unset variables are used +set -x # facilitates debugging by printing out executed commands +user_dir=$PWD +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" +cd $script_dir + +# Run workflow +snakemake \ + --snakefile="../../../workflow/quantify/Snakefile" \ + --configfile="config.yaml" \ + --use-singularity \ + --singularity-args "--bind ${PWD}/../../../" \ + --cores=4 \ + --printshellcmds \ + --rerun-incomplete \ + --verbose + +# Snakemake report +snakemake \ + --snakefile="../../../workflow/quantify/Snakefile" \ + --configfile="config.yaml" \ + --report="snakemake_report.html" diff --git a/RUNS/JOB/quantify/run_workflow_slurm.sh b/RUNS/JOB/quantify/run_workflow_slurm.sh new file mode 100755 index 0000000..15fd9d3 --- /dev/null +++ b/RUNS/JOB/quantify/run_workflow_slurm.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# Tear down environment +cleanup () { + rc=$? + cd $user_dir + echo "Exit status: $rc" +} +trap cleanup EXIT + +# Set up test environment +set -eo pipefail # ensures that script exits at first command that exits with non-zero status +set -u # ensures that script exits when unset variables are used +set -x # facilitates debugging by printing out executed commands +user_dir=$PWD +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" +cd $script_dir + +# Have to match directories indicated in config.yaml +mkdir -p logs/cluster +mkdir -p logs/local +mkdir -p results + +# Run workflow +snakemake \ + --snakefile="../../../workflow/quantify/Snakefile" \ + --configfile="config.yaml" \ + --cluster-config="cluster.json" \ + --cluster "sbatch \ + --cpus-per-task={cluster.threads} \ + --mem={cluster.mem} \ + --qos={cluster.queue} \ + --time={cluster.time} \ + --export=JOB_NAME={rule} \ + -o {params.cluster_log} \ + -p scicore \ + --open-mode=append" \ + --use-singularity \ + --singularity-args="--no-home --bind ${PWD}/../../../" \ + --jobscript="../../../jobscript.sh" \ + --cores=256 \ + --printshellcmds \ + --rerun-incomplete \ + --verbose + +# Snakemake report +snakemake \ + --snakefile="../../../workflow/quantify/Snakefile" \ + --configfile="config.yaml" \ + --report="snakemake_report.html" diff --git a/environment.root.yml b/environment.root.yml index 3156841..5a2af9b 100644 --- a/environment.root.yml +++ b/environment.root.yml @@ -1,11 +1,15 @@ name: mirflowz channels: - bioconda - - defaults - conda-forge + - defaults dependencies: - graphviz=2.40.1 + - jinja2=2.11.2 + - networkx=2.5 + - pygments=2.6.1 - python=3.7.4 - singularity=3.5.2 - snakemake=6.10.0 - unzip=6.0 + - wget==1.20.1 diff --git a/environment.yml b/environment.yml index f80ec00..d261e1c 100644 --- a/environment.yml +++ b/environment.yml @@ -1,10 +1,14 @@ name: mirflowz channels: - bioconda - - defaults - conda-forge + - defaults dependencies: - graphviz=2.40.1 + - jinja2=2.11.2 + - networkx=2.5 + - pygments=2.6.1 - python=3.7.4 - snakemake=6.10.0 - unzip=6.0 + - wget==1.20.1 diff --git a/images/rule_graph_map.svg b/images/rule_graph_map.svg index ba79835..3f17429 100644 --- a/images/rule_graph_map.svg +++ b/images/rule_graph_map.svg @@ -12,13 +12,13 @@ <!-- 0 --> <g id="node1" class="node"> <title>0</title> -<path fill="none" stroke="#56d892" stroke-width="2" d="M356,-36C356,-36 326,-36 326,-36 320,-36 314,-30 314,-24 314,-24 314,-12 314,-12 314,-6 320,0 326,0 326,0 356,0 356,0 362,0 368,-6 368,-12 368,-12 368,-24 368,-24 368,-30 362,-36 356,-36"/> +<path fill="none" stroke="#afd856" stroke-width="2" d="M356,-36C356,-36 326,-36 326,-36 320,-36 314,-30 314,-24 314,-24 314,-12 314,-12 314,-6 320,0 326,0 326,0 356,0 356,0 362,0 368,-6 368,-12 368,-12 368,-24 368,-24 368,-30 362,-36 356,-36"/> <text text-anchor="middle" x="341" y="-15.5" font-family="sans" font-size="10.00" fill="#000000">finish</text> </g> <!-- 1 --> <g id="node2" class="node"> <title>1</title> -<path fill="none" stroke="#80d856" stroke-width="2" d="M364.5,-108C364.5,-108 317.5,-108 317.5,-108 311.5,-108 305.5,-102 305.5,-96 305.5,-96 305.5,-84 305.5,-84 305.5,-78 311.5,-72 317.5,-72 317.5,-72 364.5,-72 364.5,-72 370.5,-72 376.5,-78 376.5,-84 376.5,-84 376.5,-96 376.5,-96 376.5,-102 370.5,-108 364.5,-108"/> +<path fill="none" stroke="#568ad8" stroke-width="2" d="M364.5,-108C364.5,-108 317.5,-108 317.5,-108 311.5,-108 305.5,-102 305.5,-96 305.5,-96 305.5,-84 305.5,-84 305.5,-78 311.5,-72 317.5,-72 317.5,-72 364.5,-72 364.5,-72 370.5,-72 376.5,-78 376.5,-84 376.5,-84 376.5,-96 376.5,-96 376.5,-102 370.5,-108 364.5,-108"/> <text text-anchor="middle" x="341" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">index_bam</text> </g> <!-- 1->0 --> @@ -30,7 +30,7 @@ <!-- 2 --> <g id="node3" class="node"> <title>2</title> -<path fill="none" stroke="#70d856" stroke-width="2" d="M377.5,-180C377.5,-180 304.5,-180 304.5,-180 298.5,-180 292.5,-174 292.5,-168 292.5,-168 292.5,-156 292.5,-156 292.5,-150 298.5,-144 304.5,-144 304.5,-144 377.5,-144 377.5,-144 383.5,-144 389.5,-150 389.5,-156 389.5,-156 389.5,-168 389.5,-168 389.5,-174 383.5,-180 377.5,-180"/> +<path fill="none" stroke="#d86656" stroke-width="2" d="M377.5,-180C377.5,-180 304.5,-180 304.5,-180 298.5,-180 292.5,-174 292.5,-168 292.5,-168 292.5,-156 292.5,-156 292.5,-150 298.5,-144 304.5,-144 304.5,-144 377.5,-144 377.5,-144 383.5,-144 389.5,-150 389.5,-156 389.5,-156 389.5,-168 389.5,-168 389.5,-174 383.5,-180 377.5,-180"/> <text text-anchor="middle" x="341" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">sort_by_position</text> </g> <!-- 2->1 --> @@ -42,7 +42,7 @@ <!-- 3 --> <g id="node4" class="node"> <title>3</title> -<path fill="none" stroke="#56d85b" stroke-width="2" d="M376.5,-252C376.5,-252 305.5,-252 305.5,-252 299.5,-252 293.5,-246 293.5,-240 293.5,-240 293.5,-228 293.5,-228 293.5,-222 299.5,-216 305.5,-216 305.5,-216 376.5,-216 376.5,-216 382.5,-216 388.5,-222 388.5,-228 388.5,-228 388.5,-240 388.5,-240 388.5,-246 382.5,-252 376.5,-252"/> +<path fill="none" stroke="#c6d856" stroke-width="2" d="M376.5,-252C376.5,-252 305.5,-252 305.5,-252 299.5,-252 293.5,-246 293.5,-240 293.5,-240 293.5,-228 293.5,-228 293.5,-222 299.5,-216 305.5,-216 305.5,-216 376.5,-216 376.5,-216 382.5,-216 388.5,-222 388.5,-228 388.5,-228 388.5,-240 388.5,-240 388.5,-246 382.5,-252 376.5,-252"/> <text text-anchor="middle" x="341" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">convert_to_bam</text> </g> <!-- 3->2 --> @@ -54,7 +54,7 @@ <!-- 4 --> <g id="node5" class="node"> <title>4</title> -<path fill="none" stroke="#d85656" stroke-width="2" d="M379.5,-324C379.5,-324 302.5,-324 302.5,-324 296.5,-324 290.5,-318 290.5,-312 290.5,-312 290.5,-300 290.5,-300 290.5,-294 296.5,-288 302.5,-288 302.5,-288 379.5,-288 379.5,-288 385.5,-288 391.5,-294 391.5,-300 391.5,-300 391.5,-312 391.5,-312 391.5,-318 385.5,-324 379.5,-324"/> +<path fill="none" stroke="#56d8c1" stroke-width="2" d="M379.5,-324C379.5,-324 302.5,-324 302.5,-324 296.5,-324 290.5,-318 290.5,-312 290.5,-312 290.5,-300 290.5,-300 290.5,-294 296.5,-288 302.5,-288 302.5,-288 379.5,-288 379.5,-288 385.5,-288 391.5,-294 391.5,-300 391.5,-300 391.5,-312 391.5,-312 391.5,-318 385.5,-324 379.5,-324"/> <text text-anchor="middle" x="341" y="-303.5" font-family="sans" font-size="10.00" fill="#000000">uncollapse_reads</text> </g> <!-- 4->3 --> @@ -66,7 +66,7 @@ <!-- 5 --> <g id="node6" class="node"> <title>5</title> -<path fill="none" stroke="#c6d856" stroke-width="2" d="M379,-396C379,-396 303,-396 303,-396 297,-396 291,-390 291,-384 291,-384 291,-372 291,-372 291,-366 297,-360 303,-360 303,-360 379,-360 379,-360 385,-360 391,-366 391,-372 391,-372 391,-384 391,-384 391,-390 385,-396 379,-396"/> +<path fill="none" stroke="#569ad8" stroke-width="2" d="M379,-396C379,-396 303,-396 303,-396 297,-396 291,-390 291,-384 291,-384 291,-372 291,-372 291,-366 297,-360 303,-360 303,-360 379,-360 379,-360 385,-360 391,-366 391,-372 391,-372 391,-384 391,-384 391,-390 385,-396 379,-396"/> <text text-anchor="middle" x="341" y="-375.5" font-family="sans" font-size="10.00" fill="#000000">remove_inferiors</text> </g> <!-- 5->4 --> @@ -78,7 +78,7 @@ <!-- 6 --> <g id="node7" class="node"> <title>6</title> -<path fill="none" stroke="#61d856" stroke-width="2" d="M356,-468C356,-468 326,-468 326,-468 320,-468 314,-462 314,-456 314,-456 314,-444 314,-444 314,-438 320,-432 326,-432 326,-432 356,-432 356,-432 362,-432 368,-438 368,-444 368,-444 368,-456 368,-456 368,-462 362,-468 356,-468"/> +<path fill="none" stroke="#56d882" stroke-width="2" d="M356,-468C356,-468 326,-468 326,-468 320,-468 314,-462 314,-456 314,-456 314,-444 314,-444 314,-438 320,-432 326,-432 326,-432 356,-432 356,-432 362,-432 368,-438 368,-444 368,-444 368,-456 368,-456 368,-462 362,-468 356,-468"/> <text text-anchor="middle" x="341" y="-447.5" font-family="sans" font-size="10.00" fill="#000000">sort_id</text> </g> <!-- 6->5 --> @@ -90,7 +90,7 @@ <!-- 7 --> <g id="node8" class="node"> <title>7</title> -<path fill="none" stroke="#d88556" stroke-width="2" d="M366,-540C366,-540 316,-540 316,-540 310,-540 304,-534 304,-528 304,-528 304,-516 304,-516 304,-510 310,-504 316,-504 316,-504 366,-504 366,-504 372,-504 378,-510 378,-516 378,-516 378,-528 378,-528 378,-534 372,-540 366,-540"/> +<path fill="none" stroke="#9fd856" stroke-width="2" d="M366,-540C366,-540 316,-540 316,-540 310,-540 304,-534 304,-528 304,-528 304,-516 304,-516 304,-510 310,-504 316,-504 316,-504 366,-504 366,-504 372,-504 378,-510 378,-516 378,-516 378,-528 378,-528 378,-534 372,-540 366,-540"/> <text text-anchor="middle" x="341" y="-519.5" font-family="sans" font-size="10.00" fill="#000000">add_header</text> </g> <!-- 7->6 --> @@ -102,7 +102,7 @@ <!-- 8 --> <g id="node9" class="node"> <title>8</title> -<path fill="none" stroke="#8fd856" stroke-width="2" d="M369,-612C369,-612 313,-612 313,-612 307,-612 301,-606 301,-600 301,-600 301,-588 301,-588 301,-582 307,-576 313,-576 313,-576 369,-576 369,-576 375,-576 381,-582 381,-588 381,-588 381,-600 381,-600 381,-606 375,-612 369,-612"/> +<path fill="none" stroke="#d8ac56" stroke-width="2" d="M369,-612C369,-612 313,-612 313,-612 307,-612 301,-606 301,-600 301,-600 301,-588 301,-588 301,-582 307,-576 313,-576 313,-576 369,-576 369,-576 375,-576 381,-582 381,-588 381,-588 381,-600 381,-600 381,-606 375,-612 369,-612"/> <text text-anchor="middle" x="341" y="-591.5" font-family="sans" font-size="10.00" fill="#000000">cat_mapping</text> </g> <!-- 8->7 --> @@ -114,7 +114,7 @@ <!-- 9 --> <g id="node10" class="node"> <title>9</title> -<path fill="none" stroke="#d6d856" stroke-width="2" d="M304,-684C304,-684 248,-684 248,-684 242,-684 236,-678 236,-672 236,-672 236,-660 236,-660 236,-654 242,-648 248,-648 248,-648 304,-648 304,-648 310,-648 316,-654 316,-660 316,-660 316,-672 316,-672 316,-678 310,-684 304,-684"/> +<path fill="none" stroke="#d87556" stroke-width="2" d="M304,-684C304,-684 248,-684 248,-684 242,-684 236,-678 236,-672 236,-672 236,-660 236,-660 236,-654 242,-648 248,-648 248,-648 304,-648 304,-648 310,-648 316,-654 316,-660 316,-660 316,-672 316,-672 316,-678 310,-684 304,-684"/> <text text-anchor="middle" x="276" y="-663.5" font-family="sans" font-size="10.00" fill="#000000">trans_to_gen</text> </g> <!-- 9->8 --> @@ -126,7 +126,7 @@ <!-- 10 --> <g id="node11" class="node"> <title>10</title> -<path fill="none" stroke="#56b9d8" stroke-width="2" d="M349,-756C349,-756 201,-756 201,-756 195,-756 189,-750 189,-744 189,-744 189,-732 189,-732 189,-726 195,-720 201,-720 201,-720 349,-720 349,-720 355,-720 361,-726 361,-732 361,-732 361,-744 361,-744 361,-750 355,-756 349,-756"/> +<path fill="none" stroke="#56d85b" stroke-width="2" d="M349,-756C349,-756 201,-756 201,-756 195,-756 189,-750 189,-744 189,-744 189,-732 189,-732 189,-726 195,-720 201,-720 201,-720 349,-720 349,-720 355,-720 361,-726 361,-732 361,-732 361,-744 361,-744 361,-750 355,-756 349,-756"/> <text text-anchor="middle" x="275" y="-735.5" font-family="sans" font-size="10.00" fill="#000000">remove_headers_transcriptome</text> </g> <!-- 10->9 --> @@ -138,7 +138,7 @@ <!-- 11 --> <g id="node12" class="node"> <title>11</title> -<path fill="none" stroke="#56c9d8" stroke-width="2" d="M326,-828C326,-828 220,-828 220,-828 214,-828 208,-822 208,-816 208,-816 208,-804 208,-804 208,-798 214,-792 220,-792 220,-792 326,-792 326,-792 332,-792 338,-798 338,-804 338,-804 338,-816 338,-816 338,-822 332,-828 326,-828"/> +<path fill="none" stroke="#567bd8" stroke-width="2" d="M326,-828C326,-828 220,-828 220,-828 214,-828 208,-822 208,-816 208,-816 208,-804 208,-804 208,-798 214,-792 220,-792 220,-792 326,-792 326,-792 332,-792 338,-798 338,-804 338,-804 338,-816 338,-816 338,-822 332,-828 326,-828"/> <text text-anchor="middle" x="273" y="-807.5" font-family="sans" font-size="10.00" fill="#000000">filter_nh_transcriptome</text> </g> <!-- 11->10 --> @@ -150,7 +150,7 @@ <!-- 12 --> <g id="node13" class="node"> <title>12</title> -<path fill="none" stroke="#56d8a2" stroke-width="2" d="M317,-900C317,-900 187,-900 187,-900 181,-900 175,-894 175,-888 175,-888 175,-876 175,-876 175,-870 181,-864 187,-864 187,-864 317,-864 317,-864 323,-864 329,-870 329,-876 329,-876 329,-888 329,-888 329,-894 323,-900 317,-900"/> +<path fill="none" stroke="#d6d856" stroke-width="2" d="M317,-900C317,-900 187,-900 187,-900 181,-900 175,-894 175,-888 175,-888 175,-876 175,-876 175,-870 181,-864 187,-864 187,-864 317,-864 317,-864 323,-864 329,-870 329,-876 329,-876 329,-888 329,-888 329,-894 323,-900 317,-900"/> <text text-anchor="middle" x="252" y="-879.5" font-family="sans" font-size="10.00" fill="#000000">merge_transcriptome_maps</text> </g> <!-- 12->11 --> @@ -162,11 +162,11 @@ <!-- 13 --> <g id="node14" class="node"> <title>13</title> -<path fill="none" stroke="#56d873" stroke-width="2" d="M174,-1116C174,-1116 12,-1116 12,-1116 6,-1116 0,-1110 0,-1104 0,-1104 0,-1092 0,-1092 0,-1086 6,-1080 12,-1080 12,-1080 174,-1080 174,-1080 180,-1080 186,-1086 186,-1092 186,-1092 186,-1104 186,-1104 186,-1110 180,-1116 174,-1116"/> +<path fill="none" stroke="#d8bc56" stroke-width="2" d="M174,-1116C174,-1116 12,-1116 12,-1116 6,-1116 0,-1110 0,-1104 0,-1104 0,-1092 0,-1092 0,-1086 6,-1080 12,-1080 12,-1080 174,-1080 174,-1080 180,-1080 186,-1086 186,-1092 186,-1092 186,-1104 186,-1104 186,-1110 180,-1116 174,-1116"/> <text text-anchor="middle" x="93" y="-1095.5" font-family="sans" font-size="10.00" fill="#000000">mapping_transcriptome_segemehl</text> </g> <!-- 13->12 --> -<g id="edge14" class="edge"> +<g id="edge15" class="edge"> <title>13->12</title> <path fill="none" stroke="#c0c0c0" stroke-width="2" d="M96.0562,-1079.8994C102.321,-1047.6053 119.327,-979.3819 157,-936 168.0222,-923.3075 182.7958,-912.932 197.4032,-904.7901"/> <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="199.2392,-907.7785 206.442,-900.0086 195.9659,-901.5909 199.2392,-907.7785"/> @@ -174,7 +174,7 @@ <!-- 14 --> <g id="node15" class="node"> <title>14</title> -<path fill="none" stroke="#afd856" stroke-width="2" d="M361.5,-1260C361.5,-1260 294.5,-1260 294.5,-1260 288.5,-1260 282.5,-1254 282.5,-1248 282.5,-1248 282.5,-1236 282.5,-1236 282.5,-1230 288.5,-1224 294.5,-1224 294.5,-1224 361.5,-1224 361.5,-1224 367.5,-1224 373.5,-1230 373.5,-1236 373.5,-1236 373.5,-1248 373.5,-1248 373.5,-1254 367.5,-1260 361.5,-1260"/> +<path fill="none" stroke="#d85656" stroke-width="2" d="M361.5,-1260C361.5,-1260 294.5,-1260 294.5,-1260 288.5,-1260 282.5,-1254 282.5,-1248 282.5,-1248 282.5,-1236 282.5,-1236 282.5,-1230 288.5,-1224 294.5,-1224 294.5,-1224 361.5,-1224 361.5,-1224 367.5,-1224 373.5,-1230 373.5,-1236 373.5,-1236 373.5,-1248 373.5,-1248 373.5,-1254 367.5,-1260 361.5,-1260"/> <text text-anchor="middle" x="328" y="-1239.5" font-family="sans" font-size="10.00" fill="#000000">fastx_collapser</text> </g> <!-- 14->13 --> @@ -186,7 +186,7 @@ <!-- 20 --> <g id="node21" class="node"> <title>20</title> -<path fill="none" stroke="#569ad8" stroke-width="2" d="M385.5,-1188C385.5,-1188 270.5,-1188 270.5,-1188 264.5,-1188 258.5,-1182 258.5,-1176 258.5,-1176 258.5,-1164 258.5,-1164 258.5,-1158 264.5,-1152 270.5,-1152 270.5,-1152 385.5,-1152 385.5,-1152 391.5,-1152 397.5,-1158 397.5,-1164 397.5,-1164 397.5,-1176 397.5,-1176 397.5,-1182 391.5,-1188 385.5,-1188"/> +<path fill="none" stroke="#d8cb56" stroke-width="2" d="M385.5,-1188C385.5,-1188 270.5,-1188 270.5,-1188 264.5,-1188 258.5,-1182 258.5,-1176 258.5,-1176 258.5,-1164 258.5,-1164 258.5,-1158 264.5,-1152 270.5,-1152 270.5,-1152 385.5,-1152 385.5,-1152 391.5,-1152 397.5,-1158 397.5,-1164 397.5,-1164 397.5,-1176 397.5,-1176 397.5,-1182 391.5,-1188 385.5,-1188"/> <text text-anchor="middle" x="328" y="-1167.5" font-family="sans" font-size="10.00" fill="#000000">filter_fasta_for_oligomap</text> </g> <!-- 14->20 --> @@ -198,7 +198,7 @@ <!-- 25 --> <g id="node26" class="node"> <title>25</title> -<path fill="none" stroke="#567bd8" stroke-width="2" d="M626,-1188C626,-1188 492,-1188 492,-1188 486,-1188 480,-1182 480,-1176 480,-1176 480,-1164 480,-1164 480,-1158 486,-1152 492,-1152 492,-1152 626,-1152 626,-1152 632,-1152 638,-1158 638,-1164 638,-1164 638,-1176 638,-1176 638,-1182 632,-1188 626,-1188"/> +<path fill="none" stroke="#56a9d8" stroke-width="2" d="M626,-1188C626,-1188 492,-1188 492,-1188 486,-1188 480,-1182 480,-1176 480,-1176 480,-1164 480,-1164 480,-1158 486,-1152 492,-1152 492,-1152 626,-1152 626,-1152 632,-1152 638,-1158 638,-1164 638,-1164 638,-1176 638,-1176 638,-1182 632,-1188 626,-1188"/> <text text-anchor="middle" x="559" y="-1167.5" font-family="sans" font-size="10.00" fill="#000000">mapping_genome_segemehl</text> </g> <!-- 14->25 --> @@ -210,7 +210,7 @@ <!-- 15 --> <g id="node16" class="node"> <title>15</title> -<path fill="none" stroke="#56d8c1" stroke-width="2" d="M345.5,-1332C345.5,-1332 310.5,-1332 310.5,-1332 304.5,-1332 298.5,-1326 298.5,-1320 298.5,-1320 298.5,-1308 298.5,-1308 298.5,-1302 304.5,-1296 310.5,-1296 310.5,-1296 345.5,-1296 345.5,-1296 351.5,-1296 357.5,-1302 357.5,-1308 357.5,-1308 357.5,-1320 357.5,-1320 357.5,-1326 351.5,-1332 345.5,-1332"/> +<path fill="none" stroke="#56c9d8" stroke-width="2" d="M345.5,-1332C345.5,-1332 310.5,-1332 310.5,-1332 304.5,-1332 298.5,-1326 298.5,-1320 298.5,-1320 298.5,-1308 298.5,-1308 298.5,-1302 304.5,-1296 310.5,-1296 310.5,-1296 345.5,-1296 345.5,-1296 351.5,-1296 357.5,-1302 357.5,-1308 357.5,-1308 357.5,-1320 357.5,-1320 357.5,-1326 351.5,-1332 345.5,-1332"/> <text text-anchor="middle" x="328" y="-1311.5" font-family="sans" font-size="10.00" fill="#000000">cutadapt</text> </g> <!-- 15->14 --> @@ -222,7 +222,7 @@ <!-- 16 --> <g id="node17" class="node"> <title>16</title> -<path fill="none" stroke="#56a9d8" stroke-width="2" d="M362.5,-1404C362.5,-1404 293.5,-1404 293.5,-1404 287.5,-1404 281.5,-1398 281.5,-1392 281.5,-1392 281.5,-1380 281.5,-1380 281.5,-1374 287.5,-1368 293.5,-1368 293.5,-1368 362.5,-1368 362.5,-1368 368.5,-1368 374.5,-1374 374.5,-1380 374.5,-1380 374.5,-1392 374.5,-1392 374.5,-1398 368.5,-1404 362.5,-1404"/> +<path fill="none" stroke="#d88556" stroke-width="2" d="M362.5,-1404C362.5,-1404 293.5,-1404 293.5,-1404 287.5,-1404 281.5,-1398 281.5,-1392 281.5,-1392 281.5,-1380 281.5,-1380 281.5,-1374 287.5,-1368 293.5,-1368 293.5,-1368 362.5,-1368 362.5,-1368 368.5,-1368 374.5,-1374 374.5,-1380 374.5,-1380 374.5,-1392 374.5,-1392 374.5,-1398 368.5,-1404 362.5,-1404"/> <text text-anchor="middle" x="328" y="-1383.5" font-family="sans" font-size="10.00" fill="#000000">fasta_formatter</text> </g> <!-- 16->15 --> @@ -234,7 +234,7 @@ <!-- 17 --> <g id="node18" class="node"> <title>17</title> -<path fill="none" stroke="#d86656" stroke-width="2" d="M385.5,-1476C385.5,-1476 270.5,-1476 270.5,-1476 264.5,-1476 258.5,-1470 258.5,-1464 258.5,-1464 258.5,-1452 258.5,-1452 258.5,-1446 264.5,-1440 270.5,-1440 270.5,-1440 385.5,-1440 385.5,-1440 391.5,-1440 397.5,-1446 397.5,-1452 397.5,-1452 397.5,-1464 397.5,-1464 397.5,-1470 391.5,-1476 385.5,-1476"/> +<path fill="none" stroke="#56d8d0" stroke-width="2" d="M385.5,-1476C385.5,-1476 270.5,-1476 270.5,-1476 264.5,-1476 258.5,-1470 258.5,-1464 258.5,-1464 258.5,-1452 258.5,-1452 258.5,-1446 264.5,-1440 270.5,-1440 270.5,-1440 385.5,-1440 385.5,-1440 391.5,-1440 397.5,-1446 397.5,-1452 397.5,-1452 397.5,-1464 397.5,-1464 397.5,-1470 391.5,-1476 385.5,-1476"/> <text text-anchor="middle" x="328" y="-1455.5" font-family="sans" font-size="10.00" fill="#000000">uncompress_zipped_files</text> </g> <!-- 17->16 --> @@ -246,11 +246,11 @@ <!-- 18 --> <g id="node19" class="node"> <title>18</title> -<path fill="none" stroke="#d8ac56" stroke-width="2" d="M326,-972C326,-972 178,-972 178,-972 172,-972 166,-966 166,-960 166,-960 166,-948 166,-948 166,-942 172,-936 178,-936 178,-936 326,-936 326,-936 332,-936 338,-942 338,-948 338,-948 338,-960 338,-960 338,-966 332,-972 326,-972"/> +<path fill="none" stroke="#56d8b1" stroke-width="2" d="M326,-972C326,-972 178,-972 178,-972 172,-972 166,-966 166,-960 166,-960 166,-948 166,-948 166,-942 172,-936 178,-936 178,-936 326,-936 326,-936 332,-936 338,-942 338,-948 338,-948 338,-960 338,-960 338,-966 332,-972 326,-972"/> <text text-anchor="middle" x="252" y="-951.5" font-family="sans" font-size="10.00" fill="#000000">oligomap_transcriptome_toSAM</text> </g> <!-- 18->12 --> -<g id="edge15" class="edge"> +<g id="edge14" class="edge"> <title>18->12</title> <path fill="none" stroke="#c0c0c0" stroke-width="2" d="M252,-935.8314C252,-928.131 252,-918.9743 252,-910.4166"/> <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="255.5001,-910.4132 252,-900.4133 248.5001,-910.4133 255.5001,-910.4132"/> @@ -258,11 +258,11 @@ <!-- 19 --> <g id="node20" class="node"> <title>19</title> -<path fill="none" stroke="#56d8d0" stroke-width="2" d="M376,-1116C376,-1116 216,-1116 216,-1116 210,-1116 204,-1110 204,-1104 204,-1104 204,-1092 204,-1092 204,-1086 210,-1080 216,-1080 216,-1080 376,-1080 376,-1080 382,-1080 388,-1086 388,-1092 388,-1092 388,-1104 388,-1104 388,-1110 382,-1116 376,-1116"/> +<path fill="none" stroke="#61d856" stroke-width="2" d="M376,-1116C376,-1116 216,-1116 216,-1116 210,-1116 204,-1110 204,-1104 204,-1104 204,-1092 204,-1092 204,-1086 210,-1080 216,-1080 216,-1080 376,-1080 376,-1080 382,-1080 388,-1086 388,-1092 388,-1092 388,-1104 388,-1104 388,-1110 382,-1116 376,-1116"/> <text text-anchor="middle" x="296" y="-1095.5" font-family="sans" font-size="10.00" fill="#000000">mapping_transcriptome_oligomap</text> </g> <!-- 19->18 --> -<g id="edge21" class="edge"> +<g id="edge20" class="edge"> <title>19->18</title> <path fill="none" stroke="#c0c0c0" stroke-width="2" d="M246.4713,-1079.9463C230.6746,-1071.5649 214.9778,-1059.8565 206,-1044 193.896,-1022.6221 208.7881,-997.7098 224.7693,-979.5223"/> <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="227.3609,-981.8751 231.6095,-972.1694 222.2357,-977.1072 227.3609,-981.8751"/> @@ -270,7 +270,7 @@ <!-- 21 --> <g id="node22" class="node"> <title>21</title> -<path fill="none" stroke="#9fd856" stroke-width="2" d="M363,-1044C363,-1044 227,-1044 227,-1044 221,-1044 215,-1038 215,-1032 215,-1032 215,-1020 215,-1020 215,-1014 221,-1008 227,-1008 227,-1008 363,-1008 363,-1008 369,-1008 375,-1014 375,-1020 375,-1020 375,-1032 375,-1032 375,-1038 369,-1044 363,-1044"/> +<path fill="none" stroke="#8fd856" stroke-width="2" d="M363,-1044C363,-1044 227,-1044 227,-1044 221,-1044 215,-1038 215,-1032 215,-1032 215,-1020 215,-1020 215,-1014 221,-1008 227,-1008 227,-1008 363,-1008 363,-1008 369,-1008 375,-1014 375,-1020 375,-1020 375,-1032 375,-1032 375,-1038 369,-1044 363,-1044"/> <text text-anchor="middle" x="295" y="-1023.5" font-family="sans" font-size="10.00" fill="#000000">sort_transcriptome_oligomap</text> </g> <!-- 19->21 --> @@ -288,7 +288,7 @@ <!-- 27 --> <g id="node28" class="node"> <title>27</title> -<path fill="none" stroke="#568ad8" stroke-width="2" d="M549.5,-1116C549.5,-1116 418.5,-1116 418.5,-1116 412.5,-1116 406.5,-1110 406.5,-1104 406.5,-1104 406.5,-1092 406.5,-1092 406.5,-1086 412.5,-1080 418.5,-1080 418.5,-1080 549.5,-1080 549.5,-1080 555.5,-1080 561.5,-1086 561.5,-1092 561.5,-1092 561.5,-1104 561.5,-1104 561.5,-1110 555.5,-1116 549.5,-1116"/> +<path fill="none" stroke="#d89c56" stroke-width="2" d="M549.5,-1116C549.5,-1116 418.5,-1116 418.5,-1116 412.5,-1116 406.5,-1110 406.5,-1104 406.5,-1104 406.5,-1092 406.5,-1092 406.5,-1086 412.5,-1080 418.5,-1080 418.5,-1080 549.5,-1080 549.5,-1080 555.5,-1080 561.5,-1086 561.5,-1092 561.5,-1092 561.5,-1104 561.5,-1104 561.5,-1110 555.5,-1116 549.5,-1116"/> <text text-anchor="middle" x="484" y="-1095.5" font-family="sans" font-size="10.00" fill="#000000">mapping_genome_oligomap</text> </g> <!-- 20->27 --> @@ -298,7 +298,7 @@ <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="437.3048,-1123.4065 444.9177,-1116.038 434.3714,-1117.0508 437.3048,-1123.4065"/> </g> <!-- 21->18 --> -<g id="edge20" class="edge"> +<g id="edge21" class="edge"> <title>21->18</title> <path fill="none" stroke="#c0c0c0" stroke-width="2" d="M284.1493,-1007.8314C279.2977,-999.7079 273.4783,-989.9637 268.132,-981.0118"/> <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="271.1292,-979.2041 262.9968,-972.4133 265.1194,-982.7933 271.1292,-979.2041"/> @@ -306,7 +306,7 @@ <!-- 22 --> <g id="node23" class="node"> <title>22</title> -<path fill="none" stroke="#56d8b1" stroke-width="2" d="M465.5,-684C465.5,-684 346.5,-684 346.5,-684 340.5,-684 334.5,-678 334.5,-672 334.5,-672 334.5,-660 334.5,-660 334.5,-654 340.5,-648 346.5,-648 346.5,-648 465.5,-648 465.5,-648 471.5,-648 477.5,-654 477.5,-660 477.5,-660 477.5,-672 477.5,-672 477.5,-678 471.5,-684 465.5,-684"/> +<path fill="none" stroke="#56d8a2" stroke-width="2" d="M465.5,-684C465.5,-684 346.5,-684 346.5,-684 340.5,-684 334.5,-678 334.5,-672 334.5,-672 334.5,-660 334.5,-660 334.5,-654 340.5,-648 346.5,-648 346.5,-648 465.5,-648 465.5,-648 471.5,-648 477.5,-654 477.5,-660 477.5,-660 477.5,-672 477.5,-672 477.5,-678 471.5,-684 465.5,-684"/> <text text-anchor="middle" x="406" y="-663.5" font-family="sans" font-size="10.00" fill="#000000">remove_headers_genome</text> </g> <!-- 22->8 --> @@ -318,7 +318,7 @@ <!-- 23 --> <g id="node24" class="node"> <title>23</title> -<path fill="none" stroke="#5663d8" stroke-width="2" d="M455.5,-828C455.5,-828 378.5,-828 378.5,-828 372.5,-828 366.5,-822 366.5,-816 366.5,-816 366.5,-804 366.5,-804 366.5,-798 372.5,-792 378.5,-792 378.5,-792 455.5,-792 455.5,-792 461.5,-792 467.5,-798 467.5,-804 467.5,-804 467.5,-816 467.5,-816 467.5,-822 461.5,-828 455.5,-828"/> +<path fill="none" stroke="#56b9d8" stroke-width="2" d="M455.5,-828C455.5,-828 378.5,-828 378.5,-828 372.5,-828 366.5,-822 366.5,-816 366.5,-816 366.5,-804 366.5,-804 366.5,-798 372.5,-792 378.5,-792 378.5,-792 455.5,-792 455.5,-792 461.5,-792 467.5,-798 467.5,-804 467.5,-804 467.5,-816 467.5,-816 467.5,-822 461.5,-828 455.5,-828"/> <text text-anchor="middle" x="417" y="-807.5" font-family="sans" font-size="10.00" fill="#000000">nh_filter_genome</text> </g> <!-- 23->22 --> @@ -330,7 +330,7 @@ <!-- 24 --> <g id="node25" class="node"> <title>24</title> -<path fill="none" stroke="#d8cb56" stroke-width="2" d="M496.5,-900C496.5,-900 395.5,-900 395.5,-900 389.5,-900 383.5,-894 383.5,-888 383.5,-888 383.5,-876 383.5,-876 383.5,-870 389.5,-864 395.5,-864 395.5,-864 496.5,-864 496.5,-864 502.5,-864 508.5,-870 508.5,-876 508.5,-876 508.5,-888 508.5,-888 508.5,-894 502.5,-900 496.5,-900"/> +<path fill="none" stroke="#70d856" stroke-width="2" d="M496.5,-900C496.5,-900 395.5,-900 395.5,-900 389.5,-900 383.5,-894 383.5,-888 383.5,-888 383.5,-876 383.5,-876 383.5,-870 389.5,-864 395.5,-864 395.5,-864 496.5,-864 496.5,-864 502.5,-864 508.5,-870 508.5,-876 508.5,-876 508.5,-888 508.5,-888 508.5,-894 502.5,-900 496.5,-900"/> <text text-anchor="middle" x="446" y="-879.5" font-family="sans" font-size="10.00" fill="#000000">merge_genome_maps</text> </g> <!-- 24->23 --> @@ -340,7 +340,7 @@ <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="431.3992,-836.3815 424.4165,-828.4133 424.9061,-838.9968 431.3992,-836.3815"/> </g> <!-- 25->24 --> -<g id="edge28" class="edge"> +<g id="edge27" class="edge"> <title>25->24</title> <path fill="none" stroke="#c0c0c0" stroke-width="2" d="M564.4447,-1151.5968C572.5737,-1121.0938 585.4612,-1058.4231 571,-1008 560.5969,-971.7264 552.2103,-963.1481 526,-936 515.0304,-924.6379 501.2945,-914.3226 488.316,-905.8234"/> <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="489.8165,-902.6309 479.4952,-900.239 486.0721,-908.5453 489.8165,-902.6309"/> @@ -348,11 +348,11 @@ <!-- 26 --> <g id="node27" class="node"> <title>26</title> -<path fill="none" stroke="#d89c56" stroke-width="2" d="M505.5,-972C505.5,-972 386.5,-972 386.5,-972 380.5,-972 374.5,-966 374.5,-960 374.5,-960 374.5,-948 374.5,-948 374.5,-942 380.5,-936 386.5,-936 386.5,-936 505.5,-936 505.5,-936 511.5,-936 517.5,-942 517.5,-948 517.5,-948 517.5,-960 517.5,-960 517.5,-966 511.5,-972 505.5,-972"/> +<path fill="none" stroke="#56d892" stroke-width="2" d="M505.5,-972C505.5,-972 386.5,-972 386.5,-972 380.5,-972 374.5,-966 374.5,-960 374.5,-960 374.5,-948 374.5,-948 374.5,-942 380.5,-936 386.5,-936 386.5,-936 505.5,-936 505.5,-936 511.5,-936 517.5,-942 517.5,-948 517.5,-948 517.5,-960 517.5,-960 517.5,-966 511.5,-972 505.5,-972"/> <text text-anchor="middle" x="446" y="-951.5" font-family="sans" font-size="10.00" fill="#000000">oligomap_genome_toSAM</text> </g> <!-- 26->24 --> -<g id="edge27" class="edge"> +<g id="edge28" class="edge"> <title>26->24</title> <path fill="none" stroke="#c0c0c0" stroke-width="2" d="M446,-935.8314C446,-928.131 446,-918.9743 446,-910.4166"/> <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="449.5001,-910.4132 446,-900.4133 442.5001,-910.4133 449.5001,-910.4132"/> @@ -366,7 +366,7 @@ <!-- 28 --> <g id="node29" class="node"> <title>28</title> -<path fill="none" stroke="#d8bc56" stroke-width="2" d="M550.5,-1044C550.5,-1044 443.5,-1044 443.5,-1044 437.5,-1044 431.5,-1038 431.5,-1032 431.5,-1032 431.5,-1020 431.5,-1020 431.5,-1014 437.5,-1008 443.5,-1008 443.5,-1008 550.5,-1008 550.5,-1008 556.5,-1008 562.5,-1014 562.5,-1020 562.5,-1020 562.5,-1032 562.5,-1032 562.5,-1038 556.5,-1044 550.5,-1044"/> +<path fill="none" stroke="#80d856" stroke-width="2" d="M550.5,-1044C550.5,-1044 443.5,-1044 443.5,-1044 437.5,-1044 431.5,-1038 431.5,-1032 431.5,-1032 431.5,-1020 431.5,-1020 431.5,-1014 437.5,-1008 443.5,-1008 443.5,-1008 550.5,-1008 550.5,-1008 556.5,-1008 562.5,-1014 562.5,-1020 562.5,-1020 562.5,-1032 562.5,-1032 562.5,-1038 556.5,-1044 550.5,-1044"/> <text text-anchor="middle" x="497" y="-1023.5" font-family="sans" font-size="10.00" fill="#000000">sort_genome_oligomap</text> </g> <!-- 27->28 --> diff --git a/images/rule_graph_prepare.svg b/images/rule_graph_prepare.svg index 16daf3a..e1fb5f0 100644 --- a/images/rule_graph_prepare.svg +++ b/images/rule_graph_prepare.svg @@ -12,13 +12,13 @@ <!-- 0 --> <g id="node1" class="node"> <title>0</title> -<path fill="none" stroke="#ced856" stroke-width="2" d="M357.5,-36C357.5,-36 327.5,-36 327.5,-36 321.5,-36 315.5,-30 315.5,-24 315.5,-24 315.5,-12 315.5,-12 315.5,-6 321.5,0 327.5,0 327.5,0 357.5,0 357.5,0 363.5,0 369.5,-6 369.5,-12 369.5,-12 369.5,-24 369.5,-24 369.5,-30 363.5,-36 357.5,-36"/> +<path fill="none" stroke="#5682d8" stroke-width="2" d="M357.5,-36C357.5,-36 327.5,-36 327.5,-36 321.5,-36 315.5,-30 315.5,-24 315.5,-24 315.5,-12 315.5,-12 315.5,-6 321.5,0 327.5,0 327.5,0 357.5,0 357.5,0 363.5,0 369.5,-6 369.5,-12 369.5,-12 369.5,-24 369.5,-24 369.5,-30 363.5,-36 357.5,-36"/> <text text-anchor="middle" x="342.5" y="-15.5" font-family="sans" font-size="10.00" fill="#000000">finish</text> </g> <!-- 1 --> <g id="node2" class="node"> <title>1</title> -<path fill="none" stroke="#56b1d8" stroke-width="2" d="M362,-108C362,-108 167,-108 167,-108 161,-108 155,-102 155,-96 155,-96 155,-84 155,-84 155,-78 161,-72 167,-72 167,-72 362,-72 362,-72 368,-72 374,-78 374,-84 374,-84 374,-96 374,-96 374,-102 368,-108 362,-108"/> +<path fill="none" stroke="#d88556" stroke-width="2" d="M362,-108C362,-108 167,-108 167,-108 161,-108 155,-102 155,-96 155,-96 155,-84 155,-84 155,-78 161,-72 167,-72 167,-72 362,-72 362,-72 368,-72 374,-78 374,-84 374,-84 374,-96 374,-96 374,-102 368,-108 362,-108"/> <text text-anchor="middle" x="264.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">generate_segemehl_index_transcriptome</text> </g> <!-- 1->0 --> @@ -30,7 +30,7 @@ <!-- 2 --> <g id="node3" class="node"> <title>2</title> -<path fill="none" stroke="#d8cb56" stroke-width="2" d="M289,-180C289,-180 246,-180 246,-180 240,-180 234,-174 234,-168 234,-168 234,-156 234,-156 234,-150 240,-144 246,-144 246,-144 289,-144 289,-144 295,-144 301,-150 301,-156 301,-156 301,-168 301,-168 301,-174 295,-180 289,-180"/> +<path fill="none" stroke="#56d8b1" stroke-width="2" d="M289,-180C289,-180 246,-180 246,-180 240,-180 234,-174 234,-168 234,-168 234,-156 234,-156 234,-150 240,-144 246,-144 246,-144 289,-144 289,-144 295,-144 301,-150 301,-156 301,-156 301,-168 301,-168 301,-174 295,-180 289,-180"/> <text text-anchor="middle" x="267.5" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">trim_fasta</text> </g> <!-- 2->1 --> @@ -42,7 +42,7 @@ <!-- 3 --> <g id="node4" class="node"> <title>3</title> -<path fill="none" stroke="#569ad8" stroke-width="2" d="M334.5,-252C334.5,-252 206.5,-252 206.5,-252 200.5,-252 194.5,-246 194.5,-240 194.5,-240 194.5,-228 194.5,-228 194.5,-222 200.5,-216 206.5,-216 206.5,-216 334.5,-216 334.5,-216 340.5,-216 346.5,-222 346.5,-228 346.5,-228 346.5,-240 346.5,-240 346.5,-246 340.5,-252 334.5,-252"/> +<path fill="none" stroke="#d8cb56" stroke-width="2" d="M334.5,-252C334.5,-252 206.5,-252 206.5,-252 200.5,-252 194.5,-246 194.5,-240 194.5,-240 194.5,-228 194.5,-228 194.5,-222 200.5,-216 206.5,-216 206.5,-216 334.5,-216 334.5,-216 340.5,-216 346.5,-222 346.5,-228 346.5,-228 346.5,-240 346.5,-240 346.5,-246 340.5,-252 334.5,-252"/> <text text-anchor="middle" x="270.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">extract_transcriptome_seqs</text> </g> <!-- 3->2 --> @@ -54,7 +54,7 @@ <!-- 4 --> <g id="node5" class="node"> <title>4</title> -<path fill="none" stroke="#88d856" stroke-width="2" d="M346,-756C346,-756 271,-756 271,-756 265,-756 259,-750 259,-744 259,-744 259,-732 259,-732 259,-726 265,-720 271,-720 271,-720 346,-720 346,-720 352,-720 358,-726 358,-732 358,-732 358,-744 358,-744 358,-750 352,-756 346,-756"/> +<path fill="none" stroke="#569ad8" stroke-width="2" d="M346,-756C346,-756 271,-756 271,-756 265,-756 259,-750 259,-744 259,-744 259,-732 259,-732 259,-726 265,-720 271,-720 271,-720 346,-720 346,-720 352,-720 358,-726 358,-732 358,-732 358,-744 358,-744 358,-750 352,-756 346,-756"/> <text text-anchor="middle" x="308.5" y="-735.5" font-family="sans" font-size="10.00" fill="#000000">genome_process</text> </g> <!-- 4->3 --> @@ -66,7 +66,7 @@ <!-- 6 --> <g id="node7" class="node"> <title>6</title> -<path fill="none" stroke="#5682d8" stroke-width="2" d="M179,-540C179,-540 12,-540 12,-540 6,-540 0,-534 0,-528 0,-528 0,-516 0,-516 0,-510 6,-504 12,-504 12,-504 179,-504 179,-504 185,-504 191,-510 191,-516 191,-516 191,-528 191,-528 191,-534 185,-540 179,-540"/> +<path fill="none" stroke="#56b1d8" stroke-width="2" d="M179,-540C179,-540 12,-540 12,-540 6,-540 0,-534 0,-528 0,-528 0,-516 0,-516 0,-510 6,-504 12,-504 12,-504 179,-504 179,-504 185,-504 191,-510 191,-516 191,-516 191,-528 191,-528 191,-534 185,-540 179,-540"/> <text text-anchor="middle" x="95.5" y="-519.5" font-family="sans" font-size="10.00" fill="#000000">generate_segemehl_index_genome</text> </g> <!-- 4->6 --> @@ -78,7 +78,7 @@ <!-- 9 --> <g id="node10" class="node"> <title>9</title> -<path fill="none" stroke="#56d8c9" stroke-width="2" d="M236,-468C236,-468 129,-468 129,-468 123,-468 117,-462 117,-456 117,-456 117,-444 117,-444 117,-438 123,-432 129,-432 129,-432 236,-432 236,-432 242,-432 248,-438 248,-444 248,-444 248,-456 248,-456 248,-462 242,-468 236,-468"/> +<path fill="none" stroke="#566bd8" stroke-width="2" d="M236,-468C236,-468 129,-468 129,-468 123,-468 117,-462 117,-456 117,-456 117,-444 117,-444 117,-438 123,-432 129,-432 129,-432 236,-432 236,-432 242,-432 248,-438 248,-444 248,-444 248,-456 248,-456 248,-462 242,-468 236,-468"/> <text text-anchor="middle" x="182.5" y="-447.5" font-family="sans" font-size="10.00" fill="#000000">create_header_genome</text> </g> <!-- 4->9 --> @@ -90,7 +90,7 @@ <!-- 13 --> <g id="node14" class="node"> <title>13</title> -<path fill="none" stroke="#9fd856" stroke-width="2" d="M366.5,-684C366.5,-684 316.5,-684 316.5,-684 310.5,-684 304.5,-678 304.5,-672 304.5,-672 304.5,-660 304.5,-660 304.5,-654 310.5,-648 316.5,-648 316.5,-648 366.5,-648 366.5,-648 372.5,-648 378.5,-654 378.5,-660 378.5,-660 378.5,-672 378.5,-672 378.5,-678 372.5,-684 366.5,-684"/> +<path fill="none" stroke="#b6d856" stroke-width="2" d="M366.5,-684C366.5,-684 316.5,-684 316.5,-684 310.5,-684 304.5,-678 304.5,-672 304.5,-672 304.5,-660 304.5,-660 304.5,-654 310.5,-648 316.5,-648 316.5,-648 366.5,-648 366.5,-648 372.5,-648 378.5,-654 378.5,-660 378.5,-660 378.5,-672 378.5,-672 378.5,-678 372.5,-684 366.5,-684"/> <text text-anchor="middle" x="341.5" y="-663.5" font-family="sans" font-size="10.00" fill="#000000">mirna_anno</text> </g> <!-- 4->13 --> @@ -102,7 +102,7 @@ <!-- 14 --> <g id="node15" class="node"> <title>14</title> -<path fill="none" stroke="#d86e56" stroke-width="2" d="M440,-684C440,-684 409,-684 409,-684 403,-684 397,-678 397,-672 397,-672 397,-660 397,-660 397,-654 403,-648 409,-648 409,-648 440,-648 440,-648 446,-648 452,-654 452,-660 452,-660 452,-672 452,-672 452,-678 446,-684 440,-684"/> +<path fill="none" stroke="#d85656" stroke-width="2" d="M440,-684C440,-684 409,-684 409,-684 403,-684 397,-678 397,-672 397,-672 397,-660 397,-660 397,-654 403,-648 409,-648 409,-648 440,-648 440,-648 446,-648 452,-654 452,-660 452,-660 452,-672 452,-672 452,-678 446,-684 440,-684"/> <text text-anchor="middle" x="424.5" y="-663.5" font-family="sans" font-size="10.00" fill="#000000">dict_chr</text> </g> <!-- 4->14 --> @@ -114,7 +114,7 @@ <!-- 21 --> <g id="node22" class="node"> <title>21</title> -<path fill="none" stroke="#d85656" stroke-width="2" d="M592.5,-612C592.5,-612 506.5,-612 506.5,-612 500.5,-612 494.5,-606 494.5,-600 494.5,-600 494.5,-588 494.5,-588 494.5,-582 500.5,-576 506.5,-576 506.5,-576 592.5,-576 592.5,-576 598.5,-576 604.5,-582 604.5,-588 604.5,-588 604.5,-600 604.5,-600 604.5,-606 598.5,-612 592.5,-612"/> +<path fill="none" stroke="#88d856" stroke-width="2" d="M592.5,-612C592.5,-612 506.5,-612 506.5,-612 500.5,-612 494.5,-606 494.5,-600 494.5,-600 494.5,-588 494.5,-588 494.5,-582 500.5,-576 506.5,-576 506.5,-576 592.5,-576 592.5,-576 598.5,-576 604.5,-582 604.5,-588 604.5,-588 604.5,-600 604.5,-600 604.5,-606 598.5,-612 592.5,-612"/> <text text-anchor="middle" x="549.5" y="-591.5" font-family="sans" font-size="10.00" fill="#000000">create_index_fasta</text> </g> <!-- 4->21 --> @@ -126,7 +126,7 @@ <!-- 5 --> <g id="node6" class="node"> <title>5</title> -<path fill="none" stroke="#d89c56" stroke-width="2" d="M409,-324C409,-324 346,-324 346,-324 340,-324 334,-318 334,-312 334,-312 334,-300 334,-300 334,-294 340,-288 346,-288 346,-288 409,-288 409,-288 415,-288 421,-294 421,-300 421,-300 421,-312 421,-312 421,-318 415,-324 409,-324"/> +<path fill="none" stroke="#56d86b" stroke-width="2" d="M409,-324C409,-324 346,-324 346,-324 340,-324 334,-318 334,-312 334,-312 334,-300 334,-300 334,-294 340,-288 346,-288 346,-288 409,-288 409,-288 415,-288 421,-294 421,-300 421,-300 421,-312 421,-312 421,-318 415,-324 409,-324"/> <text text-anchor="middle" x="377.5" y="-303.5" font-family="sans" font-size="10.00" fill="#000000">filter_anno_gtf</text> </g> <!-- 5->3 --> @@ -138,7 +138,7 @@ <!-- 8 --> <g id="node9" class="node"> <title>8</title> -<path fill="none" stroke="#59d856" stroke-width="2" d="M438.5,-252C438.5,-252 376.5,-252 376.5,-252 370.5,-252 364.5,-246 364.5,-240 364.5,-240 364.5,-228 364.5,-228 364.5,-222 370.5,-216 376.5,-216 376.5,-216 438.5,-216 438.5,-216 444.5,-216 450.5,-222 450.5,-228 450.5,-228 450.5,-240 450.5,-240 450.5,-246 444.5,-252 438.5,-252"/> +<path fill="none" stroke="#d89c56" stroke-width="2" d="M438.5,-252C438.5,-252 376.5,-252 376.5,-252 370.5,-252 364.5,-246 364.5,-240 364.5,-240 364.5,-228 364.5,-228 364.5,-222 370.5,-216 376.5,-216 376.5,-216 438.5,-216 438.5,-216 444.5,-216 450.5,-222 450.5,-228 450.5,-228 450.5,-240 450.5,-240 450.5,-246 444.5,-252 438.5,-252"/> <text text-anchor="middle" x="407.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">get_exons_gtf</text> </g> <!-- 5->8 --> @@ -148,7 +148,7 @@ <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="399.2124,-262.9902 399.8278,-252.4133 392.7508,-260.2979 399.2124,-262.9902"/> </g> <!-- 6->0 --> -<g id="edge5" class="edge"> +<g id="edge4" class="edge"> <title>6->0</title> <path fill="none" stroke="#c0c0c0" stroke-width="2" d="M94.0496,-503.8284C91.9993,-476.5296 88.5,-423.3034 88.5,-378 88.5,-378 88.5,-378 88.5,-162 88.5,-119.2408 90.4989,-100.3597 122.5,-72 149.3909,-48.169 249.9753,-30.8466 305.3538,-22.8826"/> <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="306.0511,-26.319 315.466,-21.4601 305.0759,-19.3873 306.0511,-26.319"/> @@ -156,11 +156,11 @@ <!-- 7 --> <g id="node8" class="node"> <title>7</title> -<path fill="none" stroke="#70d856" stroke-width="2" d="M438.5,-108C438.5,-108 404.5,-108 404.5,-108 398.5,-108 392.5,-102 392.5,-96 392.5,-96 392.5,-84 392.5,-84 392.5,-78 398.5,-72 404.5,-72 404.5,-72 438.5,-72 438.5,-72 444.5,-72 450.5,-78 450.5,-84 450.5,-84 450.5,-96 450.5,-96 450.5,-102 444.5,-108 438.5,-108"/> +<path fill="none" stroke="#56d8c9" stroke-width="2" d="M438.5,-108C438.5,-108 404.5,-108 404.5,-108 398.5,-108 392.5,-102 392.5,-96 392.5,-96 392.5,-84 392.5,-84 392.5,-78 398.5,-72 404.5,-72 404.5,-72 438.5,-72 438.5,-72 444.5,-72 450.5,-78 450.5,-84 450.5,-84 450.5,-96 450.5,-96 450.5,-102 444.5,-108 438.5,-108"/> <text text-anchor="middle" x="421.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">gtftobed</text> </g> <!-- 7->0 --> -<g id="edge4" class="edge"> +<g id="edge6" class="edge"> <title>7->0</title> <path fill="none" stroke="#c0c0c0" stroke-width="2" d="M401.565,-71.8314C392.0217,-63.1337 380.4401,-52.5783 370.0694,-43.1265"/> <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="372.1772,-40.3121 362.4287,-36.1628 367.462,-45.4857 372.1772,-40.3121"/> @@ -172,7 +172,7 @@ <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="422.2571,-118.3814 419.7413,-108.0896 415.29,-117.704 422.2571,-118.3814"/> </g> <!-- 9->0 --> -<g id="edge2" class="edge"> +<g id="edge5" class="edge"> <title>9->0</title> <path fill="none" stroke="#c0c0c0" stroke-width="2" d="M170.3488,-431.7857C153.8788,-405.2865 126.5,-354.0148 126.5,-306 126.5,-306 126.5,-306 126.5,-162 126.5,-121.1184 117.4647,-101.7545 145.5,-72 167.2281,-48.9395 254.6042,-31.7622 305.3969,-23.4905"/> <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="306.0144,-26.9364 315.3404,-21.9087 304.9147,-20.0233 306.0144,-26.9364"/> @@ -180,11 +180,11 @@ <!-- 10 --> <g id="node11" class="node"> <title>10</title> -<path fill="none" stroke="#56d882" stroke-width="2" d="M495,-468C495,-468 462,-468 462,-468 456,-468 450,-462 450,-456 450,-456 450,-444 450,-444 450,-438 456,-432 462,-432 462,-432 495,-432 495,-432 501,-432 507,-438 507,-444 507,-444 507,-456 507,-456 507,-462 501,-468 495,-468"/> +<path fill="none" stroke="#d86e56" stroke-width="2" d="M495,-468C495,-468 462,-468 462,-468 456,-468 450,-462 450,-456 450,-456 450,-444 450,-444 450,-438 456,-432 462,-432 462,-432 495,-432 495,-432 501,-432 507,-438 507,-444 507,-444 507,-456 507,-456 507,-462 501,-468 495,-468"/> <text text-anchor="middle" x="478.5" y="-447.5" font-family="sans" font-size="10.00" fill="#000000">gfftobed</text> </g> <!-- 10->0 --> -<g id="edge1" class="edge"> +<g id="edge2" class="edge"> <title>10->0</title> <path fill="none" stroke="#c0c0c0" stroke-width="2" d="M478.5,-431.8146C478.5,-404.4983 478.5,-351.25 478.5,-306 478.5,-306 478.5,-306 478.5,-162 478.5,-121.1184 484.9901,-103.9619 459.5,-72 439.6741,-47.1403 405.7974,-33.2394 379.5835,-25.7916"/> <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="380.1623,-22.3246 369.6002,-23.1563 378.3757,-29.0927 380.1623,-22.3246"/> @@ -192,7 +192,7 @@ <!-- 19 --> <g id="node20" class="node"> <title>19</title> -<path fill="none" stroke="#d8b456" stroke-width="2" d="M602,-396C602,-396 519,-396 519,-396 513,-396 507,-390 507,-384 507,-384 507,-372 507,-372 507,-366 513,-360 519,-360 519,-360 602,-360 602,-360 608,-360 614,-366 614,-372 614,-372 614,-384 614,-384 614,-390 608,-396 602,-396"/> +<path fill="none" stroke="#56d89a" stroke-width="2" d="M602,-396C602,-396 519,-396 519,-396 513,-396 507,-390 507,-384 507,-384 507,-372 507,-372 507,-366 513,-360 519,-360 519,-360 602,-360 602,-360 608,-360 614,-366 614,-372 614,-372 614,-384 614,-384 614,-390 608,-396 602,-396"/> <text text-anchor="middle" x="560.5" y="-375.5" font-family="sans" font-size="10.00" fill="#000000">filter_mature_mirs</text> </g> <!-- 10->19 --> @@ -204,7 +204,7 @@ <!-- 11 --> <g id="node12" class="node"> <title>11</title> -<path fill="none" stroke="#56c9d8" stroke-width="2" d="M489,-540C489,-540 412,-540 412,-540 406,-540 400,-534 400,-528 400,-528 400,-516 400,-516 400,-510 406,-504 412,-504 412,-504 489,-504 489,-504 495,-504 501,-510 501,-516 501,-516 501,-528 501,-528 501,-534 495,-540 489,-540"/> +<path fill="none" stroke="#56d882" stroke-width="2" d="M489,-540C489,-540 412,-540 412,-540 406,-540 400,-534 400,-528 400,-528 400,-516 400,-516 400,-510 406,-504 412,-504 412,-504 489,-504 489,-504 495,-504 501,-510 501,-516 501,-516 501,-528 501,-528 501,-534 495,-540 489,-540"/> <text text-anchor="middle" x="450.5" y="-519.5" font-family="sans" font-size="10.00" fill="#000000">filter_mir_1_anno</text> </g> <!-- 11->10 --> @@ -216,7 +216,7 @@ <!-- 12 --> <g id="node13" class="node"> <title>12</title> -<path fill="none" stroke="#566bd8" stroke-width="2" d="M461,-612C461,-612 388,-612 388,-612 382,-612 376,-606 376,-600 376,-600 376,-588 376,-588 376,-582 382,-576 388,-576 388,-576 461,-576 461,-576 467,-576 473,-582 473,-588 473,-588 473,-600 473,-600 473,-606 467,-612 461,-612"/> +<path fill="none" stroke="#56c9d8" stroke-width="2" d="M461,-612C461,-612 388,-612 388,-612 382,-612 376,-606 376,-600 376,-600 376,-588 376,-588 376,-582 382,-576 388,-576 388,-576 461,-576 461,-576 467,-576 473,-582 473,-588 473,-588 473,-600 473,-600 473,-606 467,-612 461,-612"/> <text text-anchor="middle" x="424.5" y="-591.5" font-family="sans" font-size="10.00" fill="#000000">map_chr_names</text> </g> <!-- 12->11 --> @@ -226,13 +226,13 @@ <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="443.7462,-551.0076 443.8508,-540.4133 437.1623,-548.63 443.7462,-551.0076"/> </g> <!-- 13->12 --> -<g id="edge18" class="edge"> +<g id="edge17" class="edge"> <title>13->12</title> <path fill="none" stroke="#c0c0c0" stroke-width="2" d="M362.4444,-647.8314C372.5691,-639.0485 384.8777,-628.3712 395.8548,-618.8489"/> <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="398.3019,-621.3595 403.5623,-612.1628 393.7149,-616.0718 398.3019,-621.3595"/> </g> <!-- 14->12 --> -<g id="edge17" class="edge"> +<g id="edge18" class="edge"> <title>14->12</title> <path fill="none" stroke="#c0c0c0" stroke-width="2" d="M424.5,-647.8314C424.5,-640.131 424.5,-630.9743 424.5,-622.4166"/> <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="428.0001,-622.4132 424.5,-612.4133 421.0001,-622.4133 428.0001,-622.4132"/> @@ -240,11 +240,11 @@ <!-- 15 --> <g id="node16" class="node"> <title>15</title> -<path fill="none" stroke="#56d86b" stroke-width="2" d="M584.5,-108C584.5,-108 522.5,-108 522.5,-108 516.5,-108 510.5,-102 510.5,-96 510.5,-96 510.5,-84 510.5,-84 510.5,-78 516.5,-72 522.5,-72 522.5,-72 584.5,-72 584.5,-72 590.5,-72 596.5,-78 596.5,-84 596.5,-84 596.5,-96 596.5,-96 596.5,-102 590.5,-108 584.5,-108"/> +<path fill="none" stroke="#9fd856" stroke-width="2" d="M584.5,-108C584.5,-108 522.5,-108 522.5,-108 516.5,-108 510.5,-102 510.5,-96 510.5,-96 510.5,-84 510.5,-84 510.5,-78 516.5,-72 522.5,-72 522.5,-72 584.5,-72 584.5,-72 590.5,-72 596.5,-78 596.5,-84 596.5,-84 596.5,-96 596.5,-96 596.5,-102 590.5,-108 584.5,-108"/> <text text-anchor="middle" x="553.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_final</text> </g> <!-- 15->0 --> -<g id="edge6" class="edge"> +<g id="edge1" class="edge"> <title>15->0</title> <path fill="none" stroke="#c0c0c0" stroke-width="2" d="M510.2918,-75.256C471.7195,-62.0939 415.8391,-43.0257 379.3404,-30.5711"/> <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="380.1849,-27.1612 369.5904,-27.2441 377.9242,-33.7861 380.1849,-27.1612"/> @@ -252,7 +252,7 @@ <!-- 16 --> <g id="node17" class="node"> <title>16</title> -<path fill="none" stroke="#d88556" stroke-width="2" d="M594.5,-180C594.5,-180 520.5,-180 520.5,-180 514.5,-180 508.5,-174 508.5,-168 508.5,-168 508.5,-156 508.5,-156 508.5,-150 514.5,-144 520.5,-144 520.5,-144 594.5,-144 594.5,-144 600.5,-144 606.5,-150 606.5,-156 606.5,-156 606.5,-168 606.5,-168 606.5,-174 600.5,-180 594.5,-180"/> +<path fill="none" stroke="#59d856" stroke-width="2" d="M594.5,-180C594.5,-180 520.5,-180 520.5,-180 514.5,-180 508.5,-174 508.5,-168 508.5,-168 508.5,-156 508.5,-156 508.5,-150 514.5,-144 520.5,-144 520.5,-144 594.5,-144 594.5,-144 600.5,-144 606.5,-150 606.5,-156 606.5,-156 606.5,-168 606.5,-168 606.5,-174 600.5,-180 594.5,-180"/> <text text-anchor="middle" x="557.5" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_concat</text> </g> <!-- 16->15 --> @@ -264,7 +264,7 @@ <!-- 17 --> <g id="node18" class="node"> <title>17</title> -<path fill="none" stroke="#b6d856" stroke-width="2" d="M599,-252C599,-252 520,-252 520,-252 514,-252 508,-246 508,-240 508,-240 508,-228 508,-228 508,-222 514,-216 520,-216 520,-216 599,-216 599,-216 605,-216 611,-222 611,-228 611,-228 611,-240 611,-240 611,-246 605,-252 599,-252"/> +<path fill="none" stroke="#70d856" stroke-width="2" d="M599,-252C599,-252 520,-252 520,-252 514,-252 508,-246 508,-240 508,-240 508,-228 508,-228 508,-222 514,-216 520,-216 520,-216 599,-216 599,-216 605,-216 611,-222 611,-228 611,-228 611,-240 611,-240 611,-246 605,-252 599,-252"/> <text text-anchor="middle" x="559.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> </g> <!-- 17->16 --> @@ -276,7 +276,7 @@ <!-- 18 --> <g id="node19" class="node"> <title>18</title> -<path fill="none" stroke="#56d89a" stroke-width="2" d="M578.5,-324C578.5,-324 542.5,-324 542.5,-324 536.5,-324 530.5,-318 530.5,-312 530.5,-312 530.5,-300 530.5,-300 530.5,-294 536.5,-288 542.5,-288 542.5,-288 578.5,-288 578.5,-288 584.5,-288 590.5,-294 590.5,-300 590.5,-300 590.5,-312 590.5,-312 590.5,-318 584.5,-324 578.5,-324"/> +<path fill="none" stroke="#d8b456" stroke-width="2" d="M578.5,-324C578.5,-324 542.5,-324 542.5,-324 536.5,-324 530.5,-318 530.5,-312 530.5,-312 530.5,-300 530.5,-300 530.5,-294 536.5,-288 542.5,-288 542.5,-288 578.5,-288 578.5,-288 584.5,-288 590.5,-294 590.5,-300 590.5,-300 590.5,-312 590.5,-312 590.5,-318 584.5,-324 578.5,-324"/> <text text-anchor="middle" x="560.5" y="-303.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> </g> <!-- 18->17 --> @@ -286,7 +286,7 @@ <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="563.3944,-262.3637 559.7557,-252.4133 556.3951,-262.4609 563.3944,-262.3637"/> </g> <!-- 19->18 --> -<g id="edge25" class="edge"> +<g id="edge24" class="edge"> <title>19->18</title> <path fill="none" stroke="#c0c0c0" stroke-width="2" d="M560.5,-359.8314C560.5,-352.131 560.5,-342.9743 560.5,-334.4166"/> <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="564.0001,-334.4132 560.5,-324.4133 557.0001,-334.4133 564.0001,-334.4132"/> @@ -294,11 +294,11 @@ <!-- 20 --> <g id="node21" class="node"> <title>20</title> -<path fill="none" stroke="#56d8b1" stroke-width="2" d="M712.5,-396C712.5,-396 644.5,-396 644.5,-396 638.5,-396 632.5,-390 632.5,-384 632.5,-384 632.5,-372 632.5,-372 632.5,-366 638.5,-360 644.5,-360 644.5,-360 712.5,-360 712.5,-360 718.5,-360 724.5,-366 724.5,-372 724.5,-372 724.5,-384 724.5,-384 724.5,-390 718.5,-396 712.5,-396"/> +<path fill="none" stroke="#ced856" stroke-width="2" d="M712.5,-396C712.5,-396 644.5,-396 644.5,-396 638.5,-396 632.5,-390 632.5,-384 632.5,-384 632.5,-372 632.5,-372 632.5,-366 638.5,-360 644.5,-360 644.5,-360 712.5,-360 712.5,-360 718.5,-360 724.5,-366 724.5,-372 724.5,-372 724.5,-384 724.5,-384 724.5,-390 718.5,-396 712.5,-396"/> <text text-anchor="middle" x="678.5" y="-375.5" font-family="sans" font-size="10.00" fill="#000000">extract_chr_len</text> </g> <!-- 20->18 --> -<g id="edge24" class="edge"> +<g id="edge25" class="edge"> <title>20->18</title> <path fill="none" stroke="#c0c0c0" stroke-width="2" d="M648.7236,-359.8314C633.6307,-350.6221 615.1243,-339.3301 598.9657,-329.4706"/> <polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="600.6263,-326.3838 590.2669,-324.1628 596.9802,-332.3593 600.6263,-326.3838"/> diff --git a/images/rule_graph_quantify.svg b/images/rule_graph_quantify.svg new file mode 100644 index 0000000..d723edc --- /dev/null +++ b/images/rule_graph_quantify.svg @@ -0,0 +1,97 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" + "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> +<!-- Generated by graphviz version 2.40.1 (20161225.0304) + --> +<!-- Title: snakemake_dag Pages: 1 --> +<svg width="200pt" height="404pt" + viewBox="0.00 0.00 200.00 404.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> +<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 400)"> +<title>snakemake_dag</title> +<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-400 196,-400 196,4 -4,4"/> +<!-- 0 --> +<g id="node1" class="node"> +<title>0</title> +<path fill="none" stroke="#56d8c9" stroke-width="2" d="M106,-36C106,-36 76,-36 76,-36 70,-36 64,-30 64,-24 64,-24 64,-12 64,-12 64,-6 70,0 76,0 76,0 106,0 106,0 112,0 118,-6 118,-12 118,-12 118,-24 118,-24 118,-30 112,-36 106,-36"/> +<text text-anchor="middle" x="91" y="-15.5" font-family="sans" font-size="10.00" fill="#000000">finish</text> +</g> +<!-- 1 --> +<g id="node2" class="node"> +<title>1</title> +<path fill="none" stroke="#56a2d8" stroke-width="2" d="M120.5,-108C120.5,-108 61.5,-108 61.5,-108 55.5,-108 49.5,-102 49.5,-96 49.5,-96 49.5,-84 49.5,-84 49.5,-78 55.5,-72 61.5,-72 61.5,-72 120.5,-72 120.5,-72 126.5,-72 132.5,-78 132.5,-84 132.5,-84 132.5,-96 132.5,-96 132.5,-102 126.5,-108 120.5,-108"/> +<text text-anchor="middle" x="91" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">merge_tables</text> +</g> +<!-- 1->0 --> +<g id="edge1" class="edge"> +<title>1->0</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M91,-71.8314C91,-64.131 91,-54.9743 91,-46.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="94.5001,-46.4132 91,-36.4133 87.5001,-46.4133 94.5001,-46.4132"/> +</g> +<!-- 2 --> +<g id="node3" class="node"> +<title>2</title> +<path fill="none" stroke="#78d856" stroke-width="2" d="M66,-180C66,-180 12,-180 12,-180 6,-180 0,-174 0,-168 0,-168 0,-156 0,-156 0,-150 6,-144 12,-144 12,-144 66,-144 66,-144 72,-144 78,-150 78,-156 78,-156 78,-168 78,-168 78,-174 72,-180 66,-180"/> +<text text-anchor="middle" x="39" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">quant_mirna</text> +</g> +<!-- 2->1 --> +<g id="edge2" class="edge"> +<title>2->1</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M52.1218,-143.8314C58.0499,-135.6232 65.1729,-125.7606 71.6933,-116.7323"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="74.684,-118.5693 77.7015,-108.4133 69.0092,-114.4708 74.684,-118.5693"/> +</g> +<!-- 3 --> +<g id="node4" class="node"> +<title>3</title> +<path fill="none" stroke="#56d87b" stroke-width="2" d="M125.5,-252C125.5,-252 56.5,-252 56.5,-252 50.5,-252 44.5,-246 44.5,-240 44.5,-240 44.5,-228 44.5,-228 44.5,-222 50.5,-216 56.5,-216 56.5,-216 125.5,-216 125.5,-216 131.5,-216 137.5,-222 137.5,-228 137.5,-228 137.5,-240 137.5,-240 137.5,-246 131.5,-252 125.5,-252"/> +<text text-anchor="middle" x="91" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">intersect_mirna</text> +</g> +<!-- 3->2 --> +<g id="edge4" class="edge"> +<title>3->2</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M77.8782,-215.8314C71.9501,-207.6232 64.8271,-197.7606 58.3067,-188.7323"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="60.9908,-186.4708 52.2985,-180.4133 55.316,-190.5693 60.9908,-186.4708"/> +</g> +<!-- 6 --> +<g id="node7" class="node"> +<title>6</title> +<path fill="none" stroke="#c6d856" stroke-width="2" d="M180,-180C180,-180 108,-180 108,-180 102,-180 96,-174 96,-168 96,-168 96,-156 96,-156 96,-150 102,-144 108,-144 108,-144 180,-144 180,-144 186,-144 192,-150 192,-156 192,-156 192,-168 192,-168 192,-174 186,-180 180,-180"/> +<text text-anchor="middle" x="144" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">quant_mirna_pri</text> +</g> +<!-- 3->6 --> +<g id="edge7" class="edge"> +<title>3->6</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M104.3741,-215.8314C110.4162,-207.6232 117.6763,-197.7606 124.3221,-188.7323"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="127.3363,-190.5415 130.4458,-180.4133 121.6989,-186.3918 127.3363,-190.5415"/> +</g> +<!-- 4 --> +<g id="node5" class="node"> +<title>4</title> +<path fill="none" stroke="#d85656" stroke-width="2" d="M126.5,-324C126.5,-324 55.5,-324 55.5,-324 49.5,-324 43.5,-318 43.5,-312 43.5,-312 43.5,-300 43.5,-300 43.5,-294 49.5,-288 55.5,-288 55.5,-288 126.5,-288 126.5,-288 132.5,-288 138.5,-294 138.5,-300 138.5,-300 138.5,-312 138.5,-312 138.5,-318 132.5,-324 126.5,-324"/> +<text text-anchor="middle" x="91" y="-303.5" font-family="sans" font-size="10.00" fill="#000000">sort_alignments</text> +</g> +<!-- 4->3 --> +<g id="edge5" class="edge"> +<title>4->3</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M91,-287.8314C91,-280.131 91,-270.9743 91,-262.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="94.5001,-262.4132 91,-252.4133 87.5001,-262.4133 94.5001,-262.4132"/> +</g> +<!-- 5 --> +<g id="node6" class="node"> +<title>5</title> +<path fill="none" stroke="#d8a456" stroke-width="2" d="M112,-396C112,-396 70,-396 70,-396 64,-396 58,-390 58,-384 58,-384 58,-372 58,-372 58,-366 64,-360 70,-360 70,-360 112,-360 112,-360 118,-360 124,-366 124,-372 124,-372 124,-384 124,-384 124,-390 118,-396 112,-396"/> +<text text-anchor="middle" x="91" y="-375.5" font-family="sans" font-size="10.00" fill="#000000">bamtobed</text> +</g> +<!-- 5->4 --> +<g id="edge6" class="edge"> +<title>5->4</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M91,-359.8314C91,-352.131 91,-342.9743 91,-334.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="94.5001,-334.4132 91,-324.4133 87.5001,-334.4133 94.5001,-334.4132"/> +</g> +<!-- 6->1 --> +<g id="edge3" class="edge"> +<title>6->1</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M130.6259,-143.8314C124.5838,-135.6232 117.3237,-125.7606 110.6779,-116.7323"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="113.3011,-114.3918 104.5542,-108.4133 107.6637,-118.5415 113.3011,-114.3918"/> +</g> +</g> +</svg> diff --git a/images/workflow_dag_map.svg b/images/workflow_dag_map.svg index bfbb331..892c700 100644 --- a/images/workflow_dag_map.svg +++ b/images/workflow_dag_map.svg @@ -12,13 +12,13 @@ <!-- 0 --> <g id="node1" class="node"> <title>0</title> -<path fill="none" stroke="#569ad8" stroke-width="2" d="M356,-36C356,-36 326,-36 326,-36 320,-36 314,-30 314,-24 314,-24 314,-12 314,-12 314,-6 320,0 326,0 326,0 356,0 356,0 362,0 368,-6 368,-12 368,-12 368,-24 368,-24 368,-30 362,-36 356,-36"/> +<path fill="none" stroke="#c6d856" stroke-width="2" stroke-dasharray="5,2" d="M356,-36C356,-36 326,-36 326,-36 320,-36 314,-30 314,-24 314,-24 314,-12 314,-12 314,-6 320,0 326,0 326,0 356,0 356,0 362,0 368,-6 368,-12 368,-12 368,-24 368,-24 368,-30 362,-36 356,-36"/> <text text-anchor="middle" x="341" y="-15.5" font-family="sans" font-size="10.00" fill="#000000">finish</text> </g> <!-- 1 --> <g id="node2" class="node"> <title>1</title> -<path fill="none" stroke="#56b9d8" stroke-width="2" d="M364.5,-108C364.5,-108 317.5,-108 317.5,-108 311.5,-108 305.5,-102 305.5,-96 305.5,-96 305.5,-84 305.5,-84 305.5,-78 311.5,-72 317.5,-72 317.5,-72 364.5,-72 364.5,-72 370.5,-72 376.5,-78 376.5,-84 376.5,-84 376.5,-96 376.5,-96 376.5,-102 370.5,-108 364.5,-108"/> +<path fill="none" stroke="#5663d8" stroke-width="2" stroke-dasharray="5,2" d="M364.5,-108C364.5,-108 317.5,-108 317.5,-108 311.5,-108 305.5,-102 305.5,-96 305.5,-96 305.5,-84 305.5,-84 305.5,-78 311.5,-72 317.5,-72 317.5,-72 364.5,-72 364.5,-72 370.5,-72 376.5,-78 376.5,-84 376.5,-84 376.5,-96 376.5,-96 376.5,-102 370.5,-108 364.5,-108"/> <text text-anchor="middle" x="341" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">index_bam</text> </g> <!-- 1->0 --> @@ -30,7 +30,7 @@ <!-- 2 --> <g id="node3" class="node"> <title>2</title> -<path fill="none" stroke="#9fd856" stroke-width="2" d="M377.5,-180C377.5,-180 304.5,-180 304.5,-180 298.5,-180 292.5,-174 292.5,-168 292.5,-168 292.5,-156 292.5,-156 292.5,-150 298.5,-144 304.5,-144 304.5,-144 377.5,-144 377.5,-144 383.5,-144 389.5,-150 389.5,-156 389.5,-156 389.5,-168 389.5,-168 389.5,-174 383.5,-180 377.5,-180"/> +<path fill="none" stroke="#56d873" stroke-width="2" stroke-dasharray="5,2" d="M377.5,-180C377.5,-180 304.5,-180 304.5,-180 298.5,-180 292.5,-174 292.5,-168 292.5,-168 292.5,-156 292.5,-156 292.5,-150 298.5,-144 304.5,-144 304.5,-144 377.5,-144 377.5,-144 383.5,-144 389.5,-150 389.5,-156 389.5,-156 389.5,-168 389.5,-168 389.5,-174 383.5,-180 377.5,-180"/> <text text-anchor="middle" x="341" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">sort_by_position</text> </g> <!-- 2->1 --> @@ -42,7 +42,7 @@ <!-- 3 --> <g id="node4" class="node"> <title>3</title> -<path fill="none" stroke="#d8cb56" stroke-width="2" d="M376.5,-252C376.5,-252 305.5,-252 305.5,-252 299.5,-252 293.5,-246 293.5,-240 293.5,-240 293.5,-228 293.5,-228 293.5,-222 299.5,-216 305.5,-216 305.5,-216 376.5,-216 376.5,-216 382.5,-216 388.5,-222 388.5,-228 388.5,-228 388.5,-240 388.5,-240 388.5,-246 382.5,-252 376.5,-252"/> +<path fill="none" stroke="#56b9d8" stroke-width="2" stroke-dasharray="5,2" d="M376.5,-252C376.5,-252 305.5,-252 305.5,-252 299.5,-252 293.5,-246 293.5,-240 293.5,-240 293.5,-228 293.5,-228 293.5,-222 299.5,-216 305.5,-216 305.5,-216 376.5,-216 376.5,-216 382.5,-216 388.5,-222 388.5,-228 388.5,-228 388.5,-240 388.5,-240 388.5,-246 382.5,-252 376.5,-252"/> <text text-anchor="middle" x="341" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">convert_to_bam</text> </g> <!-- 3->2 --> @@ -54,7 +54,7 @@ <!-- 4 --> <g id="node5" class="node"> <title>4</title> -<path fill="none" stroke="#70d856" stroke-width="2" d="M379.5,-324C379.5,-324 302.5,-324 302.5,-324 296.5,-324 290.5,-318 290.5,-312 290.5,-312 290.5,-300 290.5,-300 290.5,-294 296.5,-288 302.5,-288 302.5,-288 379.5,-288 379.5,-288 385.5,-288 391.5,-294 391.5,-300 391.5,-300 391.5,-312 391.5,-312 391.5,-318 385.5,-324 379.5,-324"/> +<path fill="none" stroke="#568ad8" stroke-width="2" stroke-dasharray="5,2" d="M379.5,-324C379.5,-324 302.5,-324 302.5,-324 296.5,-324 290.5,-318 290.5,-312 290.5,-312 290.5,-300 290.5,-300 290.5,-294 296.5,-288 302.5,-288 302.5,-288 379.5,-288 379.5,-288 385.5,-288 391.5,-294 391.5,-300 391.5,-300 391.5,-312 391.5,-312 391.5,-318 385.5,-324 379.5,-324"/> <text text-anchor="middle" x="341" y="-303.5" font-family="sans" font-size="10.00" fill="#000000">uncollapse_reads</text> </g> <!-- 4->3 --> @@ -66,7 +66,7 @@ <!-- 5 --> <g id="node6" class="node"> <title>5</title> -<path fill="none" stroke="#d87556" stroke-width="2" d="M379,-396C379,-396 303,-396 303,-396 297,-396 291,-390 291,-384 291,-384 291,-372 291,-372 291,-366 297,-360 303,-360 303,-360 379,-360 379,-360 385,-360 391,-366 391,-372 391,-372 391,-384 391,-384 391,-390 385,-396 379,-396"/> +<path fill="none" stroke="#56d892" stroke-width="2" stroke-dasharray="5,2" d="M379,-396C379,-396 303,-396 303,-396 297,-396 291,-390 291,-384 291,-384 291,-372 291,-372 291,-366 297,-360 303,-360 303,-360 379,-360 379,-360 385,-360 391,-366 391,-372 391,-372 391,-384 391,-384 391,-390 385,-396 379,-396"/> <text text-anchor="middle" x="341" y="-375.5" font-family="sans" font-size="10.00" fill="#000000">remove_inferiors</text> </g> <!-- 5->4 --> @@ -78,7 +78,7 @@ <!-- 6 --> <g id="node7" class="node"> <title>6</title> -<path fill="none" stroke="#56d8a2" stroke-width="2" d="M356,-468C356,-468 326,-468 326,-468 320,-468 314,-462 314,-456 314,-456 314,-444 314,-444 314,-438 320,-432 326,-432 326,-432 356,-432 356,-432 362,-432 368,-438 368,-444 368,-444 368,-456 368,-456 368,-462 362,-468 356,-468"/> +<path fill="none" stroke="#d6d856" stroke-width="2" stroke-dasharray="5,2" d="M356,-468C356,-468 326,-468 326,-468 320,-468 314,-462 314,-456 314,-456 314,-444 314,-444 314,-438 320,-432 326,-432 326,-432 356,-432 356,-432 362,-432 368,-438 368,-444 368,-444 368,-456 368,-456 368,-462 362,-468 356,-468"/> <text text-anchor="middle" x="341" y="-447.5" font-family="sans" font-size="10.00" fill="#000000">sort_id</text> </g> <!-- 6->5 --> @@ -90,7 +90,7 @@ <!-- 7 --> <g id="node8" class="node"> <title>7</title> -<path fill="none" stroke="#56a9d8" stroke-width="2" d="M366,-540C366,-540 316,-540 316,-540 310,-540 304,-534 304,-528 304,-528 304,-516 304,-516 304,-510 310,-504 316,-504 316,-504 366,-504 366,-504 372,-504 378,-510 378,-516 378,-516 378,-528 378,-528 378,-534 372,-540 366,-540"/> +<path fill="none" stroke="#56c9d8" stroke-width="2" stroke-dasharray="5,2" d="M366,-540C366,-540 316,-540 316,-540 310,-540 304,-534 304,-528 304,-528 304,-516 304,-516 304,-510 310,-504 316,-504 316,-504 366,-504 366,-504 372,-504 378,-510 378,-516 378,-516 378,-528 378,-528 378,-534 372,-540 366,-540"/> <text text-anchor="middle" x="341" y="-519.5" font-family="sans" font-size="10.00" fill="#000000">add_header</text> </g> <!-- 7->6 --> @@ -102,7 +102,7 @@ <!-- 8 --> <g id="node9" class="node"> <title>8</title> -<path fill="none" stroke="#d8ac56" stroke-width="2" d="M369,-612C369,-612 313,-612 313,-612 307,-612 301,-606 301,-600 301,-600 301,-588 301,-588 301,-582 307,-576 313,-576 313,-576 369,-576 369,-576 375,-576 381,-582 381,-588 381,-588 381,-600 381,-600 381,-606 375,-612 369,-612"/> +<path fill="none" stroke="#567bd8" stroke-width="2" stroke-dasharray="5,2" d="M369,-612C369,-612 313,-612 313,-612 307,-612 301,-606 301,-600 301,-600 301,-588 301,-588 301,-582 307,-576 313,-576 313,-576 369,-576 369,-576 375,-576 381,-582 381,-588 381,-588 381,-600 381,-600 381,-606 375,-612 369,-612"/> <text text-anchor="middle" x="341" y="-591.5" font-family="sans" font-size="10.00" fill="#000000">cat_mapping</text> </g> <!-- 8->7 --> @@ -114,7 +114,7 @@ <!-- 9 --> <g id="node10" class="node"> <title>9</title> -<path fill="none" stroke="#d85656" stroke-width="2" d="M304,-684C304,-684 248,-684 248,-684 242,-684 236,-678 236,-672 236,-672 236,-660 236,-660 236,-654 242,-648 248,-648 248,-648 304,-648 304,-648 310,-648 316,-654 316,-660 316,-660 316,-672 316,-672 316,-678 310,-684 304,-684"/> +<path fill="none" stroke="#d88556" stroke-width="2" stroke-dasharray="5,2" d="M304,-684C304,-684 248,-684 248,-684 242,-684 236,-678 236,-672 236,-672 236,-660 236,-660 236,-654 242,-648 248,-648 248,-648 304,-648 304,-648 310,-648 316,-654 316,-660 316,-660 316,-672 316,-672 316,-678 310,-684 304,-684"/> <text text-anchor="middle" x="276" y="-663.5" font-family="sans" font-size="10.00" fill="#000000">trans_to_gen</text> </g> <!-- 9->8 --> @@ -126,7 +126,7 @@ <!-- 10 --> <g id="node11" class="node"> <title>10</title> -<path fill="none" stroke="#56d8b1" stroke-width="2" d="M349,-756C349,-756 201,-756 201,-756 195,-756 189,-750 189,-744 189,-744 189,-732 189,-732 189,-726 195,-720 201,-720 201,-720 349,-720 349,-720 355,-720 361,-726 361,-732 361,-732 361,-744 361,-744 361,-750 355,-756 349,-756"/> +<path fill="none" stroke="#56d8c1" stroke-width="2" stroke-dasharray="5,2" d="M349,-756C349,-756 201,-756 201,-756 195,-756 189,-750 189,-744 189,-744 189,-732 189,-732 189,-726 195,-720 201,-720 201,-720 349,-720 349,-720 355,-720 361,-726 361,-732 361,-732 361,-744 361,-744 361,-750 355,-756 349,-756"/> <text text-anchor="middle" x="275" y="-735.5" font-family="sans" font-size="10.00" fill="#000000">remove_headers_transcriptome</text> </g> <!-- 10->9 --> @@ -138,7 +138,7 @@ <!-- 11 --> <g id="node12" class="node"> <title>11</title> -<path fill="none" stroke="#d88556" stroke-width="2" d="M326,-828C326,-828 220,-828 220,-828 214,-828 208,-822 208,-816 208,-816 208,-804 208,-804 208,-798 214,-792 220,-792 220,-792 326,-792 326,-792 332,-792 338,-798 338,-804 338,-804 338,-816 338,-816 338,-822 332,-828 326,-828"/> +<path fill="none" stroke="#569ad8" stroke-width="2" stroke-dasharray="5,2" d="M326,-828C326,-828 220,-828 220,-828 214,-828 208,-822 208,-816 208,-816 208,-804 208,-804 208,-798 214,-792 220,-792 220,-792 326,-792 326,-792 332,-792 338,-798 338,-804 338,-804 338,-816 338,-816 338,-822 332,-828 326,-828"/> <text text-anchor="middle" x="273" y="-807.5" font-family="sans" font-size="10.00" fill="#000000">filter_nh_transcriptome</text> </g> <!-- 11->10 --> @@ -150,7 +150,7 @@ <!-- 12 --> <g id="node13" class="node"> <title>12</title> -<path fill="none" stroke="#56d892" stroke-width="2" d="M317,-900C317,-900 187,-900 187,-900 181,-900 175,-894 175,-888 175,-888 175,-876 175,-876 175,-870 181,-864 187,-864 187,-864 317,-864 317,-864 323,-864 329,-870 329,-876 329,-876 329,-888 329,-888 329,-894 323,-900 317,-900"/> +<path fill="none" stroke="#8fd856" stroke-width="2" stroke-dasharray="5,2" d="M317,-900C317,-900 187,-900 187,-900 181,-900 175,-894 175,-888 175,-888 175,-876 175,-876 175,-870 181,-864 187,-864 187,-864 317,-864 317,-864 323,-864 329,-870 329,-876 329,-876 329,-888 329,-888 329,-894 323,-900 317,-900"/> <text text-anchor="middle" x="252" y="-879.5" font-family="sans" font-size="10.00" fill="#000000">merge_transcriptome_maps</text> </g> <!-- 12->11 --> @@ -162,7 +162,7 @@ <!-- 13 --> <g id="node14" class="node"> <title>13</title> -<path fill="none" stroke="#61d856" stroke-width="2" d="M174,-1116C174,-1116 12,-1116 12,-1116 6,-1116 0,-1110 0,-1104 0,-1104 0,-1092 0,-1092 0,-1086 6,-1080 12,-1080 12,-1080 174,-1080 174,-1080 180,-1080 186,-1086 186,-1092 186,-1092 186,-1104 186,-1104 186,-1110 180,-1116 174,-1116"/> +<path fill="none" stroke="#80d856" stroke-width="2" stroke-dasharray="5,2" d="M174,-1116C174,-1116 12,-1116 12,-1116 6,-1116 0,-1110 0,-1104 0,-1104 0,-1092 0,-1092 0,-1086 6,-1080 12,-1080 12,-1080 174,-1080 174,-1080 180,-1080 186,-1086 186,-1092 186,-1092 186,-1104 186,-1104 186,-1110 180,-1116 174,-1116"/> <text text-anchor="middle" x="93" y="-1095.5" font-family="sans" font-size="10.00" fill="#000000">mapping_transcriptome_segemehl</text> </g> <!-- 13->12 --> @@ -174,7 +174,7 @@ <!-- 14 --> <g id="node15" class="node"> <title>14</title> -<path fill="none" stroke="#c6d856" stroke-width="2" d="M361.5,-1260C361.5,-1260 294.5,-1260 294.5,-1260 288.5,-1260 282.5,-1254 282.5,-1248 282.5,-1248 282.5,-1236 282.5,-1236 282.5,-1230 288.5,-1224 294.5,-1224 294.5,-1224 361.5,-1224 361.5,-1224 367.5,-1224 373.5,-1230 373.5,-1236 373.5,-1236 373.5,-1248 373.5,-1248 373.5,-1254 367.5,-1260 361.5,-1260"/> +<path fill="none" stroke="#61d856" stroke-width="2" stroke-dasharray="5,2" d="M361.5,-1260C361.5,-1260 294.5,-1260 294.5,-1260 288.5,-1260 282.5,-1254 282.5,-1248 282.5,-1248 282.5,-1236 282.5,-1236 282.5,-1230 288.5,-1224 294.5,-1224 294.5,-1224 361.5,-1224 361.5,-1224 367.5,-1224 373.5,-1230 373.5,-1236 373.5,-1236 373.5,-1248 373.5,-1248 373.5,-1254 367.5,-1260 361.5,-1260"/> <text text-anchor="middle" x="328" y="-1239.5" font-family="sans" font-size="10.00" fill="#000000">fastx_collapser</text> </g> <!-- 14->13 --> @@ -186,7 +186,7 @@ <!-- 20 --> <g id="node21" class="node"> <title>20</title> -<path fill="none" stroke="#56d8c1" stroke-width="2" d="M385.5,-1188C385.5,-1188 270.5,-1188 270.5,-1188 264.5,-1188 258.5,-1182 258.5,-1176 258.5,-1176 258.5,-1164 258.5,-1164 258.5,-1158 264.5,-1152 270.5,-1152 270.5,-1152 385.5,-1152 385.5,-1152 391.5,-1152 397.5,-1158 397.5,-1164 397.5,-1164 397.5,-1176 397.5,-1176 397.5,-1182 391.5,-1188 385.5,-1188"/> +<path fill="none" stroke="#d89c56" stroke-width="2" stroke-dasharray="5,2" d="M385.5,-1188C385.5,-1188 270.5,-1188 270.5,-1188 264.5,-1188 258.5,-1182 258.5,-1176 258.5,-1176 258.5,-1164 258.5,-1164 258.5,-1158 264.5,-1152 270.5,-1152 270.5,-1152 385.5,-1152 385.5,-1152 391.5,-1152 397.5,-1158 397.5,-1164 397.5,-1164 397.5,-1176 397.5,-1176 397.5,-1182 391.5,-1188 385.5,-1188"/> <text text-anchor="middle" x="328" y="-1167.5" font-family="sans" font-size="10.00" fill="#000000">filter_fasta_for_oligomap</text> </g> <!-- 14->20 --> @@ -198,7 +198,7 @@ <!-- 25 --> <g id="node26" class="node"> <title>25</title> -<path fill="none" stroke="#5663d8" stroke-width="2" d="M626,-1188C626,-1188 492,-1188 492,-1188 486,-1188 480,-1182 480,-1176 480,-1176 480,-1164 480,-1164 480,-1158 486,-1152 492,-1152 492,-1152 626,-1152 626,-1152 632,-1152 638,-1158 638,-1164 638,-1164 638,-1176 638,-1176 638,-1182 632,-1188 626,-1188"/> +<path fill="none" stroke="#56a9d8" stroke-width="2" stroke-dasharray="5,2" d="M626,-1188C626,-1188 492,-1188 492,-1188 486,-1188 480,-1182 480,-1176 480,-1176 480,-1164 480,-1164 480,-1158 486,-1152 492,-1152 492,-1152 626,-1152 626,-1152 632,-1152 638,-1158 638,-1164 638,-1164 638,-1176 638,-1176 638,-1182 632,-1188 626,-1188"/> <text text-anchor="middle" x="559" y="-1167.5" font-family="sans" font-size="10.00" fill="#000000">mapping_genome_segemehl</text> </g> <!-- 14->25 --> @@ -210,7 +210,7 @@ <!-- 15 --> <g id="node16" class="node"> <title>15</title> -<path fill="none" stroke="#568ad8" stroke-width="2" d="M345.5,-1332C345.5,-1332 310.5,-1332 310.5,-1332 304.5,-1332 298.5,-1326 298.5,-1320 298.5,-1320 298.5,-1308 298.5,-1308 298.5,-1302 304.5,-1296 310.5,-1296 310.5,-1296 345.5,-1296 345.5,-1296 351.5,-1296 357.5,-1302 357.5,-1308 357.5,-1308 357.5,-1320 357.5,-1320 357.5,-1326 351.5,-1332 345.5,-1332"/> +<path fill="none" stroke="#d8ac56" stroke-width="2" stroke-dasharray="5,2" d="M345.5,-1332C345.5,-1332 310.5,-1332 310.5,-1332 304.5,-1332 298.5,-1326 298.5,-1320 298.5,-1320 298.5,-1308 298.5,-1308 298.5,-1302 304.5,-1296 310.5,-1296 310.5,-1296 345.5,-1296 345.5,-1296 351.5,-1296 357.5,-1302 357.5,-1308 357.5,-1308 357.5,-1320 357.5,-1320 357.5,-1326 351.5,-1332 345.5,-1332"/> <text text-anchor="middle" x="328" y="-1311.5" font-family="sans" font-size="10.00" fill="#000000">cutadapt</text> </g> <!-- 15->14 --> @@ -222,7 +222,7 @@ <!-- 16 --> <g id="node17" class="node"> <title>16</title> -<path fill="none" stroke="#d86656" stroke-width="2" d="M362.5,-1404C362.5,-1404 293.5,-1404 293.5,-1404 287.5,-1404 281.5,-1398 281.5,-1392 281.5,-1392 281.5,-1380 281.5,-1380 281.5,-1374 287.5,-1368 293.5,-1368 293.5,-1368 362.5,-1368 362.5,-1368 368.5,-1368 374.5,-1374 374.5,-1380 374.5,-1380 374.5,-1392 374.5,-1392 374.5,-1398 368.5,-1404 362.5,-1404"/> +<path fill="none" stroke="#d86656" stroke-width="2" stroke-dasharray="5,2" d="M362.5,-1404C362.5,-1404 293.5,-1404 293.5,-1404 287.5,-1404 281.5,-1398 281.5,-1392 281.5,-1392 281.5,-1380 281.5,-1380 281.5,-1374 287.5,-1368 293.5,-1368 293.5,-1368 362.5,-1368 362.5,-1368 368.5,-1368 374.5,-1374 374.5,-1380 374.5,-1380 374.5,-1392 374.5,-1392 374.5,-1398 368.5,-1404 362.5,-1404"/> <text text-anchor="middle" x="328" y="-1383.5" font-family="sans" font-size="10.00" fill="#000000">fasta_formatter</text> </g> <!-- 16->15 --> @@ -234,7 +234,7 @@ <!-- 17 --> <g id="node18" class="node"> <title>17</title> -<path fill="none" stroke="#d6d856" stroke-width="2" d="M385.5,-1481C385.5,-1481 270.5,-1481 270.5,-1481 264.5,-1481 258.5,-1475 258.5,-1469 258.5,-1469 258.5,-1452 258.5,-1452 258.5,-1446 264.5,-1440 270.5,-1440 270.5,-1440 385.5,-1440 385.5,-1440 391.5,-1440 397.5,-1446 397.5,-1452 397.5,-1452 397.5,-1469 397.5,-1469 397.5,-1475 391.5,-1481 385.5,-1481"/> +<path fill="none" stroke="#afd856" stroke-width="2" stroke-dasharray="5,2" d="M385.5,-1481C385.5,-1481 270.5,-1481 270.5,-1481 264.5,-1481 258.5,-1475 258.5,-1469 258.5,-1469 258.5,-1452 258.5,-1452 258.5,-1446 264.5,-1440 270.5,-1440 270.5,-1440 385.5,-1440 385.5,-1440 391.5,-1440 397.5,-1446 397.5,-1452 397.5,-1452 397.5,-1469 397.5,-1469 397.5,-1475 391.5,-1481 385.5,-1481"/> <text text-anchor="middle" x="328" y="-1469" font-family="sans" font-size="10.00" fill="#000000">uncompress_zipped_files</text> <text text-anchor="middle" x="328" y="-1458" font-family="sans" font-size="10.00" fill="#000000">format: fa</text> <text text-anchor="middle" x="328" y="-1447" font-family="sans" font-size="10.00" fill="#000000">sample: test_lib</text> @@ -248,7 +248,7 @@ <!-- 18 --> <g id="node19" class="node"> <title>18</title> -<path fill="none" stroke="#56d873" stroke-width="2" d="M326,-972C326,-972 178,-972 178,-972 172,-972 166,-966 166,-960 166,-960 166,-948 166,-948 166,-942 172,-936 178,-936 178,-936 326,-936 326,-936 332,-936 338,-942 338,-948 338,-948 338,-960 338,-960 338,-966 332,-972 326,-972"/> +<path fill="none" stroke="#56d85b" stroke-width="2" stroke-dasharray="5,2" d="M326,-972C326,-972 178,-972 178,-972 172,-972 166,-966 166,-960 166,-960 166,-948 166,-948 166,-942 172,-936 178,-936 178,-936 326,-936 326,-936 332,-936 338,-942 338,-948 338,-948 338,-960 338,-960 338,-966 332,-972 326,-972"/> <text text-anchor="middle" x="252" y="-951.5" font-family="sans" font-size="10.00" fill="#000000">oligomap_transcriptome_toSAM</text> </g> <!-- 18->12 --> @@ -260,7 +260,7 @@ <!-- 19 --> <g id="node20" class="node"> <title>19</title> -<path fill="none" stroke="#d89c56" stroke-width="2" d="M376,-1116C376,-1116 216,-1116 216,-1116 210,-1116 204,-1110 204,-1104 204,-1104 204,-1092 204,-1092 204,-1086 210,-1080 216,-1080 216,-1080 376,-1080 376,-1080 382,-1080 388,-1086 388,-1092 388,-1092 388,-1104 388,-1104 388,-1110 382,-1116 376,-1116"/> +<path fill="none" stroke="#d85656" stroke-width="2" stroke-dasharray="5,2" d="M376,-1116C376,-1116 216,-1116 216,-1116 210,-1116 204,-1110 204,-1104 204,-1104 204,-1092 204,-1092 204,-1086 210,-1080 216,-1080 216,-1080 376,-1080 376,-1080 382,-1080 388,-1086 388,-1092 388,-1092 388,-1104 388,-1104 388,-1110 382,-1116 376,-1116"/> <text text-anchor="middle" x="296" y="-1095.5" font-family="sans" font-size="10.00" fill="#000000">mapping_transcriptome_oligomap</text> </g> <!-- 19->18 --> @@ -272,7 +272,7 @@ <!-- 21 --> <g id="node22" class="node"> <title>21</title> -<path fill="none" stroke="#567bd8" stroke-width="2" d="M363,-1044C363,-1044 227,-1044 227,-1044 221,-1044 215,-1038 215,-1032 215,-1032 215,-1020 215,-1020 215,-1014 221,-1008 227,-1008 227,-1008 363,-1008 363,-1008 369,-1008 375,-1014 375,-1020 375,-1020 375,-1032 375,-1032 375,-1038 369,-1044 363,-1044"/> +<path fill="none" stroke="#d8cb56" stroke-width="2" stroke-dasharray="5,2" d="M363,-1044C363,-1044 227,-1044 227,-1044 221,-1044 215,-1038 215,-1032 215,-1032 215,-1020 215,-1020 215,-1014 221,-1008 227,-1008 227,-1008 363,-1008 363,-1008 369,-1008 375,-1014 375,-1020 375,-1020 375,-1032 375,-1032 375,-1038 369,-1044 363,-1044"/> <text text-anchor="middle" x="295" y="-1023.5" font-family="sans" font-size="10.00" fill="#000000">sort_transcriptome_oligomap</text> </g> <!-- 19->21 --> @@ -290,7 +290,7 @@ <!-- 27 --> <g id="node28" class="node"> <title>27</title> -<path fill="none" stroke="#8fd856" stroke-width="2" d="M549.5,-1116C549.5,-1116 418.5,-1116 418.5,-1116 412.5,-1116 406.5,-1110 406.5,-1104 406.5,-1104 406.5,-1092 406.5,-1092 406.5,-1086 412.5,-1080 418.5,-1080 418.5,-1080 549.5,-1080 549.5,-1080 555.5,-1080 561.5,-1086 561.5,-1092 561.5,-1092 561.5,-1104 561.5,-1104 561.5,-1110 555.5,-1116 549.5,-1116"/> +<path fill="none" stroke="#56d8d0" stroke-width="2" stroke-dasharray="5,2" d="M549.5,-1116C549.5,-1116 418.5,-1116 418.5,-1116 412.5,-1116 406.5,-1110 406.5,-1104 406.5,-1104 406.5,-1092 406.5,-1092 406.5,-1086 412.5,-1080 418.5,-1080 418.5,-1080 549.5,-1080 549.5,-1080 555.5,-1080 561.5,-1086 561.5,-1092 561.5,-1092 561.5,-1104 561.5,-1104 561.5,-1110 555.5,-1116 549.5,-1116"/> <text text-anchor="middle" x="484" y="-1095.5" font-family="sans" font-size="10.00" fill="#000000">mapping_genome_oligomap</text> </g> <!-- 20->27 --> @@ -308,7 +308,7 @@ <!-- 22 --> <g id="node23" class="node"> <title>22</title> -<path fill="none" stroke="#56c9d8" stroke-width="2" d="M465.5,-684C465.5,-684 346.5,-684 346.5,-684 340.5,-684 334.5,-678 334.5,-672 334.5,-672 334.5,-660 334.5,-660 334.5,-654 340.5,-648 346.5,-648 346.5,-648 465.5,-648 465.5,-648 471.5,-648 477.5,-654 477.5,-660 477.5,-660 477.5,-672 477.5,-672 477.5,-678 471.5,-684 465.5,-684"/> +<path fill="none" stroke="#56d8b1" stroke-width="2" stroke-dasharray="5,2" d="M465.5,-684C465.5,-684 346.5,-684 346.5,-684 340.5,-684 334.5,-678 334.5,-672 334.5,-672 334.5,-660 334.5,-660 334.5,-654 340.5,-648 346.5,-648 346.5,-648 465.5,-648 465.5,-648 471.5,-648 477.5,-654 477.5,-660 477.5,-660 477.5,-672 477.5,-672 477.5,-678 471.5,-684 465.5,-684"/> <text text-anchor="middle" x="406" y="-663.5" font-family="sans" font-size="10.00" fill="#000000">remove_headers_genome</text> </g> <!-- 22->8 --> @@ -320,7 +320,7 @@ <!-- 23 --> <g id="node24" class="node"> <title>23</title> -<path fill="none" stroke="#d8bc56" stroke-width="2" d="M455.5,-828C455.5,-828 378.5,-828 378.5,-828 372.5,-828 366.5,-822 366.5,-816 366.5,-816 366.5,-804 366.5,-804 366.5,-798 372.5,-792 378.5,-792 378.5,-792 455.5,-792 455.5,-792 461.5,-792 467.5,-798 467.5,-804 467.5,-804 467.5,-816 467.5,-816 467.5,-822 461.5,-828 455.5,-828"/> +<path fill="none" stroke="#d87556" stroke-width="2" stroke-dasharray="5,2" d="M455.5,-828C455.5,-828 378.5,-828 378.5,-828 372.5,-828 366.5,-822 366.5,-816 366.5,-816 366.5,-804 366.5,-804 366.5,-798 372.5,-792 378.5,-792 378.5,-792 455.5,-792 455.5,-792 461.5,-792 467.5,-798 467.5,-804 467.5,-804 467.5,-816 467.5,-816 467.5,-822 461.5,-828 455.5,-828"/> <text text-anchor="middle" x="417" y="-807.5" font-family="sans" font-size="10.00" fill="#000000">nh_filter_genome</text> </g> <!-- 23->22 --> @@ -332,7 +332,7 @@ <!-- 24 --> <g id="node25" class="node"> <title>24</title> -<path fill="none" stroke="#afd856" stroke-width="2" d="M496.5,-900C496.5,-900 395.5,-900 395.5,-900 389.5,-900 383.5,-894 383.5,-888 383.5,-888 383.5,-876 383.5,-876 383.5,-870 389.5,-864 395.5,-864 395.5,-864 496.5,-864 496.5,-864 502.5,-864 508.5,-870 508.5,-876 508.5,-876 508.5,-888 508.5,-888 508.5,-894 502.5,-900 496.5,-900"/> +<path fill="none" stroke="#56d8a2" stroke-width="2" stroke-dasharray="5,2" d="M496.5,-900C496.5,-900 395.5,-900 395.5,-900 389.5,-900 383.5,-894 383.5,-888 383.5,-888 383.5,-876 383.5,-876 383.5,-870 389.5,-864 395.5,-864 395.5,-864 496.5,-864 496.5,-864 502.5,-864 508.5,-870 508.5,-876 508.5,-876 508.5,-888 508.5,-888 508.5,-894 502.5,-900 496.5,-900"/> <text text-anchor="middle" x="446" y="-879.5" font-family="sans" font-size="10.00" fill="#000000">merge_genome_maps</text> </g> <!-- 24->23 --> @@ -350,7 +350,7 @@ <!-- 26 --> <g id="node27" class="node"> <title>26</title> -<path fill="none" stroke="#56d85b" stroke-width="2" d="M505.5,-972C505.5,-972 386.5,-972 386.5,-972 380.5,-972 374.5,-966 374.5,-960 374.5,-960 374.5,-948 374.5,-948 374.5,-942 380.5,-936 386.5,-936 386.5,-936 505.5,-936 505.5,-936 511.5,-936 517.5,-942 517.5,-948 517.5,-948 517.5,-960 517.5,-960 517.5,-966 511.5,-972 505.5,-972"/> +<path fill="none" stroke="#70d856" stroke-width="2" stroke-dasharray="5,2" d="M505.5,-972C505.5,-972 386.5,-972 386.5,-972 380.5,-972 374.5,-966 374.5,-960 374.5,-960 374.5,-948 374.5,-948 374.5,-942 380.5,-936 386.5,-936 386.5,-936 505.5,-936 505.5,-936 511.5,-936 517.5,-942 517.5,-948 517.5,-948 517.5,-960 517.5,-960 517.5,-966 511.5,-972 505.5,-972"/> <text text-anchor="middle" x="446" y="-951.5" font-family="sans" font-size="10.00" fill="#000000">oligomap_genome_toSAM</text> </g> <!-- 26->24 --> @@ -368,7 +368,7 @@ <!-- 28 --> <g id="node29" class="node"> <title>28</title> -<path fill="none" stroke="#56d8d0" stroke-width="2" d="M550.5,-1044C550.5,-1044 443.5,-1044 443.5,-1044 437.5,-1044 431.5,-1038 431.5,-1032 431.5,-1032 431.5,-1020 431.5,-1020 431.5,-1014 437.5,-1008 443.5,-1008 443.5,-1008 550.5,-1008 550.5,-1008 556.5,-1008 562.5,-1014 562.5,-1020 562.5,-1020 562.5,-1032 562.5,-1032 562.5,-1038 556.5,-1044 550.5,-1044"/> +<path fill="none" stroke="#9fd856" stroke-width="2" stroke-dasharray="5,2" d="M550.5,-1044C550.5,-1044 443.5,-1044 443.5,-1044 437.5,-1044 431.5,-1038 431.5,-1032 431.5,-1032 431.5,-1020 431.5,-1020 431.5,-1014 437.5,-1008 443.5,-1008 443.5,-1008 550.5,-1008 550.5,-1008 556.5,-1008 562.5,-1014 562.5,-1020 562.5,-1020 562.5,-1032 562.5,-1032 562.5,-1038 556.5,-1044 550.5,-1044"/> <text text-anchor="middle" x="497" y="-1023.5" font-family="sans" font-size="10.00" fill="#000000">sort_genome_oligomap</text> </g> <!-- 27->28 --> diff --git a/images/workflow_dag_prepare.svg b/images/workflow_dag_prepare.svg index 76a83c7..9ef1678 100644 --- a/images/workflow_dag_prepare.svg +++ b/images/workflow_dag_prepare.svg @@ -12,13 +12,13 @@ <!-- 0 --> <g id="node1" class="node"> <title>0</title> -<path fill="none" stroke="#569ad8" stroke-width="2" stroke-dasharray="5,2" d="M357.5,-36C357.5,-36 327.5,-36 327.5,-36 321.5,-36 315.5,-30 315.5,-24 315.5,-24 315.5,-12 315.5,-12 315.5,-6 321.5,0 327.5,0 327.5,0 357.5,0 357.5,0 363.5,0 369.5,-6 369.5,-12 369.5,-12 369.5,-24 369.5,-24 369.5,-30 363.5,-36 357.5,-36"/> +<path fill="none" stroke="#9fd856" stroke-width="2" stroke-dasharray="5,2" d="M357.5,-36C357.5,-36 327.5,-36 327.5,-36 321.5,-36 315.5,-30 315.5,-24 315.5,-24 315.5,-12 315.5,-12 315.5,-6 321.5,0 327.5,0 327.5,0 357.5,0 357.5,0 363.5,0 369.5,-6 369.5,-12 369.5,-12 369.5,-24 369.5,-24 369.5,-30 363.5,-36 357.5,-36"/> <text text-anchor="middle" x="342.5" y="-15.5" font-family="sans" font-size="10.00" fill="#000000">finish</text> </g> <!-- 1 --> <g id="node2" class="node"> <title>1</title> -<path fill="none" stroke="#566bd8" stroke-width="2" stroke-dasharray="5,2" d="M207,-108C207,-108 12,-108 12,-108 6,-108 0,-102 0,-96 0,-96 0,-84 0,-84 0,-78 6,-72 12,-72 12,-72 207,-72 207,-72 213,-72 219,-78 219,-84 219,-84 219,-96 219,-96 219,-102 213,-108 207,-108"/> +<path fill="none" stroke="#88d856" stroke-width="2" stroke-dasharray="5,2" d="M207,-108C207,-108 12,-108 12,-108 6,-108 0,-102 0,-96 0,-96 0,-84 0,-84 0,-78 6,-72 12,-72 12,-72 207,-72 207,-72 213,-72 219,-78 219,-84 219,-84 219,-96 219,-96 219,-102 213,-108 207,-108"/> <text text-anchor="middle" x="109.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">generate_segemehl_index_transcriptome</text> </g> <!-- 1->0 --> @@ -30,7 +30,7 @@ <!-- 2 --> <g id="node3" class="node"> <title>2</title> -<path fill="none" stroke="#d8b456" stroke-width="2" stroke-dasharray="5,2" d="M131,-180C131,-180 88,-180 88,-180 82,-180 76,-174 76,-168 76,-168 76,-156 76,-156 76,-150 82,-144 88,-144 88,-144 131,-144 131,-144 137,-144 143,-150 143,-156 143,-156 143,-168 143,-168 143,-174 137,-180 131,-180"/> +<path fill="none" stroke="#56d8b1" stroke-width="2" stroke-dasharray="5,2" d="M131,-180C131,-180 88,-180 88,-180 82,-180 76,-174 76,-168 76,-168 76,-156 76,-156 76,-150 82,-144 88,-144 88,-144 131,-144 131,-144 137,-144 143,-150 143,-156 143,-156 143,-168 143,-168 143,-174 137,-180 131,-180"/> <text text-anchor="middle" x="109.5" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">trim_fasta</text> </g> <!-- 2->1 --> @@ -42,7 +42,7 @@ <!-- 3 --> <g id="node4" class="node"> <title>3</title> -<path fill="none" stroke="#88d856" stroke-width="2" stroke-dasharray="5,2" d="M173.5,-252C173.5,-252 45.5,-252 45.5,-252 39.5,-252 33.5,-246 33.5,-240 33.5,-240 33.5,-228 33.5,-228 33.5,-222 39.5,-216 45.5,-216 45.5,-216 173.5,-216 173.5,-216 179.5,-216 185.5,-222 185.5,-228 185.5,-228 185.5,-240 185.5,-240 185.5,-246 179.5,-252 173.5,-252"/> +<path fill="none" stroke="#d89c56" stroke-width="2" stroke-dasharray="5,2" d="M173.5,-252C173.5,-252 45.5,-252 45.5,-252 39.5,-252 33.5,-246 33.5,-240 33.5,-240 33.5,-228 33.5,-228 33.5,-222 39.5,-216 45.5,-216 45.5,-216 173.5,-216 173.5,-216 179.5,-216 185.5,-222 185.5,-228 185.5,-228 185.5,-240 185.5,-240 185.5,-246 179.5,-252 173.5,-252"/> <text text-anchor="middle" x="109.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">extract_transcriptome_seqs</text> </g> <!-- 3->2 --> @@ -54,7 +54,7 @@ <!-- 4 --> <g id="node5" class="node"> <title>4</title> -<path fill="none" stroke="#9fd856" stroke-width="2" stroke-dasharray="5,2" d="M541,-761C541,-761 402,-761 402,-761 396,-761 390,-755 390,-749 390,-749 390,-737 390,-737 390,-731 396,-725 402,-725 402,-725 541,-725 541,-725 547,-725 553,-731 553,-737 553,-737 553,-749 553,-749 553,-755 547,-761 541,-761"/> +<path fill="none" stroke="#59d856" stroke-width="2" stroke-dasharray="5,2" d="M541,-761C541,-761 402,-761 402,-761 396,-761 390,-755 390,-749 390,-749 390,-737 390,-737 390,-731 396,-725 402,-725 402,-725 541,-725 541,-725 547,-725 553,-731 553,-737 553,-737 553,-749 553,-749 553,-755 547,-761 541,-761"/> <text text-anchor="middle" x="471.5" y="-746" font-family="sans" font-size="10.00" fill="#000000">genome_process</text> <text text-anchor="middle" x="471.5" y="-735" font-family="sans" font-size="10.00" fill="#000000">organism: homo_sapiens/chrY</text> </g> @@ -67,7 +67,7 @@ <!-- 6 --> <g id="node7" class="node"> <title>6</title> -<path fill="none" stroke="#56d89a" stroke-width="2" stroke-dasharray="5,2" d="M399,-617C399,-617 232,-617 232,-617 226,-617 220,-611 220,-605 220,-605 220,-593 220,-593 220,-587 226,-581 232,-581 232,-581 399,-581 399,-581 405,-581 411,-587 411,-593 411,-593 411,-605 411,-605 411,-611 405,-617 399,-617"/> +<path fill="none" stroke="#56d882" stroke-width="2" stroke-dasharray="5,2" d="M399,-617C399,-617 232,-617 232,-617 226,-617 220,-611 220,-605 220,-605 220,-593 220,-593 220,-587 226,-581 232,-581 232,-581 399,-581 399,-581 405,-581 411,-587 411,-593 411,-593 411,-605 411,-605 411,-611 405,-617 399,-617"/> <text text-anchor="middle" x="315.5" y="-596.5" font-family="sans" font-size="10.00" fill="#000000">generate_segemehl_index_genome</text> </g> <!-- 4->6 --> @@ -79,7 +79,7 @@ <!-- 9 --> <g id="node10" class="node"> <title>9</title> -<path fill="none" stroke="#ced856" stroke-width="2" stroke-dasharray="5,2" d="M486,-545C486,-545 379,-545 379,-545 373,-545 367,-539 367,-533 367,-533 367,-521 367,-521 367,-515 373,-509 379,-509 379,-509 486,-509 486,-509 492,-509 498,-515 498,-521 498,-521 498,-533 498,-533 498,-539 492,-545 486,-545"/> +<path fill="none" stroke="#56b1d8" stroke-width="2" stroke-dasharray="5,2" d="M486,-545C486,-545 379,-545 379,-545 373,-545 367,-539 367,-533 367,-533 367,-521 367,-521 367,-515 373,-509 379,-509 379,-509 486,-509 486,-509 492,-509 498,-515 498,-521 498,-521 498,-533 498,-533 498,-539 492,-545 486,-545"/> <text text-anchor="middle" x="432.5" y="-524.5" font-family="sans" font-size="10.00" fill="#000000">create_header_genome</text> </g> <!-- 4->9 --> @@ -91,7 +91,7 @@ <!-- 13 --> <g id="node14" class="node"> <title>13</title> -<path fill="none" stroke="#d85656" stroke-width="2" stroke-dasharray="5,2" d="M529.5,-689C529.5,-689 479.5,-689 479.5,-689 473.5,-689 467.5,-683 467.5,-677 467.5,-677 467.5,-665 467.5,-665 467.5,-659 473.5,-653 479.5,-653 479.5,-653 529.5,-653 529.5,-653 535.5,-653 541.5,-659 541.5,-665 541.5,-665 541.5,-677 541.5,-677 541.5,-683 535.5,-689 529.5,-689"/> +<path fill="none" stroke="#b6d856" stroke-width="2" stroke-dasharray="5,2" d="M529.5,-689C529.5,-689 479.5,-689 479.5,-689 473.5,-689 467.5,-683 467.5,-677 467.5,-677 467.5,-665 467.5,-665 467.5,-659 473.5,-653 479.5,-653 479.5,-653 529.5,-653 529.5,-653 535.5,-653 541.5,-659 541.5,-665 541.5,-665 541.5,-677 541.5,-677 541.5,-683 535.5,-689 529.5,-689"/> <text text-anchor="middle" x="504.5" y="-668.5" font-family="sans" font-size="10.00" fill="#000000">mirna_anno</text> </g> <!-- 4->13 --> @@ -103,7 +103,7 @@ <!-- 14 --> <g id="node15" class="node"> <title>14</title> -<path fill="none" stroke="#70d856" stroke-width="2" stroke-dasharray="5,2" d="M603,-689C603,-689 572,-689 572,-689 566,-689 560,-683 560,-677 560,-677 560,-665 560,-665 560,-659 566,-653 572,-653 572,-653 603,-653 603,-653 609,-653 615,-659 615,-665 615,-665 615,-677 615,-677 615,-683 609,-689 603,-689"/> +<path fill="none" stroke="#566bd8" stroke-width="2" stroke-dasharray="5,2" d="M603,-689C603,-689 572,-689 572,-689 566,-689 560,-683 560,-677 560,-677 560,-665 560,-665 560,-659 566,-653 572,-653 572,-653 603,-653 603,-653 609,-653 615,-659 615,-665 615,-665 615,-677 615,-677 615,-683 609,-689 603,-689"/> <text text-anchor="middle" x="587.5" y="-668.5" font-family="sans" font-size="10.00" fill="#000000">dict_chr</text> </g> <!-- 4->14 --> @@ -115,7 +115,7 @@ <!-- 21 --> <g id="node22" class="node"> <title>21</title> -<path fill="none" stroke="#d88556" stroke-width="2" stroke-dasharray="5,2" d="M858.5,-689C858.5,-689 772.5,-689 772.5,-689 766.5,-689 760.5,-683 760.5,-677 760.5,-677 760.5,-665 760.5,-665 760.5,-659 766.5,-653 772.5,-653 772.5,-653 858.5,-653 858.5,-653 864.5,-653 870.5,-659 870.5,-665 870.5,-665 870.5,-677 870.5,-677 870.5,-683 864.5,-689 858.5,-689"/> +<path fill="none" stroke="#569ad8" stroke-width="2" stroke-dasharray="5,2" d="M858.5,-689C858.5,-689 772.5,-689 772.5,-689 766.5,-689 760.5,-683 760.5,-677 760.5,-677 760.5,-665 760.5,-665 760.5,-659 766.5,-653 772.5,-653 772.5,-653 858.5,-653 858.5,-653 864.5,-653 870.5,-659 870.5,-665 870.5,-665 870.5,-677 870.5,-677 870.5,-683 864.5,-689 858.5,-689"/> <text text-anchor="middle" x="815.5" y="-668.5" font-family="sans" font-size="10.00" fill="#000000">create_index_fasta</text> </g> <!-- 4->21 --> @@ -127,7 +127,7 @@ <!-- 5 --> <g id="node6" class="node"> <title>5</title> -<path fill="none" stroke="#56b1d8" stroke-width="2" stroke-dasharray="5,2" d="M283,-326.5C283,-326.5 144,-326.5 144,-326.5 138,-326.5 132,-320.5 132,-314.5 132,-314.5 132,-302.5 132,-302.5 132,-296.5 138,-290.5 144,-290.5 144,-290.5 283,-290.5 283,-290.5 289,-290.5 295,-296.5 295,-302.5 295,-302.5 295,-314.5 295,-314.5 295,-320.5 289,-326.5 283,-326.5"/> +<path fill="none" stroke="#d8b456" stroke-width="2" stroke-dasharray="5,2" d="M283,-326.5C283,-326.5 144,-326.5 144,-326.5 138,-326.5 132,-320.5 132,-314.5 132,-314.5 132,-302.5 132,-302.5 132,-296.5 138,-290.5 144,-290.5 144,-290.5 283,-290.5 283,-290.5 289,-290.5 295,-296.5 295,-302.5 295,-302.5 295,-314.5 295,-314.5 295,-320.5 289,-326.5 283,-326.5"/> <text text-anchor="middle" x="213.5" y="-311.5" font-family="sans" font-size="10.00" fill="#000000">filter_anno_gtf</text> <text text-anchor="middle" x="213.5" y="-300.5" font-family="sans" font-size="10.00" fill="#000000">organism: homo_sapiens/chrY</text> </g> @@ -140,7 +140,7 @@ <!-- 8 --> <g id="node9" class="node"> <title>8</title> -<path fill="none" stroke="#d86e56" stroke-width="2" stroke-dasharray="5,2" d="M280.5,-252C280.5,-252 218.5,-252 218.5,-252 212.5,-252 206.5,-246 206.5,-240 206.5,-240 206.5,-228 206.5,-228 206.5,-222 212.5,-216 218.5,-216 218.5,-216 280.5,-216 280.5,-216 286.5,-216 292.5,-222 292.5,-228 292.5,-228 292.5,-240 292.5,-240 292.5,-246 286.5,-252 280.5,-252"/> +<path fill="none" stroke="#56c9d8" stroke-width="2" stroke-dasharray="5,2" d="M280.5,-252C280.5,-252 218.5,-252 218.5,-252 212.5,-252 206.5,-246 206.5,-240 206.5,-240 206.5,-228 206.5,-228 206.5,-222 212.5,-216 218.5,-216 218.5,-216 280.5,-216 280.5,-216 286.5,-216 292.5,-222 292.5,-228 292.5,-228 292.5,-240 292.5,-240 292.5,-246 286.5,-252 280.5,-252"/> <text text-anchor="middle" x="249.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">get_exons_gtf</text> </g> <!-- 5->8 --> @@ -158,7 +158,7 @@ <!-- 7 --> <g id="node8" class="node"> <title>7</title> -<path fill="none" stroke="#56d8c9" stroke-width="2" stroke-dasharray="5,2" d="M283.5,-108C283.5,-108 249.5,-108 249.5,-108 243.5,-108 237.5,-102 237.5,-96 237.5,-96 237.5,-84 237.5,-84 237.5,-78 243.5,-72 249.5,-72 249.5,-72 283.5,-72 283.5,-72 289.5,-72 295.5,-78 295.5,-84 295.5,-84 295.5,-96 295.5,-96 295.5,-102 289.5,-108 283.5,-108"/> +<path fill="none" stroke="#d8cb56" stroke-width="2" stroke-dasharray="5,2" d="M283.5,-108C283.5,-108 249.5,-108 249.5,-108 243.5,-108 237.5,-102 237.5,-96 237.5,-96 237.5,-84 237.5,-84 237.5,-78 243.5,-72 249.5,-72 249.5,-72 283.5,-72 283.5,-72 289.5,-72 295.5,-78 295.5,-84 295.5,-84 295.5,-96 295.5,-96 295.5,-102 289.5,-108 283.5,-108"/> <text text-anchor="middle" x="266.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">gtftobed</text> </g> <!-- 7->0 --> @@ -182,7 +182,7 @@ <!-- 10 --> <g id="node11" class="node"> <title>10</title> -<path fill="none" stroke="#59d856" stroke-width="2" stroke-dasharray="5,2" d="M590,-473C590,-473 557,-473 557,-473 551,-473 545,-467 545,-461 545,-461 545,-449 545,-449 545,-443 551,-437 557,-437 557,-437 590,-437 590,-437 596,-437 602,-443 602,-449 602,-449 602,-461 602,-461 602,-467 596,-473 590,-473"/> +<path fill="none" stroke="#56d8c9" stroke-width="2" stroke-dasharray="5,2" d="M590,-473C590,-473 557,-473 557,-473 551,-473 545,-467 545,-461 545,-461 545,-449 545,-449 545,-443 551,-437 557,-437 557,-437 590,-437 590,-437 596,-437 602,-443 602,-449 602,-449 602,-461 602,-461 602,-467 596,-473 590,-473"/> <text text-anchor="middle" x="573.5" y="-452.5" font-family="sans" font-size="10.00" fill="#000000">gfftobed</text> </g> <!-- 10->0 --> @@ -194,7 +194,7 @@ <!-- 19 --> <g id="node20" class="node"> <title>19</title> -<path fill="none" stroke="#b6d856" stroke-width="2" stroke-dasharray="5,2" d="M864,-401C864,-401 781,-401 781,-401 775,-401 769,-395 769,-389 769,-389 769,-377 769,-377 769,-371 775,-365 781,-365 781,-365 864,-365 864,-365 870,-365 876,-371 876,-377 876,-377 876,-389 876,-389 876,-395 870,-401 864,-401"/> +<path fill="none" stroke="#ced856" stroke-width="2" stroke-dasharray="5,2" d="M864,-401C864,-401 781,-401 781,-401 775,-401 769,-395 769,-389 769,-389 769,-377 769,-377 769,-371 775,-365 781,-365 781,-365 864,-365 864,-365 870,-365 876,-371 876,-377 876,-377 876,-389 876,-389 876,-395 870,-401 864,-401"/> <text text-anchor="middle" x="822.5" y="-380.5" font-family="sans" font-size="10.00" fill="#000000">filter_mature_mirs</text> </g> <!-- 10->19 --> @@ -206,7 +206,7 @@ <!-- 11 --> <g id="node12" class="node"> <title>11</title> -<path fill="none" stroke="#56d8b1" stroke-width="2" stroke-dasharray="5,2" d="M612,-545C612,-545 535,-545 535,-545 529,-545 523,-539 523,-533 523,-533 523,-521 523,-521 523,-515 529,-509 535,-509 535,-509 612,-509 612,-509 618,-509 624,-515 624,-521 624,-521 624,-533 624,-533 624,-539 618,-545 612,-545"/> +<path fill="none" stroke="#d85656" stroke-width="2" stroke-dasharray="5,2" d="M612,-545C612,-545 535,-545 535,-545 529,-545 523,-539 523,-533 523,-533 523,-521 523,-521 523,-515 529,-509 535,-509 535,-509 612,-509 612,-509 618,-509 624,-515 624,-521 624,-521 624,-533 624,-533 624,-539 618,-545 612,-545"/> <text text-anchor="middle" x="573.5" y="-524.5" font-family="sans" font-size="10.00" fill="#000000">filter_mir_1_anno</text> </g> <!-- 11->10 --> @@ -218,7 +218,7 @@ <!-- 12 --> <g id="node13" class="node"> <title>12</title> -<path fill="none" stroke="#56d882" stroke-width="2" stroke-dasharray="5,2" d="M610,-617C610,-617 537,-617 537,-617 531,-617 525,-611 525,-605 525,-605 525,-593 525,-593 525,-587 531,-581 537,-581 537,-581 610,-581 610,-581 616,-581 622,-587 622,-593 622,-593 622,-605 622,-605 622,-611 616,-617 610,-617"/> +<path fill="none" stroke="#5682d8" stroke-width="2" stroke-dasharray="5,2" d="M610,-617C610,-617 537,-617 537,-617 531,-617 525,-611 525,-605 525,-605 525,-593 525,-593 525,-587 531,-581 537,-581 537,-581 610,-581 610,-581 616,-581 622,-587 622,-593 622,-593 622,-605 622,-605 622,-611 616,-617 610,-617"/> <text text-anchor="middle" x="573.5" y="-596.5" font-family="sans" font-size="10.00" fill="#000000">map_chr_names</text> </g> <!-- 12->11 --> @@ -242,7 +242,7 @@ <!-- 15 --> <g id="node16" class="node"> <title>15</title> -<path fill="none" stroke="#56c9d8" stroke-width="2" stroke-dasharray="5,2" d="M747.5,-108C747.5,-108 685.5,-108 685.5,-108 679.5,-108 673.5,-102 673.5,-96 673.5,-96 673.5,-84 673.5,-84 673.5,-78 679.5,-72 685.5,-72 685.5,-72 747.5,-72 747.5,-72 753.5,-72 759.5,-78 759.5,-84 759.5,-84 759.5,-96 759.5,-96 759.5,-102 753.5,-108 747.5,-108"/> +<path fill="none" stroke="#56d86b" stroke-width="2" stroke-dasharray="5,2" d="M747.5,-108C747.5,-108 685.5,-108 685.5,-108 679.5,-108 673.5,-102 673.5,-96 673.5,-96 673.5,-84 673.5,-84 673.5,-78 679.5,-72 685.5,-72 685.5,-72 747.5,-72 747.5,-72 753.5,-72 759.5,-78 759.5,-84 759.5,-84 759.5,-96 759.5,-96 759.5,-102 753.5,-108 747.5,-108"/> <text text-anchor="middle" x="716.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_final</text> </g> <!-- 15->0 --> @@ -254,7 +254,7 @@ <!-- 16 --> <g id="node17" class="node"> <title>16</title> -<path fill="none" stroke="#5682d8" stroke-width="2" stroke-dasharray="5,2" d="M939.5,-180C939.5,-180 865.5,-180 865.5,-180 859.5,-180 853.5,-174 853.5,-168 853.5,-168 853.5,-156 853.5,-156 853.5,-150 859.5,-144 865.5,-144 865.5,-144 939.5,-144 939.5,-144 945.5,-144 951.5,-150 951.5,-156 951.5,-156 951.5,-168 951.5,-168 951.5,-174 945.5,-180 939.5,-180"/> +<path fill="none" stroke="#d86e56" stroke-width="2" stroke-dasharray="5,2" d="M939.5,-180C939.5,-180 865.5,-180 865.5,-180 859.5,-180 853.5,-174 853.5,-168 853.5,-168 853.5,-156 853.5,-156 853.5,-150 859.5,-144 865.5,-144 865.5,-144 939.5,-144 939.5,-144 945.5,-144 951.5,-150 951.5,-156 951.5,-156 951.5,-168 951.5,-168 951.5,-174 945.5,-180 939.5,-180"/> <text text-anchor="middle" x="902.5" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_concat</text> </g> <!-- 16->15 --> @@ -266,7 +266,7 @@ <!-- 17 --> <g id="node18" class="node"> <title>17</title> -<path fill="none" stroke="#d8cb56" stroke-width="2" stroke-dasharray="5,2" d="M1245,-252C1245,-252 1166,-252 1166,-252 1160,-252 1154,-246 1154,-240 1154,-240 1154,-228 1154,-228 1154,-222 1160,-216 1166,-216 1166,-216 1245,-216 1245,-216 1251,-216 1257,-222 1257,-228 1257,-228 1257,-240 1257,-240 1257,-246 1251,-252 1245,-252"/> +<path fill="none" stroke="#56d89a" stroke-width="2" stroke-dasharray="5,2" d="M1245,-252C1245,-252 1166,-252 1166,-252 1160,-252 1154,-246 1154,-240 1154,-240 1154,-228 1154,-228 1154,-222 1160,-216 1166,-216 1166,-216 1245,-216 1245,-216 1251,-216 1257,-222 1257,-228 1257,-228 1257,-240 1257,-240 1257,-246 1251,-252 1245,-252"/> <text text-anchor="middle" x="1205.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> </g> <!-- 17->16 --> @@ -278,7 +278,7 @@ <!-- 18 --> <g id="node19" class="node"> <title>18</title> -<path fill="none" stroke="#d89c56" stroke-width="2" stroke-dasharray="5,2" d="M1097.5,-329C1097.5,-329 1059.5,-329 1059.5,-329 1053.5,-329 1047.5,-323 1047.5,-317 1047.5,-317 1047.5,-300 1047.5,-300 1047.5,-294 1053.5,-288 1059.5,-288 1059.5,-288 1097.5,-288 1097.5,-288 1103.5,-288 1109.5,-294 1109.5,-300 1109.5,-300 1109.5,-317 1109.5,-317 1109.5,-323 1103.5,-329 1097.5,-329"/> +<path fill="none" stroke="#d88556" stroke-width="2" stroke-dasharray="5,2" d="M1097.5,-329C1097.5,-329 1059.5,-329 1059.5,-329 1053.5,-329 1047.5,-323 1047.5,-317 1047.5,-317 1047.5,-300 1047.5,-300 1047.5,-294 1053.5,-288 1059.5,-288 1059.5,-288 1097.5,-288 1097.5,-288 1103.5,-288 1109.5,-294 1109.5,-300 1109.5,-300 1109.5,-317 1109.5,-317 1109.5,-323 1103.5,-329 1097.5,-329"/> <text text-anchor="middle" x="1078.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> <text text-anchor="middle" x="1078.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">bp_3p: -1</text> <text text-anchor="middle" x="1078.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">bp_5p: -1</text> @@ -298,7 +298,7 @@ <!-- 23 --> <g id="node24" class="node"> <title>23</title> -<path fill="none" stroke="#d89c56" stroke-width="2" stroke-dasharray="5,2" d="M1177.5,-329C1177.5,-329 1139.5,-329 1139.5,-329 1133.5,-329 1127.5,-323 1127.5,-317 1127.5,-317 1127.5,-300 1127.5,-300 1127.5,-294 1133.5,-288 1139.5,-288 1139.5,-288 1177.5,-288 1177.5,-288 1183.5,-288 1189.5,-294 1189.5,-300 1189.5,-300 1189.5,-317 1189.5,-317 1189.5,-323 1183.5,-329 1177.5,-329"/> +<path fill="none" stroke="#d88556" stroke-width="2" stroke-dasharray="5,2" d="M1177.5,-329C1177.5,-329 1139.5,-329 1139.5,-329 1133.5,-329 1127.5,-323 1127.5,-317 1127.5,-317 1127.5,-300 1127.5,-300 1127.5,-294 1133.5,-288 1139.5,-288 1139.5,-288 1177.5,-288 1177.5,-288 1183.5,-288 1189.5,-294 1189.5,-300 1189.5,-300 1189.5,-317 1189.5,-317 1189.5,-323 1183.5,-329 1177.5,-329"/> <text text-anchor="middle" x="1158.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> <text text-anchor="middle" x="1158.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">bp_3p: -1</text> <text text-anchor="middle" x="1158.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">bp_5p: 0</text> @@ -312,7 +312,7 @@ <!-- 25 --> <g id="node26" class="node"> <title>25</title> -<path fill="none" stroke="#d89c56" stroke-width="2" stroke-dasharray="5,2" d="M1257.5,-329C1257.5,-329 1219.5,-329 1219.5,-329 1213.5,-329 1207.5,-323 1207.5,-317 1207.5,-317 1207.5,-300 1207.5,-300 1207.5,-294 1213.5,-288 1219.5,-288 1219.5,-288 1257.5,-288 1257.5,-288 1263.5,-288 1269.5,-294 1269.5,-300 1269.5,-300 1269.5,-317 1269.5,-317 1269.5,-323 1263.5,-329 1257.5,-329"/> +<path fill="none" stroke="#d88556" stroke-width="2" stroke-dasharray="5,2" d="M1257.5,-329C1257.5,-329 1219.5,-329 1219.5,-329 1213.5,-329 1207.5,-323 1207.5,-317 1207.5,-317 1207.5,-300 1207.5,-300 1207.5,-294 1213.5,-288 1219.5,-288 1219.5,-288 1257.5,-288 1257.5,-288 1263.5,-288 1269.5,-294 1269.5,-300 1269.5,-300 1269.5,-317 1269.5,-317 1269.5,-323 1263.5,-329 1257.5,-329"/> <text text-anchor="middle" x="1238.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> <text text-anchor="middle" x="1238.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">bp_3p: -1</text> <text text-anchor="middle" x="1238.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">bp_5p: 1</text> @@ -326,7 +326,7 @@ <!-- 27 --> <g id="node28" class="node"> <title>27</title> -<path fill="none" stroke="#d89c56" stroke-width="2" stroke-dasharray="5,2" d="M625.5,-329C625.5,-329 587.5,-329 587.5,-329 581.5,-329 575.5,-323 575.5,-317 575.5,-317 575.5,-300 575.5,-300 575.5,-294 581.5,-288 587.5,-288 587.5,-288 625.5,-288 625.5,-288 631.5,-288 637.5,-294 637.5,-300 637.5,-300 637.5,-317 637.5,-317 637.5,-323 631.5,-329 625.5,-329"/> +<path fill="none" stroke="#d88556" stroke-width="2" stroke-dasharray="5,2" d="M625.5,-329C625.5,-329 587.5,-329 587.5,-329 581.5,-329 575.5,-323 575.5,-317 575.5,-317 575.5,-300 575.5,-300 575.5,-294 581.5,-288 587.5,-288 587.5,-288 625.5,-288 625.5,-288 631.5,-288 637.5,-294 637.5,-300 637.5,-300 637.5,-317 637.5,-317 637.5,-323 631.5,-329 625.5,-329"/> <text text-anchor="middle" x="606.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> <text text-anchor="middle" x="606.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">bp_3p: 0</text> <text text-anchor="middle" x="606.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">bp_5p: -1</text> @@ -340,7 +340,7 @@ <!-- 29 --> <g id="node30" class="node"> <title>29</title> -<path fill="none" stroke="#d89c56" stroke-width="2" stroke-dasharray="5,2" d="M703.5,-329C703.5,-329 667.5,-329 667.5,-329 661.5,-329 655.5,-323 655.5,-317 655.5,-317 655.5,-300 655.5,-300 655.5,-294 661.5,-288 667.5,-288 667.5,-288 703.5,-288 703.5,-288 709.5,-288 715.5,-294 715.5,-300 715.5,-300 715.5,-317 715.5,-317 715.5,-323 709.5,-329 703.5,-329"/> +<path fill="none" stroke="#d88556" stroke-width="2" stroke-dasharray="5,2" d="M703.5,-329C703.5,-329 667.5,-329 667.5,-329 661.5,-329 655.5,-323 655.5,-317 655.5,-317 655.5,-300 655.5,-300 655.5,-294 661.5,-288 667.5,-288 667.5,-288 703.5,-288 703.5,-288 709.5,-288 715.5,-294 715.5,-300 715.5,-300 715.5,-317 715.5,-317 715.5,-323 709.5,-329 703.5,-329"/> <text text-anchor="middle" x="685.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> <text text-anchor="middle" x="685.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">bp_3p: 0</text> <text text-anchor="middle" x="685.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">bp_5p: 0</text> @@ -354,7 +354,7 @@ <!-- 31 --> <g id="node32" class="node"> <title>31</title> -<path fill="none" stroke="#d89c56" stroke-width="2" stroke-dasharray="5,2" d="M781.5,-329C781.5,-329 745.5,-329 745.5,-329 739.5,-329 733.5,-323 733.5,-317 733.5,-317 733.5,-300 733.5,-300 733.5,-294 739.5,-288 745.5,-288 745.5,-288 781.5,-288 781.5,-288 787.5,-288 793.5,-294 793.5,-300 793.5,-300 793.5,-317 793.5,-317 793.5,-323 787.5,-329 781.5,-329"/> +<path fill="none" stroke="#d88556" stroke-width="2" stroke-dasharray="5,2" d="M781.5,-329C781.5,-329 745.5,-329 745.5,-329 739.5,-329 733.5,-323 733.5,-317 733.5,-317 733.5,-300 733.5,-300 733.5,-294 739.5,-288 745.5,-288 745.5,-288 781.5,-288 781.5,-288 787.5,-288 793.5,-294 793.5,-300 793.5,-300 793.5,-317 793.5,-317 793.5,-323 787.5,-329 781.5,-329"/> <text text-anchor="middle" x="763.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> <text text-anchor="middle" x="763.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">bp_3p: 0</text> <text text-anchor="middle" x="763.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">bp_5p: 1</text> @@ -368,7 +368,7 @@ <!-- 33 --> <g id="node34" class="node"> <title>33</title> -<path fill="none" stroke="#d89c56" stroke-width="2" stroke-dasharray="5,2" d="M861.5,-329C861.5,-329 823.5,-329 823.5,-329 817.5,-329 811.5,-323 811.5,-317 811.5,-317 811.5,-300 811.5,-300 811.5,-294 817.5,-288 823.5,-288 823.5,-288 861.5,-288 861.5,-288 867.5,-288 873.5,-294 873.5,-300 873.5,-300 873.5,-317 873.5,-317 873.5,-323 867.5,-329 861.5,-329"/> +<path fill="none" stroke="#d88556" stroke-width="2" stroke-dasharray="5,2" d="M861.5,-329C861.5,-329 823.5,-329 823.5,-329 817.5,-329 811.5,-323 811.5,-317 811.5,-317 811.5,-300 811.5,-300 811.5,-294 817.5,-288 823.5,-288 823.5,-288 861.5,-288 861.5,-288 867.5,-288 873.5,-294 873.5,-300 873.5,-300 873.5,-317 873.5,-317 873.5,-323 867.5,-329 861.5,-329"/> <text text-anchor="middle" x="842.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> <text text-anchor="middle" x="842.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">bp_3p: 1</text> <text text-anchor="middle" x="842.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">bp_5p: -1</text> @@ -382,7 +382,7 @@ <!-- 35 --> <g id="node36" class="node"> <title>35</title> -<path fill="none" stroke="#d89c56" stroke-width="2" stroke-dasharray="5,2" d="M939.5,-329C939.5,-329 903.5,-329 903.5,-329 897.5,-329 891.5,-323 891.5,-317 891.5,-317 891.5,-300 891.5,-300 891.5,-294 897.5,-288 903.5,-288 903.5,-288 939.5,-288 939.5,-288 945.5,-288 951.5,-294 951.5,-300 951.5,-300 951.5,-317 951.5,-317 951.5,-323 945.5,-329 939.5,-329"/> +<path fill="none" stroke="#d88556" stroke-width="2" stroke-dasharray="5,2" d="M939.5,-329C939.5,-329 903.5,-329 903.5,-329 897.5,-329 891.5,-323 891.5,-317 891.5,-317 891.5,-300 891.5,-300 891.5,-294 897.5,-288 903.5,-288 903.5,-288 939.5,-288 939.5,-288 945.5,-288 951.5,-294 951.5,-300 951.5,-300 951.5,-317 951.5,-317 951.5,-323 945.5,-329 939.5,-329"/> <text text-anchor="middle" x="921.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> <text text-anchor="middle" x="921.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">bp_3p: 1</text> <text text-anchor="middle" x="921.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">bp_5p: 0</text> @@ -396,7 +396,7 @@ <!-- 37 --> <g id="node38" class="node"> <title>37</title> -<path fill="none" stroke="#d89c56" stroke-width="2" stroke-dasharray="5,2" d="M1017.5,-329C1017.5,-329 981.5,-329 981.5,-329 975.5,-329 969.5,-323 969.5,-317 969.5,-317 969.5,-300 969.5,-300 969.5,-294 975.5,-288 981.5,-288 981.5,-288 1017.5,-288 1017.5,-288 1023.5,-288 1029.5,-294 1029.5,-300 1029.5,-300 1029.5,-317 1029.5,-317 1029.5,-323 1023.5,-329 1017.5,-329"/> +<path fill="none" stroke="#d88556" stroke-width="2" stroke-dasharray="5,2" d="M1017.5,-329C1017.5,-329 981.5,-329 981.5,-329 975.5,-329 969.5,-323 969.5,-317 969.5,-317 969.5,-300 969.5,-300 969.5,-294 975.5,-288 981.5,-288 981.5,-288 1017.5,-288 1017.5,-288 1023.5,-288 1029.5,-294 1029.5,-300 1029.5,-300 1029.5,-317 1029.5,-317 1029.5,-323 1023.5,-329 1017.5,-329"/> <text text-anchor="middle" x="999.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> <text text-anchor="middle" x="999.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">bp_3p: 1</text> <text text-anchor="middle" x="999.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">bp_5p: 1</text> @@ -410,7 +410,7 @@ <!-- 20 --> <g id="node21" class="node"> <title>20</title> -<path fill="none" stroke="#56d86b" stroke-width="2" stroke-dasharray="5,2" d="M974.5,-401C974.5,-401 906.5,-401 906.5,-401 900.5,-401 894.5,-395 894.5,-389 894.5,-389 894.5,-377 894.5,-377 894.5,-371 900.5,-365 906.5,-365 906.5,-365 974.5,-365 974.5,-365 980.5,-365 986.5,-371 986.5,-377 986.5,-377 986.5,-389 986.5,-389 986.5,-395 980.5,-401 974.5,-401"/> +<path fill="none" stroke="#70d856" stroke-width="2" stroke-dasharray="5,2" d="M974.5,-401C974.5,-401 906.5,-401 906.5,-401 900.5,-401 894.5,-395 894.5,-389 894.5,-389 894.5,-377 894.5,-377 894.5,-371 900.5,-365 906.5,-365 906.5,-365 974.5,-365 974.5,-365 980.5,-365 986.5,-371 986.5,-377 986.5,-377 986.5,-389 986.5,-389 986.5,-395 980.5,-401 974.5,-401"/> <text text-anchor="middle" x="940.5" y="-380.5" font-family="sans" font-size="10.00" fill="#000000">extract_chr_len</text> </g> <!-- 20->18 --> @@ -476,7 +476,7 @@ <!-- 22 --> <g id="node23" class="node"> <title>22</title> -<path fill="none" stroke="#d8cb56" stroke-width="2" stroke-dasharray="5,2" d="M1366,-252C1366,-252 1287,-252 1287,-252 1281,-252 1275,-246 1275,-240 1275,-240 1275,-228 1275,-228 1275,-222 1281,-216 1287,-216 1287,-216 1366,-216 1366,-216 1372,-216 1378,-222 1378,-228 1378,-228 1378,-240 1378,-240 1378,-246 1372,-252 1366,-252"/> +<path fill="none" stroke="#56d89a" stroke-width="2" stroke-dasharray="5,2" d="M1366,-252C1366,-252 1287,-252 1287,-252 1281,-252 1275,-246 1275,-240 1275,-240 1275,-228 1275,-228 1275,-222 1281,-216 1287,-216 1287,-216 1366,-216 1366,-216 1372,-216 1378,-222 1378,-228 1378,-228 1378,-240 1378,-240 1378,-246 1372,-252 1366,-252"/> <text text-anchor="middle" x="1326.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> </g> <!-- 22->16 --> @@ -494,7 +494,7 @@ <!-- 24 --> <g id="node25" class="node"> <title>24</title> -<path fill="none" stroke="#d8cb56" stroke-width="2" stroke-dasharray="5,2" d="M1487,-252C1487,-252 1408,-252 1408,-252 1402,-252 1396,-246 1396,-240 1396,-240 1396,-228 1396,-228 1396,-222 1402,-216 1408,-216 1408,-216 1487,-216 1487,-216 1493,-216 1499,-222 1499,-228 1499,-228 1499,-240 1499,-240 1499,-246 1493,-252 1487,-252"/> +<path fill="none" stroke="#56d89a" stroke-width="2" stroke-dasharray="5,2" d="M1487,-252C1487,-252 1408,-252 1408,-252 1402,-252 1396,-246 1396,-240 1396,-240 1396,-228 1396,-228 1396,-222 1402,-216 1408,-216 1408,-216 1487,-216 1487,-216 1493,-216 1499,-222 1499,-228 1499,-228 1499,-240 1499,-240 1499,-246 1493,-252 1487,-252"/> <text text-anchor="middle" x="1447.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> </g> <!-- 24->16 --> @@ -512,7 +512,7 @@ <!-- 26 --> <g id="node27" class="node"> <title>26</title> -<path fill="none" stroke="#d8cb56" stroke-width="2" stroke-dasharray="5,2" d="M519,-252C519,-252 440,-252 440,-252 434,-252 428,-246 428,-240 428,-240 428,-228 428,-228 428,-222 434,-216 440,-216 440,-216 519,-216 519,-216 525,-216 531,-222 531,-228 531,-228 531,-240 531,-240 531,-246 525,-252 519,-252"/> +<path fill="none" stroke="#56d89a" stroke-width="2" stroke-dasharray="5,2" d="M519,-252C519,-252 440,-252 440,-252 434,-252 428,-246 428,-240 428,-240 428,-228 428,-228 428,-222 434,-216 440,-216 440,-216 519,-216 519,-216 525,-216 531,-222 531,-228 531,-228 531,-240 531,-240 531,-246 525,-252 519,-252"/> <text text-anchor="middle" x="479.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> </g> <!-- 26->16 --> @@ -530,7 +530,7 @@ <!-- 28 --> <g id="node29" class="node"> <title>28</title> -<path fill="none" stroke="#d8cb56" stroke-width="2" stroke-dasharray="5,2" d="M640,-252C640,-252 561,-252 561,-252 555,-252 549,-246 549,-240 549,-240 549,-228 549,-228 549,-222 555,-216 561,-216 561,-216 640,-216 640,-216 646,-216 652,-222 652,-228 652,-228 652,-240 652,-240 652,-246 646,-252 640,-252"/> +<path fill="none" stroke="#56d89a" stroke-width="2" stroke-dasharray="5,2" d="M640,-252C640,-252 561,-252 561,-252 555,-252 549,-246 549,-240 549,-240 549,-228 549,-228 549,-222 555,-216 561,-216 561,-216 640,-216 640,-216 646,-216 652,-222 652,-228 652,-228 652,-240 652,-240 652,-246 646,-252 640,-252"/> <text text-anchor="middle" x="600.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> </g> <!-- 28->16 --> @@ -548,7 +548,7 @@ <!-- 30 --> <g id="node31" class="node"> <title>30</title> -<path fill="none" stroke="#d8cb56" stroke-width="2" stroke-dasharray="5,2" d="M761,-252C761,-252 682,-252 682,-252 676,-252 670,-246 670,-240 670,-240 670,-228 670,-228 670,-222 676,-216 682,-216 682,-216 761,-216 761,-216 767,-216 773,-222 773,-228 773,-228 773,-240 773,-240 773,-246 767,-252 761,-252"/> +<path fill="none" stroke="#56d89a" stroke-width="2" stroke-dasharray="5,2" d="M761,-252C761,-252 682,-252 682,-252 676,-252 670,-246 670,-240 670,-240 670,-228 670,-228 670,-222 676,-216 682,-216 682,-216 761,-216 761,-216 767,-216 773,-222 773,-228 773,-228 773,-240 773,-240 773,-246 767,-252 761,-252"/> <text text-anchor="middle" x="721.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> </g> <!-- 30->16 --> @@ -566,7 +566,7 @@ <!-- 32 --> <g id="node33" class="node"> <title>32</title> -<path fill="none" stroke="#d8cb56" stroke-width="2" stroke-dasharray="5,2" d="M882,-252C882,-252 803,-252 803,-252 797,-252 791,-246 791,-240 791,-240 791,-228 791,-228 791,-222 797,-216 803,-216 803,-216 882,-216 882,-216 888,-216 894,-222 894,-228 894,-228 894,-240 894,-240 894,-246 888,-252 882,-252"/> +<path fill="none" stroke="#56d89a" stroke-width="2" stroke-dasharray="5,2" d="M882,-252C882,-252 803,-252 803,-252 797,-252 791,-246 791,-240 791,-240 791,-228 791,-228 791,-222 797,-216 803,-216 803,-216 882,-216 882,-216 888,-216 894,-222 894,-228 894,-228 894,-240 894,-240 894,-246 888,-252 882,-252"/> <text text-anchor="middle" x="842.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> </g> <!-- 32->16 --> @@ -584,7 +584,7 @@ <!-- 34 --> <g id="node35" class="node"> <title>34</title> -<path fill="none" stroke="#d8cb56" stroke-width="2" stroke-dasharray="5,2" d="M1003,-252C1003,-252 924,-252 924,-252 918,-252 912,-246 912,-240 912,-240 912,-228 912,-228 912,-222 918,-216 924,-216 924,-216 1003,-216 1003,-216 1009,-216 1015,-222 1015,-228 1015,-228 1015,-240 1015,-240 1015,-246 1009,-252 1003,-252"/> +<path fill="none" stroke="#56d89a" stroke-width="2" stroke-dasharray="5,2" d="M1003,-252C1003,-252 924,-252 924,-252 918,-252 912,-246 912,-240 912,-240 912,-228 912,-228 912,-222 918,-216 924,-216 924,-216 1003,-216 1003,-216 1009,-216 1015,-222 1015,-228 1015,-228 1015,-240 1015,-240 1015,-246 1009,-252 1003,-252"/> <text text-anchor="middle" x="963.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> </g> <!-- 34->16 --> @@ -602,7 +602,7 @@ <!-- 36 --> <g id="node37" class="node"> <title>36</title> -<path fill="none" stroke="#d8cb56" stroke-width="2" stroke-dasharray="5,2" d="M1124,-252C1124,-252 1045,-252 1045,-252 1039,-252 1033,-246 1033,-240 1033,-240 1033,-228 1033,-228 1033,-222 1039,-216 1045,-216 1045,-216 1124,-216 1124,-216 1130,-216 1136,-222 1136,-228 1136,-228 1136,-240 1136,-240 1136,-246 1130,-252 1124,-252"/> +<path fill="none" stroke="#56d89a" stroke-width="2" stroke-dasharray="5,2" d="M1124,-252C1124,-252 1045,-252 1045,-252 1039,-252 1033,-246 1033,-240 1033,-240 1033,-228 1033,-228 1033,-222 1039,-216 1045,-216 1045,-216 1124,-216 1124,-216 1130,-216 1136,-222 1136,-228 1136,-228 1136,-240 1136,-240 1136,-246 1130,-252 1124,-252"/> <text text-anchor="middle" x="1084.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> </g> <!-- 36->16 --> diff --git a/images/workflow_dag_quantify.svg b/images/workflow_dag_quantify.svg new file mode 100644 index 0000000..841bac1 --- /dev/null +++ b/images/workflow_dag_quantify.svg @@ -0,0 +1,228 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" + "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> +<!-- Generated by graphviz version 2.40.1 (20161225.0304) + --> +<!-- Title: snakemake_dag Pages: 1 --> +<svg width="410pt" height="404pt" + viewBox="0.00 0.00 410.00 404.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> +<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 400)"> +<title>snakemake_dag</title> +<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-400 406,-400 406,4 -4,4"/> +<!-- 0 --> +<g id="node1" class="node"> +<title>0</title> +<path fill="none" stroke="#d85656" stroke-width="2" stroke-dasharray="5,2" d="M211,-36C211,-36 181,-36 181,-36 175,-36 169,-30 169,-24 169,-24 169,-12 169,-12 169,-6 175,0 181,0 181,0 211,0 211,0 217,0 223,-6 223,-12 223,-12 223,-24 223,-24 223,-30 217,-36 211,-36"/> +<text text-anchor="middle" x="196" y="-15.5" font-family="sans" font-size="10.00" fill="#000000">finish</text> +</g> +<!-- 1 --> +<g id="node2" class="node"> +<title>1</title> +<path fill="none" stroke="#56d87b" stroke-width="2" stroke-dasharray="5,2" d="M81.5,-108C81.5,-108 22.5,-108 22.5,-108 16.5,-108 10.5,-102 10.5,-96 10.5,-96 10.5,-84 10.5,-84 10.5,-78 16.5,-72 22.5,-72 22.5,-72 81.5,-72 81.5,-72 87.5,-72 93.5,-78 93.5,-84 93.5,-84 93.5,-96 93.5,-96 93.5,-102 87.5,-108 81.5,-108"/> +<text text-anchor="middle" x="52" y="-93" font-family="sans" font-size="10.00" fill="#000000">merge_tables</text> +<text text-anchor="middle" x="52" y="-82" font-family="sans" font-size="10.00" fill="#000000">mir: miRNA</text> +</g> +<!-- 1->0 --> +<g id="edge1" class="edge"> +<title>1->0</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M88.3373,-71.8314C110.2083,-60.8959 137.9528,-47.0236 159.7992,-36.1004"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="161.3876,-39.2194 168.7666,-31.6167 158.257,-32.9584 161.3876,-39.2194"/> +</g> +<!-- 2 --> +<g id="node3" class="node"> +<title>2</title> +<path fill="none" stroke="#56a2d8" stroke-width="2" stroke-dasharray="5,2" d="M66,-180C66,-180 12,-180 12,-180 6,-180 0,-174 0,-168 0,-168 0,-156 0,-156 0,-150 6,-144 12,-144 12,-144 66,-144 66,-144 72,-144 78,-150 78,-156 78,-156 78,-168 78,-168 78,-174 72,-180 66,-180"/> +<text text-anchor="middle" x="39" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">quant_mirna</text> +</g> +<!-- 2->1 --> +<g id="edge4" class="edge"> +<title>2->1</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M42.2804,-143.8314C43.6708,-136.131 45.3241,-126.9743 46.8692,-118.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="50.3428,-118.8761 48.6754,-108.4133 43.4542,-117.6322 50.3428,-118.8761"/> +</g> +<!-- 11 --> +<g id="node12" class="node"> +<title>11</title> +<path fill="none" stroke="#56d87b" stroke-width="2" stroke-dasharray="5,2" d="M268,-108C268,-108 124,-108 124,-108 118,-108 112,-102 112,-96 112,-96 112,-84 112,-84 112,-78 118,-72 124,-72 124,-72 268,-72 268,-72 274,-72 280,-78 280,-84 280,-84 280,-96 280,-96 280,-102 274,-108 268,-108"/> +<text text-anchor="middle" x="196" y="-93" font-family="sans" font-size="10.00" fill="#000000">merge_tables</text> +<text text-anchor="middle" x="196" y="-82" font-family="sans" font-size="10.00" fill="#000000">mir: miRNA_primary_transcript</text> +</g> +<!-- 2->11 --> +<g id="edge15" class="edge"> +<title>2->11</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M78.2127,-144.0171C99.1933,-134.3954 125.2185,-122.4603 147.4656,-112.2578"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="149.1123,-115.3532 156.743,-108.0032 146.1943,-108.9904 149.1123,-115.3532"/> +</g> +<!-- 12 --> +<g id="node13" class="node"> +<title>12</title> +<path fill="none" stroke="#56d87b" stroke-width="2" stroke-dasharray="5,2" d="M369.5,-108C369.5,-108 310.5,-108 310.5,-108 304.5,-108 298.5,-102 298.5,-96 298.5,-96 298.5,-84 298.5,-84 298.5,-78 304.5,-72 310.5,-72 310.5,-72 369.5,-72 369.5,-72 375.5,-72 381.5,-78 381.5,-84 381.5,-84 381.5,-96 381.5,-96 381.5,-102 375.5,-108 369.5,-108"/> +<text text-anchor="middle" x="340" y="-93" font-family="sans" font-size="10.00" fill="#000000">merge_tables</text> +<text text-anchor="middle" x="340" y="-82" font-family="sans" font-size="10.00" fill="#000000">mir: isomirs</text> +</g> +<!-- 2->12 --> +<g id="edge19" class="edge"> +<title>2->12</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M78.101,-146.6731C81.0918,-145.7051 84.0817,-144.8 87,-144 171.9414,-120.714 198.9806,-130.5171 288.662,-107.8291"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="289.624,-111.1955 298.424,-105.2954 287.8654,-104.42 289.624,-111.1955"/> +</g> +<!-- 3 --> +<g id="node4" class="node"> +<title>3</title> +<path fill="none" stroke="#d8a456" stroke-width="2" stroke-dasharray="5,2" d="M148.5,-252C148.5,-252 79.5,-252 79.5,-252 73.5,-252 67.5,-246 67.5,-240 67.5,-240 67.5,-228 67.5,-228 67.5,-222 73.5,-216 79.5,-216 79.5,-216 148.5,-216 148.5,-216 154.5,-216 160.5,-222 160.5,-228 160.5,-228 160.5,-240 160.5,-240 160.5,-246 154.5,-252 148.5,-252"/> +<text text-anchor="middle" x="114" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">intersect_mirna</text> +</g> +<!-- 3->2 --> +<g id="edge8" class="edge"> +<title>3->2</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M95.0743,-215.8314C86.0143,-207.1337 75.0191,-196.5783 65.1735,-187.1265"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="67.5574,-184.5633 57.9196,-180.1628 62.7096,-189.613 67.5574,-184.5633"/> +</g> +<!-- 6 --> +<g id="node7" class="node"> +<title>6</title> +<path fill="none" stroke="#c6d856" stroke-width="2" stroke-dasharray="5,2" d="M180,-180C180,-180 108,-180 108,-180 102,-180 96,-174 96,-168 96,-168 96,-156 96,-156 96,-150 102,-144 108,-144 108,-144 180,-144 180,-144 186,-144 192,-150 192,-156 192,-156 192,-168 192,-168 192,-174 186,-180 180,-180"/> +<text text-anchor="middle" x="144" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">quant_mirna_pri</text> +</g> +<!-- 3->6 --> +<g id="edge11" class="edge"> +<title>3->6</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M121.5703,-215.8314C124.8493,-207.9617 128.762,-198.5712 132.3945,-189.8533"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="135.7124,-190.9902 136.3278,-180.4133 129.2508,-188.2979 135.7124,-190.9902"/> +</g> +<!-- 4 --> +<g id="node5" class="node"> +<title>4</title> +<path fill="none" stroke="#56d8c9" stroke-width="2" stroke-dasharray="5,2" d="M149.5,-324C149.5,-324 78.5,-324 78.5,-324 72.5,-324 66.5,-318 66.5,-312 66.5,-312 66.5,-300 66.5,-300 66.5,-294 72.5,-288 78.5,-288 78.5,-288 149.5,-288 149.5,-288 155.5,-288 161.5,-294 161.5,-300 161.5,-300 161.5,-312 161.5,-312 161.5,-318 155.5,-324 149.5,-324"/> +<text text-anchor="middle" x="114" y="-303.5" font-family="sans" font-size="10.00" fill="#000000">sort_alignments</text> +</g> +<!-- 4->3 --> +<g id="edge9" class="edge"> +<title>4->3</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M114,-287.8314C114,-280.131 114,-270.9743 114,-262.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="117.5001,-262.4132 114,-252.4133 110.5001,-262.4133 117.5001,-262.4132"/> +</g> +<!-- 5 --> +<g id="node6" class="node"> +<title>5</title> +<path fill="none" stroke="#78d856" stroke-width="2" stroke-dasharray="5,2" d="M149.5,-396C149.5,-396 78.5,-396 78.5,-396 72.5,-396 66.5,-390 66.5,-384 66.5,-384 66.5,-372 66.5,-372 66.5,-366 72.5,-360 78.5,-360 78.5,-360 149.5,-360 149.5,-360 155.5,-360 161.5,-366 161.5,-372 161.5,-372 161.5,-384 161.5,-384 161.5,-390 155.5,-396 149.5,-396"/> +<text text-anchor="middle" x="114" y="-381" font-family="sans" font-size="10.00" fill="#000000">bamtobed</text> +<text text-anchor="middle" x="114" y="-370" font-family="sans" font-size="10.00" fill="#000000">sample: test_lib</text> +</g> +<!-- 5->4 --> +<g id="edge10" class="edge"> +<title>5->4</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M114,-359.8314C114,-352.131 114,-342.9743 114,-334.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="117.5001,-334.4132 114,-324.4133 110.5001,-334.4133 117.5001,-334.4132"/> +</g> +<!-- 6->1 --> +<g id="edge5" class="edge"> +<title>6->1</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M120.7845,-143.8314C109.453,-134.9632 95.6536,-124.1637 83.3973,-114.5718"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="85.2402,-111.5697 75.2081,-108.1628 80.926,-117.0822 85.2402,-111.5697"/> +</g> +<!-- 6->11 --> +<g id="edge16" class="edge"> +<title>6->11</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M157.1218,-143.8314C163.0499,-135.6232 170.1729,-125.7606 176.6933,-116.7323"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="179.684,-118.5693 182.7015,-108.4133 174.0092,-114.4708 179.684,-118.5693"/> +</g> +<!-- 6->12 --> +<g id="edge20" class="edge"> +<title>6->12</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M192.198,-144.2946C221.5518,-133.5116 259.0246,-119.7461 288.8932,-108.7739"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="290.1239,-112.0506 298.3037,-105.317 287.7101,-105.4799 290.1239,-112.0506"/> +</g> +<!-- 7 --> +<g id="node8" class="node"> +<title>7</title> +<path fill="none" stroke="#56a2d8" stroke-width="2" stroke-dasharray="5,2" d="M276,-180C276,-180 222,-180 222,-180 216,-180 210,-174 210,-168 210,-168 210,-156 210,-156 210,-150 216,-144 222,-144 222,-144 276,-144 276,-144 282,-144 288,-150 288,-156 288,-156 288,-168 288,-168 288,-174 282,-180 276,-180"/> +<text text-anchor="middle" x="249" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">quant_mirna</text> +</g> +<!-- 7->1 --> +<g id="edge6" class="edge"> +<title>7->1</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M209.9807,-147.3266C206.9434,-146.197 203.9227,-145.0776 201,-144 168.5287,-132.028 131.996,-118.7844 103.2318,-108.409"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="104.3862,-105.1047 93.7918,-105.0063 102.0125,-111.69 104.3862,-105.1047"/> +</g> +<!-- 7->11 --> +<g id="edge17" class="edge"> +<title>7->11</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M235.6259,-143.8314C229.5838,-135.6232 222.3237,-125.7606 215.6779,-116.7323"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="218.3011,-114.3918 209.5542,-108.4133 212.6637,-118.5415 218.3011,-114.3918"/> +</g> +<!-- 7->12 --> +<g id="edge21" class="edge"> +<title>7->12</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M271.9631,-143.8314C283.1715,-134.9632 296.8209,-124.1637 308.9439,-114.5718"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="311.3737,-117.1125 317.0442,-108.1628 307.0303,-111.6229 311.3737,-117.1125"/> +</g> +<!-- 8 --> +<g id="node9" class="node"> +<title>8</title> +<path fill="none" stroke="#d8a456" stroke-width="2" stroke-dasharray="5,2" d="M312.5,-252C312.5,-252 243.5,-252 243.5,-252 237.5,-252 231.5,-246 231.5,-240 231.5,-240 231.5,-228 231.5,-228 231.5,-222 237.5,-216 243.5,-216 243.5,-216 312.5,-216 312.5,-216 318.5,-216 324.5,-222 324.5,-228 324.5,-228 324.5,-240 324.5,-240 324.5,-246 318.5,-252 312.5,-252"/> +<text text-anchor="middle" x="278" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">intersect_mirna</text> +</g> +<!-- 8->7 --> +<g id="edge12" class="edge"> +<title>8->7</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M270.6821,-215.8314C267.5124,-207.9617 263.73,-198.5712 260.2187,-189.8533"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="263.3992,-188.3815 256.4165,-180.4133 256.9061,-190.9968 263.3992,-188.3815"/> +</g> +<!-- 10 --> +<g id="node11" class="node"> +<title>10</title> +<path fill="none" stroke="#c6d856" stroke-width="2" stroke-dasharray="5,2" d="M390,-180C390,-180 318,-180 318,-180 312,-180 306,-174 306,-168 306,-168 306,-156 306,-156 306,-150 312,-144 318,-144 318,-144 390,-144 390,-144 396,-144 402,-150 402,-156 402,-156 402,-168 402,-168 402,-174 396,-180 390,-180"/> +<text text-anchor="middle" x="354" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">quant_mirna_pri</text> +</g> +<!-- 8->10 --> +<g id="edge14" class="edge"> +<title>8->10</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M297.178,-215.8314C306.3588,-207.1337 317.5006,-196.5783 327.4776,-187.1265"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="329.9757,-189.5811 334.8281,-180.1628 325.1615,-184.4995 329.9757,-189.5811"/> +</g> +<!-- 9 --> +<g id="node10" class="node"> +<title>9</title> +<path fill="none" stroke="#56d8c9" stroke-width="2" stroke-dasharray="5,2" d="M319,-324C319,-324 237,-324 237,-324 231,-324 225,-318 225,-312 225,-312 225,-300 225,-300 225,-294 231,-288 237,-288 237,-288 319,-288 319,-288 325,-288 331,-294 331,-300 331,-300 331,-312 331,-312 331,-318 325,-324 319,-324"/> +<text text-anchor="middle" x="278" y="-309" font-family="sans" font-size="10.00" fill="#000000">sort_alignments</text> +<text text-anchor="middle" x="278" y="-298" font-family="sans" font-size="10.00" fill="#000000">sample: test_lib_2</text> +</g> +<!-- 9->8 --> +<g id="edge13" class="edge"> +<title>9->8</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M278,-287.8314C278,-280.131 278,-270.9743 278,-262.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="281.5001,-262.4132 278,-252.4133 274.5001,-262.4133 281.5001,-262.4132"/> +</g> +<!-- 10->1 --> +<g id="edge7" class="edge"> +<title>10->1</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M305.916,-146.3603C302.9096,-145.5239 299.9198,-144.729 297,-144 214.9089,-123.5033 189.4924,-130.1421 103.257,-107.8279"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="104.145,-104.4424 93.5826,-105.2706 102.3561,-111.21 104.145,-104.4424"/> +</g> +<!-- 10->11 --> +<g id="edge18" class="edge"> +<title>10->11</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M314.1299,-143.8314C293.0923,-134.2446 267.1017,-122.4008 244.8604,-112.2655"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="246.1345,-108.9999 235.5834,-108.038 243.2317,-115.3697 246.1345,-108.9999"/> +</g> +<!-- 10->12 --> +<g id="edge22" class="edge"> +<title>10->12</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M350.4672,-143.8314C348.9699,-136.131 347.1895,-126.9743 345.5255,-118.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="348.9248,-117.5614 343.5804,-108.4133 342.0535,-118.8975 348.9248,-117.5614"/> +</g> +<!-- 11->0 --> +<g id="edge2" class="edge"> +<title>11->0</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M196,-71.8314C196,-64.131 196,-54.9743 196,-46.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="199.5001,-46.4132 196,-36.4133 192.5001,-46.4133 199.5001,-46.4132"/> +</g> +<!-- 12->0 --> +<g id="edge3" class="edge"> +<title>12->0</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M303.6627,-71.8314C281.7917,-60.8959 254.0472,-47.0236 232.2008,-36.1004"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="233.743,-32.9584 223.2334,-31.6167 230.6124,-39.2194 233.743,-32.9584"/> +</g> +</g> +</svg> diff --git a/scripts/merge_tables.R b/scripts/merge_tables.R new file mode 100755 index 0000000..69416f3 --- /dev/null +++ b/scripts/merge_tables.R @@ -0,0 +1,134 @@ +#!/usr/bin/env Rscript + +# (c) 2019 Paula Iborra, Biozentrum, University of Basel + +################# +### IMPORTS ### +################# + +# Import required packages +if ( suppressWarnings(suppressPackageStartupMessages(require("optparse"))) == FALSE ) { stop("[ERROR] Package 'optparse' required! Aborted.") } +if ( suppressWarnings(suppressPackageStartupMessages(require("dplyr"))) == FALSE ) { stop("[ERROR] Package 'dplyr' required! Aborted.") } + + +####################### +### PARSE OPTIONS ### +####################### + +# Get script name +script <- sub("--file=", "", basename(commandArgs(trailingOnly=FALSE)[4])) + +# Build description message +description <- "Merge miRNAs quantification tables.\n" +author <- "Author: Paula Iborra, Biozentrum, University of Basel" +version <- "Version: 1.0.0 (JUN-2019)" +requirements <- "Requires: optparse" +msg <- paste(description, author, version, requirements, sep="\n") + +# Define list of arguments +option_list <- list( + make_option( + "--input_dir", + action="store", + type="character", + default=getwd(), + help="Absolute path from where input files shall be readed. Required!", + metavar="directory" + ), + make_option( + "--output_file", + action="store", + type="character", + default=file.path(getwd(), "counts.tab"), + help="Table output file path. Default: $PWD/counts.tab", + metavar="directory" + ), + make_option( + c("--prefix"), + action="store_true", + type="character", + default=NULL, + help="Prefix for reading input files. Default: NULL.", + metavar="file" + ), + make_option( + c("-h", "--help"), + action="store_true", + default=FALSE, + help="Show this information and die." + ), + make_option( + c("-u", "--usage"), + action="store_true", + default=FALSE, + dest="help", + help="Show this information and die." + ), + make_option( + c("-v", "--verbose"), + action="store_true", + default=FALSE, + help="Print log messages to STDOUT." + ) +) + +# Parse command-line arguments +opt_parser <- OptionParser(usage=paste("Usage:", script, "[OPTIONS] --input_dir <path/to/input/files>\n", sep=" "), option_list = option_list, add_help_option=FALSE, description=msg) +opt <- parse_args(opt_parser) + +# Re-assign variables +in.dir <- opt$`input_dir` +prefix <- opt$`prefix` +out.file <- opt$`output_file` +verb <- opt$`verbose` + +# Validate required arguments +if ( is.null(in.dir) ) { + print_help(opt_parser) + stop("[ERROR] Required argument missing! Aborted.") +} + +###################### +### FUNCTIONS ### +###################### + +merge_tables <- function(cwd, prefix){ + dataFiles <- dir(cwd, prefix, full.names=TRUE) + mat <- NULL + if (length(dataFiles)) { + mat <- read.table(dataFiles[1], sep='\t') + sample <- gsub(prefix, "", dataFiles[1]) + colnames(mat)[2] <- basename(sample) + for (i in seq_len(length(dataFiles)-1)) { + mat <- full_join(mat, read.table(dataFiles[i+1], sep = "\t"), by='V1') + sample <- gsub(prefix, "", dataFiles[i+1]) + colnames(mat)[i + 2] <- basename(sample) + } + colnames(mat)[1] <- "ID" + } + return(mat) +} + +###################### +### MAIN ### +###################### +# Write log +if ( verb ) cat("Creating output directory...\n", sep="") + +# Create output directories +dir.create(dirname(out.file), recursive=TRUE, showWarnings=FALSE) + +# Write log +if ( verb ) cat("Creating table...\n", sep="") + +# Create table from input directory files +myTable <- merge_tables(cwd=in.dir, prefix=prefix) + +# Write log +if ( verb ) cat(paste("Writing table: ", out.file, "\n", sep=""), sep="") + +# Writing table +write.table(myTable, out.file, row.names=FALSE, col.names=TRUE, quote=FALSE, sep="\t") + +# Write log +if ( verb ) cat("Done.\n", sep="") diff --git a/scripts/mirna_quantification.py b/scripts/mirna_quantification.py new file mode 100755 index 0000000..9db5fb2 --- /dev/null +++ b/scripts/mirna_quantification.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python +import sys +import os +from argparse import ArgumentParser, RawTextHelpFormatter +import pysam + +### Created: Apr 23, 2019 +### Author: Paula Iborra +### Company: Zavolan Group, Biozentrum, University of Basel + + +### ARGUMENTS ### + +parser = ArgumentParser( + description="Script to quantify miRNA aligments. Default output: one single quantification table." + ) +parser.add_argument( + '-v','--version', + action='version', + version='%(prog)s 1.0', + help="Show program's version number and exit" + ) +parser.add_argument( + '-u','--uniq', + type=str, + help="Get the counting table of specific miR type. Name output: prefix + mirna type", + ) +parser.add_argument( + '-s','--split', + action='store_true', + help="Split the counting tables based on miR type. Name outputs: prefix + mirna type", + ) +parser.add_argument( + '-c','--column', + help="Column of miR type in bed file. Default: 8", + type=int, + default='8' + ) +parser.add_argument( + '--rid', + help="Read ID column in intersection bed file. Default: 14", + type=int, + default='14' + ) +parser.add_argument( + '--mid', + help="miRNA ID;alias;name column in intersection bed file. Default: 10 ", + type=int, + default='10' + ) +parser.add_argument( + '--nh', + help="read count column in intersection bed file. Default: 15 ", + type=int, + default='15' + ) +parser.add_argument( + '-i','--intersection', + required=True, + help="miRNA intersection bed file" + ) +parser.add_argument( + '-p','--prefix', + type=str, + default='', + help="Prefix output name of quantification table." + ) + +args = parser.parse_args() + + + +def count_dictionary(file): + total_counts = {} + u = 0 + + sys.stderr.write("##### CREATING COUNTING DICTIONARY #####\n") + + for line in file: + line = line.split('\t') + name = (line[(args.mid)-1].split(';'))[2].split('=')[1] + mirnaid = (line[(args.mid)-1].split(';'))[1].split('=')[1] + readid = line[(args.rid)-1] + s = line[(args.column)-1] + + if (args.uniq): #create the dictionary just for the specific mirna type + if s == (args.uniq): + sys.stderr.write("%s\t%s\t%s\t%s\n"%(readid,name,mirnaid,s)) + if u == 0: + total_counts[s] = {} + u += 1 + if name not in total_counts[s].keys(): + if readid != '.': + total_counts[s][name] = float(1/int(line[(args.nh)-1])) + else: + total_counts[s][name] = 0 + else: + if readid != '.': + total_counts[s][name] += float(1/int(line[(args.nh)-1])) + else: + total_counts[s][name] += 0 + + else: # create the dictionary for all mirna types + sys.stderr.write("%s\t%s\t%s\t%s\n"%(readid,name,mirnaid,s)) + if s not in total_counts.keys(): # create the dictionary for a new mirna type + total_counts[s] = {} + if name not in total_counts[s].keys(): + if readid != '.': + total_counts[s][name] = float(1/int(line[(args.nh)-1])) + else: + total_counts[s][name] = 0 + else: + if readid != '.': + total_counts[s][name] += float(1/int(line[(args.nh)-1])) + else: + total_counts[s][name] += 0 + + else: # when mirna type is already in the total_count dictionary + if name not in total_counts[s].keys(): + if readid != '.': + total_counts[s][name] = float(1/int(line[(args.nh)-1])) + else: + total_counts[s][name] = 0 + else: + if readid != '.': + total_counts[s][name] += float(1/int(line[(args.nh)-1])) + else: + total_counts[s][name] += 0 + + return(total_counts) + +sys.stderr.write("##### READING INTERSECTION FILE #####\n") +f = open(args.intersection, 'r') +counts = count_dictionary(f) + +### UNIQ. TO CREATE JUST ONE SPECIFIC MIRNA TYPE COUNT TABLE + +sys.stderr.write("##### WRITING COUNTING TABLES #####\n") + +if (args.uniq): + i = (args.uniq) + out = open(args.prefix, 'w') + for x in counts[i].keys(): + cnt = str(counts[i][x]) + print(x,cnt) + out.write(x + '\t' + cnt + '\n') + +### SPLIT. CREATE A SEPARE COUNT TABLES FILES FOR EACH MIRNA TYPE +if (args.split): + for i in counts.keys(): + out = open(args.prefix+i, 'w') + for x in counts[i].keys(): + cnt = str(counts[i][x]) + out.write(x + '\t' + cnt + '\n') + +### CREATE ONE TABLE WITH ALL MIRNA TYPE COUNTS +if not (args.uniq) and not (args.split): + out = open(args.prefix, 'w') + for i in counts.keys(): + for x in counts[i].keys(): + cnt = str(counts[i][x]) + out.write(x + '\t' + cnt + '\n') + +sys.stderr.write("##### DONE! #####\n") diff --git a/test/cluster_map.json b/test/cluster_map.json deleted file mode 100644 index f771d03..0000000 --- a/test/cluster_map.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "__default__" : - { - "queue": "6hours", - "time": "05:00:00", - "threads": "1", - "mem": "4G" - }, - - "cutadapt": - { - "threads":"{resources.threads}" - }, - - "mapping_genome_segemehl": - { - "queue": "1day", - "time": "{resources.time}:00:00", - "threads":"{resources.threads}", - "mem":"{resources.mem}G" - }, - - "mapping_transcriptome_segemehl": - { - "queue": "1day", - "time": "{resources.time}:00:00", - "threads":"{resources.threads}", - "mem":"{resources.mem}G" - }, - - "mapping_genome_oligomap": - { - "time": "{resources.time}:00:00", - "threads":"{resources.threads}", - "mem":"{resources.mem}G" - }, - - "mapping_transcriptome_oligomap": - { - "time": "{resources.time}:00:00", - "threads":"{resources.threads}", - "mem":"{resources.mem}G" - }, - - "sort_transcriptome_oligomap": - { - "threads":"{resources.threads}" - }, - - "sort_genome_oligomap": - { - "time": "{resources.time}:00:00", - "threads":"{resources.threads}" - }, - - "oligomap_genome_toSAM": - { - "time": "{resources.time}-00:00:00", - "queue": "{resources.queue}day" - }, - - "remove_inferiors": - { - "threads":"{resources.threads}", - "mem":"{resources.mem}G" - } -} diff --git a/test/cluster_prepare.json b/test/cluster_prepare.json deleted file mode 100644 index eb88fbf..0000000 --- a/test/cluster_prepare.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "__default__" : - { - "queue": "6hours", - "time": "05:00:00", - "threads": "1", - "mem": "4G" - }, - - "generate_segemehl_index_transcriptome": - { - "time": "{resources.time}:00:00", - "threads":"{resources.threads}", - "mem":"{resources.mem}G" - }, - - "generate_segemehl_index_genome": - { - "time": "{resources.time}:00:00", - "threads":"{resources.threads}", - "mem":"{resources.mem}G" - } -} diff --git a/test/config_map.yaml b/test/config_map.yaml index d082d98..4ad569e 100644 --- a/test/config_map.yaml +++ b/test/config_map.yaml @@ -1,8 +1,8 @@ --- - -############################## GLOBAL PARAMETERS ############################## +#### GLOBAL PARAMETERS #### # Directories +# Usually there is no need to change these output_dir: "results/" local_log: "logs/local" cluster_log: "logs/cluster" @@ -32,14 +32,13 @@ max_n: 0 max_length_reads: 30 nh: 100 -# Sample information +# Inputs information input_dir: "test_files" sample: ["test_lib"] -######################## PARAMETERS SPECIFIC TO SAMPLE ######################## +#### PARAMETERS SPECIFIC TO INPUTS #### test_lib: adapter: "AACTGTAGGCACCATCAAT" format: "fa" - ... diff --git a/test/config_prepare.yaml b/test/config_prepare.yaml index ed62e83..a74dba6 100644 --- a/test/config_prepare.yaml +++ b/test/config_prepare.yaml @@ -1,8 +1,8 @@ --- - -############################## GLOBAL PARAMETERS ############################## +#### GLOBAL PARAMETERS ##### # Directories +# Usually there is no need to change these output_dir: "results" scripts_dir: "../scripts" local_log: "logs/local" @@ -10,16 +10,15 @@ cluster_log: "logs/cluster" # Isomirs annotation file # Number of base pairs to add/substract from 5' (start) and 3' (end) coordinates. -bp_5p: [-1,0,+1] -bp_3p: [-1,0,+1] +bp_5p: [-1, 0, +1] +bp_3p: [-1, 0, +1] -# List of "organism/prefix" +# List of inputs organism: ["homo_sapiens/chrY"] -################### PARAMETERS SPECIFIC TO ORGANISM VERSION ################### +#### PARAMETERS SPECIFIC TO INPUTS ##### homo_sapiens/chrY: - # URLs to genome, gene & miRNA annotations genome_url: "ftp://ftp.ensembl.org/pub/release-98/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.Y.fa.gz" gtf_url: "ftp://ftp.ensembl.org/pub/release-98/gtf/homo_sapiens/Homo_sapiens.GRCh38.98.gtf.gz" @@ -31,5 +30,4 @@ homo_sapiens/chrY: # Chromosome name mapping parameters: column: 1 delimiter: "TAB" - ... diff --git a/test/config_quantify.yaml b/test/config_quantify.yaml new file mode 100644 index 0000000..691cedf --- /dev/null +++ b/test/config_quantify.yaml @@ -0,0 +1,23 @@ +--- +#### GLOBAL PARAMETERS #### + +# Directories +# Usually there is no need to change these +output_dir: "results" +scripts_dir: "../scripts" +local_log: "logs/local" +cluster_log: "logs/cluster" + +# Types of miRNAs to quantify +#mir_list: ["miRNA", "miRNA_primary_transcript", "isomirs"] +mir_list: ["miRNA", "miRNA_primary_transcript"] + +# Resources: miR annotations, chromosome name mappings +# All of these are produced by the "prepare" workflow +mirnas_anno: "results/homo_sapiens/chrY/mirna_filtered.bed" +isomirs_anno: "results/homo_sapiens/chrY/isomirs_annotation.bed" + +# Inputs information +input_dir: "results" +sample: ["test_lib"] # put all samples, separated by comma +... diff --git a/test/expected_output.files b/test/expected_output.files deleted file mode 100644 index c20e63c..0000000 --- a/test/expected_output.files +++ /dev/null @@ -1,47 +0,0 @@ -results/homo_sapiens/chrY/chr_size.txt -results/homo_sapiens/chrY/exons.bed -results/homo_sapiens/chrY/exons.gtf -results/homo_sapiens/chrY/gene_annotations.filtered.gtf -results/homo_sapiens/chrY/genome_index_segemehl.idx -results/homo_sapiens/chrY/genome.processed.fa -results/homo_sapiens/chrY/genome.processed.fa.fai -results/homo_sapiens/chrY/headerOfCollapsedFasta.sam -results/homo_sapiens/chrY/isomirs_annotation.bed -results/homo_sapiens/chrY/mirna_chr_mapped.gff3 -results/homo_sapiens/chrY/mirna_filtered.bed -results/homo_sapiens/chrY/mirna_filtered.gff3 -results/homo_sapiens/chrY/mirna_mature_filtered.bed -results/homo_sapiens/chrY/transcriptome.fa -results/homo_sapiens/chrY/transcriptome_idtrim.fa -results/homo_sapiens/chrY/transcriptome_index_segemehl.idx -results/homo_sapiens/chrY/raw/Homo_sapiens.GRCh38.98.gtf -results/homo_sapiens/chrY/raw/Homo_sapiens.GRCh38.dna_sm.chromosome.Y.fa -results/homo_sapiens/chrY/raw/mirna.gff3 -results/homo_sapiens/chrY/UCSC2ensembl.txt -results/test_lib/catMappings.sam -results/test_lib/collapsed.fasta -results/test_lib/concatenated_header_catMappings.sam -results/test_lib/convertedSortedMappings_test_lib.bam -results/test_lib/convertedSortedMappings_test_lib.bam.bai -results/test_lib/cut.fasta -results/test_lib/fa/reads.fa -results/test_lib/filtered_for_oligomap.fasta -results/test_lib/formatted.fasta -results/test_lib/GenomeMappings.sam -results/test_lib/header_sorted_catMappings.sam -results/test_lib/nhfiltered_GenomeMappings.sam -results/test_lib/nhfiltered_TranscriptomeMappings.sam -results/test_lib/noheader_GenomeMappings.sam -results/test_lib/noheader_TranscriptomeMappings.sam -results/test_lib/oligoGenome_converted.sam -results/test_lib/oligoGenome_map.fa -results/test_lib/oligoGenome_report.txt -results/test_lib/oligoGenome_sorted.fa -results/test_lib/oligoTranscriptome_converted.sam -results/test_lib/oligoTranscriptome_map.fa -results/test_lib/oligoTranscriptome_report.txt -results/test_lib/oligoTranscriptome_sorted.fa -results/test_lib/segemehlGenome_map.sam -results/test_lib/segemehlTranscriptome_map.sam -results/test_lib/TranscriptomeMappings.sam -results/test_lib/TransToGen.sam diff --git a/test/expected_output.md5 b/test/expected_output.md5 index d176435..0d7c5c3 100644 --- a/test/expected_output.md5 +++ b/test/expected_output.md5 @@ -1,47 +1,74 @@ -1e6a0b3d0e678014f87afdd80f4025b9 results/homo_sapiens/chrY/chr_size.txt -51ac61c61825929f8f05c4b4f821f04d results/homo_sapiens/chrY/exons.bed +f37a213f94d11bf2260f50f2c9f199d2 results/homo_sapiens/chrY/genome.processed.fa.fai +ce880d47bf97bb4c43589e51568d02d6 results/homo_sapiens/chrY/iso_anno_5p-1_3p-1.bed +52114defcb2905049d1182d8e9458739 results/homo_sapiens/chrY/iso_anno_5p1_3p-1.bed +a49b7296e91bf33ac560cb4dcaaf28f2 results/homo_sapiens/chrY/iso_anno_5p-1_3p0.bed +583f395125f769102ff08ff84b60e0d3 results/homo_sapiens/chrY/genome.processed.fa +a12a5577947ff6a3d6cd6c205a149b0b results/homo_sapiens/chrY/iso_anno_rename_5p-1_3p-1.bed 6fe52e2e126ef2e0c368fb1bf267f453 results/homo_sapiens/chrY/exons.gtf 0b3dfe8cf4d644637671572fca629f69 results/homo_sapiens/chrY/gene_annotations.filtered.gtf -11b0b7c50160aa8837dd92eda516c124 results/homo_sapiens/chrY/genome_index_segemehl.idx -583f395125f769102ff08ff84b60e0d3 results/homo_sapiens/chrY/genome.processed.fa -f37a213f94d11bf2260f50f2c9f199d2 results/homo_sapiens/chrY/genome.processed.fa.fai 40054d82cc01b4b44dbe476bdb50141c results/homo_sapiens/chrY/headerOfCollapsedFasta.sam -909a2fc878c5ac0437344e4f5c6e58e3 results/homo_sapiens/chrY/isomirs_annotation.bed +67e880cfae3cdfa5a17b5161106c7a05 results/homo_sapiens/chrY/iso_anno_rename_5p0_3p-1.bed +548d8315be9dcfa394bfcb928e5a131c results/homo_sapiens/chrY/iso_anno_rename_5p-1_3p0.bed +221977ef10a502e764d9ff9a9e4d96cf results/homo_sapiens/chrY/iso_anno_rename_5p-1_3p1.bed +fc1068050ba1e7fa03e94087d9bd6978 results/homo_sapiens/chrY/iso_anno_5p0_3p0.bed +c27229d158fdf70eb6f10313e988e9f7 results/homo_sapiens/chrY/iso_anno_5p0_3p-1.bed +4fd9274d50a8dd7ce1533b03f2e538c0 results/homo_sapiens/chrY/iso_anno_5p1_3p0.bed +dd6bc7e94c04f7a35c068335c9055b00 results/homo_sapiens/chrY/iso_anno_rename_5p0_3p1.bed +bf1e37165b908729327599801ff5147b results/homo_sapiens/chrY/transcriptome_idtrim.fa ba7404239073e3b67204af1803729884 results/homo_sapiens/chrY/mirna_chr_mapped.gff3 -a923f50eea2708cd889886ae5179ee18 results/homo_sapiens/chrY/mirna_filtered.bed +11b0b7c50160aa8837dd92eda516c124 results/homo_sapiens/chrY/genome_index_segemehl.idx +5ab1c2f39ab35fabc6673c73beb3097b results/homo_sapiens/chrY/transcriptome.fa 91e1facd80f93ef61f242050dd7d03c3 results/homo_sapiens/chrY/mirna_filtered.gff3 e7e85f57e0476d1805c1cb64131dd75c results/homo_sapiens/chrY/mirna_mature_filtered.bed -d5eaafa9aec63e3fab632fc49392b54b results/homo_sapiens/chrY/raw/Homo_sapiens.GRCh38.98.gtf +8c6e976e926877f77946e9216db33a00 results/homo_sapiens/chrY/iso_anno_rename_5p0_3p0.bed +a5a6fd2cab7d7919b80761fc25f2777a results/homo_sapiens/chrY/transcriptome_index_segemehl.idx +801dcc664f889e9148555dc85542402f results/homo_sapiens/chrY/iso_anno_rename_5p1_3p-1.bed eb44404d89516497e6480d4dd33f2381 results/homo_sapiens/chrY/raw/Homo_sapiens.GRCh38.dna_sm.chromosome.Y.fa 6bc49275f74ed1b43d80cf7598d387b9 results/homo_sapiens/chrY/raw/mirna.gff3 -5ab1c2f39ab35fabc6673c73beb3097b results/homo_sapiens/chrY/transcriptome.fa -bf1e37165b908729327599801ff5147b results/homo_sapiens/chrY/transcriptome_idtrim.fa -a5a6fd2cab7d7919b80761fc25f2777a results/homo_sapiens/chrY/transcriptome_index_segemehl.idx +d5eaafa9aec63e3fab632fc49392b54b results/homo_sapiens/chrY/raw/Homo_sapiens.GRCh38.98.gtf +8976aba06b980b721dbb4a9a4fd3509e results/homo_sapiens/chrY/iso_anno_5p-1_3p1.bed +79adc4c7382cf39b5f366e00a8696453 results/homo_sapiens/chrY/iso_anno_concat.bed +909a2fc878c5ac0437344e4f5c6e58e3 results/homo_sapiens/chrY/isomirs_annotation.bed d2095c371c9b8b2c7cacd1024abf2d18 results/homo_sapiens/chrY/UCSC2ensembl.txt -db60b643dd35fe014a452a30ce748f84 results/test_lib/catMappings.sam +b23e51a933a2c453bd265ba60ef5442c results/homo_sapiens/chrY/iso_anno_rename_5p1_3p1.bed +51ac61c61825929f8f05c4b4f821f04d results/homo_sapiens/chrY/exons.bed +06bd13dd6df8113cd2d42269f57a1c56 results/homo_sapiens/chrY/iso_anno_5p0_3p1.bed +a987168575a5e777cff7ca6b883a04eb results/homo_sapiens/chrY/iso_anno_rename_5p1_3p0.bed +1e6a0b3d0e678014f87afdd80f4025b9 results/homo_sapiens/chrY/chr_size.txt +c8a30f530ba38f67bd980eb5e39f541d results/homo_sapiens/chrY/iso_anno_5p1_3p1.bed +a923f50eea2708cd889886ae5179ee18 results/homo_sapiens/chrY/mirna_filtered.bed +043e8c04602f052f9f8f7d36f2990a4f results/TABLES/counts.miRNA_primary_transcript.tab +946eea1b37f3d13c335e8b7845e7d340 results/TABLES/counts.miRNA.tab +68b329da9893e34099c7d8ad5cb9c940 results/TABLES/counts.isomirs.tab +fa804cf65856c26aa7ad0bfe76fa689e results/TABLES/miRNA_counts_test_lib +06c39aa920f33540739099371baac36c results/TABLES/miRNA_primary_transcript_counts_test_lib +d41d8cd98f00b204e9800998ecf8427e results/test_lib/noheader_TranscriptomeMappings.sam +0c4c280b79c35fda5558161e63bd2ae3 results/test_lib/intersect_mirna.bed da07cdd64fddbc1d018c92c7b8b3c9bd results/test_lib/collapsed.fasta -a8579cb9828810a81a9be000a6c0c38d results/test_lib/concatenated_header_catMappings.sam +1af83e4998536f0bb07af3785c22e455 results/test_lib/oligoGenome_converted.sam +cf92ecdb9bc4ad395a2d4d1cde8e85c2 results/test_lib/oligoGenome_report.txt +d7fb4b61c4e21aa59d709cd63aa8a34b results/test_lib/oligoGenome_map.fa +6f36e04dc0cf4ce4a0115445ac133a86 results/test_lib/fa/reads.fa +cdf5453be4761745cf086f48df202b30 results/test_lib/sorted.alignments.bed12 +c0daa909634f9611954188928adf87cb results/test_lib/cut.fasta +6b9e79f12cb9e7d38827d396034ac62e results/test_lib/TranscriptomeMappings.sam +c994ca3e27f45cf0d8260dc4faf5d3fa results/test_lib/nhfiltered_GenomeMappings.sam +81d670b806fbf429e1df7b31721dcb9c results/test_lib/oligoGenome_sorted.fa +f4d86fc90874aeeed5d4bff4eacd6bb3 results/test_lib/header_sorted_catMappings.sam +d41d8cd98f00b204e9800998ecf8427e results/test_lib/oligoTranscriptome_sorted.fa 244208bcf475ce5eac0940fc15e477fd results/test_lib/convertedSortedMappings_test_lib.bam +15dd5f2caa3797dc71da1f6d355d1a73 results/test_lib/alignments.bed12 +2909ec89b63190055195f8052561073f results/test_lib/oligoTranscriptome_report.txt +d41d8cd98f00b204e9800998ecf8427e results/test_lib/oligoTranscriptome_converted.sam +db60b643dd35fe014a452a30ce748f84 results/test_lib/catMappings.sam +6b9e79f12cb9e7d38827d396034ac62e results/test_lib/nhfiltered_TranscriptomeMappings.sam a1a1afd1e5ed8e4cc81d380c8456777c results/test_lib/convertedSortedMappings_test_lib.bam.bai -c0daa909634f9611954188928adf87cb results/test_lib/cut.fasta -6f36e04dc0cf4ce4a0115445ac133a86 results/test_lib/fa/reads.fa a8239c8468e0f1a32eedf1a1f3d4b572 results/test_lib/filtered_for_oligomap.fasta +78f3db3abce3bd901e01965da7a674a6 results/test_lib/segemehlGenome_map.sam 6c03db8848d24a36ad31879cadec7582 results/test_lib/formatted.fasta c994ca3e27f45cf0d8260dc4faf5d3fa results/test_lib/GenomeMappings.sam -f4d86fc90874aeeed5d4bff4eacd6bb3 results/test_lib/header_sorted_catMappings.sam -c994ca3e27f45cf0d8260dc4faf5d3fa results/test_lib/nhfiltered_GenomeMappings.sam -6b9e79f12cb9e7d38827d396034ac62e results/test_lib/nhfiltered_TranscriptomeMappings.sam -db60b643dd35fe014a452a30ce748f84 results/test_lib/noheader_GenomeMappings.sam -d41d8cd98f00b204e9800998ecf8427e results/test_lib/noheader_TranscriptomeMappings.sam -1af83e4998536f0bb07af3785c22e455 results/test_lib/oligoGenome_converted.sam -d7fb4b61c4e21aa59d709cd63aa8a34b results/test_lib/oligoGenome_map.fa -cf92ecdb9bc4ad395a2d4d1cde8e85c2 results/test_lib/oligoGenome_report.txt -81d670b806fbf429e1df7b31721dcb9c results/test_lib/oligoGenome_sorted.fa -d41d8cd98f00b204e9800998ecf8427e results/test_lib/oligoTranscriptome_converted.sam +a8579cb9828810a81a9be000a6c0c38d results/test_lib/concatenated_header_catMappings.sam d41d8cd98f00b204e9800998ecf8427e results/test_lib/oligoTranscriptome_map.fa -2909ec89b63190055195f8052561073f results/test_lib/oligoTranscriptome_report.txt -d41d8cd98f00b204e9800998ecf8427e results/test_lib/oligoTranscriptome_sorted.fa -78f3db3abce3bd901e01965da7a674a6 results/test_lib/segemehlGenome_map.sam 6b9e79f12cb9e7d38827d396034ac62e results/test_lib/segemehlTranscriptome_map.sam -6b9e79f12cb9e7d38827d396034ac62e results/test_lib/TranscriptomeMappings.sam d41d8cd98f00b204e9800998ecf8427e results/test_lib/TransToGen.sam +db60b643dd35fe014a452a30ce748f84 results/test_lib/noheader_GenomeMappings.sam diff --git a/test/test_cleanup.sh b/test/test_cleanup.sh index 5f2f6d7..dd631ac 100755 --- a/test/test_cleanup.sh +++ b/test/test_cleanup.sh @@ -13,6 +13,8 @@ set -eo pipefail # ensures that script exits at first command that exits with n set -u # ensures that script exits when unset variables are used set -x # facilitates debugging by printing out executed commands user_dir=$PWD +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" +cd $script_dir # Remove all generated by the test runs rm -rf .snakemake/ @@ -21,3 +23,4 @@ rm -rf logs/ rm -rf results/ rm -rf snakemake_report_*.html rm -rf wget-log* +rm -rf .wget-hsts diff --git a/test/test_dag.sh b/test/test_dag.sh index 2b52bb5..f472b15 100755 --- a/test/test_dag.sh +++ b/test/test_dag.sh @@ -35,3 +35,13 @@ snakemake \ --dryrun \ --verbose \ | dot -Tsvg > "../images/workflow_dag_map.svg" + +# Run test: quantify workflow +snakemake \ + --snakefile="../workflow/quantify/Snakefile" \ + --configfile="config_quantify.yaml" \ + --dag \ + --printshellcmds \ + --dryrun \ + --verbose \ + | dot -Tsvg > "../images/workflow_dag_quantify.svg" diff --git a/test/test_rule_graph.sh b/test/test_rule_graph.sh index 65ca37b..3b0b235 100755 --- a/test/test_rule_graph.sh +++ b/test/test_rule_graph.sh @@ -35,3 +35,13 @@ snakemake \ --dryrun \ --verbose \ | dot -Tsvg > "../images/rule_graph_map.svg" + +# Run test: quantify workflow +snakemake \ + --snakefile="../workflow/quantify/Snakefile" \ + --configfile="config_quantify.yaml" \ + --rulegraph \ + --printshellcmds \ + --dryrun \ + --verbose \ + | dot -Tsvg > "../images/rule_graph_quantify.svg" diff --git a/test/test_workflow_local.sh b/test/test_workflow_local.sh index 8925277..e59a3f4 100755 --- a/test/test_workflow_local.sh +++ b/test/test_workflow_local.sh @@ -38,6 +38,17 @@ snakemake \ --rerun-incomplete \ --verbose +# Run test: quantify workflow +snakemake \ + --snakefile="../workflow/quantify/Snakefile" \ + --configfile="config_quantify.yaml" \ + --use-singularity \ + --singularity-args "--bind ${PWD}/../" \ + --cores=4 \ + --printshellcmds \ + --rerun-incomplete \ + --verbose + # Snakemake report: prepare workflow snakemake \ --snakefile="../workflow/prepare/Snakefile" \ @@ -50,6 +61,12 @@ snakemake \ --configfile="config_map.yaml" \ --report="snakemake_report_map.html" +# Snakemake report: quantify workflow +snakemake \ + --snakefile="../workflow/quantify/Snakefile" \ + --configfile="config_quantify.yaml" \ + --report="snakemake_report_quantify.html" + # Check md5 sum of some output files find results/ -type f -name \*\.gz -exec gunzip '{}' \; find results/ -type f -name \*\.zip -exec sh -c 'unzip -o {} -d $(dirname {})' \; @@ -57,5 +74,4 @@ md5sum --check "expected_output.md5" # Generate checksum files # (run only when using new test data and after verifying results!) -# find results/ -type f > expected_output.files; -# md5sum $(cat expected_output.files) > expected_output.md5 +# md5sum $(find results/ -type f) > expected_output.md5 diff --git a/test/test_workflow_slurm.sh b/test/test_workflow_slurm.sh index c72b678..5ae9432 100755 --- a/test/test_workflow_slurm.sh +++ b/test/test_workflow_slurm.sh @@ -23,7 +23,7 @@ mkdir -p results/{homo_sapiens/chrY,results/test_lib} snakemake \ --snakefile="../workflow/prepare/Snakefile" \ --configfile="config_prepare.yaml" \ - --cluster-config="cluster_prepare.json" \ + --cluster-config="../RUNS/JOB/prepare/cluster.json" \ --cluster "sbatch \ --cpus-per-task={cluster.threads} \ --mem={cluster.mem} \ @@ -45,7 +45,29 @@ snakemake \ snakemake \ --snakefile="../workflow/map/Snakefile" \ --configfile="config_map.yaml" \ - --cluster-config="cluster_map.json" \ + --cluster-config="../RUNS/JOB/map/cluster.json" \ + --cluster "sbatch \ + --cpus-per-task={cluster.threads} \ + --mem={cluster.mem} \ + --qos={cluster.queue} \ + --time={cluster.time} \ + --export=JOB_NAME={rule} \ + -o {params.cluster_log} \ + -p scicore \ + --open-mode=append" \ + --jobscript="../jobscript.sh" \ + --use-singularity \ + --singularity-args="--no-home --bind ${PWD}/../" \ + --cores=256 \ + --printshellcmds \ + --rerun-incomplete \ + --verbose + +# Run test: quantify workflow +snakemake \ + --snakefile="../workflow/quantify/Snakefile" \ + --configfile="config_quantify.yaml" \ + --cluster-config="../RUNS/JOB/quantify/cluster.json" \ --cluster "sbatch \ --cpus-per-task={cluster.threads} \ --mem={cluster.mem} \ @@ -75,6 +97,12 @@ snakemake \ --configfile="config_map.yaml" \ --report="snakemake_report_map.html" +# Snakemake report: quantify workflow +snakemake \ + --snakefile="../workflow/quantify/Snakefile" \ + --configfile="config_quantify.yaml" \ + --report="snakemake_report_quantify.html" + # Check md5 sum of some output files find results/ -type f -name \*\.gz -exec gunzip '{}' \; find results/ -type f -name \*\.zip -exec sh -c 'unzip -o {} -d $(dirname {})' \; @@ -82,5 +110,4 @@ md5sum --check "expected_output.md5" # Generate checksum files # (run only when using new test data and after verifying results!) -# find results/ -type f > expected_output.files; -# md5sum $(cat expected_output.files) > expected_output.md5 +# md5sum $(find results/ -type f) > expected_output.md5 diff --git a/workflow/map/Snakefile b/workflow/map/Snakefile index dbb10e2..aace1f1 100644 --- a/workflow/map/Snakefile +++ b/workflow/map/Snakefile @@ -1,647 +1,1007 @@ -################################################################################# +############################################################################### # (c) 2020 Paula Iborra, Zavolan Lab, Biozentrum, University of Basel # (@) paula.iborradetoledo@unibas.ch / paula.iborra@alumni.esci.upf.edu # # Workflow to map small RNA-seq reads (e.g. from miRNA sequencing libraries). -################################################################################# +############################################################################### import os -localrules: finish -################################################################################# +# Rules that require internet connection for downloading files are included +# in the localrules +localrules: + finish, + + +############################################################################### ### Finish rule -################################################################################# +############################################################################### + rule finish: input: - maps = expand( + maps=expand( os.path.join( config["output_dir"], "{sample}", - "convertedSortedMappings_{sample}.bam.bai" + "convertedSortedMappings_{sample}.bam.bai", ), - sample=config["sample"] - ) + sample=config["sample"], + ), + -################################################################################# +############################################################################### ### Uncompress fastq files -################################################################################# +############################################################################### + rule uncompress_zipped_files: - input: - reads = os.path.join(config["input_dir"], "{sample}.{format}.gz") - output: - reads = os.path.join(config["output_dir"], "{sample}", "{format}", "reads.{format}") - params: - cluster_log = os.path.join(config["cluster_log"], "uncompress_zipped_files_{sample}_{format}.log") - log: - os.path.join(config["local_log"], "uncompress_zipped_files_{sample}_{format}.log") - singularity: - "docker://zavolab/ubuntu:18.04" - shell: - "(zcat {input.reads} > {output.reads}) &> {log}" - -################################################################################# + input: + reads=os.path.join(config["input_dir"], "{sample}.{format}.gz"), + output: + reads=os.path.join( + config["output_dir"], "{sample}", "{format}", "reads.{format}" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], + "uncompress_zipped_files_{sample}_{format}.log", + ), + log: + os.path.join( + config["local_log"], + "uncompress_zipped_files_{sample}_{format}.log", + ), + singularity: + "docker://zavolab/ubuntu:18.04" + shell: + "(zcat {input.reads} > {output.reads}) &> {log}" + + +############################################################################### ### Quality filter -################################################################################# +############################################################################### + rule fastq_quality_filter: - input: - reads = os.path.join(config["output_dir"], "{sample}", "fastq", "reads.fastq") - output: - reads = os.path.join(config["output_dir"], "{sample}", "fastq", "filtered_reads.fastq") - params: - cluster_log = os.path.join(config["cluster_log"], "fastq_quality_filter_{sample}.log"), - p = config["p_value"], - q = config["q_value"] - log: - os.path.join(config["local_log"], "fastq_quality_filter_{sample}.log") - singularity: - "docker://zavolab/fastx:0.0.14" - shell: - "(fastq_quality_filter -v -q {params.q} -p {params.p} -i {input.reads} > {output.reads}) &> {log}" - -################################################################################# -### Convert fastq to fasta -################################################################################# + input: + reads=os.path.join( + config["output_dir"], "{sample}", "fastq", "reads.fastq" + ), + output: + reads=os.path.join( + config["output_dir"], "{sample}", "fastq", "filtered_reads.fastq" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "fastq_quality_filter_{sample}.log" + ), + p=config["p_value"], + q=config["q_value"], + log: + os.path.join(config["local_log"], "fastq_quality_filter_{sample}.log"), + singularity: + "docker://zavolab/fastx:0.0.14" + shell: + "(fastq_quality_filter \ + -v \ + -q {params.q} \ + -p {params.p} \ + -i {input.reads} \ + > {output.reads} \ + ) &> {log}" + + +############################################################################### +### Convert fastq to fasta +############################################################################### + rule fastq_to_fasta: - input: - reads = os.path.join(config["output_dir"], "{sample}", "fastq", "filtered_reads.fastq") - output: - reads = os.path.join(config["output_dir"], "{sample}", "fastq", "reads.fa") - params: - cluster_log = os.path.join(config["cluster_log"], "fastq_to_fasta_{sample}.log") - log: - os.path.join(config["local_log"], "fastq_to_fasta_{sample}.log") - singularity: - "docker://zavolab/fastx:0.0.14" - shell: - "(fastq_to_fasta -r -n -i {input.reads} > {output.reads}) &> {log}" - -################################################################################# -### Format fasta file -################################################################################# + input: + reads=os.path.join( + config["output_dir"], "{sample}", "fastq", "filtered_reads.fastq" + ), + output: + reads=os.path.join( + config["output_dir"], "{sample}", "fastq", "reads.fa" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "fastq_to_fasta_{sample}.log" + ), + log: + os.path.join(config["local_log"], "fastq_to_fasta_{sample}.log"), + singularity: + "docker://zavolab/fastx:0.0.14" + shell: + "(fastq_to_fasta -r -n -i {input.reads} > {output.reads}) &> {log}" + + +############################################################################### +### Format fasta file +############################################################################### + rule fasta_formatter: - input: - reads = lambda wildcards: os.path.join(config["output_dir"], wildcards.sample, config[wildcards.sample]['format'], "reads.fa" ) - output: - reads = os.path.join(config["output_dir"], "{sample}", "formatted.fasta") - params: - cluster_log = os.path.join(config["cluster_log"], "fasta_formatter_{sample}.log") - log: - os.path.join(config["local_log"], "fasta_formatter_{sample}.log") - singularity: - "docker://zavolab/fastx:0.0.14" - shell: - "(fasta_formatter -w 0 -i {input.reads} > {output.reads}) &> {log}" - -################################################################################# + input: + reads=lambda wildcards: os.path.join( + config["output_dir"], + wildcards.sample, + config[wildcards.sample]["format"], + "reads.fa", + ), + output: + reads=os.path.join(config["output_dir"], "{sample}", "formatted.fasta"), + params: + cluster_log=os.path.join( + config["cluster_log"], "fasta_formatter_{sample}.log" + ), + log: + os.path.join(config["local_log"], "fasta_formatter_{sample}.log"), + singularity: + "docker://zavolab/fastx:0.0.14" + shell: + "(fasta_formatter -w 0 -i {input.reads} > {output.reads}) &> {log}" + + +############################################################################### ### Remove adapters -################################################################################# +############################################################################### + rule cutadapt: - input: - reads = os.path.join(config["output_dir"], "{sample}", "formatted.fasta") - output: - reads = os.path.join(config["output_dir"], "{sample}", "cut.fasta") - params: - cluster_log = os.path.join(config["cluster_log"], "cutadapt_{sample}.log"), - adapter = lambda wildcards: config[ wildcards.sample ]['adapter'], - error_rate = config["error_rate"], - minimum_length = config["minimum_length"], - overlap = config["overlap"], - max_n = config["max_n"] - log: - os.path.join(config["local_log"],"cutadapt_{sample}.log") - resources: - threads = 8 - singularity: - "docker://zavolab/cutadapt:1.16" - shell: - "(cutadapt \ - -a {params.adapter} \ - --error-rate {params.error_rate} \ - --minimum-length {params.minimum_length} \ - --overlap {params.overlap} \ - --trim-n \ - --max-n {params.max_n} \ - --cores {resources.threads} \ - -o {output.reads} {input.reads}) &> {log}" - -################################################################################# + input: + reads=os.path.join(config["output_dir"], "{sample}", "formatted.fasta"), + output: + reads=os.path.join(config["output_dir"], "{sample}", "cut.fasta"), + params: + cluster_log=os.path.join( + config["cluster_log"], "cutadapt_{sample}.log" + ), + adapter=lambda wildcards: config[wildcards.sample]["adapter"], + error_rate=config["error_rate"], + minimum_length=config["minimum_length"], + overlap=config["overlap"], + max_n=config["max_n"], + log: + os.path.join(config["local_log"], "cutadapt_{sample}.log"), + resources: + threads=8, + singularity: + "docker://zavolab/cutadapt:1.16" + shell: + "(cutadapt \ + -a {params.adapter} \ + --error-rate {params.error_rate} \ + --minimum-length {params.minimum_length} \ + --overlap {params.overlap} \ + --trim-n \ + --max-n {params.max_n} \ + --cores {resources.threads} \ + -o {output.reads} {input.reads}) &> {log}" + + +############################################################################### ### Collapse identical reads -################################################################################# +############################################################################### + rule fastx_collapser: - input: - reads = os.path.join(config["output_dir"], "{sample}", "cut.fasta") - output: - reads = os.path.join(config["output_dir"], "{sample}", "collapsed.fasta") - params: - cluster_log = os.path.join(config["cluster_log"],"fastx_collapser_{sample}.log") - log: - os.path.join(config["local_log"], "fastx_collapser_{sample}.log") - singularity: - "docker://zavolab/fastx:0.0.14" - shell: - "(fastx_collapser -i {input.reads} > {output.reads}) &> {log}" - -################################################################################# + input: + reads=os.path.join(config["output_dir"], "{sample}", "cut.fasta"), + output: + reads=os.path.join(config["output_dir"], "{sample}", "collapsed.fasta"), + params: + cluster_log=os.path.join( + config["cluster_log"], "fastx_collapser_{sample}.log" + ), + log: + os.path.join(config["local_log"], "fastx_collapser_{sample}.log"), + singularity: + "docker://zavolab/fastx:0.0.14" + shell: + "(fastx_collapser -i {input.reads} > {output.reads}) &> {log}" + + +############################################################################### ### Segemehl genome mapping -################################################################################# +############################################################################### + rule mapping_genome_segemehl: - input: - reads = os.path.join(config["output_dir"], "{sample}", "collapsed.fasta"), - genome = config["genome"], - genome_index_segemehl = config["genome_index_segemehl"] - output: - gmap = os.path.join(config["output_dir"], "{sample}", "segemehlGenome_map.sam") - params: - cluster_log = os.path.join(config["cluster_log"], "mapping_genome_segemehl_{sample}.log") - log: - os.path.join(config["local_log"],"mapping_genome_segemehl_{sample}.log") - resources: - mem = 50, - time = 12, - threads = 8 - singularity: - "docker://zavolab/segemehl:0.2.0" - shell: - "segemehl.x \ - -i {input.genome_index_segemehl} \ - -d {input.genome} \ - -t {threads} \ - -q {input.reads} \ - -outfile {output.gmap}" - -################################################################################# + input: + reads=os.path.join(config["output_dir"], "{sample}", "collapsed.fasta"), + genome=config["genome"], + genome_index_segemehl=config["genome_index_segemehl"], + output: + gmap=os.path.join( + config["output_dir"], "{sample}", "segemehlGenome_map.sam" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "mapping_genome_segemehl_{sample}.log" + ), + log: + os.path.join( + config["local_log"], "mapping_genome_segemehl_{sample}.log" + ), + resources: + mem=50, + time=12, + threads=8, + singularity: + "docker://zavolab/segemehl:0.2.0" + shell: + "(segemehl.x \ + -i {input.genome_index_segemehl} \ + -d {input.genome} \ + -t {threads} \ + -q {input.reads} \ + -outfile {output.gmap} \ + ) &> {log}" + + +############################################################################### ### Segemehl transcriptome mapping -################################################################################# +############################################################################### + rule mapping_transcriptome_segemehl: - input: - reads = os.path.join(config["output_dir"], "{sample}", "collapsed.fasta"), - transcriptome = config["transcriptome"], - transcriptome_index_segemehl = config["transcriptome_index_segemehl"] - output: - tmap = os.path.join(config["output_dir"], "{sample}", "segemehlTranscriptome_map.sam") - params: - cluster_log = os.path.join(config["cluster_log"], "mapping_transcriptome_segemehl_{sample}.log") - log: - os.path.join(config["local_log"], "mapping_transcriptome_segemehl_{sample}.log") - resources: - mem = 10, - time = 12, - threads = 8 - singularity: - "docker://zavolab/segemehl:0.2.0" - shell: - "segemehl.x \ - -i {input.transcriptome_index_segemehl} \ - -d {input.transcriptome} \ - -t {threads} \ - -q {input.reads} \ - -outfile {output.tmap}" - -################################################################################# + input: + reads=os.path.join(config["output_dir"], "{sample}", "collapsed.fasta"), + transcriptome=config["transcriptome"], + transcriptome_index_segemehl=config["transcriptome_index_segemehl"], + output: + tmap=os.path.join( + config["output_dir"], "{sample}", "segemehlTranscriptome_map.sam" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], + "mapping_transcriptome_segemehl_{sample}.log", + ), + log: + os.path.join( + config["local_log"], "mapping_transcriptome_segemehl_{sample}.log" + ), + resources: + mem=10, + time=12, + threads=8, + singularity: + "docker://zavolab/segemehl:0.2.0" + shell: + "(segemehl.x \ + -i {input.transcriptome_index_segemehl} \ + -d {input.transcriptome} \ + -t {threads} \ + -q {input.reads} \ + -outfile {output.tmap} \ + ) &> {log}" + + +############################################################################### ### Filter fasta for oligomap mapping -################################################################################# +############################################################################### + rule filter_fasta_for_oligomap: - input: - reads = os.path.join(config["output_dir"], "{sample}", "collapsed.fasta"), - script = os.path.join(config["scripts_dir"], "validation_fasta.py") - output: - reads = os.path.join(config["output_dir"], "{sample}", "filtered_for_oligomap.fasta") - params: - cluster_log = os.path.join(config["cluster_log"], "filter_fasta_for_oligomap_{sample}.log"), - max_length_reads = config["max_length_reads"], - log: - os.path.join(config["local_log"], "filter_fasta_for_oligomap_{sample}.log") - singularity: - "docker://zavolab/python:3.6.5" - shell: - "(python {input.script} -r {params.max_length_reads} -i {input.reads} -o {output.reads}) &> {log}" - -################################################################################# -### Oligomap genome mapping -################################################################################# + input: + reads=os.path.join(config["output_dir"], "{sample}", "collapsed.fasta"), + script=os.path.join(config["scripts_dir"], "validation_fasta.py"), + output: + reads=os.path.join( + config["output_dir"], "{sample}", "filtered_for_oligomap.fasta" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "filter_fasta_for_oligomap_{sample}.log" + ), + max_length_reads=config["max_length_reads"], + log: + os.path.join( + config["local_log"], "filter_fasta_for_oligomap_{sample}.log" + ), + singularity: + "docker://zavolab/python:3.6.5" + shell: + "(python {input.script} \ + -r {params.max_length_reads} \ + -i {input.reads} \ + -o {output.reads} \ + ) &> {log}" + + +############################################################################### +### Oligomap genome mapping +############################################################################### + rule mapping_genome_oligomap: - input: - reads = os.path.join(config["output_dir"], "{sample}", "filtered_for_oligomap.fasta"), - target = config["genome"] - output: - gmap = os.path.join(config["output_dir"], "{sample}", "oligoGenome_map.fa"), - report = os.path.join(config["output_dir"], "{sample}", "oligoGenome_report.txt") - params: - cluster_log = os.path.join(config["cluster_log"], "mapping_genome_oligomap_{sample}.log") - log: - os.path.join(config["local_log"], "mapping_genome_oligomap_{sample}.log") - resources: - mem = 50, - time = 6, - threads = 8 - singularity: - "docker://zavolab/oligomap:1.0" - shell: - "oligomap {input.target} {input.reads} -r {output.report} > {output.gmap}" - -################################################################################# + input: + reads=os.path.join( + config["output_dir"], "{sample}", "filtered_for_oligomap.fasta" + ), + target=config["genome"], + output: + gmap=os.path.join( + config["output_dir"], "{sample}", "oligoGenome_map.fa" + ), + report=os.path.join( + config["output_dir"], "{sample}", "oligoGenome_report.txt" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "mapping_genome_oligomap_{sample}.log" + ), + log: + os.path.join( + config["local_log"], "mapping_genome_oligomap_{sample}.log" + ), + resources: + mem=50, + time=6, + threads=8, + singularity: + "docker://zavolab/oligomap:1.0" + shell: + "(oligomap \ + {input.target} \ + {input.reads} \ + -r {output.report} \ + > {output.gmap} \ + ) &> {log}" + + +############################################################################### ### Oligomap genome sorting -################################################################################# +############################################################################### + rule sort_genome_oligomap: - input: - tmap = os.path.join(config["output_dir"], "{sample}", "oligoGenome_map.fa"), - report = os.path.join(config["output_dir"], "{sample}", "oligoGenome_report.txt"), - script = os.path.join(config["scripts_dir"], "blocksort.sh") - output: - sort = os.path.join(config["output_dir"], "{sample}", "oligoGenome_sorted.fa") - params: - cluster_log = os.path.join(config["cluster_log"], "sorting_genome_oligomap_{sample}.log") - log: - os.path.join(config["local_log"], "sorting_genome_oligomap_{sample}.log") - resources: - threads = 8, - time = 6 - shell: - "(bash {input.script} {input.tmap} {resources.threads} {output.sort}) &> {log}" - -################################################################################# + input: + tmap=os.path.join( + config["output_dir"], "{sample}", "oligoGenome_map.fa" + ), + report=os.path.join( + config["output_dir"], "{sample}", "oligoGenome_report.txt" + ), + script=os.path.join(config["scripts_dir"], "blocksort.sh"), + output: + sort=os.path.join( + config["output_dir"], "{sample}", "oligoGenome_sorted.fa" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "sorting_genome_oligomap_{sample}.log" + ), + log: + os.path.join( + config["local_log"], "sorting_genome_oligomap_{sample}.log" + ), + resources: + threads=8, + time=6, + shell: + "(bash {input.script} \ + {input.tmap} \ + {resources.threads} \ + {output.sort} \ + ) &> {log}" + + +############################################################################### ### Oligomap genome mapping output to SAM -################################################################################# +############################################################################### + rule oligomap_genome_toSAM: - input: - report = os.path.join(config["output_dir"], "{sample}", "oligoGenome_report.txt"), - sort = os.path.join(config["output_dir"], "{sample}", "oligoGenome_sorted.fa"), - script = os.path.join(config["scripts_dir"], "oligomapOutputToSam_nhfiltered.py") - output: - gmap = os.path.join(config["output_dir"], "{sample}", "oligoGenome_converted.sam") - params: - cluster_log = os.path.join(config["cluster_log"], "oligomap_genome_toSAM_{sample}.log"), - nh = config["nh"] - log: - os.path.join(config["local_log"], "oligomap_genome_toSAM_{sample}.log") - resources: - time = 1, - queue = 1 - singularity: - "docker://zavolab/python:3.6.5" - shell: - "(python {input.script} -i {input.sort} -n {params.nh} > {output.gmap}) &> {log}" - -################################################################################# + input: + report=os.path.join( + config["output_dir"], "{sample}", "oligoGenome_report.txt" + ), + sort=os.path.join( + config["output_dir"], "{sample}", "oligoGenome_sorted.fa" + ), + script=os.path.join( + config["scripts_dir"], "oligomapOutputToSam_nhfiltered.py" + ), + output: + gmap=os.path.join( + config["output_dir"], "{sample}", "oligoGenome_converted.sam" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "oligomap_genome_toSAM_{sample}.log" + ), + nh=config["nh"], + log: + os.path.join(config["local_log"], "oligomap_genome_toSAM_{sample}.log"), + resources: + time=1, + queue=1, + singularity: + "docker://zavolab/python:3.6.5" + shell: + "(python {input.script} \ + -i {input.sort} \ + -n {params.nh} \ + > {output.gmap}) &> {log}" + + +############################################################################### ### Oligomap trancriptome mapping -################################################################################# +############################################################################### + rule mapping_transcriptome_oligomap: - input: - reads = os.path.join(config["output_dir"], "{sample}", "filtered_for_oligomap.fasta"), - target = config["transcriptome"] - output: - tmap = os.path.join(config["output_dir"], "{sample}", "oligoTranscriptome_map.fa"), - report = os.path.join(config["output_dir"], "{sample}", "oligoTranscriptome_report.txt") - params: - cluster_log = os.path.join(config["cluster_log"], "mapping_transcriptome_oligomap_{sample}.log") - log: - os.path.join(config["local_log"], "mapping_transcriptome_oligomap_{sample}.log") - resources: - mem = 10, - time = 6, - threads = 8 - singularity: - "docker://zavolab/oligomap:1.0" - shell: - "oligomap {input.target} {input.reads} -s -r {output.report} > {output.tmap}" - -################################################################################# + input: + reads=os.path.join( + config["output_dir"], "{sample}", "filtered_for_oligomap.fasta" + ), + target=config["transcriptome"], + output: + tmap=os.path.join( + config["output_dir"], "{sample}", "oligoTranscriptome_map.fa" + ), + report=os.path.join( + config["output_dir"], "{sample}", "oligoTranscriptome_report.txt" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], + "mapping_transcriptome_oligomap_{sample}.log", + ), + log: + os.path.join( + config["local_log"], "mapping_transcriptome_oligomap_{sample}.log" + ), + resources: + mem=10, + time=6, + threads=8, + singularity: + "docker://zavolab/oligomap:1.0" + shell: + "(oligomap \ + {input.target} \ + {input.reads} \ + -s \ + -r {output.report} \ + > {output.tmap} \ + ) &> {log}" + + +############################################################################### ### Oligomap trancriptome sorting -################################################################################# +############################################################################### + rule sort_transcriptome_oligomap: - input: - tmap = os.path.join(config["output_dir"], "{sample}", "oligoTranscriptome_map.fa"), - report = os.path.join(config["output_dir"], "{sample}", "oligoTranscriptome_report.txt"), - script = os.path.join(config["scripts_dir"], "blocksort.sh") - output: - sort = os.path.join(config["output_dir"], "{sample}", "oligoTranscriptome_sorted.fa") - params: - cluster_log = os.path.join(config["cluster_log"], "sorting_transcriptome_oligomap_{sample}.log") - log: - os.path.join(config["local_log"], "sorting_transcriptome_oligomap_{sample}.log") - resources: - threads = 8 - shell: - "(bash {input.script} {input.tmap} {resources.threads} {output.sort}) &> {log}" - -################################################################################# + input: + tmap=os.path.join( + config["output_dir"], "{sample}", "oligoTranscriptome_map.fa" + ), + report=os.path.join( + config["output_dir"], "{sample}", "oligoTranscriptome_report.txt" + ), + script=os.path.join(config["scripts_dir"], "blocksort.sh"), + output: + sort=os.path.join( + config["output_dir"], "{sample}", "oligoTranscriptome_sorted.fa" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], + "sorting_transcriptome_oligomap_{sample}.log", + ), + log: + os.path.join( + config["local_log"], "sorting_transcriptome_oligomap_{sample}.log" + ), + resources: + threads=8, + shell: + "(bash {input.script} \ + {input.tmap} \ + {resources.threads} \ + {output.sort} \ + ) &> {log}" + + +############################################################################### ### Oligomap transcriptome mapping ouput to SAM -################################################################################# +############################################################################### + rule oligomap_transcriptome_toSAM: - input: - report = os.path.join(config["output_dir"], "{sample}", "oligoTranscriptome_report.txt"), - sort = os.path.join(config["output_dir"], "{sample}", "oligoTranscriptome_sorted.fa"), - script = os.path.join(config["scripts_dir"], "oligomapOutputToSam_nhfiltered.py") - output: - tmap = os.path.join(config["output_dir"], "{sample}", "oligoTranscriptome_converted.sam") - params: - cluster_log = os.path.join(config["cluster_log"], "oligomap_transcriptome_toSAM_{sample}.log"), - nh = config["nh"] - log: - os.path.join(config["local_log"], "oligomap_transcriptome_toSAM_{sample}.log") - singularity: - "docker://zavolab/python:3.6.5" - shell: - "(python {input.script} -i {input.sort} -n {params.nh} > {output.tmap}) &> {log}" - -################################################################################# + input: + report=os.path.join( + config["output_dir"], "{sample}", "oligoTranscriptome_report.txt" + ), + sort=os.path.join( + config["output_dir"], "{sample}", "oligoTranscriptome_sorted.fa" + ), + script=os.path.join( + config["scripts_dir"], "oligomapOutputToSam_nhfiltered.py" + ), + output: + tmap=os.path.join( + config["output_dir"], + "{sample}", + "oligoTranscriptome_converted.sam", + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "oligomap_transcriptome_toSAM_{sample}.log" + ), + nh=config["nh"], + log: + os.path.join( + config["local_log"], "oligomap_transcriptome_toSAM_{sample}.log" + ), + singularity: + "docker://zavolab/python:3.6.5" + shell: + "(python {input.script} \ + -i {input.sort} \ + -n {params.nh} \ + > {output.tmap} \ + ) &> {log}" + + +############################################################################### ### Merge genome mappings -################################################################################# +############################################################################### + rule merge_genome_maps: - input: - gmap1 = os.path.join(config["output_dir"], "{sample}", "segemehlGenome_map.sam"), - gmap2 = os.path.join(config["output_dir"], "{sample}", "oligoGenome_converted.sam") - output: - gmaps = os.path.join(config["output_dir"], "{sample}", "GenomeMappings.sam") - params: - cluster_log = os.path.join(config["cluster_log"], "merge_genome_maps_{sample}.log") - log: - os.path.join(config["local_log"], "merge_genome_maps_{sample}.log") - singularity: - "docker://zavolab/ubuntu:18.04" - shell: - "(cat {input.gmap1} {input.gmap2} > {output.gmaps}) &> {log}" - -################################################################################# + input: + gmap1=os.path.join( + config["output_dir"], "{sample}", "segemehlGenome_map.sam" + ), + gmap2=os.path.join( + config["output_dir"], "{sample}", "oligoGenome_converted.sam" + ), + output: + gmaps=os.path.join( + config["output_dir"], "{sample}", "GenomeMappings.sam" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "merge_genome_maps_{sample}.log" + ), + log: + os.path.join(config["local_log"], "merge_genome_maps_{sample}.log"), + singularity: + "docker://zavolab/ubuntu:18.04" + shell: + "(cat {input.gmap1} {input.gmap2} > {output.gmaps}) &> {log}" + + +############################################################################### ### Merge trancriptome mappings -################################################################################# +############################################################################### + rule merge_transcriptome_maps: - input: - tmap1 = os.path.join(config["output_dir"], "{sample}", "segemehlTranscriptome_map.sam"), - tmap2 = os.path.join(config["output_dir"], "{sample}", "oligoTranscriptome_converted.sam") - output: - tmaps = os.path.join(config["output_dir"], "{sample}", "TranscriptomeMappings.sam") - params: - cluster_log = os.path.join(config["cluster_log"], "merge_transcriptome_maps_{sample}.log") - log: - os.path.join(config["local_log"], "merge_transcriptome_maps_{sample}.log") - singularity: - "docker://zavolab/ubuntu:18.04" - shell: - "(cat {input.tmap1} {input.tmap2} > {output.tmaps}) &> {log}" - -################################################################################# + input: + tmap1=os.path.join( + config["output_dir"], "{sample}", "segemehlTranscriptome_map.sam" + ), + tmap2=os.path.join( + config["output_dir"], + "{sample}", + "oligoTranscriptome_converted.sam", + ), + output: + tmaps=os.path.join( + config["output_dir"], "{sample}", "TranscriptomeMappings.sam" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "merge_transcriptome_maps_{sample}.log" + ), + log: + os.path.join( + config["local_log"], "merge_transcriptome_maps_{sample}.log" + ), + singularity: + "docker://zavolab/ubuntu:18.04" + shell: + "(cat {input.tmap1} {input.tmap2} > {output.tmaps}) &> {log}" + + +############################################################################### ### Filter NH genome -################################################################################# +############################################################################### + rule nh_filter_genome: - input: - gmaps = os.path.join(config["output_dir"], "{sample}", "GenomeMappings.sam"), - script = os.path.join(config["scripts_dir"], "nh_filter.py") - output: - gmaps = os.path.join(config["output_dir"], "{sample}", "nhfiltered_GenomeMappings.sam") - params: - cluster_log = os.path.join(config["cluster_log"], "nh_filter_genome_{sample}.log"), - nh = config["nh"] - log: - os.path.join(config["local_log"], "nh_filter_genome_{sample}.log") - singularity: - "docker://zavolab/python:3.6.5" - shell: - "(python {input.script} {input.gmaps} {params.nh} {output.gmaps}) &> {log}" - -################################################################################# + input: + gmaps=os.path.join( + config["output_dir"], "{sample}", "GenomeMappings.sam" + ), + script=os.path.join(config["scripts_dir"], "nh_filter.py"), + output: + gmaps=os.path.join( + config["output_dir"], "{sample}", "nhfiltered_GenomeMappings.sam" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "nh_filter_genome_{sample}.log" + ), + nh=config["nh"], + log: + os.path.join(config["local_log"], "nh_filter_genome_{sample}.log"), + singularity: + "docker://zavolab/python:3.6.5" + shell: + "(python {input.script} \ + {input.gmaps} \ + {params.nh} \ + {output.gmaps} \ + ) &> {log}" + + +############################################################################### ### Filter NH transcriptome -################################################################################# +############################################################################### + rule filter_nh_transcriptome: - input: - tmaps = os.path.join(config["output_dir"], "{sample}", "TranscriptomeMappings.sam"), - script = os.path.join(config["scripts_dir"], "nh_filter.py") - output: - tmaps = os.path.join(config["output_dir"], "{sample}", "nhfiltered_TranscriptomeMappings.sam") - params: - cluster_log = os.path.join(config["cluster_log"], "filter_nh_transcriptome_{sample}.log"), - nh = config["nh"] - log: - os.path.join(config["local_log"], "filter_nh_transcriptome_{sample}.log") - singularity: - "docker://zavolab/python:3.6.5" - shell: - "(python {input.script} {input.tmaps} {params.nh} {output.tmaps}) &> {log}" - -################################################################################# + input: + tmaps=os.path.join( + config["output_dir"], "{sample}", "TranscriptomeMappings.sam" + ), + script=os.path.join(config["scripts_dir"], "nh_filter.py"), + output: + tmaps=os.path.join( + config["output_dir"], + "{sample}", + "nhfiltered_TranscriptomeMappings.sam", + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "filter_nh_transcriptome_{sample}.log" + ), + nh=config["nh"], + log: + os.path.join( + config["local_log"], "filter_nh_transcriptome_{sample}.log" + ), + singularity: + "docker://zavolab/python:3.6.5" + shell: + "(python {input.script} \ + {input.tmaps} \ + {params.nh} \ + {output.tmaps} \ + ) &> {log}" + + +############################################################################### ### Remove header genome mappings -################################################################################# +############################################################################### + rule remove_headers_genome: - input: - gmap = os.path.join(config["output_dir"], "{sample}", "nhfiltered_GenomeMappings.sam") - output: - gmap = os.path.join(config["output_dir"], "{sample}", "noheader_GenomeMappings.sam") - params: - cluster_log = os.path.join(config["cluster_log"], "remove_headers_genome_{sample}.log") - log: - os.path.join(config["local_log"], "remove_headers_genome_{sample}.log") - singularity: - "docker://zavolab/samtools:1.8" - shell: - "samtools view {input.gmap} > {output.gmap}" - -################################################################################# + input: + gmap=os.path.join( + config["output_dir"], "{sample}", "nhfiltered_GenomeMappings.sam" + ), + output: + gmap=os.path.join( + config["output_dir"], "{sample}", "noheader_GenomeMappings.sam" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "remove_headers_genome_{sample}.log" + ), + log: + os.path.join(config["local_log"], "remove_headers_genome_{sample}.log"), + singularity: + "docker://zavolab/samtools:1.8" + shell: + "samtools view {input.gmap} > {output.gmap}" + + +############################################################################### ### Remove header transcriptome mappings -################################################################################# +############################################################################### + rule remove_headers_transcriptome: - input: - tmap = os.path.join(config["output_dir"], "{sample}", "nhfiltered_TranscriptomeMappings.sam") - output: - tmap = os.path.join(config["output_dir"], "{sample}", "noheader_TranscriptomeMappings.sam") - params: - cluster_log = os.path.join(config["cluster_log"], "remove_headers_transcriptome_{sample}.log") - log: - os.path.join(config["local_log"], "remove_headers_transcriptome_{sample}.log") - singularity: - "docker://zavolab/samtools:1.8" - shell: - "samtools view {input.tmap} > {output.tmap}" - -################################################################################# + input: + tmap=os.path.join( + config["output_dir"], + "{sample}", + "nhfiltered_TranscriptomeMappings.sam", + ), + output: + tmap=os.path.join( + config["output_dir"], + "{sample}", + "noheader_TranscriptomeMappings.sam", + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "remove_headers_transcriptome_{sample}.log" + ), + log: + os.path.join( + config["local_log"], "remove_headers_transcriptome_{sample}.log" + ), + singularity: + "docker://zavolab/samtools:1.8" + shell: + "samtools view {input.tmap} > {output.tmap}" + + +############################################################################### ### Transcriptome to genome coordinates -################################################################################# +############################################################################### + rule trans_to_gen: - input: - tmap = os.path.join(config["output_dir"], "{sample}", "noheader_TranscriptomeMappings.sam"), - script = os.path.join(config["scripts_dir"], "sam_trx_to_sam_gen.pl"), - exons = config["exons"] - output: - genout = os.path.join(config["output_dir"], "{sample}", "TransToGen.sam") - params: - cluster_log = os.path.join(config["cluster_log"], "trans_to_gen_{sample}.log") - log: - os.path.join(config["local_log"], "trans_to_gen_{sample}.log") - singularity: - "docker://zavolab/perl:5.28" - shell: - "(perl {input.script} --in {input.tmap} --exons {input.exons} --out {output.genout}) &> {log}" - -################################################################################# + input: + tmap=os.path.join( + config["output_dir"], + "{sample}", + "noheader_TranscriptomeMappings.sam", + ), + script=os.path.join(config["scripts_dir"], "sam_trx_to_sam_gen.pl"), + exons=config["exons"], + output: + genout=os.path.join(config["output_dir"], "{sample}", "TransToGen.sam"), + params: + cluster_log=os.path.join( + config["cluster_log"], "trans_to_gen_{sample}.log" + ), + log: + os.path.join(config["local_log"], "trans_to_gen_{sample}.log"), + singularity: + "docker://zavolab/perl:5.28" + shell: + "(perl {input.script} \ + --in {input.tmap} \ + --exons {input.exons} \ + --out {output.genout} \ + ) &> {log}" + + +############################################################################### ### Concatenate genome and trancriptome mappings -################################################################################# +############################################################################### + rule cat_mapping: - input: - gmap1 = os.path.join(config["output_dir"], "{sample}", "TransToGen.sam"), - gmap2 = os.path.join(config["output_dir"], "{sample}", "noheader_GenomeMappings.sam") - output: - catmaps = os.path.join(config["output_dir"], "{sample}", "catMappings.sam") - params: - cluster_log = os.path.join(config["cluster_log"], "cat_mapping_{sample}.log") - log: - os.path.join(config["local_log"], "cat_mapping_{sample}.log") - singularity: - "docker://zavolab/ubuntu:18.04" - shell: - "(cat {input.gmap1} {input.gmap2} > {output.catmaps}) &> {log}" - -################################################################################# + input: + gmap1=os.path.join(config["output_dir"], "{sample}", "TransToGen.sam"), + gmap2=os.path.join( + config["output_dir"], "{sample}", "noheader_GenomeMappings.sam" + ), + output: + catmaps=os.path.join( + config["output_dir"], "{sample}", "catMappings.sam" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "cat_mapping_{sample}.log" + ), + log: + os.path.join(config["local_log"], "cat_mapping_{sample}.log"), + singularity: + "docker://zavolab/ubuntu:18.04" + shell: + "(cat {input.gmap1} {input.gmap2} > {output.catmaps}) &> {log}" + + +############################################################################### ### Add header -################################################################################# +############################################################################### + rule add_header: - input: - header = config["header_of_collapsed_fasta"], - catmaps = os.path.join(config["output_dir"], "{sample}", "catMappings.sam") - output: - concatenate = os.path.join(config["output_dir"], "{sample}", "concatenated_header_catMappings.sam") - params: - cluster_log = os.path.join(config["cluster_log"], "add_header_{sample}.log") - log: - os.path.join(config["local_log"], "add_header_{sample}.log") - singularity: - "docker://zavolab/ubuntu:18.04" - shell: - "(cat {input.header} {input.catmaps} > {output.concatenate}) &> {log}" - -################################################################################# + input: + header=config["header_of_collapsed_fasta"], + catmaps=os.path.join( + config["output_dir"], "{sample}", "catMappings.sam" + ), + output: + concatenate=os.path.join( + config["output_dir"], + "{sample}", + "concatenated_header_catMappings.sam", + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "add_header_{sample}.log" + ), + log: + os.path.join(config["local_log"], "add_header_{sample}.log"), + singularity: + "docker://zavolab/ubuntu:18.04" + shell: + "(cat {input.header} {input.catmaps} > {output.concatenate}) &> {log}" + + +############################################################################### ### Sort mapped file by IDs -################################################################################# +############################################################################### + rule sort_id: - input: - concatenate = os.path.join(config["output_dir"], "{sample}", "concatenated_header_catMappings.sam") - output: - sort = os.path.join(config["output_dir"], "{sample}", "header_sorted_catMappings.sam") - params: - cluster_log = os.path.join(config["cluster_log"], "sort_id_{sample}.log") - log: - os.path.join(config["local_log"], "sort_id_{sample}.log") - singularity: - "docker://zavolab/samtools:1.8" - shell: - "(samtools sort -n -o {output.sort} {input.concatenate}) &> {log}" - -################################################################################# + input: + concatenate=os.path.join( + config["output_dir"], + "{sample}", + "concatenated_header_catMappings.sam", + ), + output: + sort=os.path.join( + config["output_dir"], "{sample}", "header_sorted_catMappings.sam" + ), + params: + cluster_log=os.path.join(config["cluster_log"], "sort_id_{sample}.log"), + log: + os.path.join(config["local_log"], "sort_id_{sample}.log"), + singularity: + "docker://zavolab/samtools:1.8" + shell: + "(samtools sort -n -o {output.sort} {input.concatenate}) &> {log}" + + +############################################################################### ### Remove inferior mappings (keeping multimappers) -################################################################################# +############################################################################### + rule remove_inferiors: - input: - sort = os.path.join(config["output_dir"], "{sample}", "header_sorted_catMappings.sam"), - script = os.path.join(config["scripts_dir"], "sam_remove_duplicates_inferior_alignments_multimappers.1_5.pl") - output: - remove_inf = os.path.join(config["output_dir"], "{sample}", "removeInferiors.sam") - params: - cluster_log = os.path.join(config["cluster_log"], "remove_inferiors_{sample}.log") - log: - os.path.join(config["local_log"], "remove_inferiors_{sample}.log") - resources: - mem = 15, - threads = 4 - singularity: - "docker://zavolab/perl:5.28" - shell: - "(perl {input.script} --print-header --keep-mm --in {input.sort} --out {output.remove_inf}) &> {log}" - -################################################################################# + input: + sort=os.path.join( + config["output_dir"], "{sample}", "header_sorted_catMappings.sam" + ), + script=os.path.join( + config["scripts_dir"], + "sam_remove_duplicates_inferior_alignments_multimappers.1_5.pl", + ), + output: + remove_inf=os.path.join( + config["output_dir"], "{sample}", "removeInferiors.sam" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "remove_inferiors_{sample}.log" + ), + log: + os.path.join(config["local_log"], "remove_inferiors_{sample}.log"), + resources: + mem=15, + threads=4, + singularity: + "docker://zavolab/perl:5.28" + shell: + "(perl {input.script} \ + --print-header \ + --keep-mm \ + --in {input.sort} \ + --out {output.remove_inf} \ + ) &> {log}" + + +############################################################################### ### Uncollapse reads -################################################################################# +############################################################################### + rule uncollapse_reads: - input: - maps = os.path.join(config["output_dir"], "{sample}", "removeInferiors.sam"), - script = os.path.join(config["scripts_dir"], "sam_uncollapse.pl") - output: - maps = os.path.join(config["output_dir"], "{sample}", "uncollapsedMappings.sam") - params: - cluster_log = os.path.join(config["cluster_log"], "uncollapse_reads_{sample}.log") - log: - os.path.join(config["local_log"], "uncollapse_reads_{sample}.log") - singularity: - "docker://zavolab/perl:5.28" - shell: - "(perl {input.script} --suffix --in {input.maps} --out {output.maps}) &> {log}" - -################################################################################# -### Convert SAM to BAM -################################################################################# + input: + maps=os.path.join( + config["output_dir"], "{sample}", "removeInferiors.sam" + ), + script=os.path.join(config["scripts_dir"], "sam_uncollapse.pl"), + output: + maps=os.path.join( + config["output_dir"], "{sample}", "uncollapsedMappings.sam" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "uncollapse_reads_{sample}.log" + ), + log: + os.path.join(config["local_log"], "uncollapse_reads_{sample}.log"), + singularity: + "docker://zavolab/perl:5.28" + shell: + "(perl {input.script} \ + --suffix \ + --in {input.maps} \ + --out {output.maps} \ + ) &> {log}" + + +############################################################################### +### Convert SAM to BAM +############################################################################### + rule convert_to_bam: - input: - maps = os.path.join(config["output_dir"], "{sample}", "uncollapsedMappings.sam") - output: - maps = os.path.join(config["output_dir"], "{sample}", "mappingsConverted.bam") - params: - cluster_log = os.path.join(config["cluster_log"], "convert_to_bam_{sample}.log") - log: - os.path.join(config["local_log"], "convert_to_bam_{sample}.log") - singularity: - "docker://zavolab/samtools:1.8" - shell: - "(samtools view -b {input.maps} > {output.maps}) &> {log}" - -################################################################################# + input: + maps=os.path.join( + config["output_dir"], "{sample}", "uncollapsedMappings.sam" + ), + output: + maps=os.path.join( + config["output_dir"], "{sample}", "mappingsConverted.bam" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "convert_to_bam_{sample}.log" + ), + log: + os.path.join(config["local_log"], "convert_to_bam_{sample}.log"), + singularity: + "docker://zavolab/samtools:1.8" + shell: + "(samtools view -b {input.maps} > {output.maps}) &> {log}" + + +############################################################################### ### Sort by coordinate position -################################################################################# +############################################################################### + rule sort_by_position: - input: - maps = os.path.join(config["output_dir"], "{sample}", "mappingsConverted.bam") - output: - maps = os.path.join(config["output_dir"], "{sample}", "convertedSortedMappings_{sample}.bam") - params: - cluster_log = os.path.join(config["cluster_log"], "sort_by_position_{sample}.log") - log: - os.path.join(config["local_log"], "sort_by_position_{sample}.log") - singularity: - "docker://zavolab/samtools:1.8" - shell: - "(samtools sort {input.maps} > {output.maps}) &> {log}" - -################################################################################# -### Create bam index -################################################################################# + input: + maps=os.path.join( + config["output_dir"], "{sample}", "mappingsConverted.bam" + ), + output: + maps=os.path.join( + config["output_dir"], + "{sample}", + "convertedSortedMappings_{sample}.bam", + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "sort_by_position_{sample}.log" + ), + log: + os.path.join(config["local_log"], "sort_by_position_{sample}.log"), + singularity: + "docker://zavolab/samtools:1.8" + shell: + "(samtools sort {input.maps} > {output.maps}) &> {log}" + + +############################################################################### +### Create bam index +############################################################################### + rule index_bam: - input: - maps = os.path.join(config["output_dir"], "{sample}", "convertedSortedMappings_{sample}.bam") - output: - maps = os.path.join(config["output_dir"], "{sample}", "convertedSortedMappings_{sample}.bam.bai") - params: - cluster_log = os.path.join(config["cluster_log"], "index_bam_{sample}.log") - log: - os.path.join(config["local_log"], "index_bam_{sample}.log") - singularity: - "docker://zavolab/samtools:1.8" - shell: - "(samtools index -b {input.maps} > {output.maps}) &> {log}" + input: + maps=os.path.join( + config["output_dir"], + "{sample}", + "convertedSortedMappings_{sample}.bam", + ), + output: + maps=os.path.join( + config["output_dir"], + "{sample}", + "convertedSortedMappings_{sample}.bam.bai", + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "index_bam_{sample}.log" + ), + log: + os.path.join(config["local_log"], "index_bam_{sample}.log"), + singularity: + "docker://zavolab/samtools:1.8" + shell: + "(samtools index -b {input.maps} > {output.maps}) &> {log}" diff --git a/workflow/prepare/Snakefile b/workflow/prepare/Snakefile index 9e49339..3574e0a 100644 --- a/workflow/prepare/Snakefile +++ b/workflow/prepare/Snakefile @@ -1,660 +1,730 @@ -################################################################################ +############################################################################### # (c) 2020 Paula Iborra, Zavolan Lab, Biozentrum, University of Basel # (@) paula.iborradetoledo@unibas.ch / paula.iborra@alumni.esci.upf.edu # # Snakemake workflow to download and prepare the necessary files for # smallRNA-seq related workflows. -################################################################################ +############################################################################### import os + # Rules that require internet connection for downloading files are included # in the localrules -localrules: finish, genome_process, filter_anno_gtf, mirna_anno, dict_chr +localrules: + finish, + genome_process, + filter_anno_gtf, + mirna_anno, + dict_chr, -################################################################################ + +############################################################################### ### Finish rule -################################################################################ +############################################################################### + rule finish: input: - idx_transcriptome = expand( - os.path.join( - config["output_dir"], - "{organism}", - "transcriptome_index_segemehl.idx"), - organism=config["organism"]), - idx_genome = expand( + idx_transcriptome=expand( os.path.join( config["output_dir"], - "{organism}", - "genome_index_segemehl.idx"), - organism=config["organism"]), - exons = expand( + "{organism}", + "transcriptome_index_segemehl.idx", + ), + organism=config["organism"], + ), + idx_genome=expand( os.path.join( config["output_dir"], - "{organism}", - "exons.bed"), - organism=config["organism"]), - header = expand( + "{organism}", + "genome_index_segemehl.idx", + ), + organism=config["organism"], + ), + exons=expand( + os.path.join(config["output_dir"], "{organism}", "exons.bed"), + organism=config["organism"], + ), + header=expand( os.path.join( config["output_dir"], - "{organism}", - "headerOfCollapsedFasta.sam"), - organism=config["organism"]), - mirnafilt = expand( + "{organism}", + "headerOfCollapsedFasta.sam", + ), + organism=config["organism"], + ), + mirnafilt=expand( os.path.join( - config["output_dir"], - "{organism}", - "mirna_filtered.bed"), - organism=config["organism"]), - isomirs = expand( + config["output_dir"], "{organism}", "mirna_filtered.bed" + ), + organism=config["organism"], + ), + isomirs=expand( os.path.join( - config["output_dir"], - "{organism}", - "isomirs_annotation.bed"), - organism=config["organism"]) + config["output_dir"], "{organism}", "isomirs_annotation.bed" + ), + organism=config["organism"], + ), -################################################################################ + +############################################################################### ### Download and process genome IDs -################################################################################ +############################################################################### + rule genome_process: input: - script = os.path.join(config["scripts_dir"],"genome_process.sh") + script=os.path.join(config["scripts_dir"], "genome_process.sh"), output: - genome = os.path.join( + genome=os.path.join( config["output_dir"], "{organism}", "genome.processed.fa" - ) + ), params: - url = lambda wildcards: config[ wildcards.organism ]["genome_url"], - dir_out = os.path.join(config["output_dir"], "{organism}") + url=lambda wildcards: config[wildcards.organism]["genome_url"], + dir_out=os.path.join(config["output_dir"], "{organism}"), log: - os.path.join(config["local_log"], "{organism}","genome_process.log") + os.path.join(config["local_log"], "{organism}", "genome_process.log"), singularity: "docker://zavolab/ubuntu:18.04" shell: "(bash {input.script} {params.dir_out} {log} {params.url})" -################################################################################ + +############################################################################### ### Download and filter gtf by transcript_level -################################################################################ +############################################################################### + rule filter_anno_gtf: input: - script = os.path.join(config["scripts_dir"],"filter_anno_gtf.sh"), + script=os.path.join(config["scripts_dir"], "filter_anno_gtf.sh"), output: - gtf = os.path.join( - config["output_dir"], "{organism}","gene_annotations.filtered.gtf" - ) + gtf=os.path.join( + config["output_dir"], + "{organism}", + "gene_annotations.filtered.gtf", + ), params: - url = lambda wildcards: config[ wildcards.organism ]['gtf_url'], - dir_out = os.path.join(config["output_dir"], "{organism}") + url=lambda wildcards: config[wildcards.organism]["gtf_url"], + dir_out=os.path.join(config["output_dir"], "{organism}"), log: - os.path.join(config["local_log"], "{organism}","filter_anno_gtf.log") + os.path.join(config["local_log"], "{organism}", "filter_anno_gtf.log"), singularity: "docker://zavolab/ubuntu:18.04" shell: "(bash {input.script} {params.dir_out} {log} {params.url}) &> {log}" -################################################################################ + +############################################################################### ### Extract transcriptome sequences in FASTA from genome. -################################################################################ +############################################################################### + rule extract_transcriptome_seqs: input: - genome = os.path.join( + genome=os.path.join( config["output_dir"], "{organism}", "genome.processed.fa" ), - gtf = os.path.join( - config["output_dir"], "{organism}","gene_annotations.filtered.gtf" - ) + gtf=os.path.join( + config["output_dir"], + "{organism}", + "gene_annotations.filtered.gtf", + ), output: - fasta = os.path.join( - config["output_dir"], "{organism}","transcriptome.fa" - ) + fasta=os.path.join( + config["output_dir"], "{organism}", "transcriptome.fa" + ), params: - cluster_log = os.path.join( - config["cluster_log"], "{organism}","extract_transcriptome_seqs.log" - ) + cluster_log=os.path.join( + config["cluster_log"], + "{organism}", + "extract_transcriptome_seqs.log", + ), log: os.path.join( - config["local_log"], "{organism}","extract_transcriptome_seqs.log" - ) + config["local_log"], + "{organism}", + "extract_transcriptome_seqs.log", + ), singularity: "docker://zavolab/cufflinks:2.2.1" shell: "(gffread -w {output.fasta} -g {input.genome} {input.gtf}) &> {log}" + ############################################################################### -## Trim transcript IDs from FASTA file +### Trim transcript IDs from FASTA file ############################################################################### + rule trim_fasta: input: - fasta = os.path.join( - config["output_dir"], "{organism}","transcriptome.fa" + fasta=os.path.join( + config["output_dir"], "{organism}", "transcriptome.fa" ), - script = os.path.join(config["scripts_dir"], "validation_fasta.py") + script=os.path.join(config["scripts_dir"], "validation_fasta.py"), output: - fasta = os.path.join( - config["output_dir"], "{organism}","transcriptome_idtrim.fa" - ) + fasta=os.path.join( + config["output_dir"], "{organism}", "transcriptome_idtrim.fa" + ), params: - cluster_log = os.path.join( - config["cluster_log"], "{organism}","trim_fasta.log" - ) + cluster_log=os.path.join( + config["cluster_log"], "{organism}", "trim_fasta.log" + ), log: - os.path.join(config["local_log"], "{organism}","trim_fasta.log") + os.path.join(config["local_log"], "{organism}", "trim_fasta.log"), singularity: "docker://zavolab/ubuntu:18.04" shell: """(awk \ - -F" " \ - "/^>/ {{print \$1; next}} 1" \ - {input.fasta} \ - > {output.fasta} \ + -F" " \ + "/^>/ {{print \$1; next}} 1" \ + {input.fasta} \ + > {output.fasta} \ ) &> {log}""" -################################################################################ + +############################################################################### ### Generate segemehl index for transcripts -################################################################################ +############################################################################### + rule generate_segemehl_index_transcriptome: input: - fasta = os.path.join( - config["output_dir"], "{organism}","transcriptome_idtrim.fa" - ) + fasta=os.path.join( + config["output_dir"], "{organism}", "transcriptome_idtrim.fa" + ), output: - idx = os.path.join( + idx=os.path.join( config["output_dir"], "{organism}", - "transcriptome_index_segemehl.idx" - ) + "transcriptome_index_segemehl.idx", + ), params: - cluster_log = os.path.join( + cluster_log=os.path.join( config["cluster_log"], "{organism}", - "generate_segemehl_index_transcriptome.log" - ) + "generate_segemehl_index_transcriptome.log", + ), log: os.path.join( config["local_log"], "{organism}", - "generate_segemehl_index_transcriptome.log" - ) + "generate_segemehl_index_transcriptome.log", + ), resources: - mem = 10, - threads = 8, - time = 6 + mem=10, + threads=8, + time=6, singularity: "docker://zavolab/segemehl:0.2.0" shell: "(segemehl.x -x {output.idx} -d {input.fasta}) &> {log}" -################################################################################ + +############################################################################### ### Generate segemehl index for genome -################################################################################ +############################################################################### + rule generate_segemehl_index_genome: input: - genome = os.path.join( + genome=os.path.join( config["output_dir"], "{organism}", "genome.processed.fa" - ) + ), output: - idx = os.path.join - (config["output_dir"], "{organism}","genome_index_segemehl.idx" - ) + idx=os.path.join( + config["output_dir"], "{organism}", "genome_index_segemehl.idx" + ), params: - cluster_log = os.path.join( + cluster_log=os.path.join( config["cluster_log"], "{organism}", - "generate_segemehl_index_genome.log" - ) + "generate_segemehl_index_genome.log", + ), log: os.path.join( config["local_log"], "{organism}", - "generate_segemehl_index_genome.log" - ) + "generate_segemehl_index_genome.log", + ), resources: - mem = 50, - threads = 8, - time = 6 + mem=50, + threads=8, + time=6, singularity: "docker://zavolab/segemehl:0.2.0" shell: "(segemehl.x -x {output.idx} -d {input.genome}) &> {log}" -################################################################################ + +############################################################################### ### GTF file of exons (genomic coordinates) -################################################################################ +############################################################################### + rule get_exons_gtf: input: - gtf = os.path.join( - config["output_dir"], "{organism}","gene_annotations.filtered.gtf" + gtf=os.path.join( + config["output_dir"], + "{organism}", + "gene_annotations.filtered.gtf", ), - script = os.path.join(config["scripts_dir"], "get_lines_w_pattern.sh") + script=os.path.join(config["scripts_dir"], "get_lines_w_pattern.sh"), output: - exons = os.path.join(config["output_dir"], "{organism}","exons.gtf") + exons=os.path.join(config["output_dir"], "{organism}", "exons.gtf"), params: - cluster_log = os.path.join( - config["cluster_log"], "{organism}","get_exons_gtf.log" - ) + cluster_log=os.path.join( + config["cluster_log"], "{organism}", "get_exons_gtf.log" + ), log: - os.path.join(config["local_log"], "{organism}", "get_exons_gtf.log") + os.path.join(config["local_log"], "{organism}", "get_exons_gtf.log"), singularity: "docker://zavolab/ubuntu:18.04" shell: "(bash \ - {input.script} \ - -f {input.gtf} \ - -c 3 \ - -p exon \ - -o {output.exons} \ + {input.script} \ + -f {input.gtf} \ + -c 3 \ + -p exon \ + -o {output.exons} \ ) &> {log}" -################################################################################ + +############################################################################### ### Convert GTF file of exons to BED file -################################################################################ +############################################################################### + rule gtftobed: input: - exons = os.path.join(config["output_dir"], "{organism}","exons.gtf"), - script = os.path.join(config["scripts_dir"], "gtf_exons_bed.1.1.2.R") + exons=os.path.join(config["output_dir"], "{organism}", "exons.gtf"), + script=os.path.join(config["scripts_dir"], "gtf_exons_bed.1.1.2.R"), output: - exons = os.path.join(config["output_dir"], "{organism}","exons.bed") + exons=os.path.join(config["output_dir"], "{organism}", "exons.bed"), params: - cluster_log = os.path.join( - config["cluster_log"], "{organism}","gtftobed.log" - ) + cluster_log=os.path.join( + config["cluster_log"], "{organism}", "gtftobed.log" + ), log: - os.path.join(config["local_log"], "{organism}","gtftobed.log") + os.path.join(config["local_log"], "{organism}", "gtftobed.log"), singularity: "docker://zavolab/r-zavolab:3.5.1" shell: "(Rscript \ - {input.script} \ - --gtf {input.exons} \ - -o {output.exons} \ + {input.script} \ + --gtf {input.exons} \ + -o {output.exons} \ ) &> {log}" -################################################################################ + +############################################################################### ### Create header for SAM file -################################################################################ +############################################################################### + rule create_header_genome: input: - genome = os.path.join( + genome=os.path.join( config["output_dir"], "{organism}", "genome.processed.fa" - ) + ), output: - header = os.path.join( - config["output_dir"], "{organism}","headerOfCollapsedFasta.sam" - ) + header=os.path.join( + config["output_dir"], "{organism}", "headerOfCollapsedFasta.sam" + ), params: - cluster_log = os.path.join( - config["cluster_log"], "{organism}","create_header_genome.log" - ) + cluster_log=os.path.join( + config["cluster_log"], "{organism}", "create_header_genome.log" + ), log: os.path.join( - config["local_log"], "{organism}","create_header_genome.log" - ) + config["local_log"], "{organism}", "create_header_genome.log" + ), singularity: "docker://zavolab/samtools:1.8" shell: "(samtools dict -o {output.header} --uri=NA {input.genome}) &> {log}" -################################################################################ + +############################################################################### ### Download miRNA annotation -################################################################################ +############################################################################### + rule mirna_anno: input: - genome = os.path.join( + genome=os.path.join( config["output_dir"], "{organism}", "genome.processed.fa" - ) + ), output: - anno = os.path.join( - config["output_dir"], "{organism}","raw", "mirna.gff3" - ) + anno=os.path.join( + config["output_dir"], "{organism}", "raw", "mirna.gff3" + ), params: - anno = lambda wildcards: config[ wildcards.organism ]["mirna_url"], - cluster_log = os.path.join( - config["cluster_log"], "{organism}","mirna_anno.log" + anno=lambda wildcards: config[wildcards.organism]["mirna_url"], + cluster_log=os.path.join( + config["cluster_log"], "{organism}", "mirna_anno.log" ), log: - os.path.join(config["local_log"], "{organism}","mirna_anno.log") + os.path.join(config["local_log"], "{organism}", "mirna_anno.log"), singularity: "docker://zavolab/ubuntu:18.04" shell: "(wget {params.anno} -O {output.anno}) &> {log}" -################################################################################ + +############################################################################### ### Download dictionary mapping chr -################################################################################ +############################################################################### + rule dict_chr: input: - genome = os.path.join( + genome=os.path.join( config["output_dir"], "{organism}", "genome.processed.fa" - ) + ), output: - map_chr = os.path.join( + map_chr=os.path.join( config["output_dir"], "{organism}", "UCSC2ensembl.txt" - ) + ), params: - map_chr = lambda wildcards: config[wildcards.organism]["map_chr_url"], - cluster_log = os.path.join( - config["cluster_log"], "{organism}","dict_chr.log" + map_chr=lambda wildcards: config[wildcards.organism]["map_chr_url"], + cluster_log=os.path.join( + config["cluster_log"], "{organism}", "dict_chr.log" ), log: - os.path.join(config["local_log"], "{organism}","dict_chr.log") + os.path.join(config["local_log"], "{organism}", "dict_chr.log"), singularity: "docker://zavolab/ubuntu:18.04" shell: "(wget {params.map_chr} -O {output.map_chr}) &> {log}" -################################################################################ + +############################################################################### ### Mapping chromosomes names, UCSC <-> ENSEMBL -################################################################################ +############################################################################### + rule map_chr_names: input: - anno = os.path.join( - config["output_dir"], "{organism}","raw", "mirna.gff3" + anno=os.path.join( + config["output_dir"], "{organism}", "raw", "mirna.gff3" ), - script = os.path.join(config["scripts_dir"], "map_chromosomes.pl"), - map_chr = os.path.join( + script=os.path.join(config["scripts_dir"], "map_chromosomes.pl"), + map_chr=os.path.join( config["output_dir"], "{organism}", "UCSC2ensembl.txt" - ) + ), output: - gff = os.path.join( + gff=os.path.join( config["output_dir"], "{organism}", "mirna_chr_mapped.gff3" - ) + ), params: - cluster_log = os.path.join( + cluster_log=os.path.join( config["cluster_log"], "{organism}", "map_chr_names.log" ), - column = lambda wildcards: config[ wildcards.organism ]["column"], - delimiter = lambda wildcards: config[ wildcards.organism ]["delimiter"] + column=lambda wildcards: config[wildcards.organism]["column"], + delimiter=lambda wildcards: config[wildcards.organism]["delimiter"], log: - os.path.join(config["local_log"], "{organism}","map_chr_names.log") + os.path.join(config["local_log"], "{organism}", "map_chr_names.log"), singularity: "docker://zavolab/perl:5.28" shell: "(perl {input.script} \ - {input.anno} \ - {params.column} \ - {params.delimiter} \ - {input.map_chr} \ - {output.gff} \ + {input.anno} \ + {params.column} \ + {params.delimiter} \ + {input.map_chr} \ + {output.gff} \ ) &> {log}" -################################################################################ + +############################################################################### ### Filtering _1 miR IDs -################################################################################ +############################################################################### + rule filter_mir_1_anno: input: - gff = os.path.join( + gff=os.path.join( config["output_dir"], "{organism}", "mirna_chr_mapped.gff3" - ) + ), output: - gff = os.path.join( + gff=os.path.join( config["output_dir"], "{organism}", "mirna_filtered.gff3" - ) + ), params: - script = os.path.join(config["scripts_dir"], "filter_mir_1_anno.sh"), - cluster_log = os.path.join( - config["cluster_log"], "{organism}","filter_mir_1_anno.log" + script=os.path.join(config["scripts_dir"], "filter_mir_1_anno.sh"), + cluster_log=os.path.join( + config["cluster_log"], "{organism}", "filter_mir_1_anno.log" ), log: - os.path.join(config["local_log"], "{organism}", "filter_mir_1_anno.log") + os.path.join( + config["local_log"], "{organism}", "filter_mir_1_anno.log" + ), singularity: - "docker://zavolab/ubuntu:18.04" + "docker://zavolab/ubuntu:18.04" shell: "(bash {params.script} -f {input.gff} -o {output.gff}) &> {log}" -################################################################################ + +############################################################################### ### GFF to BED (improve intersect memory efficient allowing to use -sorted) -################################################################################ +############################################################################### + rule gfftobed: input: - gff = os.path.join( + gff=os.path.join( config["output_dir"], "{organism}", "mirna_filtered.gff3" - ) + ), output: - bed = os.path.join( + bed=os.path.join( config["output_dir"], "{organism}", "mirna_filtered.bed" - ) + ), params: - cluster_log = os.path.join( + cluster_log=os.path.join( config["cluster_log"], "{organism}", "gfftobed.log" ), - out_dir = os.path.join(config["output_dir"]) + out_dir=os.path.join(config["output_dir"]), log: - os.path.join(config["local_log"], "{organism}", "gfftobed.log") + os.path.join(config["local_log"], "{organism}", "gfftobed.log"), singularity: "docker://zavolab/bedops:2.4.35" shell: "(convert2bed -i gff < {input.gff} \ - --sort-tmpdir={params.out_dir} \ - > {output.bed} \ + --sort-tmpdir={params.out_dir} \ + > {output.bed} \ ) &> {log}" -################################################################################ + +############################################################################### ### Index genome fasta file -################################################################################ +############################################################################### + rule create_index_fasta: input: - genome = os.path.join( + genome=os.path.join( config["output_dir"], "{organism}", "genome.processed.fa" ), output: - genome = os.path.join( + genome=os.path.join( config["output_dir"], "{organism}", "genome.processed.fa.fai" ), params: - cluster_log = os.path.join( - config["cluster_log"], "{organism}","create_index_fasta.log" - ) + cluster_log=os.path.join( + config["cluster_log"], "{organism}", "create_index_fasta.log" + ), log: - os.path.join(config["local_log"], "{organism}","create_index_fasta.log") + os.path.join( + config["local_log"], "{organism}", "create_index_fasta.log" + ), singularity: "docker://zavolab/samtools:1.8" shell: "(samtools faidx {input.genome}) &> {log}" -################################################################################ + +############################################################################### ### Extract chromosome length -################################################################################ +############################################################################### + rule extract_chr_len: input: - genome = os.path.join( + genome=os.path.join( config["output_dir"], "{organism}", "genome.processed.fa.fai" - ) + ), output: - chrsize = os.path.join( + chrsize=os.path.join( config["output_dir"], "{organism}", "chr_size.txt" - ) + ), params: - cluster_log = os.path.join( - config["cluster_log"], "{organism}","extract_chr_len.log" - ) + cluster_log=os.path.join( + config["cluster_log"], "{organism}", "extract_chr_len.log" + ), log: - os.path.join(config["local_log"], "{organism}","extract_chr_len.log") + os.path.join(config["local_log"], "{organism}", "extract_chr_len.log"), singularity: "docker://zavolab/ubuntu:18.04" shell: "(cut -f1,2 {input.genome} > {output.chrsize}) &> {log}" -################################################################################ + +############################################################################### ### Extract mature miRNA -################################################################################ +############################################################################### + rule filter_mature_mirs: input: - bed = os.path.join( + bed=os.path.join( config["output_dir"], "{organism}", "mirna_filtered.bed" - ) + ), output: - bed = os.path.join( + bed=os.path.join( config["output_dir"], "{organism}", "mirna_mature_filtered.bed" - ) + ), params: - cluster_log = os.path.join( + cluster_log=os.path.join( config["cluster_log"], "{organism}", "filter_mature_mirs.log" ), - precursor = "miRNA_primary_transcript" + precursor="miRNA_primary_transcript", log: os.path.join( config["local_log"], "{organism}", "filter_mature_mirs.log" - ) + ), singularity: - "docker://zavolab/ubuntu:18.04", + "docker://zavolab/ubuntu:18.04" shell: "(grep -v {params.precursor} {input.bed} > {output.bed}) &> {log}" -################################################################################ + +############################################################################### ### Create isomirs annotation file from mature miRNA -################################################################################ +############################################################################### + rule iso_anno: input: - bed = os.path.join( + bed=os.path.join( config["output_dir"], "{organism}", "mirna_mature_filtered.bed" ), - chrsize = os.path.join( + chrsize=os.path.join( config["output_dir"], "{organism}", "chr_size.txt" - ) + ), output: - bed = os.path.join( + bed=os.path.join( config["output_dir"], "{organism}", - "iso_anno_5p{bp_5p}_3p{bp_3p}.bed" - ) + "iso_anno_5p{bp_5p}_3p{bp_3p}.bed", + ), params: - cluster_log = os.path.join( + cluster_log=os.path.join( config["cluster_log"], "{organism}", - "iso_anno_5p{bp_5p}_3p{bp_3p}.log" + "iso_anno_5p{bp_5p}_3p{bp_3p}.log", ), - bp_5p = lambda wildcards: wildcards.bp_5p, - bp_3p = lambda wildcards: wildcards.bp_3p + bp_5p=lambda wildcards: wildcards.bp_5p, + bp_3p=lambda wildcards: wildcards.bp_3p, log: os.path.join( config["local_log"], "{organism}", - "iso_anno_5p{bp_5p}_3p{bp_3p}.log" - ) + "iso_anno_5p{bp_5p}_3p{bp_3p}.log", + ), singularity: "docker://zavolab/bedtools:2.28.0" shell: "(bedtools slop \ - -i {input.bed} \ - -g {input.chrsize} \ - -l {params.bp_5p} \ - -r {params.bp_3p} \ - > {output.bed} \ + -i {input.bed} \ + -g {input.chrsize} \ + -l {params.bp_5p} \ + -r {params.bp_3p} \ + > {output.bed} \ ) &> {log}" -################################################################################ -### Change miRNA names to isomirs names -################################################################################ + +############################################################################### +### Change miRNA names to isomirs names +############################################################################### + rule iso_anno_rename: input: - bed = os.path.join( + bed=os.path.join( config["output_dir"], "{organism}", - "iso_anno_5p{bp_5p}_3p{bp_3p}.bed" - ) + "iso_anno_5p{bp_5p}_3p{bp_3p}.bed", + ), output: - bed = os.path.join( + bed=os.path.join( config["output_dir"], "{organism}", - "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.bed" - ) + "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.bed", + ), params: - cluster_log = os.path.join( + cluster_log=os.path.join( config["cluster_log"], "{organism}", - "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.log" + "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.log", ), - bp_5p = lambda wildcards: wildcards.bp_5p, - bp_3p = lambda wildcards: wildcards.bp_3p + bp_5p=lambda wildcards: wildcards.bp_5p, + bp_3p=lambda wildcards: wildcards.bp_3p, log: os.path.join( config["local_log"], "{organism}", - "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.log" - ) + "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.log", + ), singularity: "docker://zavolab/ubuntu:18.04" shell: "(sed \ - 's/;Derives/_5p{params.bp_5p}_3p{params.bp_3p};Derives/' \ - {input.bed} \ - > {output.bed} \ + 's/;Derives/_5p{params.bp_5p}_3p{params.bp_3p};Derives/' \ + {input.bed} \ + > {output.bed} \ ) &> {log}" -################################################################################ + +############################################################################### ### Concatenate all isomirs annotation files -################################################################################ +############################################################################### + rule iso_anno_concat: input: - bed = lambda wildcards: expand(os.path.join( - config["output_dir"], - "{organism}", - "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.bed" + bed=lambda wildcards: expand( + os.path.join( + config["output_dir"], + "{organism}", + "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.bed", + ), + organism=config["organism"], + bp_3p=config["bp_3p"], + bp_5p=config["bp_5p"], ), - organism = config["organism"], - bp_3p = config['bp_3p'], - bp_5p = config['bp_5p']) output: - bed = os.path.join( + bed=os.path.join( config["output_dir"], "{organism}", "iso_anno_concat.bed" - ) + ), params: - cluster_log = os.path.join( + cluster_log=os.path.join( config["cluster_log"], "{organism}", "iso_anno_concat.log" ), - prefix = os.path.join( + prefix=os.path.join( config["output_dir"], "{organism}", "iso_anno_rename" - ) + ), log: - os.path.join(config["local_log"], "{organism}", "iso_anno_concat.log") + os.path.join(config["local_log"], "{organism}", "iso_anno_concat.log"), singularity: "docker://zavolab/ubuntu:18.04" shell: "(cat {params.prefix}* > {output.bed}) &> {log}" -################################################################################ + +############################################################################### ### Remove non changing isomirs (5p0_3p0) -################################################################################ +############################################################################### + rule iso_anno_final: input: - bed = os.path.join( + bed=os.path.join( config["output_dir"], "{organism}", "iso_anno_concat.bed" - ) + ), output: - bed = os.path.join( + bed=os.path.join( config["output_dir"], "{organism}", "isomirs_annotation.bed" - ) + ), params: - cluster_log = os.path.join( + cluster_log=os.path.join( config["cluster_log"], "{organism}", "iso_anno_final.log" ), - pattern = "5p0_3p0" + pattern="5p0_3p0", log: - os.path.join(config["local_log"], "{organism}", "iso_anno_final.log") + os.path.join(config["local_log"], "{organism}", "iso_anno_final.log"), singularity: "docker://zavolab/ubuntu:18.04" shell: diff --git a/workflow/quantify/Snakefile b/workflow/quantify/Snakefile new file mode 100644 index 0000000..b30667d --- /dev/null +++ b/workflow/quantify/Snakefile @@ -0,0 +1,317 @@ +############################################################################### +# (c) 2020 Paula Iborra, Zavolan Lab, Biozentrum, University of Basel +# (@) paula.iborradetoledo@unibas.ch / paula.iborra@alumni.esci.upf.edu +# +# Pipeline to quantify miRNAs, including isomiRs, from miRNA-seq alignments. +############################################################################### + +import os + + +# Rules that require internet connection for downloading files are included +# in the localrules +localrules: + finish, + + +############################################################################### +### Finish rule +############################################################################### + + +rule finish: + input: + table1=expand( + os.path.join(config["output_dir"], "TABLES", "counts.{mir}.tab"), + mir=config["mir_list"], + ), + table2=os.path.join( + config["output_dir"], "TABLES", "counts.isomirs.tab" + ), + + +############################################################################### +### BAM to BED +############################################################################### + + +rule bamtobed: + input: + alignment=os.path.join( + config["input_dir"], + "{sample}", + "convertedSortedMappings_{sample}.bam", + ), + output: + alignment=os.path.join( + config["output_dir"], "{sample}", "alignments.bed12" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "bamtobed_{sample}.log" + ), + log: + os.path.join(config["local_log"], "bamtobed_{sample}.log"), + singularity: + "docker://zavolab/bedtools:2.27.0" + shell: + "(bedtools bamtobed \ + -bed12 \ + -tag NH \ + -i {input.alignment} \ + > {output.alignment} \ + ) &> {log}" + + +############################################################################### +### Sort alignments +############################################################################### + + +rule sort_alignments: + input: + alignment=os.path.join( + config["output_dir"], "{sample}", "alignments.bed12" + ), + output: + alignment=os.path.join( + config["output_dir"], "{sample}", "sorted.alignments.bed12" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "sortalignment_{sample}.log" + ), + log: + os.path.join(config["local_log"], "sortalignment_{sample}.log"), + resources: + mem=4, + threads=8, + singularity: + "docker://zavolab/ubuntu:18.04" + shell: + "(sort \ + -k1,1 \ + -k2,2n \ + {input.alignment} \ + > {output.alignment} \ + ) &> {log}" + + +############################################################################### +### miRNAs intersection +############################################################################### + + +rule intersect_mirna: + input: + alignment=os.path.join( + config["output_dir"], "{sample}", "sorted.alignments.bed12" + ), + mirna=config["mirnas_anno"], + output: + intersect=os.path.join( + config["output_dir"], "{sample}", "intersect_mirna.bed" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "intersection_mirna_{sample}.log" + ), + log: + os.path.join(config["local_log"], "intersection_mirna_{sample}.log"), + singularity: + "docker://zavolab/bedtools:2.27.0" + shell: + "(bedtools intersect \ + -wao \ + -s \ + -F 1 \ + -sorted \ + -b {input.alignment} \ + -a {input.mirna} \ + > {output.intersect} \ + ) &> {log}" + + +############################################################################### +### isomiRs intersection +############################################################################### + + +# rule intersect_isomirs: +# input: +# alignment=os.path.join( +# config["output_dir"], "{sample}", "sorted.alignments.bed12" +# ), +# isomirs=config["isomirs_anno"], +# output: +# intersect=os.path.join( +# config["output_dir"], "{sample}", "intersect_isomirs.bed" +# ), +# params: +# cluster_log=os.path.join( +# config["cluster_log"], "intersection_isomirs_{sample}.log" +# ), +# log: +# os.path.join(config["local_log"], "intersection_isomirs_{sample}.log"), +# singularity: +# "docker://zavolab/bedtools:2.27.0" +# shell: +# "(bedtools intersect \ +# -wao \ +# -s \ +# -F 1 \ +# -sorted \ +# -b {input.alignment} \ +# -a {input.isomirs} \ +# > {output.intersect} \ +# ) &> {log}" + + +############################################################################### +### miRNAs counting table - miRNA +############################################################################### + + +rule quant_mirna: + input: + intersect=os.path.join( + config["output_dir"], "{sample}", "intersect_mirna.bed" + ), + script=os.path.join(config["scripts_dir"], "mirna_quantification.py"), + output: + table=os.path.join( + config["output_dir"], "TABLES", "miRNA_counts_{sample}" + ), + params: + cluster_log=os.path.join( + config["cluster_log"], "quant_mirna_miRNA_{sample}.log" + ), + prefix=os.path.join( + config["output_dir"], "TABLES", "miRNA_counts_{sample}" + ), + log: + os.path.join(config["local_log"], "quant_mirna_miRNA_{sample}.log"), + singularity: + "docker://zavolab/python:3.6.5" + shell: + "(python \ + {input.script} \ + -i {input.intersect} \ + --uniq=miRNA \ + -p={params.prefix} \ + ) &> {log}" + + +############################################################################### +### miRNAs counting table - miRNA_primary +############################################################################### + + +rule quant_mirna_pri: + input: + intersect=os.path.join( + config["output_dir"], "{sample}", "intersect_mirna.bed" + ), + script=os.path.join(config["scripts_dir"], "mirna_quantification.py"), + output: + table=os.path.join( + config["output_dir"], + "TABLES", + "miRNA_primary_transcript_counts_{sample}", + ), + params: + cluster_log=os.path.join( + config["cluster_log"], + "quant_mirna_miRNA_primary_transcript_{sample}.log", + ), + prefix=os.path.join( + config["output_dir"], + "TABLES", + "miRNA_primary_transcript_counts_{sample}", + ), + log: + os.path.join( + config["local_log"], + "quant_mirna_miRNA_primary_transcript_{sample}.log", + ), + singularity: + "docker://zavolab/python:3.6.5" + shell: + "(python \ + {input.script} \ + -i {input.intersect} \ + --uniq=miRNA_primary_transcript \ + -p={params.prefix} \ + ) &> {log}" + + +############################################################################### +### isomiRs counting table +############################################################################### + + +# rule quant_isomirs: +# input: +# intersect=os.path.join( +# config["output_dir"], "{sample}", "intersect_isomirs.bed" +# ), +# script=os.path.join(config["scripts_dir"], "mirna_quantification.py"), +# output: +# table=os.path.join( +# config["output_dir"], "TABLES", "isomirs_counts_{sample}" +# ), +# params: +# cluster_log=os.path.join( +# config["cluster_log"], "quant_isomirs_{sample}.log" +# ), +# prefix=os.path.join( +# config["output_dir"], "TABLES", "isomirs_counts_{sample}" +# ), +# log: +# os.path.join(config["local_log"], "quant_isomirs_{sample}.log"), +# singularity: +# "docker://zavolab/python:3.6.5" +# shell: +# "(python \ +# {input.script} \ +# -i {input.intersect} \ +# --uniq=miRNA \ +# -p={params.prefix} \ +# ) &> {log}" + + +############################################################################### +### Merge counting tables for all samples by mature/primary/isomirs forms. +############################################################################### + + +rule merge_tables: + input: + table=expand( + os.path.join( + config["output_dir"], "TABLES", "{mir}_counts_{sample}" + ), + sample=config["sample"], + mir=config["mir_list"], + ), + script=os.path.join(config["scripts_dir"], "merge_tables.R"), + output: + table=os.path.join(config["output_dir"], "TABLES", "counts.{mir}.tab"), + params: + cluster_log=os.path.join( + config["cluster_log"], "merge_tables_{mir}.log" + ), + prefix="{mir}_counts_", + input_dir=os.path.join(config["output_dir"], "TABLES"), + log: + os.path.join(config["local_log"], "merge_tables_{mir}.log"), + singularity: + "docker://zavolab/r-tidyverse:3.5.3" + shell: + "(Rscript \ + {input.script} \ + --input_dir {params.input_dir} \ + --output_file {output.table} \ + --prefix {params.prefix} \ + --verbose \ + ) &> {log}" -- GitLab