diff --git a/.gitignore b/.gitignore index 9b84e6d2dba3415c891f1fe29712f5d0ff6a315c..b7175b0eb9688a45b33806fddf06db984e9c12dc 100644 --- a/.gitignore +++ b/.gitignore @@ -212,4 +212,5 @@ tags .snakemake results logs -nohup.out \ No newline at end of file +nohup.out +.wget-hsts diff --git a/README.md b/README.md index 5f75c34108817e5c8779907fa31c772060e5de6e..c351b3416a1232933b7df6c6ba3fb4466d4f98b5 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,7 @@ [Snakemake][snakemake] workflow to download and prepare the necessary files for smallRNA-seq related pipelines [mir-map][mir-map] and [mir-quant][mir-quant]. -The scheme below is a visual representation of an example run of the -workflow: +The scheme below is a visual representation of an example run of the workflow: > ![rule-graph-prep-anno][rule-graph-prep-anno] @@ -108,39 +107,24 @@ Now make a clean copy of the `JOB` directory and name it what you want, e.g., cp -r JOB MY_ANALYSIS ``` -Now traverse to the directory from where you will actually executw the pipeline +Now traverse to the directory from where you will actually execute the pipeline with: ```bash cd MY_ANALYSIS/prepare_annotation ``` -Before running the pipeline adjust the parameters in file `config.yaml`: +Before running the pipeline adjust the parameters in file +`config_prepare_annotation.yaml`: ```yaml ---- - ############################################################################## - ### Necessary inputs - ############################################################################## - organism: " " # name of the organism, e.g., "homo_sapiens" - genome_url: "ftp:// ..... " # FTP/HTTP URL to genome file in FASTA format - gtf_url: "ftp:// ..... " # FTP/HTTP URL to gene annotation file in GTF format - prefix_name: " " # name of the assembly/annotation version, e.g., "GRCh38.100" - - ############################################################################## - ### Directories - ############################################################################## - output_dir: "results" - scripts_dir: "../scripts" - local_log: "logs/local" - cluster_log: "logs/cluster" -... ``` > **Note:** We expect the genome and gene annotations to be formatted according > the style used by Ensembl. Other formats are very likely to lead to problems, > if not in this pipeline, then further down the road in the mapping or -> annotation pipelines. +> annotation pipelines. The miRNA annotation file is expected to originate from +> miRBase, or follow their exact layout. To start pipeline execution locally: diff --git a/workflow/prepare_annotation/cluster.json b/RUNS/JOB/prepare_annotation/cluster.json similarity index 100% rename from workflow/prepare_annotation/cluster.json rename to RUNS/JOB/prepare_annotation/cluster.json diff --git a/RUNS/JOB/prepare_annotation/config.yaml b/RUNS/JOB/prepare_annotation/config.yaml index a3ae6587435d9d5ca8a75d7b47d7e16ecda52d74..b6f688c2dd69f6027535d8483a19da0ef3739529 100644 --- a/RUNS/JOB/prepare_annotation/config.yaml +++ b/RUNS/JOB/prepare_annotation/config.yaml @@ -1,17 +1,35 @@ --- - ############################################################################## - ### Necessary inputs - ############################################################################## - organism: " " - genome_url: "ftp:// ..... " - gtf_url: "ftp:// ..... " - prefix_name: " " - - ############################################################################## - ### Directories - ############################################################################## - output_dir: "results" - scripts_dir: "../scripts" - local_log: "logs/local" - cluster_log: "logs/cluster" + +############################## GLOBAL PARAMETERS ############################## + +## Isomirs annotation file +## Number of base pairs to add/substract from 5' (start) and 3' (end) coordinates. +bp_5p: [0] # array of numbers, e.g., [-2,-1,0,+1], to include 2 upstream and 1 downstream nts +bp_3p: [0] # array of numbers, e.g., [-2,-1,0,+1], to include 2 upstream and 1 downstream nts + +## Directories +output_dir: "results" +scripts_dir: "../../../scripts" +local_log: "logs/local" +cluster_log: "logs/cluster" + +# List of "organism/prefix" identifiers +organism: ["org/pre"] # e.g., ["homo_sapiens/GRCh38.100", "mus_musculus/GRCm37.98"] + +################### PARAMETERS SPECIFIC TO ORGANISM VERSION ################### + +org/pre: # One section for each list item in "organism"; names have to match precisely + + # URLs to genome, gene & miRNA annotations + genome_url: # FTP/HTTP URL to gzipped genome in FASTA format, Ensembl style + gtf_url: # FTP/HTTP URL to gzipped gene annotations in GTF format, Ensembl style + mirna_url: # FTP/HTTP URL to unzipped microRNA annotations in GFF format, miRBase style + + # Chromosome name mappings between UCSC <-> Ensembl + # Available at: https://github.com/dpryan79/ChromosomeMappings; e.g., `GRCh38_UCSC2ensembl.txt` + map_chr_url: # FTP/HTTP URL to mapping table + # Chromosome name mapping parameters: + column: 1 # Column number from input file where to change chromosome name + delimiter: "TAB" # Delimiter of the input file + ... diff --git a/RUNS/JOB/prepare_annotation/run_workflow_slurm.sh b/RUNS/JOB/prepare_annotation/run_workflow_slurm.sh index 6fba1b79adb18af44c4d15de819fd5574817b4d8..be575ff5fb4a4dec9c2aa5b74593db63ccf4b444 100755 --- a/RUNS/JOB/prepare_annotation/run_workflow_slurm.sh +++ b/RUNS/JOB/prepare_annotation/run_workflow_slurm.sh @@ -27,7 +27,7 @@ cd $script_dir snakemake \ --snakefile="../../../workflow/prepare_annotation/Snakefile" \ --configfile="config.yaml" \ - --cluster-config="../../../workflow/prepare_annotation/cluster.json" \ + --cluster-config="cluster.json" \ --cluster "sbatch \ --cpus-per-task={cluster.threads} \ --mem={cluster.mem} \ diff --git a/environment.root.yml b/environment.root.yml index 50e1d633b1e2465bc95fed5a2745e6fcd73cce4c..b961a0d33881ab213688136aef2b77d65b03dc11 100644 --- a/environment.root.yml +++ b/environment.root.yml @@ -5,9 +5,7 @@ channels: - conda-forge dependencies: - graphviz=2.40.1 - - pip=20.0.2 - python=3.7.4 - singularity=3.5.2 - - snakemake==6.10.0 + - snakemake=6.10.0 - unzip=6.0 - - wget==1.20.1 diff --git a/environment.yml b/environment.yml index da6868d8f8f91cc728906e06e1c98df8230ceb52..533927f89a047d59fccaf8c706eb2965f5d55a59 100644 --- a/environment.yml +++ b/environment.yml @@ -5,8 +5,6 @@ channels: - conda-forge dependencies: - graphviz=2.40.1 - - pip=20.0.2 - python=3.7.4 - - snakemake==6.10.0 + - snakemake=6.10.0 - unzip=6.0 - - wget==1.20.1 diff --git a/images/rule_graph_prepare_annotation.svg b/images/rule_graph_prepare_annotation.svg index e4f5562006d7e2448a5a944790c21369ca3aacba..1b6f8cd0f55a2305390426294365e330b16db309 100644 --- a/images/rule_graph_prepare_annotation.svg +++ b/images/rule_graph_prepare_annotation.svg @@ -4,142 +4,310 @@ <!-- Generated by graphviz version 2.40.1 (20161225.0304) --> <!-- Title: snakemake_dag Pages: 1 --> -<svg width="596pt" height="332pt" - viewBox="0.00 0.00 595.50 332.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> -<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 328)"> +<svg width="733pt" height="764pt" + viewBox="0.00 0.00 732.50 764.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> +<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 760)"> <title>snakemake_dag</title> -<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-328 591.5,-328 591.5,4 -4,4"/> +<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-760 728.5,-760 728.5,4 -4,4"/> <!-- 0 --> <g id="node1" class="node"> <title>0</title> -<path fill="none" stroke="#d85656" stroke-width="2" d="M300.5,-36C300.5,-36 270.5,-36 270.5,-36 264.5,-36 258.5,-30 258.5,-24 258.5,-24 258.5,-12 258.5,-12 258.5,-6 264.5,0 270.5,0 270.5,0 300.5,0 300.5,0 306.5,0 312.5,-6 312.5,-12 312.5,-12 312.5,-24 312.5,-24 312.5,-30 306.5,-36 300.5,-36"/> -<text text-anchor="middle" x="285.5" y="-15.5" font-family="sans" font-size="10.00" fill="#000000">finish</text> +<path fill="none" stroke="#d88556" stroke-width="2" d="M357.5,-36C357.5,-36 327.5,-36 327.5,-36 321.5,-36 315.5,-30 315.5,-24 315.5,-24 315.5,-12 315.5,-12 315.5,-6 321.5,0 327.5,0 327.5,0 357.5,0 357.5,0 363.5,0 369.5,-6 369.5,-12 369.5,-12 369.5,-24 369.5,-24 369.5,-30 363.5,-36 357.5,-36"/> +<text text-anchor="middle" x="342.5" y="-15.5" font-family="sans" font-size="10.00" fill="#000000">finish</text> </g> <!-- 1 --> <g id="node2" class="node"> <title>1</title> -<path fill="none" stroke="#56c1d8" stroke-width="2" d="M495,-108C495,-108 300,-108 300,-108 294,-108 288,-102 288,-96 288,-96 288,-84 288,-84 288,-78 294,-72 300,-72 300,-72 495,-72 495,-72 501,-72 507,-78 507,-84 507,-84 507,-96 507,-96 507,-102 501,-108 495,-108"/> -<text text-anchor="middle" x="397.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">generate_segemehl_index_transcriptome</text> +<path fill="none" stroke="#56d8c9" stroke-width="2" d="M362,-108C362,-108 167,-108 167,-108 161,-108 155,-102 155,-96 155,-96 155,-84 155,-84 155,-78 161,-72 167,-72 167,-72 362,-72 362,-72 368,-72 374,-78 374,-84 374,-84 374,-96 374,-96 374,-102 368,-108 362,-108"/> +<text text-anchor="middle" x="264.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">generate_segemehl_index_transcriptome</text> </g> <!-- 1->0 --> -<g id="edge2" class="edge"> +<g id="edge1" class="edge"> <title>1->0</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M369.2377,-71.8314C354.7198,-62.4984 336.8745,-51.0264 321.3932,-41.0742"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="322.8987,-37.8812 312.5942,-35.4177 319.1134,-43.7695 322.8987,-37.8812"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M284.1827,-71.8314C293.6051,-63.1337 305.0401,-52.5783 315.2796,-43.1265"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="317.8495,-45.5175 322.8236,-36.1628 313.1016,-40.3738 317.8495,-45.5175"/> </g> <!-- 2 --> <g id="node3" class="node"> <title>2</title> -<path fill="none" stroke="#568ad8" stroke-width="2" d="M419,-180C419,-180 376,-180 376,-180 370,-180 364,-174 364,-168 364,-168 364,-156 364,-156 364,-150 370,-144 376,-144 376,-144 419,-144 419,-144 425,-144 431,-150 431,-156 431,-156 431,-168 431,-168 431,-174 425,-180 419,-180"/> -<text text-anchor="middle" x="397.5" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">trim_fasta</text> +<path fill="none" stroke="#56d882" stroke-width="2" d="M289,-180C289,-180 246,-180 246,-180 240,-180 234,-174 234,-168 234,-168 234,-156 234,-156 234,-150 240,-144 246,-144 246,-144 289,-144 289,-144 295,-144 301,-150 301,-156 301,-156 301,-168 301,-168 301,-174 295,-180 289,-180"/> +<text text-anchor="middle" x="267.5" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">trim_fasta</text> </g> <!-- 2->1 --> -<g id="edge5" class="edge"> +<g id="edge7" class="edge"> <title>2->1</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M397.5,-143.8314C397.5,-136.131 397.5,-126.9743 397.5,-118.4166"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="401.0001,-118.4132 397.5,-108.4133 394.0001,-118.4133 401.0001,-118.4132"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M266.743,-143.8314C266.4221,-136.131 266.0406,-126.9743 265.684,-118.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="269.1806,-118.2589 265.2672,-108.4133 262.1867,-118.5503 269.1806,-118.2589"/> </g> <!-- 3 --> <g id="node4" class="node"> <title>3</title> -<path fill="none" stroke="#d8bc56" stroke-width="2" d="M461.5,-252C461.5,-252 333.5,-252 333.5,-252 327.5,-252 321.5,-246 321.5,-240 321.5,-240 321.5,-228 321.5,-228 321.5,-222 327.5,-216 333.5,-216 333.5,-216 461.5,-216 461.5,-216 467.5,-216 473.5,-222 473.5,-228 473.5,-228 473.5,-240 473.5,-240 473.5,-246 467.5,-252 461.5,-252"/> -<text text-anchor="middle" x="397.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">extract_transcriptome_seqs</text> +<path fill="none" stroke="#d85656" stroke-width="2" d="M334.5,-252C334.5,-252 206.5,-252 206.5,-252 200.5,-252 194.5,-246 194.5,-240 194.5,-240 194.5,-228 194.5,-228 194.5,-222 200.5,-216 206.5,-216 206.5,-216 334.5,-216 334.5,-216 340.5,-216 346.5,-222 346.5,-228 346.5,-228 346.5,-240 346.5,-240 346.5,-246 340.5,-252 334.5,-252"/> +<text text-anchor="middle" x="270.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">extract_transcriptome_seqs</text> </g> <!-- 3->2 --> -<g id="edge6" class="edge"> +<g id="edge8" class="edge"> <title>3->2</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M397.5,-215.8314C397.5,-208.131 397.5,-198.9743 397.5,-190.4166"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="401.0001,-190.4132 397.5,-180.4133 394.0001,-190.4133 401.0001,-190.4132"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M269.743,-215.8314C269.4221,-208.131 269.0406,-198.9743 268.684,-190.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="272.1806,-190.2589 268.2672,-180.4133 265.1867,-190.5503 272.1806,-190.2589"/> </g> <!-- 4 --> <g id="node5" class="node"> <title>4</title> -<path fill="none" stroke="#d88d56" stroke-width="2" d="M212,-324C212,-324 137,-324 137,-324 131,-324 125,-318 125,-312 125,-312 125,-300 125,-300 125,-294 131,-288 137,-288 137,-288 212,-288 212,-288 218,-288 224,-294 224,-300 224,-300 224,-312 224,-312 224,-318 218,-324 212,-324"/> -<text text-anchor="middle" x="174.5" y="-303.5" font-family="sans" font-size="10.00" fill="#000000">genome_process</text> +<path fill="none" stroke="#88d856" stroke-width="2" d="M346,-756C346,-756 271,-756 271,-756 265,-756 259,-750 259,-744 259,-744 259,-732 259,-732 259,-726 265,-720 271,-720 271,-720 346,-720 346,-720 352,-720 358,-726 358,-732 358,-732 358,-744 358,-744 358,-750 352,-756 346,-756"/> +<text text-anchor="middle" x="308.5" y="-735.5" font-family="sans" font-size="10.00" fill="#000000">genome_process</text> </g> <!-- 4->3 --> -<g id="edge7" class="edge"> +<g id="edge9" class="edge"> <title>4->3</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M224.268,-289.9314C255.9297,-279.7088 297.2888,-266.3552 331.6541,-255.2597"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="333.1696,-258.4483 341.6104,-252.0451 331.0187,-251.7869 333.1696,-258.4483"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M301.7136,-719.6497C292.3203,-692.5619 276.5,-640.0977 276.5,-594 276.5,-594 276.5,-594 276.5,-378 276.5,-337.8412 274.1417,-291.4372 272.3821,-262.4031"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="275.8599,-261.9376 271.7432,-252.1753 268.8736,-262.3741 275.8599,-261.9376"/> </g> <!-- 6 --> <g id="node7" class="node"> <title>6</title> -<path fill="none" stroke="#59d856" stroke-width="2" d="M258,-108C258,-108 91,-108 91,-108 85,-108 79,-102 79,-96 79,-96 79,-84 79,-84 79,-78 85,-72 91,-72 91,-72 258,-72 258,-72 264,-72 270,-78 270,-84 270,-84 270,-96 270,-96 270,-102 264,-108 258,-108"/> -<text text-anchor="middle" x="174.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">generate_segemehl_index_genome</text> +<path fill="none" stroke="#d86e56" stroke-width="2" d="M179,-540C179,-540 12,-540 12,-540 6,-540 0,-534 0,-528 0,-528 0,-516 0,-516 0,-510 6,-504 12,-504 12,-504 179,-504 179,-504 185,-504 191,-510 191,-516 191,-516 191,-528 191,-528 191,-534 185,-540 179,-540"/> +<text text-anchor="middle" x="95.5" y="-519.5" font-family="sans" font-size="10.00" fill="#000000">generate_segemehl_index_genome</text> </g> <!-- 4->6 --> -<g id="edge9" class="edge"> +<g id="edge11" class="edge"> <title>4->6</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M174.5,-287.9555C174.5,-250.3938 174.5,-163.5541 174.5,-118.4103"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="178.0001,-118.145 174.5,-108.1451 171.0001,-118.1451 178.0001,-118.145"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M266.7238,-719.8513C248.5524,-710.7235 227.7488,-698.5026 211.5,-684 166.6798,-643.9963 129.285,-583.7121 109.8685,-549.0753"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="112.7713,-547.0911 104.8776,-540.0242 106.6414,-550.4712 112.7713,-547.0911"/> </g> <!-- 9 --> <g id="node10" class="node"> <title>9</title> -<path fill="none" stroke="#56d8c1" stroke-width="2" d="M119,-180C119,-180 12,-180 12,-180 6,-180 0,-174 0,-168 0,-168 0,-156 0,-156 0,-150 6,-144 12,-144 12,-144 119,-144 119,-144 125,-144 131,-150 131,-156 131,-156 131,-168 131,-168 131,-174 125,-180 119,-180"/> -<text text-anchor="middle" x="65.5" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">create_header_genome</text> +<path fill="none" stroke="#70d856" stroke-width="2" d="M236,-468C236,-468 129,-468 129,-468 123,-468 117,-462 117,-456 117,-456 117,-444 117,-444 117,-438 123,-432 129,-432 129,-432 236,-432 236,-432 242,-432 248,-438 248,-444 248,-444 248,-456 248,-456 248,-462 242,-468 236,-468"/> +<text text-anchor="middle" x="182.5" y="-447.5" font-family="sans" font-size="10.00" fill="#000000">create_header_genome</text> </g> <!-- 4->9 --> -<g id="edge12" class="edge"> +<g id="edge14" class="edge"> <title>4->9</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M160.6951,-287.7623C141.6122,-262.5518 107.0641,-216.9103 85.2526,-188.0952"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="88.0189,-185.9506 79.1928,-180.0896 82.4376,-190.1754 88.0189,-185.9506"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M289.9935,-719.8356C280.7284,-709.9335 269.9328,-697.0574 262.5,-684 251.9809,-665.5208 209.8347,-535.5126 191.31,-477.6548"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="194.6195,-476.5127 188.2407,-468.0533 187.9518,-478.6442 194.6195,-476.5127"/> +</g> +<!-- 13 --> +<g id="node14" class="node"> +<title>13</title> +<path fill="none" stroke="#56b1d8" stroke-width="2" d="M366.5,-684C366.5,-684 316.5,-684 316.5,-684 310.5,-684 304.5,-678 304.5,-672 304.5,-672 304.5,-660 304.5,-660 304.5,-654 310.5,-648 316.5,-648 316.5,-648 366.5,-648 366.5,-648 372.5,-648 378.5,-654 378.5,-660 378.5,-660 378.5,-672 378.5,-672 378.5,-678 372.5,-684 366.5,-684"/> +<text text-anchor="middle" x="341.5" y="-663.5" font-family="sans" font-size="10.00" fill="#000000">mirna_anno</text> +</g> +<!-- 4->13 --> +<g id="edge19" class="edge"> +<title>4->13</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M316.8273,-719.8314C320.473,-711.8771 324.8309,-702.369 328.8627,-693.5723"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="332.0757,-694.9622 333.0606,-684.4133 325.7123,-692.0456 332.0757,-694.9622"/> +</g> +<!-- 14 --> +<g id="node15" class="node"> +<title>14</title> +<path fill="none" stroke="#56c9d8" stroke-width="2" d="M440,-684C440,-684 409,-684 409,-684 403,-684 397,-678 397,-672 397,-672 397,-660 397,-660 397,-654 403,-648 409,-648 409,-648 440,-648 440,-648 446,-648 452,-654 452,-660 452,-660 452,-672 452,-672 452,-678 446,-684 440,-684"/> +<text text-anchor="middle" x="424.5" y="-663.5" font-family="sans" font-size="10.00" fill="#000000">dict_chr</text> +</g> +<!-- 4->14 --> +<g id="edge20" class="edge"> +<title>4->14</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M337.7717,-719.8314C353.0619,-710.3409 371.9156,-698.6386 388.1347,-688.5716"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="390.1842,-691.4189 396.8349,-683.1715 386.4927,-685.4714 390.1842,-691.4189"/> +</g> +<!-- 21 --> +<g id="node22" class="node"> +<title>21</title> +<path fill="none" stroke="#5682d8" stroke-width="2" d="M592.5,-612C592.5,-612 506.5,-612 506.5,-612 500.5,-612 494.5,-606 494.5,-600 494.5,-600 494.5,-588 494.5,-588 494.5,-582 500.5,-576 506.5,-576 506.5,-576 592.5,-576 592.5,-576 598.5,-576 604.5,-582 604.5,-588 604.5,-588 604.5,-600 604.5,-600 604.5,-606 598.5,-612 592.5,-612"/> +<text text-anchor="middle" x="549.5" y="-591.5" font-family="sans" font-size="10.00" fill="#000000">create_index_fasta</text> +</g> +<!-- 4->21 --> +<g id="edge28" class="edge"> +<title>4->21</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M358.2792,-726.4177C389.2668,-717.7739 429.0656,-703.979 460.5,-684 487.558,-666.8025 512.682,-639.9749 529.2879,-620.0797"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="532.0321,-622.2527 535.6493,-612.2945 526.6116,-617.8235 532.0321,-622.2527"/> </g> <!-- 5 --> <g id="node6" class="node"> <title>5</title> -<path fill="none" stroke="#56d88a" stroke-width="2" d="M502,-324C502,-324 439,-324 439,-324 433,-324 427,-318 427,-312 427,-312 427,-300 427,-300 427,-294 433,-288 439,-288 439,-288 502,-288 502,-288 508,-288 514,-294 514,-300 514,-300 514,-312 514,-312 514,-318 508,-324 502,-324"/> -<text text-anchor="middle" x="470.5" y="-303.5" font-family="sans" font-size="10.00" fill="#000000">filter_anno_gtf</text> +<path fill="none" stroke="#59d856" stroke-width="2" d="M409,-324C409,-324 346,-324 346,-324 340,-324 334,-318 334,-312 334,-312 334,-300 334,-300 334,-294 340,-288 346,-288 346,-288 409,-288 409,-288 415,-288 421,-294 421,-300 421,-300 421,-312 421,-312 421,-318 415,-324 409,-324"/> +<text text-anchor="middle" x="377.5" y="-303.5" font-family="sans" font-size="10.00" fill="#000000">filter_anno_gtf</text> </g> <!-- 5->3 --> -<g id="edge8" class="edge"> +<g id="edge10" class="edge"> <title>5->3</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M452.079,-287.8314C443.3471,-279.219 432.7683,-268.7851 423.2577,-259.4048"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="425.4925,-256.6931 415.9151,-252.1628 420.577,-261.6769 425.4925,-256.6931"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M350.4994,-287.8314C337.0669,-278.7927 320.6527,-267.7476 306.1961,-258.0198"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="307.7425,-254.8418 297.492,-252.1628 303.8346,-260.6494 307.7425,-254.8418"/> </g> <!-- 8 --> <g id="node9" class="node"> <title>8</title> -<path fill="none" stroke="#bed856" stroke-width="2" d="M575.5,-252C575.5,-252 513.5,-252 513.5,-252 507.5,-252 501.5,-246 501.5,-240 501.5,-240 501.5,-228 501.5,-228 501.5,-222 507.5,-216 513.5,-216 513.5,-216 575.5,-216 575.5,-216 581.5,-216 587.5,-222 587.5,-228 587.5,-228 587.5,-240 587.5,-240 587.5,-246 581.5,-252 575.5,-252"/> -<text text-anchor="middle" x="544.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">get_exons_gtf</text> +<path fill="none" stroke="#ced856" stroke-width="2" d="M438.5,-252C438.5,-252 376.5,-252 376.5,-252 370.5,-252 364.5,-246 364.5,-240 364.5,-240 364.5,-228 364.5,-228 364.5,-222 370.5,-216 376.5,-216 376.5,-216 438.5,-216 438.5,-216 444.5,-216 450.5,-222 450.5,-228 450.5,-228 450.5,-240 450.5,-240 450.5,-246 444.5,-252 438.5,-252"/> +<text text-anchor="middle" x="407.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">get_exons_gtf</text> </g> <!-- 5->8 --> -<g id="edge11" class="edge"> +<g id="edge13" class="edge"> <title>5->8</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M489.1733,-287.8314C498.0249,-279.219 508.7486,-268.7851 518.3895,-259.4048"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="521.1061,-261.645 525.8326,-252.1628 516.2246,-256.6279 521.1061,-261.645"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M385.0703,-287.8314C388.3493,-279.9617 392.262,-270.5712 395.8945,-261.8533"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="399.2124,-262.9902 399.8278,-252.4133 392.7508,-260.2979 399.2124,-262.9902"/> </g> <!-- 6->0 --> <g id="edge3" class="edge"> <title>6->0</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M202.51,-71.8314C216.7909,-62.5681 234.3205,-51.1975 249.5834,-41.2972"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="251.7815,-44.0434 258.2664,-35.665 247.9721,-38.1706 251.7815,-44.0434"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M94.0496,-503.8284C91.9993,-476.5296 88.5,-423.3034 88.5,-378 88.5,-378 88.5,-378 88.5,-162 88.5,-119.2408 90.4989,-100.3597 122.5,-72 149.3909,-48.169 249.9753,-30.8466 305.3538,-22.8826"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="306.0511,-26.319 315.466,-21.4601 305.0759,-19.3873 306.0511,-26.319"/> </g> <!-- 7 --> <g id="node8" class="node"> <title>7</title> -<path fill="none" stroke="#88d856" stroke-width="2" d="M571.5,-108C571.5,-108 537.5,-108 537.5,-108 531.5,-108 525.5,-102 525.5,-96 525.5,-96 525.5,-84 525.5,-84 525.5,-78 531.5,-72 537.5,-72 537.5,-72 571.5,-72 571.5,-72 577.5,-72 583.5,-78 583.5,-84 583.5,-84 583.5,-96 583.5,-96 583.5,-102 577.5,-108 571.5,-108"/> -<text text-anchor="middle" x="554.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">gtftobed</text> +<path fill="none" stroke="#566bd8" stroke-width="2" d="M438.5,-108C438.5,-108 404.5,-108 404.5,-108 398.5,-108 392.5,-102 392.5,-96 392.5,-96 392.5,-84 392.5,-84 392.5,-78 398.5,-72 404.5,-72 404.5,-72 438.5,-72 438.5,-72 444.5,-72 450.5,-78 450.5,-84 450.5,-84 450.5,-96 450.5,-96 450.5,-102 444.5,-108 438.5,-108"/> +<text text-anchor="middle" x="421.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">gtftobed</text> </g> <!-- 7->0 --> -<g id="edge1" class="edge"> +<g id="edge6" class="edge"> <title>7->0</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M525.3009,-75.4138C522.3621,-74.1746 519.3945,-73.0109 516.5,-72 449.8064,-48.7071 368.7812,-32.3854 322.571,-24.1501"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="323.1591,-20.6999 312.705,-22.4208 321.9506,-27.5948 323.1591,-20.6999"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M401.565,-71.8314C392.0217,-63.1337 380.4401,-52.5783 370.0694,-43.1265"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="372.1772,-40.3121 362.4287,-36.1628 367.462,-45.4857 372.1772,-40.3121"/> </g> <!-- 8->7 --> -<g id="edge10" class="edge"> +<g id="edge12" class="edge"> <title>8->7</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M545.7665,-215.7623C547.4721,-191.201 550.5245,-147.2474 552.531,-118.3541"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="556.0425,-118.3081 553.2438,-108.0896 549.0593,-117.8231 556.0425,-118.3081"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M409.2731,-215.7623C411.661,-191.201 415.9343,-147.2474 418.7434,-118.3541"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="422.2571,-118.3814 419.7413,-108.0896 415.29,-117.704 422.2571,-118.3814"/> </g> <!-- 9->0 --> -<g id="edge4" class="edge"> +<g id="edge2" class="edge"> <title>9->0</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M60.544,-143.9048C56.2668,-123.9783 53.0715,-92.1044 69.5,-72 91.8461,-44.6539 192.3357,-28.8343 248.0323,-22.0186"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="248.6915,-25.465 258.21,-20.8123 247.8676,-18.5137 248.6915,-25.465"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M170.3488,-431.7857C153.8788,-405.2865 126.5,-354.0148 126.5,-306 126.5,-306 126.5,-306 126.5,-162 126.5,-121.1184 117.4647,-101.7545 145.5,-72 167.2281,-48.9395 254.6042,-31.7622 305.3969,-23.4905"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="306.0144,-26.9364 315.3404,-21.9087 304.9147,-20.0233 306.0144,-26.9364"/> +</g> +<!-- 10 --> +<g id="node11" class="node"> +<title>10</title> +<path fill="none" stroke="#56d89a" stroke-width="2" d="M495,-468C495,-468 462,-468 462,-468 456,-468 450,-462 450,-456 450,-456 450,-444 450,-444 450,-438 456,-432 462,-432 462,-432 495,-432 495,-432 501,-432 507,-438 507,-444 507,-444 507,-456 507,-456 507,-462 501,-468 495,-468"/> +<text text-anchor="middle" x="478.5" y="-447.5" font-family="sans" font-size="10.00" fill="#000000">gfftobed</text> +</g> +<!-- 10->0 --> +<g id="edge5" class="edge"> +<title>10->0</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M478.5,-431.8146C478.5,-404.4983 478.5,-351.25 478.5,-306 478.5,-306 478.5,-306 478.5,-162 478.5,-121.1184 484.9901,-103.9619 459.5,-72 439.6741,-47.1403 405.7974,-33.2394 379.5835,-25.7916"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="380.1623,-22.3246 369.6002,-23.1563 378.3757,-29.0927 380.1623,-22.3246"/> +</g> +<!-- 19 --> +<g id="node20" class="node"> +<title>19</title> +<path fill="none" stroke="#9fd856" stroke-width="2" d="M602,-396C602,-396 519,-396 519,-396 513,-396 507,-390 507,-384 507,-384 507,-372 507,-372 507,-366 513,-360 519,-360 519,-360 602,-360 602,-360 608,-360 614,-366 614,-372 614,-372 614,-384 614,-384 614,-390 608,-396 602,-396"/> +<text text-anchor="middle" x="560.5" y="-375.5" font-family="sans" font-size="10.00" fill="#000000">filter_mature_mirs</text> +</g> +<!-- 10->19 --> +<g id="edge26" class="edge"> +<title>10->19</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M499.1921,-431.8314C509.1948,-423.0485 521.355,-412.3712 532.1999,-402.8489"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="534.6094,-405.3909 539.8146,-396.1628 529.9908,-400.1308 534.6094,-405.3909"/> +</g> +<!-- 11 --> +<g id="node12" class="node"> +<title>11</title> +<path fill="none" stroke="#56d86b" stroke-width="2" d="M489,-540C489,-540 412,-540 412,-540 406,-540 400,-534 400,-528 400,-528 400,-516 400,-516 400,-510 406,-504 412,-504 412,-504 489,-504 489,-504 495,-504 501,-510 501,-516 501,-516 501,-528 501,-528 501,-534 495,-540 489,-540"/> +<text text-anchor="middle" x="450.5" y="-519.5" font-family="sans" font-size="10.00" fill="#000000">filter_mir_1_anno</text> +</g> +<!-- 11->10 --> +<g id="edge15" class="edge"> +<title>11->10</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M457.5656,-503.8314C460.626,-495.9617 464.2779,-486.5712 467.6682,-477.8533"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="470.9768,-479.0019 471.3393,-468.4133 464.4527,-476.4647 470.9768,-479.0019"/> +</g> +<!-- 12 --> +<g id="node13" class="node"> +<title>12</title> +<path fill="none" stroke="#56d8b1" stroke-width="2" d="M461,-612C461,-612 388,-612 388,-612 382,-612 376,-606 376,-600 376,-600 376,-588 376,-588 376,-582 382,-576 388,-576 388,-576 461,-576 461,-576 467,-576 473,-582 473,-588 473,-588 473,-600 473,-600 473,-606 467,-612 461,-612"/> +<text text-anchor="middle" x="424.5" y="-591.5" font-family="sans" font-size="10.00" fill="#000000">map_chr_names</text> +</g> +<!-- 12->11 --> +<g id="edge16" class="edge"> +<title>12->11</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M431.0609,-575.8314C433.9027,-567.9617 437.2937,-558.5712 440.4419,-549.8533"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="443.7462,-551.0076 443.8508,-540.4133 437.1623,-548.63 443.7462,-551.0076"/> +</g> +<!-- 13->12 --> +<g id="edge18" class="edge"> +<title>13->12</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M362.4444,-647.8314C372.5691,-639.0485 384.8777,-628.3712 395.8548,-618.8489"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="398.3019,-621.3595 403.5623,-612.1628 393.7149,-616.0718 398.3019,-621.3595"/> +</g> +<!-- 14->12 --> +<g id="edge17" class="edge"> +<title>14->12</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M424.5,-647.8314C424.5,-640.131 424.5,-630.9743 424.5,-622.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="428.0001,-622.4132 424.5,-612.4133 421.0001,-622.4133 428.0001,-622.4132"/> +</g> +<!-- 15 --> +<g id="node16" class="node"> +<title>15</title> +<path fill="none" stroke="#569ad8" stroke-width="2" d="M584.5,-108C584.5,-108 522.5,-108 522.5,-108 516.5,-108 510.5,-102 510.5,-96 510.5,-96 510.5,-84 510.5,-84 510.5,-78 516.5,-72 522.5,-72 522.5,-72 584.5,-72 584.5,-72 590.5,-72 596.5,-78 596.5,-84 596.5,-84 596.5,-96 596.5,-96 596.5,-102 590.5,-108 584.5,-108"/> +<text text-anchor="middle" x="553.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_final</text> +</g> +<!-- 15->0 --> +<g id="edge4" class="edge"> +<title>15->0</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M510.2918,-75.256C471.7195,-62.0939 415.8391,-43.0257 379.3404,-30.5711"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="380.1849,-27.1612 369.5904,-27.2441 377.9242,-33.7861 380.1849,-27.1612"/> +</g> +<!-- 16 --> +<g id="node17" class="node"> +<title>16</title> +<path fill="none" stroke="#b6d856" stroke-width="2" d="M594.5,-180C594.5,-180 520.5,-180 520.5,-180 514.5,-180 508.5,-174 508.5,-168 508.5,-168 508.5,-156 508.5,-156 508.5,-150 514.5,-144 520.5,-144 520.5,-144 594.5,-144 594.5,-144 600.5,-144 606.5,-150 606.5,-156 606.5,-156 606.5,-168 606.5,-168 606.5,-174 600.5,-180 594.5,-180"/> +<text text-anchor="middle" x="557.5" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_concat</text> +</g> +<!-- 16->15 --> +<g id="edge21" class="edge"> +<title>16->15</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M556.4906,-143.8314C556.0628,-136.131 555.5541,-126.9743 555.0787,-118.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="558.5724,-118.2037 554.523,-108.4133 551.5831,-118.592 558.5724,-118.2037"/> +</g> +<!-- 17 --> +<g id="node18" class="node"> +<title>17</title> +<path fill="none" stroke="#d89c56" stroke-width="2" d="M599,-252C599,-252 520,-252 520,-252 514,-252 508,-246 508,-240 508,-240 508,-228 508,-228 508,-222 514,-216 520,-216 520,-216 599,-216 599,-216 605,-216 611,-222 611,-228 611,-228 611,-240 611,-240 611,-246 605,-252 599,-252"/> +<text text-anchor="middle" x="559.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> +</g> +<!-- 17->16 --> +<g id="edge22" class="edge"> +<title>17->16</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M558.9953,-215.8314C558.7814,-208.131 558.5271,-198.9743 558.2894,-190.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="561.7879,-190.3122 558.0115,-180.4133 554.7906,-190.5066 561.7879,-190.3122"/> +</g> +<!-- 18 --> +<g id="node19" class="node"> +<title>18</title> +<path fill="none" stroke="#d8cb56" stroke-width="2" d="M578.5,-324C578.5,-324 542.5,-324 542.5,-324 536.5,-324 530.5,-318 530.5,-312 530.5,-312 530.5,-300 530.5,-300 530.5,-294 536.5,-288 542.5,-288 542.5,-288 578.5,-288 578.5,-288 584.5,-288 590.5,-294 590.5,-300 590.5,-300 590.5,-312 590.5,-312 590.5,-318 584.5,-324 578.5,-324"/> +<text text-anchor="middle" x="560.5" y="-303.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> +</g> +<!-- 18->17 --> +<g id="edge23" class="edge"> +<title>18->17</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M560.2477,-287.8314C560.1407,-280.131 560.0135,-270.9743 559.8947,-262.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="563.3944,-262.3637 559.7557,-252.4133 556.3951,-262.4609 563.3944,-262.3637"/> +</g> +<!-- 19->18 --> +<g id="edge24" class="edge"> +<title>19->18</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M560.5,-359.8314C560.5,-352.131 560.5,-342.9743 560.5,-334.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="564.0001,-334.4132 560.5,-324.4133 557.0001,-334.4133 564.0001,-334.4132"/> +</g> +<!-- 20 --> +<g id="node21" class="node"> +<title>20</title> +<path fill="none" stroke="#d8b456" stroke-width="2" d="M712.5,-396C712.5,-396 644.5,-396 644.5,-396 638.5,-396 632.5,-390 632.5,-384 632.5,-384 632.5,-372 632.5,-372 632.5,-366 638.5,-360 644.5,-360 644.5,-360 712.5,-360 712.5,-360 718.5,-360 724.5,-366 724.5,-372 724.5,-372 724.5,-384 724.5,-384 724.5,-390 718.5,-396 712.5,-396"/> +<text text-anchor="middle" x="678.5" y="-375.5" font-family="sans" font-size="10.00" fill="#000000">extract_chr_len</text> +</g> +<!-- 20->18 --> +<g id="edge25" class="edge"> +<title>20->18</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M648.7236,-359.8314C633.6307,-350.6221 615.1243,-339.3301 598.9657,-329.4706"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="600.6263,-326.3838 590.2669,-324.1628 596.9802,-332.3593 600.6263,-326.3838"/> +</g> +<!-- 21->20 --> +<g id="edge27" class="edge"> +<title>21->20</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M560.2766,-575.9555C582.9464,-537.9967 635.6716,-449.7127 662.3793,-404.9928"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="665.5408,-406.5251 667.6634,-396.1451 659.531,-402.9359 665.5408,-406.5251"/> </g> </g> </svg> diff --git a/images/workflow_dag_prepare_annotation.svg b/images/workflow_dag_prepare_annotation.svg index 3fd406e237fe85a9f888d44e51633eabcd34bf88..4e105ff88d9cf1dce95b8b644b7143ed20e5e75f 100644 --- a/images/workflow_dag_prepare_annotation.svg +++ b/images/workflow_dag_prepare_annotation.svg @@ -4,146 +4,618 @@ <!-- Generated by graphviz version 2.40.1 (20161225.0304) --> <!-- Title: snakemake_dag Pages: 1 --> -<svg width="596pt" height="337pt" - viewBox="0.00 0.00 595.50 337.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> -<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 333)"> +<svg width="1507pt" height="769pt" + viewBox="0.00 0.00 1507.00 769.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> +<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 765)"> <title>snakemake_dag</title> -<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-333 591.5,-333 591.5,4 -4,4"/> +<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-765 1503,-765 1503,4 -4,4"/> <!-- 0 --> <g id="node1" class="node"> <title>0</title> -<path fill="none" stroke="#d88d56" stroke-width="2" d="M300.5,-36C300.5,-36 270.5,-36 270.5,-36 264.5,-36 258.5,-30 258.5,-24 258.5,-24 258.5,-12 258.5,-12 258.5,-6 264.5,0 270.5,0 270.5,0 300.5,0 300.5,0 306.5,0 312.5,-6 312.5,-12 312.5,-12 312.5,-24 312.5,-24 312.5,-30 306.5,-36 300.5,-36"/> -<text text-anchor="middle" x="285.5" y="-15.5" font-family="sans" font-size="10.00" fill="#000000">finish</text> +<path fill="none" stroke="#d86e56" stroke-width="2" stroke-dasharray="5,2" d="M357.5,-36C357.5,-36 327.5,-36 327.5,-36 321.5,-36 315.5,-30 315.5,-24 315.5,-24 315.5,-12 315.5,-12 315.5,-6 321.5,0 327.5,0 327.5,0 357.5,0 357.5,0 363.5,0 369.5,-6 369.5,-12 369.5,-12 369.5,-24 369.5,-24 369.5,-30 363.5,-36 357.5,-36"/> +<text text-anchor="middle" x="342.5" y="-15.5" font-family="sans" font-size="10.00" fill="#000000">finish</text> </g> <!-- 1 --> <g id="node2" class="node"> <title>1</title> -<path fill="none" stroke="#56c1d8" stroke-width="2" d="M495,-108C495,-108 300,-108 300,-108 294,-108 288,-102 288,-96 288,-96 288,-84 288,-84 288,-78 294,-72 300,-72 300,-72 495,-72 495,-72 501,-72 507,-78 507,-84 507,-84 507,-96 507,-96 507,-102 501,-108 495,-108"/> -<text text-anchor="middle" x="397.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">generate_segemehl_index_transcriptome</text> +<path fill="none" stroke="#b6d856" stroke-width="2" stroke-dasharray="5,2" d="M207,-108C207,-108 12,-108 12,-108 6,-108 0,-102 0,-96 0,-96 0,-84 0,-84 0,-78 6,-72 12,-72 12,-72 207,-72 207,-72 213,-72 219,-78 219,-84 219,-84 219,-96 219,-96 219,-102 213,-108 207,-108"/> +<text text-anchor="middle" x="109.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">generate_segemehl_index_transcriptome</text> </g> <!-- 1->0 --> <g id="edge1" class="edge"> <title>1->0</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M369.2377,-71.8314C354.7198,-62.4984 336.8745,-51.0264 321.3932,-41.0742"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="322.8987,-37.8812 312.5942,-35.4177 319.1134,-43.7695 322.8987,-37.8812"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M167.995,-71.9243C211.1214,-58.5977 268.4857,-40.8714 305.4568,-29.4468"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="306.7942,-32.6969 315.315,-26.4005 304.7274,-26.009 306.7942,-32.6969"/> </g> <!-- 2 --> <g id="node3" class="node"> <title>2</title> -<path fill="none" stroke="#d85656" stroke-width="2" d="M419,-180C419,-180 376,-180 376,-180 370,-180 364,-174 364,-168 364,-168 364,-156 364,-156 364,-150 370,-144 376,-144 376,-144 419,-144 419,-144 425,-144 431,-150 431,-156 431,-156 431,-168 431,-168 431,-174 425,-180 419,-180"/> -<text text-anchor="middle" x="397.5" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">trim_fasta</text> +<path fill="none" stroke="#566bd8" stroke-width="2" stroke-dasharray="5,2" d="M131,-180C131,-180 88,-180 88,-180 82,-180 76,-174 76,-168 76,-168 76,-156 76,-156 76,-150 82,-144 88,-144 88,-144 131,-144 131,-144 137,-144 143,-150 143,-156 143,-156 143,-168 143,-168 143,-174 137,-180 131,-180"/> +<text text-anchor="middle" x="109.5" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">trim_fasta</text> </g> <!-- 2->1 --> -<g id="edge5" class="edge"> +<g id="edge7" class="edge"> <title>2->1</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M397.5,-143.8314C397.5,-136.131 397.5,-126.9743 397.5,-118.4166"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="401.0001,-118.4132 397.5,-108.4133 394.0001,-118.4133 401.0001,-118.4132"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M109.5,-143.8314C109.5,-136.131 109.5,-126.9743 109.5,-118.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="113.0001,-118.4132 109.5,-108.4133 106.0001,-118.4133 113.0001,-118.4132"/> </g> <!-- 3 --> <g id="node4" class="node"> <title>3</title> -<path fill="none" stroke="#bed856" stroke-width="2" d="M461.5,-252C461.5,-252 333.5,-252 333.5,-252 327.5,-252 321.5,-246 321.5,-240 321.5,-240 321.5,-228 321.5,-228 321.5,-222 327.5,-216 333.5,-216 333.5,-216 461.5,-216 461.5,-216 467.5,-216 473.5,-222 473.5,-228 473.5,-228 473.5,-240 473.5,-240 473.5,-246 467.5,-252 461.5,-252"/> -<text text-anchor="middle" x="397.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">extract_transcriptome_seqs</text> +<path fill="none" stroke="#9fd856" stroke-width="2" stroke-dasharray="5,2" d="M173.5,-252C173.5,-252 45.5,-252 45.5,-252 39.5,-252 33.5,-246 33.5,-240 33.5,-240 33.5,-228 33.5,-228 33.5,-222 39.5,-216 45.5,-216 45.5,-216 173.5,-216 173.5,-216 179.5,-216 185.5,-222 185.5,-228 185.5,-228 185.5,-240 185.5,-240 185.5,-246 179.5,-252 173.5,-252"/> +<text text-anchor="middle" x="109.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">extract_transcriptome_seqs</text> </g> <!-- 3->2 --> -<g id="edge6" class="edge"> +<g id="edge8" class="edge"> <title>3->2</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M397.5,-215.8314C397.5,-208.131 397.5,-198.9743 397.5,-190.4166"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="401.0001,-190.4132 397.5,-180.4133 394.0001,-190.4133 401.0001,-190.4132"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M109.5,-215.8314C109.5,-208.131 109.5,-198.9743 109.5,-190.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="113.0001,-190.4132 109.5,-180.4133 106.0001,-190.4133 113.0001,-190.4132"/> </g> <!-- 4 --> <g id="node5" class="node"> <title>4</title> -<path fill="none" stroke="#d8bc56" stroke-width="2" d="M244,-329C244,-329 105,-329 105,-329 99,-329 93,-323 93,-317 93,-317 93,-300 93,-300 93,-294 99,-288 105,-288 105,-288 244,-288 244,-288 250,-288 256,-294 256,-300 256,-300 256,-317 256,-317 256,-323 250,-329 244,-329"/> -<text text-anchor="middle" x="174.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">genome_process</text> -<text text-anchor="middle" x="174.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">organism: homo_sapiens</text> -<text text-anchor="middle" x="174.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">prefix_name: GRCh38.98_chrY</text> +<path fill="none" stroke="#d88556" stroke-width="2" stroke-dasharray="5,2" d="M541,-761C541,-761 402,-761 402,-761 396,-761 390,-755 390,-749 390,-749 390,-737 390,-737 390,-731 396,-725 402,-725 402,-725 541,-725 541,-725 547,-725 553,-731 553,-737 553,-737 553,-749 553,-749 553,-755 547,-761 541,-761"/> +<text text-anchor="middle" x="471.5" y="-746" font-family="sans" font-size="10.00" fill="#000000">genome_process</text> +<text text-anchor="middle" x="471.5" y="-735" font-family="sans" font-size="10.00" fill="#000000">organism: homo_sapiens/chrY</text> </g> <!-- 4->3 --> -<g id="edge7" class="edge"> +<g id="edge9" class="edge"> <title>4->3</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M236.026,-287.9453C266.388,-277.802 302.9835,-265.5761 333.6536,-255.3298"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="335.0122,-258.5662 343.3879,-252.0778 332.7941,-251.9269 335.0122,-258.5662"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M389.8289,-728.047C281.4131,-705.8262 103.5,-660.0594 103.5,-599 103.5,-599 103.5,-599 103.5,-383 103.5,-341.0355 105.9055,-292.4767 107.6703,-262.4985"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="111.1744,-262.5347 108.2858,-252.3412 104.1872,-262.1112 111.1744,-262.5347"/> </g> <!-- 6 --> <g id="node7" class="node"> <title>6</title> -<path fill="none" stroke="#568ad8" stroke-width="2" d="M258,-108C258,-108 91,-108 91,-108 85,-108 79,-102 79,-96 79,-96 79,-84 79,-84 79,-78 85,-72 91,-72 91,-72 258,-72 258,-72 264,-72 270,-78 270,-84 270,-84 270,-96 270,-96 270,-102 264,-108 258,-108"/> -<text text-anchor="middle" x="174.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">generate_segemehl_index_genome</text> +<path fill="none" stroke="#56c9d8" stroke-width="2" stroke-dasharray="5,2" d="M399,-617C399,-617 232,-617 232,-617 226,-617 220,-611 220,-605 220,-605 220,-593 220,-593 220,-587 226,-581 232,-581 232,-581 399,-581 399,-581 405,-581 411,-587 411,-593 411,-593 411,-605 411,-605 411,-611 405,-617 399,-617"/> +<text text-anchor="middle" x="315.5" y="-596.5" font-family="sans" font-size="10.00" fill="#000000">generate_segemehl_index_genome</text> </g> <!-- 4->6 --> -<g id="edge9" class="edge"> +<g id="edge11" class="edge"> <title>4->6</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M174.5,-287.9441C174.5,-248.6867 174.5,-162.9857 174.5,-118.331"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="178.0001,-118.1716 174.5,-108.1716 171.0001,-118.1716 178.0001,-118.1716"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M451.7425,-724.7623C424.0795,-699.2272 373.7092,-652.7316 342.5731,-623.9906"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="344.8191,-621.3006 335.0971,-617.0896 340.0712,-626.4443 344.8191,-621.3006"/> </g> <!-- 9 --> <g id="node10" class="node"> <title>9</title> -<path fill="none" stroke="#59d856" stroke-width="2" d="M119,-180C119,-180 12,-180 12,-180 6,-180 0,-174 0,-168 0,-168 0,-156 0,-156 0,-150 6,-144 12,-144 12,-144 119,-144 119,-144 125,-144 131,-150 131,-156 131,-156 131,-168 131,-168 131,-174 125,-180 119,-180"/> -<text text-anchor="middle" x="65.5" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">create_header_genome</text> +<path fill="none" stroke="#59d856" stroke-width="2" stroke-dasharray="5,2" d="M486,-545C486,-545 379,-545 379,-545 373,-545 367,-539 367,-533 367,-533 367,-521 367,-521 367,-515 373,-509 379,-509 379,-509 486,-509 486,-509 492,-509 498,-515 498,-521 498,-521 498,-533 498,-533 498,-539 492,-545 486,-545"/> +<text text-anchor="middle" x="432.5" y="-524.5" font-family="sans" font-size="10.00" fill="#000000">create_header_genome</text> </g> <!-- 4->9 --> -<g id="edge12" class="edge"> +<g id="edge14" class="edge"> <title>4->9</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M159.1141,-287.8208C139.7559,-261.8027 106.4824,-217.0819 85.2602,-188.5585"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="87.867,-186.1987 79.0897,-180.265 82.251,-190.3773 87.867,-186.1987"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M466.6938,-724.5836C464.0728,-714.1809 460.8863,-700.9133 458.5,-689 449.1509,-642.3262 440.9286,-587.5572 436.3457,-555.1498"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="439.7861,-554.4795 434.9347,-545.0607 432.8536,-555.4491 439.7861,-554.4795"/> +</g> +<!-- 13 --> +<g id="node14" class="node"> +<title>13</title> +<path fill="none" stroke="#56d86b" stroke-width="2" stroke-dasharray="5,2" d="M529.5,-689C529.5,-689 479.5,-689 479.5,-689 473.5,-689 467.5,-683 467.5,-677 467.5,-677 467.5,-665 467.5,-665 467.5,-659 473.5,-653 479.5,-653 479.5,-653 529.5,-653 529.5,-653 535.5,-653 541.5,-659 541.5,-665 541.5,-665 541.5,-677 541.5,-677 541.5,-683 535.5,-689 529.5,-689"/> +<text text-anchor="middle" x="504.5" y="-668.5" font-family="sans" font-size="10.00" fill="#000000">mirna_anno</text> +</g> +<!-- 4->13 --> +<g id="edge19" class="edge"> +<title>4->13</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M479.8273,-724.8314C483.473,-716.8771 487.8309,-707.369 491.8627,-698.5723"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="495.0757,-699.9622 496.0606,-689.4133 488.7123,-697.0456 495.0757,-699.9622"/> +</g> +<!-- 14 --> +<g id="node15" class="node"> +<title>14</title> +<path fill="none" stroke="#d8cb56" stroke-width="2" stroke-dasharray="5,2" d="M603,-689C603,-689 572,-689 572,-689 566,-689 560,-683 560,-677 560,-677 560,-665 560,-665 560,-659 566,-653 572,-653 572,-653 603,-653 603,-653 609,-653 615,-659 615,-665 615,-665 615,-677 615,-677 615,-683 609,-689 603,-689"/> +<text text-anchor="middle" x="587.5" y="-668.5" font-family="sans" font-size="10.00" fill="#000000">dict_chr</text> +</g> +<!-- 4->14 --> +<g id="edge20" class="edge"> +<title>4->14</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M500.7717,-724.8314C516.0619,-715.3409 534.9156,-703.6386 551.1347,-693.5716"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="553.1842,-696.4189 559.8349,-688.1715 549.4927,-690.4714 553.1842,-696.4189"/> +</g> +<!-- 21 --> +<g id="node22" class="node"> +<title>21</title> +<path fill="none" stroke="#d8b456" stroke-width="2" stroke-dasharray="5,2" d="M858.5,-689C858.5,-689 772.5,-689 772.5,-689 766.5,-689 760.5,-683 760.5,-677 760.5,-677 760.5,-665 760.5,-665 760.5,-659 766.5,-653 772.5,-653 772.5,-653 858.5,-653 858.5,-653 864.5,-653 870.5,-659 870.5,-665 870.5,-665 870.5,-677 870.5,-677 870.5,-683 864.5,-689 858.5,-689"/> +<text text-anchor="middle" x="815.5" y="-668.5" font-family="sans" font-size="10.00" fill="#000000">create_index_fasta</text> +</g> +<!-- 4->21 --> +<g id="edge36" class="edge"> +<title>4->21</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M553.0232,-725.937C613.3753,-713.3052 694.5038,-696.3248 750.5129,-684.6019"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="751.2491,-688.0238 760.3199,-682.5493 749.815,-681.1723 751.2491,-688.0238"/> </g> <!-- 5 --> <g id="node6" class="node"> <title>5</title> -<path fill="none" stroke="#88d856" stroke-width="2" d="M540,-329C540,-329 401,-329 401,-329 395,-329 389,-323 389,-317 389,-317 389,-300 389,-300 389,-294 395,-288 401,-288 401,-288 540,-288 540,-288 546,-288 552,-294 552,-300 552,-300 552,-317 552,-317 552,-323 546,-329 540,-329"/> -<text text-anchor="middle" x="470.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">filter_anno_gtf</text> -<text text-anchor="middle" x="470.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">organism: homo_sapiens</text> -<text text-anchor="middle" x="470.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">prefix_name: GRCh38.98_chrY</text> +<path fill="none" stroke="#70d856" stroke-width="2" stroke-dasharray="5,2" d="M283,-326.5C283,-326.5 144,-326.5 144,-326.5 138,-326.5 132,-320.5 132,-314.5 132,-314.5 132,-302.5 132,-302.5 132,-296.5 138,-290.5 144,-290.5 144,-290.5 283,-290.5 283,-290.5 289,-290.5 295,-296.5 295,-302.5 295,-302.5 295,-314.5 295,-314.5 295,-320.5 289,-326.5 283,-326.5"/> +<text text-anchor="middle" x="213.5" y="-311.5" font-family="sans" font-size="10.00" fill="#000000">filter_anno_gtf</text> +<text text-anchor="middle" x="213.5" y="-300.5" font-family="sans" font-size="10.00" fill="#000000">organism: homo_sapiens/chrY</text> </g> <!-- 5->3 --> -<g id="edge8" class="edge"> +<g id="edge10" class="edge"> <title>5->3</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M450.1654,-287.7476C441.5104,-278.9147 431.3356,-268.5308 422.2486,-259.2571"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="424.7428,-256.8017 415.2441,-252.1087 419.743,-261.7009 424.7428,-256.8017"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M188.3245,-290.4656C174.7563,-280.7461 157.8327,-268.6229 143.1487,-258.1041"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="144.9718,-255.1047 134.8041,-252.1265 140.8953,-260.7953 144.9718,-255.1047"/> </g> <!-- 8 --> <g id="node9" class="node"> <title>8</title> -<path fill="none" stroke="#56d8c1" stroke-width="2" d="M575.5,-252C575.5,-252 513.5,-252 513.5,-252 507.5,-252 501.5,-246 501.5,-240 501.5,-240 501.5,-228 501.5,-228 501.5,-222 507.5,-216 513.5,-216 513.5,-216 575.5,-216 575.5,-216 581.5,-216 587.5,-222 587.5,-228 587.5,-228 587.5,-240 587.5,-240 587.5,-246 581.5,-252 575.5,-252"/> -<text text-anchor="middle" x="544.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">get_exons_gtf</text> +<path fill="none" stroke="#56d89a" stroke-width="2" stroke-dasharray="5,2" d="M280.5,-252C280.5,-252 218.5,-252 218.5,-252 212.5,-252 206.5,-246 206.5,-240 206.5,-240 206.5,-228 206.5,-228 206.5,-222 212.5,-216 218.5,-216 218.5,-216 280.5,-216 280.5,-216 286.5,-216 292.5,-222 292.5,-228 292.5,-228 292.5,-240 292.5,-240 292.5,-246 286.5,-252 280.5,-252"/> +<text text-anchor="middle" x="249.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">get_exons_gtf</text> </g> <!-- 5->8 --> -<g id="edge11" class="edge"> +<g id="edge13" class="edge"> <title>5->8</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M491.1131,-287.7476C499.8868,-278.9147 510.2009,-268.5308 519.4124,-259.2571"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="521.9488,-261.6701 526.5129,-252.1087 516.9824,-256.737 521.9488,-261.6701"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M222.2146,-290.4656C226.4285,-281.7453 231.5772,-271.0901 236.2623,-261.3948"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="239.5413,-262.6532 240.7409,-252.1265 233.2386,-259.6076 239.5413,-262.6532"/> </g> <!-- 6->0 --> <g id="edge2" class="edge"> <title>6->0</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M202.51,-71.8314C216.7909,-62.5681 234.3205,-51.1975 249.5834,-41.2972"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="251.7815,-44.0434 258.2664,-35.665 247.9721,-38.1706 251.7815,-44.0434"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M317.1576,-580.8326C319.5009,-553.5392 323.5,-500.3198 323.5,-455 323.5,-455 323.5,-455 323.5,-162 323.5,-121.5282 330.968,-75.2037 336.54,-46.2725"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="340.0483,-46.5746 338.5632,-36.0844 333.1823,-45.2111 340.0483,-46.5746"/> </g> <!-- 7 --> <g id="node8" class="node"> <title>7</title> -<path fill="none" stroke="#56d88a" stroke-width="2" d="M571.5,-108C571.5,-108 537.5,-108 537.5,-108 531.5,-108 525.5,-102 525.5,-96 525.5,-96 525.5,-84 525.5,-84 525.5,-78 531.5,-72 537.5,-72 537.5,-72 571.5,-72 571.5,-72 577.5,-72 583.5,-78 583.5,-84 583.5,-84 583.5,-96 583.5,-96 583.5,-102 577.5,-108 571.5,-108"/> -<text text-anchor="middle" x="554.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">gtftobed</text> +<path fill="none" stroke="#d89c56" stroke-width="2" stroke-dasharray="5,2" d="M283.5,-108C283.5,-108 249.5,-108 249.5,-108 243.5,-108 237.5,-102 237.5,-96 237.5,-96 237.5,-84 237.5,-84 237.5,-78 243.5,-72 249.5,-72 249.5,-72 283.5,-72 283.5,-72 289.5,-72 295.5,-78 295.5,-84 295.5,-84 295.5,-96 295.5,-96 295.5,-102 289.5,-108 283.5,-108"/> +<text text-anchor="middle" x="266.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">gtftobed</text> </g> <!-- 7->0 --> <g id="edge3" class="edge"> <title>7->0</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M525.3009,-75.4138C522.3621,-74.1746 519.3945,-73.0109 516.5,-72 449.8064,-48.7071 368.7812,-32.3854 322.571,-24.1501"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="323.1591,-20.6999 312.705,-22.4208 321.9506,-27.5948 323.1591,-20.6999"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M285.678,-71.8314C294.8588,-63.1337 306.0006,-52.5783 315.9776,-43.1265"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="318.4757,-45.5811 323.3281,-36.1628 313.6615,-40.4995 318.4757,-45.5811"/> </g> <!-- 8->7 --> -<g id="edge10" class="edge"> +<g id="edge12" class="edge"> <title>8->7</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M545.7665,-215.7623C547.4721,-191.201 550.5245,-147.2474 552.531,-118.3541"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="556.0425,-118.3081 553.2438,-108.0896 549.0593,-117.8231 556.0425,-118.3081"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M251.6531,-215.7623C254.5527,-191.201 259.7416,-147.2474 263.1526,-118.3541"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="266.6678,-118.431 264.3644,-108.0896 259.7161,-117.6103 266.6678,-118.431"/> </g> <!-- 9->0 --> <g id="edge4" class="edge"> <title>9->0</title> -<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M60.544,-143.9048C56.2668,-123.9783 53.0715,-92.1044 69.5,-72 91.8461,-44.6539 192.3357,-28.8343 248.0323,-22.0186"/> -<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="248.6915,-25.465 258.21,-20.8123 247.8676,-18.5137 248.6915,-25.465"/> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M416.7428,-508.8597C395.8256,-482.8524 361.5,-432.6151 361.5,-383 361.5,-383 361.5,-383 361.5,-162 361.5,-121.5282 354.032,-75.2037 348.46,-46.2725"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="351.8177,-45.2111 346.4368,-36.0844 344.9517,-46.5746 351.8177,-45.2111"/> +</g> +<!-- 10 --> +<g id="node11" class="node"> +<title>10</title> +<path fill="none" stroke="#d85656" stroke-width="2" stroke-dasharray="5,2" d="M590,-473C590,-473 557,-473 557,-473 551,-473 545,-467 545,-461 545,-461 545,-449 545,-449 545,-443 551,-437 557,-437 557,-437 590,-437 590,-437 596,-437 602,-443 602,-449 602,-449 602,-461 602,-461 602,-467 596,-473 590,-473"/> +<text text-anchor="middle" x="573.5" y="-452.5" font-family="sans" font-size="10.00" fill="#000000">gfftobed</text> +</g> +<!-- 10->0 --> +<g id="edge5" class="edge"> +<title>10->0</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M544.7674,-445.7697C495.3466,-427.8859 399.5,-383.5304 399.5,-308.5 399.5,-308.5 399.5,-308.5 399.5,-162 399.5,-120.6022 391.7954,-110.0557 375.5,-72 371.5617,-62.8027 366.2497,-53.3159 361.0396,-44.9093"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="363.8442,-42.7994 355.4958,-36.276 357.954,-46.5817 363.8442,-42.7994"/> +</g> +<!-- 19 --> +<g id="node20" class="node"> +<title>19</title> +<path fill="none" stroke="#56b1d8" stroke-width="2" stroke-dasharray="5,2" d="M864,-401C864,-401 781,-401 781,-401 775,-401 769,-395 769,-389 769,-389 769,-377 769,-377 769,-371 775,-365 781,-365 781,-365 864,-365 864,-365 870,-365 876,-371 876,-377 876,-377 876,-389 876,-389 876,-395 870,-401 864,-401"/> +<text text-anchor="middle" x="822.5" y="-380.5" font-family="sans" font-size="10.00" fill="#000000">filter_mature_mirs</text> +</g> +<!-- 10->19 --> +<g id="edge34" class="edge"> +<title>10->19</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M602.0635,-446.7407C640.027,-435.7633 708.1876,-416.0542 758.6993,-401.4484"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="759.918,-404.7395 768.5522,-398.5994 757.9735,-398.0149 759.918,-404.7395"/> +</g> +<!-- 11 --> +<g id="node12" class="node"> +<title>11</title> +<path fill="none" stroke="#569ad8" stroke-width="2" stroke-dasharray="5,2" d="M612,-545C612,-545 535,-545 535,-545 529,-545 523,-539 523,-533 523,-533 523,-521 523,-521 523,-515 529,-509 535,-509 535,-509 612,-509 612,-509 618,-509 624,-515 624,-521 624,-521 624,-533 624,-533 624,-539 618,-545 612,-545"/> +<text text-anchor="middle" x="573.5" y="-524.5" font-family="sans" font-size="10.00" fill="#000000">filter_mir_1_anno</text> +</g> +<!-- 11->10 --> +<g id="edge15" class="edge"> +<title>11->10</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M573.5,-508.8314C573.5,-501.131 573.5,-491.9743 573.5,-483.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="577.0001,-483.4132 573.5,-473.4133 570.0001,-483.4133 577.0001,-483.4132"/> +</g> +<!-- 12 --> +<g id="node13" class="node"> +<title>12</title> +<path fill="none" stroke="#ced856" stroke-width="2" stroke-dasharray="5,2" d="M610,-617C610,-617 537,-617 537,-617 531,-617 525,-611 525,-605 525,-605 525,-593 525,-593 525,-587 531,-581 537,-581 537,-581 610,-581 610,-581 616,-581 622,-587 622,-593 622,-593 622,-605 622,-605 622,-611 616,-617 610,-617"/> +<text text-anchor="middle" x="573.5" y="-596.5" font-family="sans" font-size="10.00" fill="#000000">map_chr_names</text> +</g> +<!-- 12->11 --> +<g id="edge16" class="edge"> +<title>12->11</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M573.5,-580.8314C573.5,-573.131 573.5,-563.9743 573.5,-555.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="577.0001,-555.4132 573.5,-545.4133 570.0001,-555.4133 577.0001,-555.4132"/> +</g> +<!-- 13->12 --> +<g id="edge17" class="edge"> +<title>13->12</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M521.9116,-652.8314C530.021,-644.3694 539.8156,-634.1489 548.682,-624.8971"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="551.4618,-627.0549 555.854,-617.4133 546.4079,-622.2115 551.4618,-627.0549"/> +</g> +<!-- 14->12 --> +<g id="edge18" class="edge"> +<title>14->12</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M583.9672,-652.8314C582.4699,-645.131 580.6895,-635.9743 579.0255,-627.4166"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="582.4248,-626.5614 577.0804,-617.4133 575.5535,-627.8975 582.4248,-626.5614"/> +</g> +<!-- 15 --> +<g id="node16" class="node"> +<title>15</title> +<path fill="none" stroke="#5682d8" stroke-width="2" stroke-dasharray="5,2" d="M747.5,-108C747.5,-108 685.5,-108 685.5,-108 679.5,-108 673.5,-102 673.5,-96 673.5,-96 673.5,-84 673.5,-84 673.5,-78 679.5,-72 685.5,-72 685.5,-72 747.5,-72 747.5,-72 753.5,-72 759.5,-78 759.5,-84 759.5,-84 759.5,-96 759.5,-96 759.5,-102 753.5,-108 747.5,-108"/> +<text text-anchor="middle" x="716.5" y="-87.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_final</text> +</g> +<!-- 15->0 --> +<g id="edge6" class="edge"> +<title>15->0</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M673.2321,-81.6703C599.1603,-67.4105 449.3253,-38.5653 379.6658,-25.1549"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="380.0743,-21.6694 369.5929,-23.2158 378.7509,-28.5431 380.0743,-21.6694"/> +</g> +<!-- 16 --> +<g id="node17" class="node"> +<title>16</title> +<path fill="none" stroke="#88d856" stroke-width="2" stroke-dasharray="5,2" d="M939.5,-180C939.5,-180 865.5,-180 865.5,-180 859.5,-180 853.5,-174 853.5,-168 853.5,-168 853.5,-156 853.5,-156 853.5,-150 859.5,-144 865.5,-144 865.5,-144 939.5,-144 939.5,-144 945.5,-144 951.5,-150 951.5,-156 951.5,-156 951.5,-168 951.5,-168 951.5,-174 945.5,-180 939.5,-180"/> +<text text-anchor="middle" x="902.5" y="-159.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_concat</text> +</g> +<!-- 16->15 --> +<g id="edge21" class="edge"> +<title>16->15</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M855.5643,-143.8314C829.1867,-133.6207 796.1948,-120.8496 768.9662,-110.3095"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="770.1849,-107.0282 759.5958,-106.6822 767.6579,-113.5562 770.1849,-107.0282"/> +</g> +<!-- 17 --> +<g id="node18" class="node"> +<title>17</title> +<path fill="none" stroke="#56d882" stroke-width="2" stroke-dasharray="5,2" d="M1245,-252C1245,-252 1166,-252 1166,-252 1160,-252 1154,-246 1154,-240 1154,-240 1154,-228 1154,-228 1154,-222 1160,-216 1166,-216 1166,-216 1245,-216 1245,-216 1251,-216 1257,-222 1257,-228 1257,-228 1257,-240 1257,-240 1257,-246 1251,-252 1245,-252"/> +<text text-anchor="middle" x="1205.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> +</g> +<!-- 17->16 --> +<g id="edge22" class="edge"> +<title>17->16</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M1153.8566,-218.2469C1151.0353,-217.4676 1148.2361,-216.7134 1145.5,-216 1083.0986,-199.7293 1010.8817,-184.0796 961.4936,-173.8551"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="962.1224,-170.4112 951.6217,-171.8207 960.7094,-177.2671 962.1224,-170.4112"/> +</g> +<!-- 18 --> +<g id="node19" class="node"> +<title>18</title> +<path fill="none" stroke="#56d8b1" stroke-width="2" stroke-dasharray="5,2" d="M1097.5,-329C1097.5,-329 1059.5,-329 1059.5,-329 1053.5,-329 1047.5,-323 1047.5,-317 1047.5,-317 1047.5,-300 1047.5,-300 1047.5,-294 1053.5,-288 1059.5,-288 1059.5,-288 1097.5,-288 1097.5,-288 1103.5,-288 1109.5,-294 1109.5,-300 1109.5,-300 1109.5,-317 1109.5,-317 1109.5,-323 1103.5,-329 1097.5,-329"/> +<text text-anchor="middle" x="1078.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> +<text text-anchor="middle" x="1078.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">bp_3p: -1</text> +<text text-anchor="middle" x="1078.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">bp_5p: -1</text> +</g> +<!-- 18->17 --> +<g id="edge31" class="edge"> +<title>18->17</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M1109.5677,-290.2752C1126.6067,-280.28 1147.9043,-267.7864 1166.1013,-257.1118"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="1167.8985,-260.1154 1174.753,-252.0366 1164.3566,-254.0775 1167.8985,-260.1154"/> +</g> +<!-- 19->18 --> +<g id="edge32" class="edge"> +<title>19->18</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M876.0691,-367.5085C879.256,-366.6447 882.4177,-365.8025 885.5,-365 950.0665,-348.1901 971.0565,-352.6418 1037.6698,-328.8971"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="1039.1864,-332.0696 1047.3813,-325.3544 1036.7874,-325.4935 1039.1864,-332.0696"/> +</g> +<!-- 23 --> +<g id="node24" class="node"> +<title>23</title> +<path fill="none" stroke="#56d8b1" stroke-width="2" stroke-dasharray="5,2" d="M1177.5,-329C1177.5,-329 1139.5,-329 1139.5,-329 1133.5,-329 1127.5,-323 1127.5,-317 1127.5,-317 1127.5,-300 1127.5,-300 1127.5,-294 1133.5,-288 1139.5,-288 1139.5,-288 1177.5,-288 1177.5,-288 1183.5,-288 1189.5,-294 1189.5,-300 1189.5,-300 1189.5,-317 1189.5,-317 1189.5,-323 1183.5,-329 1177.5,-329"/> +<text text-anchor="middle" x="1158.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> +<text text-anchor="middle" x="1158.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">bp_3p: -1</text> +<text text-anchor="middle" x="1158.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">bp_5p: 0</text> +</g> +<!-- 19->23 --> +<g id="edge38" class="edge"> +<title>19->23</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M876.0004,-367.2218C879.2046,-366.4301 882.3888,-365.682 885.5,-365 984.7553,-343.2422 1016.8807,-360.2118 1117.8083,-328.6955"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="1119.0119,-331.9852 1127.4661,-325.5994 1116.8749,-325.3194 1119.0119,-331.9852"/> +</g> +<!-- 25 --> +<g id="node26" class="node"> +<title>25</title> +<path fill="none" stroke="#56d8b1" stroke-width="2" stroke-dasharray="5,2" d="M1257.5,-329C1257.5,-329 1219.5,-329 1219.5,-329 1213.5,-329 1207.5,-323 1207.5,-317 1207.5,-317 1207.5,-300 1207.5,-300 1207.5,-294 1213.5,-288 1219.5,-288 1219.5,-288 1257.5,-288 1257.5,-288 1263.5,-288 1269.5,-294 1269.5,-300 1269.5,-300 1269.5,-317 1269.5,-317 1269.5,-323 1263.5,-329 1257.5,-329"/> +<text text-anchor="middle" x="1238.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> +<text text-anchor="middle" x="1238.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">bp_3p: -1</text> +<text text-anchor="middle" x="1238.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">bp_5p: 1</text> +</g> +<!-- 19->25 --> +<g id="edge41" class="edge"> +<title>19->25</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M876.3144,-366.9972C879.4101,-366.2725 882.4878,-365.5994 885.5,-365 1019.6173,-338.3145 1062.4776,-368.1219 1197.1954,-328.8575"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="1198.478,-332.1272 1207.0564,-325.9093 1196.4728,-325.4205 1198.478,-332.1272"/> +</g> +<!-- 27 --> +<g id="node28" class="node"> +<title>27</title> +<path fill="none" stroke="#56d8b1" stroke-width="2" stroke-dasharray="5,2" d="M625.5,-329C625.5,-329 587.5,-329 587.5,-329 581.5,-329 575.5,-323 575.5,-317 575.5,-317 575.5,-300 575.5,-300 575.5,-294 581.5,-288 587.5,-288 587.5,-288 625.5,-288 625.5,-288 631.5,-288 637.5,-294 637.5,-300 637.5,-300 637.5,-317 637.5,-317 637.5,-323 631.5,-329 625.5,-329"/> +<text text-anchor="middle" x="606.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> +<text text-anchor="middle" x="606.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">bp_3p: 0</text> +<text text-anchor="middle" x="606.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">bp_5p: -1</text> +</g> +<!-- 19->27 --> +<g id="edge44" class="edge"> +<title>19->27</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M768.8973,-369.1985C735.8592,-360.0919 692.6315,-347.0993 647.1323,-328.9682"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="648.2813,-325.6575 637.6987,-325.1473 645.6534,-332.1455 648.2813,-325.6575"/> +</g> +<!-- 29 --> +<g id="node30" class="node"> +<title>29</title> +<path fill="none" stroke="#56d8b1" stroke-width="2" stroke-dasharray="5,2" d="M703.5,-329C703.5,-329 667.5,-329 667.5,-329 661.5,-329 655.5,-323 655.5,-317 655.5,-317 655.5,-300 655.5,-300 655.5,-294 661.5,-288 667.5,-288 667.5,-288 703.5,-288 703.5,-288 709.5,-288 715.5,-294 715.5,-300 715.5,-300 715.5,-317 715.5,-317 715.5,-323 709.5,-329 703.5,-329"/> +<text text-anchor="middle" x="685.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> +<text text-anchor="middle" x="685.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">bp_3p: 0</text> +<text text-anchor="middle" x="685.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">bp_5p: 0</text> +</g> +<!-- 19->29 --> +<g id="edge47" class="edge"> +<title>19->29</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M789.3361,-364.9656C769.833,-354.3599 745.0671,-340.8923 724.6141,-329.7701"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="726.0118,-326.5461 715.5546,-324.8436 722.6677,-332.6957 726.0118,-326.5461"/> +</g> +<!-- 31 --> +<g id="node32" class="node"> +<title>31</title> +<path fill="none" stroke="#56d8b1" stroke-width="2" stroke-dasharray="5,2" d="M781.5,-329C781.5,-329 745.5,-329 745.5,-329 739.5,-329 733.5,-323 733.5,-317 733.5,-317 733.5,-300 733.5,-300 733.5,-294 739.5,-288 745.5,-288 745.5,-288 781.5,-288 781.5,-288 787.5,-288 793.5,-294 793.5,-300 793.5,-300 793.5,-317 793.5,-317 793.5,-323 787.5,-329 781.5,-329"/> +<text text-anchor="middle" x="763.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> +<text text-anchor="middle" x="763.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">bp_3p: 0</text> +<text text-anchor="middle" x="763.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">bp_5p: 1</text> +</g> +<!-- 19->31 --> +<g id="edge50" class="edge"> +<title>19->31</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M808.2177,-364.9656C801.6234,-356.6389 793.6321,-346.5481 786.2394,-337.2133"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="788.8992,-334.9343 779.947,-329.2679 783.4117,-339.2802 788.8992,-334.9343"/> +</g> +<!-- 33 --> +<g id="node34" class="node"> +<title>33</title> +<path fill="none" stroke="#56d8b1" stroke-width="2" stroke-dasharray="5,2" d="M861.5,-329C861.5,-329 823.5,-329 823.5,-329 817.5,-329 811.5,-323 811.5,-317 811.5,-317 811.5,-300 811.5,-300 811.5,-294 817.5,-288 823.5,-288 823.5,-288 861.5,-288 861.5,-288 867.5,-288 873.5,-294 873.5,-300 873.5,-300 873.5,-317 873.5,-317 873.5,-323 867.5,-329 861.5,-329"/> +<text text-anchor="middle" x="842.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> +<text text-anchor="middle" x="842.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">bp_3p: 1</text> +<text text-anchor="middle" x="842.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">bp_5p: -1</text> +</g> +<!-- 19->33 --> +<g id="edge53" class="edge"> +<title>19->33</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M827.3414,-364.9656C829.4187,-357.2277 831.905,-347.9663 834.2574,-339.2038"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="837.7122,-339.8334 836.9247,-329.2679 830.9516,-338.0184 837.7122,-339.8334"/> +</g> +<!-- 35 --> +<g id="node36" class="node"> +<title>35</title> +<path fill="none" stroke="#56d8b1" stroke-width="2" stroke-dasharray="5,2" d="M939.5,-329C939.5,-329 903.5,-329 903.5,-329 897.5,-329 891.5,-323 891.5,-317 891.5,-317 891.5,-300 891.5,-300 891.5,-294 897.5,-288 903.5,-288 903.5,-288 939.5,-288 939.5,-288 945.5,-288 951.5,-294 951.5,-300 951.5,-300 951.5,-317 951.5,-317 951.5,-323 945.5,-329 939.5,-329"/> +<text text-anchor="middle" x="921.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> +<text text-anchor="middle" x="921.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">bp_3p: 1</text> +<text text-anchor="middle" x="921.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">bp_5p: 0</text> +</g> +<!-- 19->35 --> +<g id="edge56" class="edge"> +<title>19->35</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M846.4651,-364.9656C858.2008,-356.1343 872.5734,-345.3185 885.5877,-335.5249"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="888.0166,-338.0774 893.9024,-329.2679 883.8076,-332.4842 888.0166,-338.0774"/> +</g> +<!-- 37 --> +<g id="node38" class="node"> +<title>37</title> +<path fill="none" stroke="#56d8b1" stroke-width="2" stroke-dasharray="5,2" d="M1017.5,-329C1017.5,-329 981.5,-329 981.5,-329 975.5,-329 969.5,-323 969.5,-317 969.5,-317 969.5,-300 969.5,-300 969.5,-294 975.5,-288 981.5,-288 981.5,-288 1017.5,-288 1017.5,-288 1023.5,-288 1029.5,-294 1029.5,-300 1029.5,-300 1029.5,-317 1029.5,-317 1029.5,-323 1023.5,-329 1017.5,-329"/> +<text text-anchor="middle" x="999.5" y="-317" font-family="sans" font-size="10.00" fill="#000000">iso_anno</text> +<text text-anchor="middle" x="999.5" y="-306" font-family="sans" font-size="10.00" fill="#000000">bp_3p: 1</text> +<text text-anchor="middle" x="999.5" y="-295" font-family="sans" font-size="10.00" fill="#000000">bp_5p: 1</text> +</g> +<!-- 19->37 --> +<g id="edge59" class="edge"> +<title>19->37</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M872.5569,-364.9436C896.833,-355.8355 926.4897,-344.1949 960.2755,-328.9125"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="961.7911,-332.068 969.4309,-324.7275 958.881,-325.7016 961.7911,-332.068"/> +</g> +<!-- 20 --> +<g id="node21" class="node"> +<title>20</title> +<path fill="none" stroke="#56d8c9" stroke-width="2" stroke-dasharray="5,2" d="M974.5,-401C974.5,-401 906.5,-401 906.5,-401 900.5,-401 894.5,-395 894.5,-389 894.5,-389 894.5,-377 894.5,-377 894.5,-371 900.5,-365 906.5,-365 906.5,-365 974.5,-365 974.5,-365 980.5,-365 986.5,-371 986.5,-377 986.5,-377 986.5,-389 986.5,-389 986.5,-395 980.5,-401 974.5,-401"/> +<text text-anchor="middle" x="940.5" y="-380.5" font-family="sans" font-size="10.00" fill="#000000">extract_chr_len</text> +</g> +<!-- 20->18 --> +<g id="edge33" class="edge"> +<title>20->18</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M973.906,-364.9656C993.2747,-354.5093 1017.7964,-341.2711 1038.2273,-330.2414"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="1040.1565,-333.1774 1047.2934,-325.347 1036.8311,-327.0177 1040.1565,-333.1774"/> +</g> +<!-- 20->23 --> +<g id="edge39" class="edge"> +<title>20->23</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M986.6376,-371.4431C1020.834,-362.3331 1068.5762,-348.4728 1117.7774,-328.9936"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="1119.3363,-332.139 1127.3052,-325.1571 1116.7216,-325.6456 1119.3363,-332.139"/> +</g> +<!-- 20->25 --> +<g id="edge42" class="edge"> +<title>20->25</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M986.7141,-377.1305C1037.0889,-369.9013 1119.8819,-355.5828 1197.9187,-328.7648"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="1199.1339,-332.0477 1207.4093,-325.432 1196.8145,-325.4431 1199.1339,-332.0477"/> +</g> +<!-- 20->27 --> +<g id="edge45" class="edge"> +<title>20->27</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M894.4458,-367.3101C891.4334,-366.4754 888.4327,-365.6956 885.5,-365 784.0417,-340.9342 750.6997,-360.6954 647.1056,-328.6565"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="648.1205,-325.3066 637.53,-325.6115 645.9991,-331.9775 648.1205,-325.3066"/> +</g> +<!-- 20->29 --> +<g id="edge48" class="edge"> +<title>20->29</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M894.3843,-367.5507C891.3869,-366.6571 888.4064,-365.7985 885.5,-365 817.8355,-346.4091 795.2636,-353.5816 725.4978,-328.9214"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="726.4011,-325.5259 715.807,-325.4021 724.0116,-332.1054 726.4011,-325.5259"/> +</g> +<!-- 20->31 --> +<g id="edge51" class="edge"> +<title>20->31</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M894.2817,-366.3795C869.4433,-357.1154 838.2051,-344.9356 803.0776,-329.0697"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="804.1194,-325.6978 793.5691,-324.7275 801.2115,-332.0653 804.1194,-325.6978"/> +</g> +<!-- 20->33 --> +<g id="edge54" class="edge"> +<title>20->33</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M916.7769,-364.9656C905.1598,-356.1343 890.9324,-345.3185 878.0496,-335.5249"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="879.8978,-332.5335 869.8188,-329.2679 875.6615,-338.1061 879.8978,-332.5335"/> +</g> +<!-- 20->35 --> +<g id="edge57" class="edge"> +<title>20->35</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M935.9006,-364.9656C933.9272,-357.2277 931.5652,-347.9663 929.3305,-339.2038"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="932.6593,-338.0928 926.7965,-329.2679 925.8764,-339.8227 932.6593,-338.0928"/> +</g> +<!-- 20->37 --> +<g id="edge60" class="edge"> +<title>20->37</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M954.7823,-364.9656C961.3766,-356.6389 969.3679,-346.5481 976.7606,-337.2133"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="979.5883,-339.2802 983.053,-329.2679 974.1008,-334.9343 979.5883,-339.2802"/> +</g> +<!-- 21->20 --> +<g id="edge35" class="edge"> +<title>21->20</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M823.3193,-652.9843C844.54,-604.0919 903.3131,-468.6785 928.638,-410.33"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="931.9128,-411.5756 932.6836,-401.0089 925.4915,-408.7886 931.9128,-411.5756"/> +</g> +<!-- 22 --> +<g id="node23" class="node"> +<title>22</title> +<path fill="none" stroke="#56d882" stroke-width="2" stroke-dasharray="5,2" d="M1366,-252C1366,-252 1287,-252 1287,-252 1281,-252 1275,-246 1275,-240 1275,-240 1275,-228 1275,-228 1275,-222 1281,-216 1287,-216 1287,-216 1366,-216 1366,-216 1372,-216 1378,-222 1378,-228 1378,-228 1378,-240 1378,-240 1378,-246 1372,-252 1366,-252"/> +<text text-anchor="middle" x="1326.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> +</g> +<!-- 22->16 --> +<g id="edge23" class="edge"> +<title>22->16</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M1274.9141,-218.0093C1272.0779,-217.2913 1269.2598,-216.6153 1266.5,-216 1160.4136,-192.3475 1034.566,-176.2829 962.0781,-168.1673"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="962.1033,-164.6488 951.779,-167.0276 961.3334,-171.6063 962.1033,-164.6488"/> +</g> +<!-- 23->22 --> +<g id="edge37" class="edge"> +<title>23->22</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M1189.8339,-292.1629C1192.7499,-290.7269 1195.6712,-289.3193 1198.5,-288 1222.3119,-276.8941 1249.0366,-265.4617 1271.8831,-255.9918"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="1273.5069,-259.1082 1281.4173,-252.0601 1270.8382,-252.6369 1273.5069,-259.1082"/> +</g> +<!-- 24 --> +<g id="node25" class="node"> +<title>24</title> +<path fill="none" stroke="#56d882" stroke-width="2" stroke-dasharray="5,2" d="M1487,-252C1487,-252 1408,-252 1408,-252 1402,-252 1396,-246 1396,-240 1396,-240 1396,-228 1396,-228 1396,-222 1402,-216 1408,-216 1408,-216 1487,-216 1487,-216 1493,-216 1499,-222 1499,-228 1499,-228 1499,-240 1499,-240 1499,-246 1493,-252 1487,-252"/> +<text text-anchor="middle" x="1447.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> +</g> +<!-- 24->16 --> +<g id="edge24" class="edge"> +<title>24->16</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M1395.9401,-217.8875C1393.0972,-217.2009 1390.2706,-216.565 1387.5,-216 1236.0278,-185.1105 1054.0874,-170.8211 962.0651,-165.1607"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="962.0256,-161.6521 951.8331,-164.5443 961.6045,-168.6394 962.0256,-161.6521"/> +</g> +<!-- 25->24 --> +<g id="edge40" class="edge"> +<title>25->24</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M1269.7894,-297.3466C1300.7581,-286.3075 1348.9656,-269.1235 1387.3616,-255.4369"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="1388.6512,-258.693 1396.8955,-252.0385 1386.3008,-252.0994 1388.6512,-258.693"/> +</g> +<!-- 26 --> +<g id="node27" class="node"> +<title>26</title> +<path fill="none" stroke="#56d882" stroke-width="2" stroke-dasharray="5,2" d="M519,-252C519,-252 440,-252 440,-252 434,-252 428,-246 428,-240 428,-240 428,-228 428,-228 428,-222 434,-216 440,-216 440,-216 519,-216 519,-216 525,-216 531,-222 531,-228 531,-228 531,-240 531,-240 531,-246 525,-252 519,-252"/> +<text text-anchor="middle" x="479.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> +</g> +<!-- 26->16 --> +<g id="edge25" class="edge"> +<title>26->16</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M531.2867,-218.1865C534.393,-217.4037 537.4812,-216.6672 540.5,-216 645.9118,-192.7034 770.8468,-176.5451 842.9651,-168.311"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="843.6696,-171.7537 853.2136,-167.1537 842.884,-164.798 843.6696,-171.7537"/> +</g> +<!-- 27->26 --> +<g id="edge43" class="edge"> +<title>27->26</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M575.4323,-290.2752C558.3933,-280.28 537.0957,-267.7864 518.8987,-257.1118"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="520.6434,-254.0775 510.247,-252.0366 517.1015,-260.1154 520.6434,-254.0775"/> +</g> +<!-- 28 --> +<g id="node29" class="node"> +<title>28</title> +<path fill="none" stroke="#56d882" stroke-width="2" stroke-dasharray="5,2" d="M640,-252C640,-252 561,-252 561,-252 555,-252 549,-246 549,-240 549,-240 549,-228 549,-228 549,-222 555,-216 561,-216 561,-216 640,-216 640,-216 646,-216 652,-222 652,-228 652,-228 652,-240 652,-240 652,-246 646,-252 640,-252"/> +<text text-anchor="middle" x="600.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> +</g> +<!-- 28->16 --> +<g id="edge26" class="edge"> +<title>28->16</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M652.0176,-218.5362C655.2202,-217.6562 658.4004,-216.8036 661.5,-216 723.2401,-199.9928 794.6002,-184.3557 843.5551,-174.0633"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="844.2732,-177.4889 853.3436,-172.0137 842.8386,-170.6375 844.2732,-177.4889"/> +</g> +<!-- 29->28 --> +<g id="edge46" class="edge"> +<title>29->28</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M661.8228,-287.7476C651.5454,-278.7398 639.4277,-268.119 628.6898,-258.7076"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="630.9882,-256.0679 621.1609,-252.1087 626.3742,-261.3321 630.9882,-256.0679"/> +</g> +<!-- 30 --> +<g id="node31" class="node"> +<title>30</title> +<path fill="none" stroke="#56d882" stroke-width="2" stroke-dasharray="5,2" d="M761,-252C761,-252 682,-252 682,-252 676,-252 670,-246 670,-240 670,-240 670,-228 670,-228 670,-222 676,-216 682,-216 682,-216 761,-216 761,-216 767,-216 773,-222 773,-228 773,-228 773,-240 773,-240 773,-246 767,-252 761,-252"/> +<text text-anchor="middle" x="721.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> +</g> +<!-- 30->16 --> +<g id="edge27" class="edge"> +<title>30->16</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M767.1739,-215.8314C791.5968,-206.1162 821.8466,-194.0831 847.5484,-183.8592"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="849.1563,-186.9864 857.1545,-180.038 846.569,-180.4821 849.1563,-186.9864"/> +</g> +<!-- 31->30 --> +<g id="edge49" class="edge"> +<title>31->30</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M751.8007,-287.7476C747.1168,-279.4394 741.6595,-269.7591 736.677,-260.9211"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="739.6688,-259.1009 731.7089,-252.1087 733.571,-262.5386 739.6688,-259.1009"/> +</g> +<!-- 32 --> +<g id="node33" class="node"> +<title>32</title> +<path fill="none" stroke="#56d882" stroke-width="2" stroke-dasharray="5,2" d="M882,-252C882,-252 803,-252 803,-252 797,-252 791,-246 791,-240 791,-240 791,-228 791,-228 791,-222 797,-216 803,-216 803,-216 882,-216 882,-216 888,-216 894,-222 894,-228 894,-228 894,-240 894,-240 894,-246 888,-252 882,-252"/> +<text text-anchor="middle" x="842.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> +</g> +<!-- 32->16 --> +<g id="edge28" class="edge"> +<title>32->16</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M857.6405,-215.8314C864.6217,-207.454 873.0391,-197.3531 880.6876,-188.1749"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="883.4425,-190.3362 887.1556,-180.4133 878.0649,-185.8548 883.4425,-190.3362"/> +</g> +<!-- 33->32 --> +<g id="edge52" class="edge"> +<title>33->32</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M842.5,-287.7476C842.5,-279.8767 842.5,-270.7743 842.5,-262.3232"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="846.0001,-262.1086 842.5,-252.1087 839.0001,-262.1087 846.0001,-262.1086"/> +</g> +<!-- 34 --> +<g id="node35" class="node"> +<title>34</title> +<path fill="none" stroke="#56d882" stroke-width="2" stroke-dasharray="5,2" d="M1003,-252C1003,-252 924,-252 924,-252 918,-252 912,-246 912,-240 912,-240 912,-228 912,-228 912,-222 918,-216 924,-216 924,-216 1003,-216 1003,-216 1009,-216 1015,-222 1015,-228 1015,-228 1015,-240 1015,-240 1015,-246 1009,-252 1003,-252"/> +<text text-anchor="middle" x="963.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> +</g> +<!-- 34->16 --> +<g id="edge29" class="edge"> +<title>34->16</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M948.1071,-215.8314C941.0096,-207.454 932.452,-197.3531 924.676,-188.1749"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="927.2348,-185.7807 918.1001,-180.4133 921.8939,-190.3056 927.2348,-185.7807"/> +</g> +<!-- 35->34 --> +<g id="edge55" class="edge"> +<title>35->34</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M933.1993,-287.7476C937.8832,-279.4394 943.3405,-269.7591 948.323,-260.9211"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="951.429,-262.5386 953.2911,-252.1087 945.3312,-259.1009 951.429,-262.5386"/> +</g> +<!-- 36 --> +<g id="node37" class="node"> +<title>36</title> +<path fill="none" stroke="#56d882" stroke-width="2" stroke-dasharray="5,2" d="M1124,-252C1124,-252 1045,-252 1045,-252 1039,-252 1033,-246 1033,-240 1033,-240 1033,-228 1033,-228 1033,-222 1039,-216 1045,-216 1045,-216 1124,-216 1124,-216 1130,-216 1136,-222 1136,-228 1136,-228 1136,-240 1136,-240 1136,-246 1130,-252 1124,-252"/> +<text text-anchor="middle" x="1084.5" y="-231.5" font-family="sans" font-size="10.00" fill="#000000">iso_anno_rename</text> +</g> +<!-- 36->16 --> +<g id="edge30" class="edge"> +<title>36->16</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M1038.5737,-215.8314C1013.9077,-206.0734 983.3309,-193.977 957.414,-183.7242"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="958.6824,-180.4621 948.096,-180.038 956.1073,-186.9713 958.6824,-180.4621"/> +</g> +<!-- 37->36 --> +<g id="edge58" class="edge"> +<title>37->36</title> +<path fill="none" stroke="#c0c0c0" stroke-width="2" d="M1023.1772,-287.7476C1033.4546,-278.7398 1045.5723,-268.119 1056.3102,-258.7076"/> +<polygon fill="#c0c0c0" stroke="#c0c0c0" stroke-width="2" points="1058.6258,-261.3321 1063.8391,-252.1087 1054.0118,-256.0679 1058.6258,-261.3321"/> </g> </g> </svg> diff --git a/scripts/filter_anno_gtf.sh b/scripts/filter_anno_gtf.sh index fbdd907ad0e417ab59c9470b66fc52ce33527479..df121c9588aafe69fd935cf17fb1b20c5f88212d 100755 --- a/scripts/filter_anno_gtf.sh +++ b/scripts/filter_anno_gtf.sh @@ -18,16 +18,16 @@ ### PARAMETERS ### #################### -# Prefix for filenames -fileNamePrefix="$1" # Modified by Iborra P -organism="$2" +output_dir="$1" +log_dir="$2" + # Paths (DO NOT CHANGE!) #Modified by Iborra P root="$PWD" -#root="$(cd "$(dirname "$0" )" && pwd)" -resDir="${root}/results/${organism}/${fileNamePrefix}" +resDir="${root}/${output_dir}" rawDir="${resDir}/raw" tmpDir="${root}/.tmp" -logDir="${root}/logs/local/${organism}/${fileNamePrefix}" +logDir="${root}/${log_dir}" + # URLs # ---- @@ -67,11 +67,10 @@ set -o pipefail mkdir --parents "$resDir" mkdir --parents "$rawDir" mkdir --parents "$tmpDir" -mkdir --parents "$logDir" # Create log file -logFile="${logDir}/$(basename $0 ".sh").log" -rm -f "$logFile"; touch "$logFile" +logFile="${logDir}" +rm -fr "$logFile"; touch "$logFile" >&2 echo "Log written to '$logFile'..." diff --git a/scripts/filter_mir_1_anno.sh b/scripts/filter_mir_1_anno.sh new file mode 100755 index 0000000000000000000000000000000000000000..891b4d2b97ee85606a9e8d4eee801af0e3b1348c --- /dev/null +++ b/scripts/filter_mir_1_anno.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +# Setting to strict mode +set -euo pipefail +IFS=$'\n\t' + +#### FUNCTIONS + +usage() +{ + echo "usage: filter_annotation.sh [[[-f file ] [-o outname]] | [-h]]" +} + + + +#### MAIN +# Test whether all required inputs are present +if [[ $1 == -h ]] || [[ $# != 4 ]] +then + usage + exit +fi + +# Get parameters +while [ $# -gt 0 ]; do + case $1 in + -f | --file ) shift + filename=$1 + ;; + -o | --out) shift + outname=$1 + ;; + -h | --help ) usage + exit + ;; + * ) usage + exit 1 + esac + shift +done + +printf "\nRemoves miRs with ID _1 in \"%s\" and write output to %s.\n" \ + ${filename}\ + ${outname} + +#Remove lines with _1 ID +awk -F ';' '{print $0}' ${filename} | awk -F '=' '{print $0}'| grep -v "_1" > ${outname} + +printf "\nDONE!\n" diff --git a/scripts/genome_process.sh b/scripts/genome_process.sh index 806746fd821ad05208a0504c70266fa95088fd62..abc9d7fc1473c73bc993f4fa83eca93a624a9ecb 100755 --- a/scripts/genome_process.sh +++ b/scripts/genome_process.sh @@ -17,15 +17,16 @@ #################### # Prefix for filenames -fileNamePrefix="$1" -organism="$2" +output_dir="$1" +log_dir="$2" -# Paths (DO NOT CHANGE!) +# # Paths (DO NOT CHANGE!) root="$PWD" #root="$(cd "$(dirname "$0" )" && pwd)" -resDir="${root}/results/${organism}/${fileNamePrefix}/" + +resDir="${root}/${output_dir}" rawDir="${resDir}/raw" -logDir="${root}/logs/local/${organism}/${fileNamePrefix}/" +logDir="${root}/${log_dir}" # URLs # ------ @@ -48,8 +49,8 @@ mkdir --parents "$rawDir" mkdir --parents "$logDir" # Create log file -logFile="${logDir}/$(basename $0 ".sh").log" -rm -f "$logFile"; touch "$logFile" +logFile="${logDir}" +rm -fr "$logFile"; touch "$logFile" >&2 echo "Log written to '$logFile'..." ############## diff --git a/scripts/map_chromosomes.pl b/scripts/map_chromosomes.pl new file mode 100755 index 0000000000000000000000000000000000000000..8dddee9aa10bc5e0a4799382cbac511e5bf04763 --- /dev/null +++ b/scripts/map_chromosomes.pl @@ -0,0 +1,80 @@ +#! /usr/bin/perl -w + +# MAY 2019, Paula Iborra +# University of Basel + +use strict; +use warnings; +use Scalar::Util qw(looks_like_number); + +my @in = (); +my $column_delimiters_href_split = { + 'TAB' => q{\t}, + 'COMMA' => ",", + 'DASH' => "-", + 'UNDERSCORE' => "_", + 'PIPE' => q{\|}, + 'DOT' => q{\.}, + 'SPACE' => " " +}; + +my $column_delimiters_href_join = { + 'TAB' => qq{\t}, + 'COMMA' => ",", + 'DASH' => "-", + 'UNDERSCORE' => "_", + 'PIPE' => "|", + 'DOT' => ".", + 'SPACE' => " " +}; + +# a wrapper for converting between UCSC and ensembl chromosome representations from within galaxy +# convert_UCSC_ensembl.pl [input] [col] [delimiter] [genome] [out_file1] + +die "Check arguments: $0 [input] [col] [delimiter] [map] [out_file1]\n" unless @ARGV == 5; +die "No columns specified: $ARGV[1]\n" if looks_like_number($ARGV[1]) == 0; +die "Delimeter must be one of TAB, COMMA, DASH, UNDERSCORE, PIPE, DOT, SPACE\n" unless defined $column_delimiters_href_split->{$ARGV[2]}; + +# process input +my $input = $ARGV[0]; +$ARGV[1] =~ s/\s+//g; +my $col = --$ARGV[1]; +my $delim = $ARGV[2]; +my $map_file = $ARGV[3]; +my $output = $ARGV[4]; +my $delim_split = $column_delimiters_href_split->{$delim}; +my $delim_join = $column_delimiters_href_join->{$delim}; + +open (MAP, "<$map_file") or die "Cannot open map file $map_file:$!\n"; +my %chr_map; +while(my $line = <MAP>) { + chop $line; + next if grep /^#/, $line; + my @map = split /\t/, $line; + $map[1] = "remove" unless $#map; + $chr_map{$map[0]} = $map[1]; +} +close MAP; + +open (IN, "<$input") or die "Cannot open $input:$!\n"; +open (OUT, ">$output") or die "Cannot create $output:$!\n"; +while (my $line = <IN>) { + chop $line; + @in = split /$delim_split/, $line; + if(defined $in[$col] && defined $chr_map{$in[$col]}) { + $in[$col] = $chr_map{$in[$col]}; + if($in[$col] eq "remove") { + print "Removed line \"$line\" as chromosome does not have a proper mapping\n"; + } else { + print OUT join($delim_join, @in), "\n"; + } + } elsif(grep /^#/, $in[0]) { + print OUT join($delim_join, @in), "\n"; + } else { + print "Removed line \"$line\" as \"$in[$col]\" is not a valid chromosome name\n"; + } +} +close IN; +close OUT; + + diff --git a/scripts/validation_fasta.py b/scripts/validation_fasta.py new file mode 100755 index 0000000000000000000000000000000000000000..f39059e37e2f0c1c119ed7205742d368a6185cab --- /dev/null +++ b/scripts/validation_fasta.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python +import sys +import re +import gzip +from argparse import ArgumentParser, RawTextHelpFormatter + + +### Created: Mar 5, 2019 +### Author: Paula Iborra +### Company: Zavolan Group, Biozentrum, University of Basel + + +### ARGUMENTS ### + +parser = ArgumentParser( + description="Script to filter FASTA files" + ) +parser.add_argument( + '-v','--version', + action='version', + version='%(prog)s 1.0', + help="Show program's version number and exit" + ) +parser.add_argument( + '--trim', + help="Character's used to trim the ID. Remove anything that follows the character's. Write \\ infront of \'.\' and \'-\'(i.e trim=\"$\\.\\-|_\"). Default: first white space", + type=str, + nargs='?', + default="" + ) +parser.add_argument( + '--idlist', + help="Generate text file with the sequences IDs. One ID per line." + ) +parser.add_argument( + '-f','--filter', + help="Input ID list. Filter IDs and sequences from FASTA file with the mode selected. Filter file must contain ONE ID per line", + ) +parser.add_argument( + '-m', '--mode', + help="Type of filtering fasta file: keep (k) or discard (d) IDs contained in the ID list file.", + choices=('k', 'd') + ) +parser.add_argument( + '-r','--remove', + help="Remove sequences from FASTA file longer than specified length.", + type=int + ) +parser.add_argument( + '-i','--input', + required=True, + help="Input FASTA file", + type=str + ) +parser.add_argument( + '-o','--output', + help="Output FASTA file" + ) + +args = parser.parse_args() + +if args.filter and not args.mode: + sys.exit("ERROR! Mode argument required when using filter option. (--mode, -m). See --help option.") + +### PARSE FASTA FILE ### + +class Seq: + def __init__(self): + self.id="" + def __init__(self): + self.seq="" + def __init__(self): + self.features="" + +# open files +if args.input.endswith('.gz'): + f = gzip.open(args.input, 'rt') +else: + f = open(args.input) + +record=[] #list of records +nrec=-1 +inseq=0 + +# parse fasta file +sys.stdout.write("Parsing FASTA file...") +for line in f: + if re.match(r'^>' , line): + nrec+=1 + record.append(Seq()) + + # define id of the record + if not args.trim: + mobj=re.match(r'^>(\S*)(.*)', line) + else: + mobj=re.match(r'^>([^%s]*)(.*)'%args.trim , line) + + # add id and features + if (mobj): + record[nrec].id=mobj.group(1) + record[nrec].features=mobj.group(2) + inseq=0 + else : + if inseq==0 : + inseq=1 + record[nrec].seq = line + else: + cstring=record[nrec].seq+line + record[nrec].seq = cstring +sys.stdout.write("DONE\n") + +## ID FILTER LIST ## + +if (args.filter): + sys.stdout.write("Filtering FASTA file...") + id_filter=[line.rstrip('\n') for line in open(args.filter)] + sys.stdout.write("DONE\n") + + +## OUTPUT FASTA FILE ## + +if (args.output): + sys.stdout.write("Writing FASTA file...") + with open(args.output, 'w') as output: + if (args.filter) and args.mode == 'k': + if (args.remove): + for x in range(0,nrec+1): + if record[x].id in id_filter and (len(record[x].seq)-1 <= args.remove): + output.write(">%s\n%s"%(record[x].id, record[x].seq)) + else: + for x in range(0,nrec+1): + if record[x].id in id_filter: + output.write(">%s\n%s"%(record[x].id, record[x].seq)) + elif (args.filter) and args.mode == 'd': + if (args.remove): + for x in range(0,nrec+1): + if record[x].id not in id_filter and (len(record[x].seq)-1 <= args.remove): + output.write(">%s\n%s"%(record[x].id, record[x].seq)) + else: + for x in range(0,nrec+1): + if record[x].id not in id_filter: + output.write(">%s\n%s"%(record[x].id, record[x].seq)) + else: + if (args.remove): + for x in range(0,nrec+1): + if (len(record[x].seq)-1 <= args.remove): + output.write(">%s\n%s"%(record[x].id, record[x].seq)) + else: + for x in range(0,nrec+1): + output.write(">%s\n%s"%(record[x].id, record[x].seq)) + output.close() + sys.stdout.write("DONE\n") + + +## OUTPUT LIST IDs ## + +idlist=[] +if (args.idlist): + sys.stdout.write("Creating IDs list from FASTA file...") + fasta = open(args.output, 'r') + with open(args.idlist, 'w') as id_list: + for line in fasta: + if line.startswith('>'): + idlist.append(line[1:]) + idlist.sort() + id_list.write(''.join(idlist)) + id_list.close() + sys.stdout.write("DONE\n") + + diff --git a/test/cluster_prepare_annotation.jsob b/test/cluster_prepare_annotation.jsob new file mode 100644 index 0000000000000000000000000000000000000000..eb88fbf452a16061453aa957c9869e4c1814c358 --- /dev/null +++ b/test/cluster_prepare_annotation.jsob @@ -0,0 +1,23 @@ +{ + "__default__" : + { + "queue": "6hours", + "time": "05:00:00", + "threads": "1", + "mem": "4G" + }, + + "generate_segemehl_index_transcriptome": + { + "time": "{resources.time}:00:00", + "threads":"{resources.threads}", + "mem":"{resources.mem}G" + }, + + "generate_segemehl_index_genome": + { + "time": "{resources.time}:00:00", + "threads":"{resources.threads}", + "mem":"{resources.mem}G" + } +} diff --git a/test/config_prepare_annotation.yaml b/test/config_prepare_annotation.yaml index 7f487bc5f501b87cb0433b4cf669b12f0c23214e..6a7ada50c4f7ff99229da710d78e0e0ce5c9273b 100644 --- a/test/config_prepare_annotation.yaml +++ b/test/config_prepare_annotation.yaml @@ -1,17 +1,35 @@ --- - ############################################################################## - ### Necessary inputs - ############################################################################## - organism: "homo_sapiens" + +############################## GLOBAL PARAMETERS ############################## + +## Isomirs annotation file +## Number of base pairs to add/substract from 5' (start) and 3' (end) coordinates. +bp_5p: [-1,0,+1] +bp_3p: [-1,0,+1] + +## Directories +output_dir: "results" +scripts_dir: "../scripts" +local_log: "logs/local" +cluster_log: "logs/cluster" + +# List of "organism/prefix" +organism: ["homo_sapiens/chrY"] + +################### PARAMETERS SPECIFIC TO ORGANISM VERSION ################### + +homo_sapiens/chrY: + + # URLs to genome, gene & miRNA annotations genome_url: "ftp://ftp.ensembl.org/pub/release-98/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.Y.fa.gz" gtf_url: "ftp://ftp.ensembl.org/pub/release-98/gtf/homo_sapiens/Homo_sapiens.GRCh38.98.gtf.gz" - prefix_name: "GRCh38.98_chrY" - - ############################################################################## - ### Directories - ############################################################################## - output_dir: "results" - scripts_dir: "../scripts" - local_log: "logs/local" - cluster_log: "logs/cluster" + mirna_url: "https://www.mirbase.org/ftp/CURRENT/genomes/hsa.gff3" + + # Chromosome name mappings between UCSC <-> Ensembl + # Other organisms available at: https://github.com/dpryan79/ChromosomeMappings + map_chr_url: "https://raw.githubusercontent.com/dpryan79/ChromosomeMappings/master/GRCh38_UCSC2ensembl.txt" + # Chromosome name mapping parameters: + column: 1 # Column number from input file where to change chromosome name + delimiter: "TAB" # Delimiter of the input file + ... diff --git a/test/expected_output.files b/test/expected_output.files index a8d99461bd457faff08d50f595ab4d9fd79791c3..ad5ad26212c2ffdc86b33ff08111d7a91008cfbe 100644 --- a/test/expected_output.files +++ b/test/expected_output.files @@ -1,9 +1,19 @@ -results/homo_sapiens/GRCh38.98_chrY/genome.processed.fa -results/homo_sapiens/GRCh38.98_chrY/transcriptome_index_segemehl.idx -results/homo_sapiens/GRCh38.98_chrY/transcriptome_idtrim.fa -results/homo_sapiens/GRCh38.98_chrY/genome.processed.fa.fai -results/homo_sapiens/GRCh38.98_chrY/gene_annotations.filtered.gtf -results/homo_sapiens/GRCh38.98_chrY/exons.gtf -results/homo_sapiens/GRCh38.98_chrY/transcriptome.fa -results/homo_sapiens/GRCh38.98_chrY/exons.bed -results/homo_sapiens/GRCh38.98_chrY/genome_index_segemehl.idx +results/homo_sapiens/chrY/raw/Homo_sapiens.GRCh38.dna_sm.chromosome.Y.fa +results/homo_sapiens/chrY/genome.processed.fa +results/homo_sapiens/chrY/genome.processed.fa.fai +results/homo_sapiens/chrY/raw/Homo_sapiens.GRCh38.98.gtf +results/homo_sapiens/chrY/gene_annotations.filtered.gtf +results/homo_sapiens/chrY/transcriptome.fa +results/homo_sapiens/chrY/transcriptome_idtrim.fa +results/homo_sapiens/chrY/transcriptome_index_segemehl.idx +results/homo_sapiens/chrY/genome_index_segemehl.idx +results/homo_sapiens/chrY/exons.gtf +results/homo_sapiens/chrY/exons.bed +results/homo_sapiens/chrY/raw/mirna.gff3 +results/homo_sapiens/chrY/UCSC2ensembl.txt +results/homo_sapiens/chrY/mirna_chr_mapped.gff3 +results/homo_sapiens/chrY/mirna_filtered.gff3 +results/homo_sapiens/chrY/mirna_filtered.bed +results/homo_sapiens/chrY/chr_size.txt +results/homo_sapiens/chrY/mirna_mature_filtered.bed +results/homo_sapiens/chrY/isomirs_annotation.bed diff --git a/test/expected_output.md5 b/test/expected_output.md5 index 57dc755124001949c4accb0e5dd2c72c77366db2..57c9cf01d4f371ed0b912f22fa5f2dd3d92eeea1 100644 --- a/test/expected_output.md5 +++ b/test/expected_output.md5 @@ -1,9 +1,19 @@ -583f395125f769102ff08ff84b60e0d3 results/homo_sapiens/GRCh38.98_chrY/genome.processed.fa -a5a6fd2cab7d7919b80761fc25f2777a results/homo_sapiens/GRCh38.98_chrY/transcriptome_index_segemehl.idx -bf1e37165b908729327599801ff5147b results/homo_sapiens/GRCh38.98_chrY/transcriptome_idtrim.fa -f37a213f94d11bf2260f50f2c9f199d2 results/homo_sapiens/GRCh38.98_chrY/genome.processed.fa.fai -0b3dfe8cf4d644637671572fca629f69 results/homo_sapiens/GRCh38.98_chrY/gene_annotations.filtered.gtf -6fe52e2e126ef2e0c368fb1bf267f453 results/homo_sapiens/GRCh38.98_chrY/exons.gtf -5ab1c2f39ab35fabc6673c73beb3097b results/homo_sapiens/GRCh38.98_chrY/transcriptome.fa -51ac61c61825929f8f05c4b4f821f04d results/homo_sapiens/GRCh38.98_chrY/exons.bed -11b0b7c50160aa8837dd92eda516c124 results/homo_sapiens/GRCh38.98_chrY/genome_index_segemehl.idx +eb44404d89516497e6480d4dd33f2381 results/homo_sapiens/chrY/raw/Homo_sapiens.GRCh38.dna_sm.chromosome.Y.fa +583f395125f769102ff08ff84b60e0d3 results/homo_sapiens/chrY/genome.processed.fa +f37a213f94d11bf2260f50f2c9f199d2 results/homo_sapiens/chrY/genome.processed.fa.fai +d5eaafa9aec63e3fab632fc49392b54b results/homo_sapiens/chrY/raw/Homo_sapiens.GRCh38.98.gtf +0b3dfe8cf4d644637671572fca629f69 results/homo_sapiens/chrY/gene_annotations.filtered.gtf +5ab1c2f39ab35fabc6673c73beb3097b results/homo_sapiens/chrY/transcriptome.fa +bf1e37165b908729327599801ff5147b results/homo_sapiens/chrY/transcriptome_idtrim.fa +a5a6fd2cab7d7919b80761fc25f2777a results/homo_sapiens/chrY/transcriptome_index_segemehl.idx +11b0b7c50160aa8837dd92eda516c124 results/homo_sapiens/chrY/genome_index_segemehl.idx +6fe52e2e126ef2e0c368fb1bf267f453 results/homo_sapiens/chrY/exons.gtf +51ac61c61825929f8f05c4b4f821f04d results/homo_sapiens/chrY/exons.bed +6bc49275f74ed1b43d80cf7598d387b9 results/homo_sapiens/chrY/raw/mirna.gff3 +d2095c371c9b8b2c7cacd1024abf2d18 results/homo_sapiens/chrY/UCSC2ensembl.txt +ba7404239073e3b67204af1803729884 results/homo_sapiens/chrY/mirna_chr_mapped.gff3 +91e1facd80f93ef61f242050dd7d03c3 results/homo_sapiens/chrY/mirna_filtered.gff3 +a923f50eea2708cd889886ae5179ee18 results/homo_sapiens/chrY/mirna_filtered.bed +1e6a0b3d0e678014f87afdd80f4025b9 results/homo_sapiens/chrY/chr_size.txt +e7e85f57e0476d1805c1cb64131dd75c results/homo_sapiens/chrY/mirna_mature_filtered.bed +909a2fc878c5ac0437344e4f5c6e58e3 results/homo_sapiens/chrY/isomirs_annotation.bed diff --git a/test/test_workflow_slurm.sh b/test/test_workflow_slurm.sh index 1944f7726feea2ec8108e649b4c2b6bdcf1885eb..905fa44a7dfae6c1cceddde4807c5a54d75b7c12 100755 --- a/test/test_workflow_slurm.sh +++ b/test/test_workflow_slurm.sh @@ -28,7 +28,7 @@ mkdir -p results/homo_sapiens/GRCh38.98_chrY snakemake \ --snakefile="../workflow/prepare_annotation/Snakefile" \ --configfile="config_prepare_annotation.yaml" \ - --cluster-config="../workflow/prepare_annotation/cluster.json" \ + --cluster-config="cluster_prepare_annotation.json" \ --cluster "sbatch \ --cpus-per-task={cluster.threads} \ --mem={cluster.mem} \ diff --git a/workflow/prepare_annotation/Snakefile b/workflow/prepare_annotation/Snakefile index c8428986024a39e6211facd46062d06836acb851..db9b71a80270d1fc9ef9a4a4a6ad8c23f61f51f1 100644 --- a/workflow/prepare_annotation/Snakefile +++ b/workflow/prepare_annotation/Snakefile @@ -1,4 +1,15 @@ -localrules: finish, genome_process, filter_anno_gtf +################################################################################# +# (c) 2020 Paula Iborra, Zavolan Lab, Biozentrum, University of Basel +# (@) paula.iborradetoledo@unibas.ch / paula.iborra@alumni.esci.upf.edu +# +# Pipeline to download and prepare the necessary files for smallRNA-seq related pipelines. +################################################################################# + +import os + +# Global config +# Rules that requires internet connection for downloading files should be included in the localrules +localrules: finish, genome_process, filter_anno_gtf, mirna_anno, dict_chr ################################################################################# ### Finish rule @@ -6,14 +17,42 @@ localrules: finish, genome_process, filter_anno_gtf rule finish: input: - idx_transcriptome = expand(os.path.join(config["output_dir"], "{organism}", "{prefix_name}","transcriptome_index_segemehl.idx"), - organism=config["organism"], prefix_name=config["prefix_name"]), - idx_genome = expand(os.path.join(config["output_dir"],"{organism}", "{prefix_name}", "genome_index_segemehl.idx"), - organism=config["organism"], prefix_name=config["prefix_name"]), - exons = expand(os.path.join(config["output_dir"],"{organism}", "{prefix_name}", "exons.bed"), - organism=config["organism"], prefix_name=config["prefix_name"]), - header = expand(os.path.join(config["output_dir"],"{organism}", "{prefix_name}","headerOfCollapsedFasta.sam"), - organism=config["organism"], prefix_name=config["prefix_name"]), + idx_transcriptome = expand( + os.path.join( + config["output_dir"], + "{organism}", + "transcriptome_index_segemehl.idx"), + organism=config["organism"]), + idx_genome = expand( + os.path.join( + config["output_dir"], + "{organism}", + "genome_index_segemehl.idx"), + organism=config["organism"]), + exons = expand( + os.path.join( + config["output_dir"], + "{organism}", + "exons.bed"), + organism=config["organism"]), + header = expand( + os.path.join( + config["output_dir"], + "{organism}", + "headerOfCollapsedFasta.sam"), + organism=config["organism"]), + mirnafilt = expand( + os.path.join( + config["output_dir"], + "{organism}", + "mirna_filtered.bed"), + organism=config["organism"]), + isomirs = expand( + os.path.join( + config["output_dir"], + "{organism}", + "isomirs_annotation.bed"), + organism=config["organism"]) ################################################################################# ### Download and process genome IDs @@ -23,19 +62,16 @@ rule genome_process: input: script = os.path.join(config["scripts_dir"],"genome_process.sh"), output: - genome = os.path.join(config["output_dir"],"{organism}","{prefix_name}", "genome.processed.fa") + genome = os.path.join(config["output_dir"],"{organism}", "genome.processed.fa") params: - cluster_log = os.path.join(config["cluster_log"],"{organism}","{prefix_name}","genome_process.log"), - prefix = config["prefix_name"], - url = config["genome_url"], - organism=config["organism"] + url = lambda wildcards: config[ wildcards.organism ]["genome_url"], + dir_out = os.path.join(config["output_dir"],"{organism}") log: - os.path.join(config["local_log"],"{organism}","{prefix_name}","genome_process.log") + os.path.join(config["local_log"],"{organism}","genome_process.log") singularity: "docker://zavolab/ubuntu:18.04" shell: - "(bash {input.script} {params.prefix} {params.organism} {params.url}) &> {log}" - + "(bash {input.script} {params.dir_out} {log} {params.url})" ################################################################################# ### Download and filter gtf by transcript_level @@ -45,18 +81,16 @@ rule filter_anno_gtf: input: script = os.path.join(config["scripts_dir"],"filter_anno_gtf.sh"), output: - gtf = os.path.join(config["output_dir"],"{organism}","{prefix_name}","gene_annotations.filtered.gtf") + gtf = os.path.join(config["output_dir"],"{organism}","gene_annotations.filtered.gtf") params: - cluster_log = os.path.join(config["cluster_log"],"{organism}","{prefix_name}","download_filter_gtf.log"), - prefix = config["prefix_name"], - url = config["gtf_url"], - organism=config["organism"] + url = lambda wildcards: config[ wildcards.organism ]['gtf_url'], + dir_out = os.path.join(config["output_dir"],"{organism}") log: - os.path.join(config["local_log"],"{organism}","{prefix_name}","filter_anno_gtf.log") + os.path.join(config["local_log"],"{organism}","filter_anno_gtf.log") singularity: "docker://zavolab/ubuntu:18.04" shell: - "(bash {input.script} {params.prefix} {params.organism} {params.url}) &> {log}" + "(bash {input.script} {params.dir_out} {log} {params.url}) &> {log}" ################################################################################# ### Extract transcriptome sequences in FASTA from genome. @@ -64,52 +98,51 @@ rule filter_anno_gtf: rule extract_transcriptome_seqs: input: - genome = os.path.join(config["output_dir"],"{organism}","{prefix_name}", "genome.processed.fa"), - gtf = os.path.join(config["output_dir"],"{organism}","{prefix_name}","gene_annotations.filtered.gtf") + genome = os.path.join(config["output_dir"],"{organism}", "genome.processed.fa"), + gtf = os.path.join(config["output_dir"],"{organism}","gene_annotations.filtered.gtf") output: - fasta = os.path.join(config["output_dir"],"{organism}","{prefix_name}","transcriptome.fa") + fasta = os.path.join(config["output_dir"],"{organism}","transcriptome.fa") params: - cluster_log = os.path.join(config["cluster_log"],"{organism}","{prefix_name}","extract_transcriptome_seqs.log") + cluster_log = os.path.join(config["cluster_log"],"{organism}","extract_transcriptome_seqs.log") log: - os.path.join(config["local_log"],"{organism}","{prefix_name}","extract_transcriptome_seqs.log") + os.path.join(config["local_log"],"{organism}","extract_transcriptome_seqs.log") singularity: "docker://zavolab/cufflinks:2.2.1" shell: "(gffread -w {output.fasta} -g {input.genome} {input.gtf}) &> {log}" - ################################################################################ ## Trim transcript IDs from FASTA file ################################################################################ rule trim_fasta: input: - fasta = os.path.join(config["output_dir"], "{organism}","{prefix_name}","transcriptome.fa"), + fasta = os.path.join(config["output_dir"], "{organism}","transcriptome.fa"), + script = os.path.join(config["scripts_dir"], "validation_fasta.py") output: - fasta = os.path.join(config["output_dir"],"{organism}","{prefix_name}","transcriptome_idtrim.fa") + fasta = os.path.join(config["output_dir"],"{organism}","transcriptome_idtrim.fa") params: - cluster_log = os.path.join(config["cluster_log"],"{organism}","{prefix_name}","trim_fasta.log") + cluster_log = os.path.join(config["cluster_log"],"{organism}","trim_fasta.log") log: - os.path.join(config["local_log"],"{organism}","{prefix_name}","trim_fasta.log") + os.path.join(config["local_log"],"{organism}","trim_fasta.log") singularity: "docker://zavolab/ubuntu:18.04" shell: """(awk -F" " "/^>/ {{print \$1; next}} 1" {input.fasta} > {output.fasta}) &> {log}""" - ################################################################################# ### Generate segemehl index for transcripts ################################################################################# rule generate_segemehl_index_transcriptome: input: - fasta = os.path.join(config["output_dir"],"{organism}","{prefix_name}","transcriptome_idtrim.fa") + fasta = os.path.join(config["output_dir"],"{organism}","transcriptome_idtrim.fa") output: - idx = os.path.join(config["output_dir"],"{organism}","{prefix_name}","transcriptome_index_segemehl.idx") + idx = os.path.join(config["output_dir"],"{organism}","transcriptome_index_segemehl.idx") params: - cluster_log = os.path.join(config["cluster_log"],"{organism}","{prefix_name}","generate_segemehl_index_transcriptome.log"), + cluster_log = os.path.join(config["cluster_log"],"{organism}","generate_segemehl_index_transcriptome.log"), log: - os.path.join(config["local_log"],"{organism}","{prefix_name}","generate_segemehl_index_transcriptome.log") + os.path.join(config["local_log"],"{organism}","generate_segemehl_index_transcriptome.log") resources: mem = 10, threads = 8, @@ -119,24 +152,21 @@ rule generate_segemehl_index_transcriptome: shell: "(segemehl.x -x {output.idx} -d {input.fasta}) &> {log}" - ################################################################################# ### Generate segemehl index for genome ################################################################################# rule generate_segemehl_index_genome: input: - #genome = config["genome"] - genome = os.path.join(config["output_dir"],"{organism}","{prefix_name}", "genome.processed.fa") - + genome = os.path.join(config["output_dir"],"{organism}", "genome.processed.fa") output: - idx = os.path.join(config["output_dir"],"{organism}","{prefix_name}","genome_index_segemehl.idx") + idx = os.path.join(config["output_dir"],"{organism}","genome_index_segemehl.idx") params: - cluster_log = os.path.join(config["cluster_log"],"{organism}","{prefix_name}","generate_segemehl_index_genome.log"), + cluster_log = os.path.join(config["cluster_log"],"{organism}","generate_segemehl_index_genome.log"), log: - os.path.join(config["local_log"],"{organism}","{prefix_name}","generate_segemehl_index_genome.log") + os.path.join(config["local_log"],"{organism}","generate_segemehl_index_genome.log") resources: - mem = 60, + mem = 50, threads = 8, time = 6 singularity: @@ -144,62 +174,297 @@ rule generate_segemehl_index_genome: shell: "(segemehl.x -x {output.idx} -d {input.genome}) &> {log}" - ################################################################################# ### GTF file of exons (genomic coordinates) ################################################################################# rule get_exons_gtf: input: - gtf = os.path.join(config["output_dir"],"{organism}","{prefix_name}","gene_annotations.filtered.gtf"), + gtf = os.path.join(config["output_dir"],"{organism}","gene_annotations.filtered.gtf"), script = os.path.join(config["scripts_dir"], "get_lines_w_pattern.sh") output: - exons = os.path.join(config["output_dir"],"{organism}","{prefix_name}","exons.gtf") + exons = os.path.join(config["output_dir"],"{organism}","exons.gtf") params: - cluster_log = os.path.join(config["cluster_log"],"{organism}","{prefix_name}","get_exons_gtf.log") + cluster_log = os.path.join(config["cluster_log"],"{organism}","get_exons_gtf.log") log: - os.path.join(config["local_log"],"{organism}","{prefix_name}", "get_exons_gtf.log") + os.path.join(config["local_log"],"{organism}", "get_exons_gtf.log") singularity: "docker://zavolab/ubuntu:18.04" shell: "(bash {input.script} -f {input.gtf} -c 3 -p exon -o {output.exons} ) &> {log}" - ################################################################################# ### Convert GTF file of exons to BED file ################################################################################# rule gtftobed: input: - exons = os.path.join(config["output_dir"],"{organism}","{prefix_name}","exons.gtf"), + exons = os.path.join(config["output_dir"],"{organism}","exons.gtf"), script = os.path.join(config["scripts_dir"], "gtf_exons_bed.1.1.2.R") output: - exons = os.path.join(config["output_dir"],"{organism}","{prefix_name}","exons.bed") + exons = os.path.join(config["output_dir"],"{organism}","exons.bed") params: - cluster_log = os.path.join(config["cluster_log"],"{organism}","{prefix_name}","gtftobed.log") + cluster_log = os.path.join(config["cluster_log"],"{organism}","gtftobed.log") log: - os.path.join(config["local_log"],"{organism}","{prefix_name}","gtftobed.log") + os.path.join(config["local_log"],"{organism}","gtftobed.log") singularity: "docker://zavolab/r-zavolab:3.5.1" shell: "(Rscript {input.script} --gtf {input.exons} -o {output.exons}) &> {log}" - ################################################################################# ### Create header for SAM file ################################################################################# rule create_header_genome: input: - #genome = config["genome"] - genome = os.path.join(config["output_dir"],"{organism}","{prefix_name}", "genome.processed.fa") + genome = os.path.join(config["output_dir"],"{organism}", "genome.processed.fa") output: - header = os.path.join(config["output_dir"],"{organism}","{prefix_name}","headerOfCollapsedFasta.sam") + header = os.path.join(config["output_dir"],"{organism}","headerOfCollapsedFasta.sam") params: - cluster_log = os.path.join(config["cluster_log"],"{organism}","{prefix_name}","create_header_genome.log") + cluster_log = os.path.join(config["cluster_log"],"{organism}","create_header_genome.log") log: - os.path.join(config["local_log"],"{organism}","{prefix_name}","create_header_genome.log") + os.path.join(config["local_log"],"{organism}","create_header_genome.log") singularity: "docker://zavolab/samtools:1.8" shell: "(samtools dict -o {output.header} {input.genome}) &> {log}" + +################################################################################# +### Download miRNA annotation +################################################################################# + +rule mirna_anno: + input: + genome = os.path.join(config["output_dir"],"{organism}", "genome.processed.fa") + output: + anno = os.path.join(config["output_dir"],"{organism}","raw", "mirna.gff3") + params: + anno = lambda wildcards: config[ wildcards.organism ]["mirna_url"], + cluster_log = os.path.join(config["cluster_log"],"{organism}","mirna_anno.log"), + log: + os.path.join(config["local_log"],"{organism}","mirna_anno.log") + singularity: + "docker://zavolab/ubuntu:18.04" + shell: + "(wget {params.anno} -O {output.anno}) &> {log}" + +################################################################################# +### Download dictionary mapping chr +################################################################################# + +rule dict_chr: + input: + genome = os.path.join(config["output_dir"],"{organism}", "genome.processed.fa") + output: + map_chr = os.path.join(config["output_dir"],"{organism}", "UCSC2ensembl.txt") + params: + map_chr = lambda wildcards: config[ wildcards.organism ]["map_chr_url"], + cluster_log = os.path.join(config["cluster_log"],"{organism}","dict_chr.log"), + log: + os.path.join(config["local_log"],"{organism}","dict_chr.log") + singularity: + "docker://zavolab/ubuntu:18.04" + shell: + "(wget {params.map_chr} -O {output.map_chr}) &> {log}" + +################################################################################# +### Mapping chromosomes names, UCSC <-> ENSEMBL +################################################################################# + +rule map_chr_names: + input: + anno = os.path.join(config["output_dir"],"{organism}","raw", "mirna.gff3"), + script = os.path.join(config["scripts_dir"], "map_chromosomes.pl"), + map_chr = os.path.join(config["output_dir"],"{organism}", "UCSC2ensembl.txt") + output: + gff = os.path.join(config["output_dir"],"{organism}", "mirna_chr_mapped.gff3") + params: + cluster_log = os.path.join(config["cluster_log"],"{organism}", "map_chr_names.log"), + column = lambda wildcards: config[ wildcards.organism ]["column"], + delimiter = lambda wildcards: config[ wildcards.organism ]["delimiter"] + log: + os.path.join(config["local_log"],"{organism}","map_chr_names.log") + singularity: + "docker://zavolab/perl:5.28" + shell: + "(perl {input.script} {input.anno} \ + {params.column} \ + {params.delimiter} \ + {input.map_chr} \ + {output.gff}) &> {log}" + +################################################################################# +### Filtering _1 miR IDs +################################################################################# + +rule filter_mir_1_anno: + input: + gff = os.path.join(config["output_dir"],"{organism}", "mirna_chr_mapped.gff3") + output: + gff = os.path.join(config["output_dir"],"{organism}", "mirna_filtered.gff3") + params: + script = os.path.join(config["scripts_dir"], "filter_mir_1_anno.sh"), + cluster_log = os.path.join(config["cluster_log"],"{organism}","filter_mir_1_anno.log"), + log: + os.path.join(config["local_log"],"{organism}", "filter_mir_1_anno.log") + singularity: + "docker://zavolab/ubuntu:18.04" + shell: + "(bash {params.script} -f {input.gff} -o {output.gff}) &> {log}" + +################################################################################# +### GFF to BED (improve intersect memory efficient allowing to use -sorted) +################################################################################# + +rule gfftobed: + input: + gff = os.path.join(config["output_dir"],"{organism}", "mirna_filtered.gff3") + output: + bed= os.path.join(config["output_dir"],"{organism}", "mirna_filtered.bed") + params: + cluster_log = os.path.join(config["cluster_log"],"{organism}", "gfftobed.log"), + out_dir= os.path.join(config["output_dir"]) + log: + os.path.join(config["local_log"],"{organism}", "gfftobed.log") + singularity: + "docker://zavolab/bedops:2.4.35" + shell: + "(convert2bed -i gff < {input.gff} --sort-tmpdir={params.out_dir} > {output.bed}) &> {log}" + +################################################################################# +### Index genome fasta file +################################################################################# + +rule create_index_fasta: + input: + genome = os.path.join(config["output_dir"],"{organism}", "genome.processed.fa"), + output: + genome = os.path.join(config["output_dir"],"{organism}", "genome.processed.fa.fai"), + params: + cluster_log = os.path.join(config["cluster_log"],"{organism}","create_index_fasta.log") + log: + os.path.join(config["local_log"],"{organism}","create_index_fasta.log") + singularity: + "docker://zavolab/samtools:1.8" + shell: + "(samtools faidx {input.genome}) &> {log}" + +################################################################################# +### Extract chromosome length +################################################################################# + +rule extract_chr_len: + input: + genome = os.path.join(config["output_dir"],"{organism}", "genome.processed.fa.fai"), + output: + chrsize = os.path.join(config["output_dir"],"{organism}", "chr_size.txt"), + params: + cluster_log = os.path.join(config["cluster_log"],"{organism}","extract_chr_len.log") + log: + os.path.join(config["local_log"],"{organism}","extract_chr_len.log") + singularity: + "docker://zavolab/ubuntu:18.04" + shell: + "(cut -f1,2 {input.genome} > {output.chrsize}) &> {log}" + +################################################################################# +### Extract mature miRNA +################################################################################# + +rule filter_mature_mirs: + input: + bed= os.path.join(config["output_dir"],"{organism}", "mirna_filtered.bed"), + output: + bed= os.path.join(config["output_dir"],"{organism}", "mirna_mature_filtered.bed") + params: + cluster_log = os.path.join(config["cluster_log"],"{organism}", "filter_mature_mirs.log"), + precursor="miRNA_primary_transcript" + log: + os.path.join(config["local_log"],"{organism}", "filter_mature_mirs.log") + singularity: + "docker://zavolab/ubuntu:18.04", + shell: + "(grep -v {params.precursor} {input.bed} > {output.bed}) &> {log}" + +################################################################################# +### Create isomirs annotation file from mature miRNA +################################################################################# + +rule iso_anno: + input: + bed= os.path.join(config["output_dir"],"{organism}", "mirna_mature_filtered.bed"), + chrsize = os.path.join(config["output_dir"],"{organism}", "chr_size.txt") + output: + bed= os.path.join(config["output_dir"],"{organism}", "iso_anno_5p{bp_5p}_3p{bp_3p}.bed") + params: + cluster_log = os.path.join(config["cluster_log"],"{organism}", "iso_anno_5p{bp_5p}_3p{bp_3p}.log"), + bp_5p = lambda wildcards: wildcards.bp_5p, + bp_3p = lambda wildcards: wildcards.bp_3p + log: + os.path.join(config["local_log"],"{organism}", "iso_anno_5p{bp_5p}_3p{bp_3p}.log") + singularity: + "docker://zavolab/bedtools:2.28.0" + shell: + "(bedtools slop -i {input.bed} -g {input.chrsize} -l {params.bp_5p} -r {params.bp_3p} > {output.bed}) &> {log}" + +################################################################################# +### Change miRNA names to isomirs names +################################################################################# + +rule iso_anno_rename: + input: + bed= os.path.join(config["output_dir"],"{organism}", "iso_anno_5p{bp_5p}_3p{bp_3p}.bed") + output: + bed= os.path.join(config["output_dir"],"{organism}", "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.bed") + params: + cluster_log = os.path.join(config["cluster_log"],"{organism}", "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.log"), + bp_5p = lambda wildcards: wildcards.bp_5p, + bp_3p = lambda wildcards: wildcards.bp_3p + log: + os.path.join(config["local_log"],"{organism}", "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.log") + singularity: + "docker://zavolab/ubuntu:18.04" + shell: + "( sed 's/;Derives/_5p{params.bp_5p}_3p{params.bp_3p};Derives/' {input.bed} > {output.bed}) &> {log}" + +################################################################################# +### Concatenate all isomirs annotation files +################################################################################# + +rule iso_anno_concat: + input: + bed = lambda wildcards: expand(os.path.join(config["output_dir"],"{organism}", "iso_anno_rename_5p{bp_5p}_3p{bp_3p}.bed"), + organism= config["organism"], + bp_3p= config['bp_3p'], + bp_5p= config['bp_5p']) + output: + bed= os.path.join(config["output_dir"],"{organism}", "iso_anno_concat.bed") + params: + cluster_log = os.path.join(config["cluster_log"],"{organism}", "iso_anno_concat.log"), + prefix= os.path.join(config["output_dir"],"{organism}", "iso_anno_rename") + log: + os.path.join(config["local_log"],"{organism}", "iso_anno_concat.log") + singularity: + "docker://zavolab/ubuntu:18.04" + shell: + "( cat {params.prefix}* > {output.bed}) &> {log}" + +################################################################################# +### Remove non changing isomirs (5p0_3p0) +################################################################################# + +rule iso_anno_final: + input: + bed= os.path.join(config["output_dir"],"{organism}", "iso_anno_concat.bed") + output: + bed= os.path.join(config["output_dir"],"{organism}", "isomirs_annotation.bed") + params: + cluster_log = os.path.join(config["cluster_log"],"{organism}", "iso_anno_final.log"), + pattern= "5p0_3p0" + log: + os.path.join(config["local_log"],"{organism}", "iso_anno_final.log") + singularity: + "docker://zavolab/ubuntu:18.04" + shell: + "( grep -v '{params.pattern}' {input.bed} > {output.bed}) &> {log}"