feat: merge updated workflow with miR processing

0a7c0761 · Alex Kanitz · d7abe262 · 0a7c0761 · 0a7c0761 · 0a7c0761
Commit 0a7c0761 authored 2 years ago by Alex Kanitz
--- a/.gitignore
+++ b/.gitignore
@@ -212,4 +212,5 @@ tags
 .snakemake
 results
 logs
 nohup.out
\ No newline at end of file
+.wget-hsts
--- a/README.md
+++ b/README.md
@@ -3,8 +3,7 @@
 [Snakemake][snakemake] workflow to download and prepare the necessary files for
 smallRNA-seq related pipelines [mir-map][mir-map] and [mir-quant][mir-quant].
-The scheme below is a visual representation of an example run of the
+The scheme below is a visual representation of an example run of the workflow:
-workflow:
 > ![rule-graph-prep-anno][rule-graph-prep-anno]
@@ -108,39 +107,24 @@ Now make a clean copy of the `JOB` directory and name it what you want, e.g.,
 cp -r JOB MY_ANALYSIS
 ```
-Now traverse to the directory from where you will actually executw the pipeline
+Now traverse to the directory from where you will actually execute the pipeline
 with:
 ```bash
 cd MY_ANALYSIS/prepare_annotation
 ```
-Before running the pipeline adjust the parameters in file `config.yaml`:
+Before running the pipeline adjust the parameters in file
+`config_prepare_annotation.yaml`:
 ```yaml
---
-  ##############################################################################
-  ### Necessary inputs
-  ##############################################################################
-  organism: " "  # name of the organism, e.g., "homo_sapiens"
-  genome_url: "ftp:// ..... "  # FTP/HTTP URL to genome file in FASTA format
-  gtf_url: "ftp:// ..... "  # FTP/HTTP URL to gene annotation file in GTF format
-  prefix_name: " "  # name of the assembly/annotation version, e.g., "GRCh38.100"
-  ##############################################################################
-  ### Directories
-  ##############################################################################
-  output_dir: "results"
-  scripts_dir: "../scripts"
-  local_log: "logs/local"
-  cluster_log: "logs/cluster"
-...
 ```
 > **Note:** We expect the genome and gene annotations to be formatted according
 > the style used by Ensembl. Other formats are very likely to lead to problems,
 > if not in this pipeline, then further down the road in the mapping or
-> annotation pipelines.
+> annotation pipelines. The miRNA annotation file is expected to originate from
+> miRBase, or follow their exact layout.
 To start pipeline execution locally:

--- a/workflow/prepare_annotation/cluster.json
+++ b/workflow/prepare_annotation/cluster.json
--- a/RUNS/JOB/prepare_annotation/config.yaml
+++ b/RUNS/JOB/prepare_annotation/config.yaml
 ---
-  ##############################################################################
-  ### Necessary inputs
+############################## GLOBAL PARAMETERS ##############################
-  ##############################################################################
-  organism: " "
+## Isomirs annotation file
-  genome_url: "ftp:// ..... "
+## Number of base pairs to add/substract from 5' (start) and 3' (end) coordinates.
-  gtf_url: "ftp:// ..... "
+bp_5p: [0]  # array of numbers, e.g., [-2,-1,0,+1], to include 2 upstream and 1 downstream nts
-  prefix_name: " "
+bp_3p: [0]  # array of numbers, e.g., [-2,-1,0,+1], to include 2 upstream and 1 downstream nts
-  ##############################################################################
+## Directories
-  ### Directories
+output_dir: "results"
-  ##############################################################################
+scripts_dir: "../../../scripts"
-  output_dir: "results"
+local_log: "logs/local"
-  scripts_dir: "../scripts"
+cluster_log: "logs/cluster"
-  local_log: "logs/local"
-  cluster_log: "logs/cluster"
+# List of "organism/prefix" identifiers
+organism: ["org/pre"]  # e.g., ["homo_sapiens/GRCh38.100", "mus_musculus/GRCm37.98"]
+################### PARAMETERS SPECIFIC TO ORGANISM VERSION ###################
+org/pre:  # One section for each list item in "organism"; names have to match precisely
+  # URLs to genome, gene & miRNA annotations
+  genome_url:  # FTP/HTTP URL to gzipped genome in FASTA format, Ensembl style
+  gtf_url:  # FTP/HTTP URL to gzipped gene annotations in GTF format, Ensembl style
+  mirna_url:  # FTP/HTTP URL to unzipped microRNA annotations in GFF format, miRBase style
+  # Chromosome name mappings between UCSC <-> Ensembl
+  # Available at: https://github.com/dpryan79/ChromosomeMappings; e.g., `GRCh38_UCSC2ensembl.txt`
+  map_chr_url:  # FTP/HTTP URL to mapping table
+  # Chromosome name mapping parameters:
+  column: 1  # Column number from input file where to change chromosome name
+  delimiter: "TAB"  # Delimiter of the input file
 ...
--- a/RUNS/JOB/prepare_annotation/run_workflow_slurm.sh
+++ b/RUNS/JOB/prepare_annotation/run_workflow_slurm.sh
@@ -27,7 +27,7 @@ cd $script_dir
 snakemake \
    --snakefile="../../../workflow/prepare_annotation/Snakefile" \
    --configfile="config.yaml" \
-    --cluster-config="../../../workflow/prepare_annotation/cluster.json" \
+    --cluster-config="cluster.json" \
    --cluster "sbatch \
        --cpus-per-task={cluster.threads} \
        --mem={cluster.mem} \

--- a/environment.root.yml
+++ b/environment.root.yml
@@ -5,9 +5,7 @@ channels:
  - conda-forge
 dependencies:
  - graphviz=2.40.1
-  - pip=20.0.2
  - python=3.7.4
  - singularity=3.5.2
-  - snakemake==6.10.0
+  - snakemake=6.10.0
  - unzip=6.0
-  - wget==1.20.1
--- a/environment.yml
+++ b/environment.yml
@@ -5,8 +5,6 @@ channels:
  - conda-forge
 dependencies:
  - graphviz=2.40.1
-  - pip=20.0.2
  - python=3.7.4
-  - snakemake==6.10.0
+  - snakemake=6.10.0
  - unzip=6.0
-  - wget==1.20.1
--- a/images/rule_graph_prepare_annotation.svg
+++ b/images/rule_graph_prepare_annotation.svg
--- a/images/workflow_dag_prepare_annotation.svg
+++ b/images/workflow_dag_prepare_annotation.svg
--- a/scripts/filter_anno_gtf.sh
+++ b/scripts/filter_anno_gtf.sh
@@ -18,16 +18,16 @@
 ###  PARAMETERS  ###
 ####################
-# Prefix for filenames
+output_dir="$1"
-fileNamePrefix="$1"  # Modified by Iborra P
+log_dir="$2" 
-organism="$2"
 # Paths (DO NOT CHANGE!)  #Modified by Iborra P
 root="$PWD"
-#root="$(cd "$(dirname "$0" )" && pwd)"
+resDir="${root}/${output_dir}"
-resDir="${root}/results/${organism}/${fileNamePrefix}"
 rawDir="${resDir}/raw"
 tmpDir="${root}/.tmp"
-logDir="${root}/logs/local/${organism}/${fileNamePrefix}"
+logDir="${root}/${log_dir}"
 # URLs
 # ----
@@ -67,11 +67,10 @@ set -o pipefail
 mkdir --parents "$resDir"
 mkdir --parents "$rawDir"
 mkdir --parents "$tmpDir"
-mkdir --parents "$logDir"
 # Create log file
-logFile="${logDir}/$(basename $0 ".sh").log"
+logFile="${logDir}"
-rm -f "$logFile"; touch "$logFile"
+rm -fr "$logFile"; touch "$logFile"
 >&2 echo "Log written to '$logFile'..."

--- a/scripts/filter_mir_1_anno.sh
+++ b/scripts/filter_mir_1_anno.sh
+#!/bin/bash
+# Setting to strict mode
+set -euo pipefail
+IFS=$'\n\t'
+#### FUNCTIONS
+usage()
+{
+    echo "usage: filter_annotation.sh [[[-f file ] [-o outname]] | [-h]]"
+}
+#### MAIN
+# Test whether all required inputs are present
+if [[ $1 == -h ]] || [[ $# != 4 ]]
+then
+	usage
+	exit
+fi
+# Get parameters
+while [ $# -gt 0 ]; do
+    case $1 in
+        -f | --file )           shift
+                                filename=$1
+                                ;;
+        -o | --out)             shift
+                                outname=$1
+				;;
+        -h | --help )           usage
+                                exit
+                                ;;
+        * )                     usage
+                                exit 1
+    esac
+    shift
+done
+printf "\nRemoves miRs with ID _1 in \"%s\" and write output to %s.\n" \
+	${filename}\
+	${outname}
+#Remove lines with _1 ID
+awk -F ';' '{print $0}' ${filename} | awk -F '=' '{print $0}'| grep -v "_1" > ${outname}
+printf "\nDONE!\n"
--- a/scripts/genome_process.sh
+++ b/scripts/genome_process.sh
@@ -17,15 +17,16 @@
 ####################
 # Prefix for filenames
-fileNamePrefix="$1"
+output_dir="$1"
-organism="$2"   
+log_dir="$2"   
-# Paths (DO NOT CHANGE!)
+# # Paths (DO NOT CHANGE!)
 root="$PWD"
 #root="$(cd "$(dirname "$0" )" && pwd)"
-resDir="${root}/results/${organism}/${fileNamePrefix}/"
+resDir="${root}/${output_dir}"
 rawDir="${resDir}/raw"
-logDir="${root}/logs/local/${organism}/${fileNamePrefix}/"
+logDir="${root}/${log_dir}"
 # URLs
 # ------
@@ -48,8 +49,8 @@ mkdir --parents "$rawDir"
 mkdir --parents "$logDir"
 # Create log file
-logFile="${logDir}/$(basename $0 ".sh").log"
+logFile="${logDir}"
-rm -f "$logFile"; touch "$logFile"
+rm -fr "$logFile"; touch "$logFile"
 >&2 echo "Log written to '$logFile'..."
 ##############

--- a/scripts/map_chromosomes.pl
+++ b/scripts/map_chromosomes.pl
+#! /usr/bin/perl -w
+# MAY 2019, Paula Iborra
+# University of Basel
+use strict;
+use warnings;
+use Scalar::Util qw(looks_like_number);
+my @in = ();
+my $column_delimiters_href_split = {
+	'TAB' => q{\t},
+	'COMMA' => ",",
+	'DASH' => "-",
+	'UNDERSCORE' => "_",
+	'PIPE' => q{\|},
+	'DOT' => q{\.},
+	'SPACE' => " "
+};
+my $column_delimiters_href_join = {
+        'TAB' => qq{\t},
+        'COMMA' => ",",
+        'DASH' => "-",
+        'UNDERSCORE' => "_",
+        'PIPE' => "|",
+        'DOT' => ".",
+        'SPACE' => " "
+};
+# a wrapper for converting between UCSC and ensembl chromosome representations from within galaxy
+# convert_UCSC_ensembl.pl [input] [col] [delimiter] [genome] [out_file1]
+die "Check arguments: $0 [input] [col] [delimiter] [map] [out_file1]\n" unless @ARGV == 5;
+die "No columns specified: $ARGV[1]\n" if looks_like_number($ARGV[1]) == 0;
+die "Delimeter must be one of TAB, COMMA, DASH, UNDERSCORE, PIPE, DOT, SPACE\n" unless defined $column_delimiters_href_split->{$ARGV[2]};
+# process input
+my $input = $ARGV[0];
+$ARGV[1] =~ s/\s+//g;
+my $col = --$ARGV[1];
+my $delim = $ARGV[2];
+my $map_file = $ARGV[3];
+my $output = $ARGV[4];
+my $delim_split = $column_delimiters_href_split->{$delim};
+my $delim_join = $column_delimiters_href_join->{$delim};
+open (MAP, "<$map_file") or die "Cannot open map file $map_file:$!\n";
+my %chr_map;
+while(my $line = <MAP>) {
+	chop $line;
+	next if grep /^#/, $line;
+	my @map = split /\t/, $line;
+	$map[1] = "remove" unless $#map;
+	$chr_map{$map[0]} = $map[1];
+}
+close MAP;
+open (IN,  "<$input") or die "Cannot open $input:$!\n";
+open (OUT, ">$output") or die "Cannot create $output:$!\n";
+while (my $line = <IN>) {
+	chop $line;
+	@in = split /$delim_split/, $line; 
+	if(defined $in[$col] && defined $chr_map{$in[$col]}) {
+		$in[$col] = $chr_map{$in[$col]};
+		if($in[$col] eq "remove") {
+			print "Removed line \"$line\" as chromosome does not have a proper mapping\n";
+		} else {
+			print OUT join($delim_join, @in), "\n";
+		}
+	} elsif(grep /^#/, $in[0]) {
+		print OUT join($delim_join, @in), "\n";
+	} else {
+		print "Removed line \"$line\" as \"$in[$col]\" is not a valid chromosome name\n";
+	}
+}
+close IN;
+close OUT;
--- a/scripts/validation_fasta.py
+++ b/scripts/validation_fasta.py
+#!/usr/bin/env python
+import sys
+import re
+import gzip
+from argparse import ArgumentParser, RawTextHelpFormatter
+### Created: Mar 5, 2019
+### Author: Paula Iborra 
+### Company: Zavolan Group, Biozentrum, University of Basel
+### ARGUMENTS ###
+parser = ArgumentParser(
+    description="Script to filter FASTA files"
+    )
+parser.add_argument(
+    '-v','--version', 
+    action='version', 
+    version='%(prog)s 1.0',
+    help="Show program's version number and exit"
+    )
+parser.add_argument(
+    '--trim', 
+    help="Character's used to trim the ID. Remove anything that follows the character's. Write \\ infront of \'.\' and \'-\'(i.e trim=\"$\\.\\-|_\").  Default: first white space",
+    type=str, 
+    nargs='?', 
+    default=""
+    )
+parser.add_argument(
+    '--idlist', 
+    help="Generate text file with the sequences IDs. One ID per line."
+    )
+parser.add_argument(
+    '-f','--filter',
+    help="Input ID list. Filter IDs and sequences from FASTA file with the mode selected. Filter file must contain ONE ID per line",
+    )
+parser.add_argument(
+    '-m', '--mode',
+    help="Type of filtering fasta file: keep (k) or discard (d) IDs contained in the ID list file.",
+    choices=('k', 'd')
+    )
+parser.add_argument(
+    '-r','--remove',
+    help="Remove sequences from FASTA file longer than specified length.",
+    type=int
+    )
+parser.add_argument(
+    '-i','--input', 
+    required=True, 
+    help="Input FASTA file", 
+    type=str
+    )
+parser.add_argument(
+    '-o','--output', 
+    help="Output FASTA file"
+    )
+args = parser.parse_args()
+if args.filter and not args.mode:
+    sys.exit("ERROR! Mode argument required when using filter option. (--mode, -m). See --help option.")
+### PARSE FASTA FILE ###
+class Seq:
+    def __init__(self):
+        self.id=""
+    def __init__(self):
+        self.seq=""
+    def __init__(self):
+        self.features=""
+# open files 
+if args.input.endswith('.gz'):
+    f = gzip.open(args.input, 'rt')
+else:
+    f = open(args.input)
+record=[] #list of records 
+nrec=-1
+inseq=0
+# parse fasta file
+sys.stdout.write("Parsing FASTA file...")
+for line in f:
+    if re.match(r'^>' , line):
+        nrec+=1
+        record.append(Seq())
+        # define id of the record
+        if not args.trim:
+            mobj=re.match(r'^>(\S*)(.*)', line) 
+        else:
+            mobj=re.match(r'^>([^%s]*)(.*)'%args.trim , line)
+        # add id and features
+        if (mobj):
+            record[nrec].id=mobj.group(1)
+            record[nrec].features=mobj.group(2)
+        inseq=0
+    else :
+        if inseq==0 :
+            inseq=1
+            record[nrec].seq = line   
+        else:
+            cstring=record[nrec].seq+line
+            record[nrec].seq = cstring
+sys.stdout.write("DONE\n")
+## ID FILTER LIST ##
+if (args.filter):
+    sys.stdout.write("Filtering FASTA file...")
+    id_filter=[line.rstrip('\n') for line in open(args.filter)]
+    sys.stdout.write("DONE\n")
+## OUTPUT FASTA FILE ##
+if (args.output):
+    sys.stdout.write("Writing FASTA file...")
+    with open(args.output, 'w') as output:
+        if (args.filter) and args.mode == 'k':
+            if (args.remove):
+                for x in range(0,nrec+1):
+                    if record[x].id in id_filter and (len(record[x].seq)-1 <= args.remove):
+                        output.write(">%s\n%s"%(record[x].id, record[x].seq))
+            else:
+                for x in range(0,nrec+1):
+                    if record[x].id in id_filter:
+                        output.write(">%s\n%s"%(record[x].id, record[x].seq))
+        elif (args.filter) and args.mode == 'd':
+            if (args.remove):
+                for x in range(0,nrec+1):
+                    if record[x].id not in id_filter and (len(record[x].seq)-1 <= args.remove):
+                        output.write(">%s\n%s"%(record[x].id, record[x].seq))
+            else:
+                for x in range(0,nrec+1):
+                    if record[x].id not in id_filter: 
+                        output.write(">%s\n%s"%(record[x].id, record[x].seq))
+        else:
+            if (args.remove):
+                for x in range(0,nrec+1):
+                    if (len(record[x].seq)-1 <= args.remove):
+                        output.write(">%s\n%s"%(record[x].id, record[x].seq))
+            else:
+                for x in range(0,nrec+1):
+                    output.write(">%s\n%s"%(record[x].id, record[x].seq))
+    output.close()
+    sys.stdout.write("DONE\n")
+## OUTPUT LIST IDs ##    
+idlist=[]
+if (args.idlist):
+    sys.stdout.write("Creating IDs list from FASTA file...")
+    fasta = open(args.output, 'r')
+    with open(args.idlist, 'w') as id_list:
+        for line in fasta:
+            if line.startswith('>'):
+                idlist.append(line[1:])
+        idlist.sort()
+        id_list.write(''.join(idlist))
+        id_list.close()
+        sys.stdout.write("DONE\n")
--- a/test/cluster_prepare_annotation.jsob
+++ b/test/cluster_prepare_annotation.jsob
+{
+  "__default__" :
+  {
+    "queue": "6hours",
+    "time": "05:00:00",
+    "threads": "1",
+    "mem": "4G"
+  },
+  "generate_segemehl_index_transcriptome":
+  {
+    "time": "{resources.time}:00:00",
+    "threads":"{resources.threads}",
+    "mem":"{resources.mem}G"
+  },
+  "generate_segemehl_index_genome":
+  {
+    "time": "{resources.time}:00:00",
+    "threads":"{resources.threads}",
+    "mem":"{resources.mem}G"
+  }
+}
--- a/test/config_prepare_annotation.yaml
+++ b/test/config_prepare_annotation.yaml
 ---
-  ##############################################################################
-  ### Necessary inputs
+############################## GLOBAL PARAMETERS ##############################
-  ##############################################################################
-  organism: "homo_sapiens"
+## Isomirs annotation file
+## Number of base pairs to add/substract from 5' (start) and 3' (end) coordinates.
+bp_5p: [-1,0,+1]
+bp_3p: [-1,0,+1]
+## Directories
+output_dir: "results"
+scripts_dir: "../scripts"
+local_log: "logs/local"
+cluster_log: "logs/cluster"
+# List of "organism/prefix"
+organism: ["homo_sapiens/chrY"]
+################### PARAMETERS SPECIFIC TO ORGANISM VERSION ###################
+homo_sapiens/chrY:
+  # URLs to genome, gene & miRNA annotations
  genome_url: "ftp://ftp.ensembl.org/pub/release-98/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.Y.fa.gz"
  gtf_url: "ftp://ftp.ensembl.org/pub/release-98/gtf/homo_sapiens/Homo_sapiens.GRCh38.98.gtf.gz"
-  prefix_name: "GRCh38.98_chrY"
+  mirna_url: "https://www.mirbase.org/ftp/CURRENT/genomes/hsa.gff3"
-  ##############################################################################
+  # Chromosome name mappings between UCSC <-> Ensembl
-  ### Directories
+  # Other organisms available at: https://github.com/dpryan79/ChromosomeMappings
-  ##############################################################################
+  map_chr_url: "https://raw.githubusercontent.com/dpryan79/ChromosomeMappings/master/GRCh38_UCSC2ensembl.txt"
-  output_dir: "results"
+  # Chromosome name mapping parameters:
-  scripts_dir: "../scripts"
+  column: 1  # Column number from input file where to change chromosome name
-  local_log: "logs/local"
+  delimiter: "TAB"  # Delimiter of the input file
-  cluster_log: "logs/cluster"
 ...
--- a/test/expected_output.files
+++ b/test/expected_output.files
-results/homo_sapiens/GRCh38.98_chrY/genome.processed.fa
+results/homo_sapiens/chrY/raw/Homo_sapiens.GRCh38.dna_sm.chromosome.Y.fa
-results/homo_sapiens/GRCh38.98_chrY/transcriptome_index_segemehl.idx
+results/homo_sapiens/chrY/genome.processed.fa
-results/homo_sapiens/GRCh38.98_chrY/transcriptome_idtrim.fa
+results/homo_sapiens/chrY/genome.processed.fa.fai
-results/homo_sapiens/GRCh38.98_chrY/genome.processed.fa.fai
+results/homo_sapiens/chrY/raw/Homo_sapiens.GRCh38.98.gtf
-results/homo_sapiens/GRCh38.98_chrY/gene_annotations.filtered.gtf
+results/homo_sapiens/chrY/gene_annotations.filtered.gtf
-results/homo_sapiens/GRCh38.98_chrY/exons.gtf
+results/homo_sapiens/chrY/transcriptome.fa
-results/homo_sapiens/GRCh38.98_chrY/transcriptome.fa
+results/homo_sapiens/chrY/transcriptome_idtrim.fa
-results/homo_sapiens/GRCh38.98_chrY/exons.bed
+results/homo_sapiens/chrY/transcriptome_index_segemehl.idx
-results/homo_sapiens/GRCh38.98_chrY/genome_index_segemehl.idx
+results/homo_sapiens/chrY/genome_index_segemehl.idx
+results/homo_sapiens/chrY/exons.gtf
+results/homo_sapiens/chrY/exons.bed
+results/homo_sapiens/chrY/raw/mirna.gff3
+results/homo_sapiens/chrY/UCSC2ensembl.txt
+results/homo_sapiens/chrY/mirna_chr_mapped.gff3
+results/homo_sapiens/chrY/mirna_filtered.gff3
+results/homo_sapiens/chrY/mirna_filtered.bed
+results/homo_sapiens/chrY/chr_size.txt
+results/homo_sapiens/chrY/mirna_mature_filtered.bed
+results/homo_sapiens/chrY/isomirs_annotation.bed
--- a/test/expected_output.md5
+++ b/test/expected_output.md5
-583f395125f769102ff08ff84b60e0d3  results/homo_sapiens/GRCh38.98_chrY/genome.processed.fa
+eb44404d89516497e6480d4dd33f2381  results/homo_sapiens/chrY/raw/Homo_sapiens.GRCh38.dna_sm.chromosome.Y.fa
-a5a6fd2cab7d7919b80761fc25f2777a  results/homo_sapiens/GRCh38.98_chrY/transcriptome_index_segemehl.idx
+583f395125f769102ff08ff84b60e0d3  results/homo_sapiens/chrY/genome.processed.fa
-bf1e37165b908729327599801ff5147b  results/homo_sapiens/GRCh38.98_chrY/transcriptome_idtrim.fa
+f37a213f94d11bf2260f50f2c9f199d2  results/homo_sapiens/chrY/genome.processed.fa.fai
-f37a213f94d11bf2260f50f2c9f199d2  results/homo_sapiens/GRCh38.98_chrY/genome.processed.fa.fai
+d5eaafa9aec63e3fab632fc49392b54b  results/homo_sapiens/chrY/raw/Homo_sapiens.GRCh38.98.gtf
-0b3dfe8cf4d644637671572fca629f69  results/homo_sapiens/GRCh38.98_chrY/gene_annotations.filtered.gtf
+0b3dfe8cf4d644637671572fca629f69  results/homo_sapiens/chrY/gene_annotations.filtered.gtf
-6fe52e2e126ef2e0c368fb1bf267f453  results/homo_sapiens/GRCh38.98_chrY/exons.gtf
+5ab1c2f39ab35fabc6673c73beb3097b  results/homo_sapiens/chrY/transcriptome.fa
-5ab1c2f39ab35fabc6673c73beb3097b  results/homo_sapiens/GRCh38.98_chrY/transcriptome.fa
+bf1e37165b908729327599801ff5147b  results/homo_sapiens/chrY/transcriptome_idtrim.fa
-51ac61c61825929f8f05c4b4f821f04d  results/homo_sapiens/GRCh38.98_chrY/exons.bed
+a5a6fd2cab7d7919b80761fc25f2777a  results/homo_sapiens/chrY/transcriptome_index_segemehl.idx
-11b0b7c50160aa8837dd92eda516c124  results/homo_sapiens/GRCh38.98_chrY/genome_index_segemehl.idx
+11b0b7c50160aa8837dd92eda516c124  results/homo_sapiens/chrY/genome_index_segemehl.idx
+6fe52e2e126ef2e0c368fb1bf267f453  results/homo_sapiens/chrY/exons.gtf
+51ac61c61825929f8f05c4b4f821f04d  results/homo_sapiens/chrY/exons.bed
+6bc49275f74ed1b43d80cf7598d387b9  results/homo_sapiens/chrY/raw/mirna.gff3
+d2095c371c9b8b2c7cacd1024abf2d18  results/homo_sapiens/chrY/UCSC2ensembl.txt
+ba7404239073e3b67204af1803729884  results/homo_sapiens/chrY/mirna_chr_mapped.gff3
+91e1facd80f93ef61f242050dd7d03c3  results/homo_sapiens/chrY/mirna_filtered.gff3
+a923f50eea2708cd889886ae5179ee18  results/homo_sapiens/chrY/mirna_filtered.bed
+1e6a0b3d0e678014f87afdd80f4025b9  results/homo_sapiens/chrY/chr_size.txt
+e7e85f57e0476d1805c1cb64131dd75c  results/homo_sapiens/chrY/mirna_mature_filtered.bed
+909a2fc878c5ac0437344e4f5c6e58e3  results/homo_sapiens/chrY/isomirs_annotation.bed
--- a/test/test_workflow_slurm.sh
+++ b/test/test_workflow_slurm.sh
@@ -28,7 +28,7 @@ mkdir -p results/homo_sapiens/GRCh38.98_chrY
 snakemake \
    --snakefile="../workflow/prepare_annotation/Snakefile" \
    --configfile="config_prepare_annotation.yaml" \
-    --cluster-config="../workflow/prepare_annotation/cluster.json" \
+    --cluster-config="cluster_prepare_annotation.json" \
    --cluster "sbatch \
        --cpus-per-task={cluster.threads} \
        --mem={cluster.mem} \

--- a/workflow/prepare_annotation/Snakefile
+++ b/workflow/prepare_annotation/Snakefile