From 6a49c7d57dad3225d231389d8aab2124d6644eff Mon Sep 17 00:00:00 2001
From: Iris Mestres <iris.mestrespascual@unibas.ch>
Date: Sat, 4 Mar 2023 14:00:03 +0100
Subject: [PATCH] refactor: use local annotation file

---
 scripts/filter_anno_gtf.sh | 31 +++++++++----------------------
 1 file changed, 9 insertions(+), 22 deletions(-)

diff --git a/scripts/filter_anno_gtf.sh b/scripts/filter_anno_gtf.sh
index df121c9..6151a53 100755
--- a/scripts/filter_anno_gtf.sh
+++ b/scripts/filter_anno_gtf.sh
@@ -24,18 +24,16 @@ log_dir="$2"
 # Paths (DO NOT CHANGE!)  #Modified by Iborra P
 root="$PWD"
 resDir="${root}/${output_dir}"
-rawDir="${resDir}/raw"
+fileDir="${root}/test_files"
 tmpDir="${root}/.tmp"
 logDir="${root}/${log_dir}"
 
 
-# URLs
+# Annotation file 
 # ----
-# - All URLs variables represent Bash arrays, so that multiple URLs can be provided; in that case, 
-# files are concatenated after download
 # - It is assumed that the specified transcriptome files contain sequences for all transcripts in 
 # the (filtered) gene annotations
-geneAnnoURLs="$3"   #Modified by Iborra P
+geneAnnoFile="$3"  
 
 # Filters
 # -------
@@ -65,7 +63,6 @@ set -o pipefail
 
 # Create directories
 mkdir --parents "$resDir"
-mkdir --parents "$rawDir"
 mkdir --parents "$tmpDir"
 
 # Create log file
@@ -78,25 +75,14 @@ rm -fr "$logFile"; touch "$logFile"
 ###  MAIN  ###
 ##############
 
-## GET & FILTER GENE ANNOTATIONS
-
-# Get gene annotation files
-echo "Downloading gene annotations..." >> "$logFile"
-for url in "${geneAnnoURLs[@]}"; do
-    wget "$url" --output-document "${rawDir}/$(basename "$url")" &> /dev/null
-done
-
-# Concatenate gene annotation files
-echo "Concatenating gene annotation files..." >> "$logFile"
-geneAnno="${resDir}/gene_annotations.gtf.gz"
-for url in "${geneAnnoURLs[@]}"; do
-    cat "${rawDir}/$(basename "$url")" >> "$geneAnno"
-done
+## FILTER GENE ANNOTATIONS
 
 # Filter gene annotations
 geneAnnoFilt="${resDir}/gene_annotations.filtered.gtf.gz"
 geneAnnoOut="${resDir}/gene_annotations.filtered.gtf"
 geneAnnoFiltTmp="${tmpDir}/gene_annotations.filtered.gtf.gz.tmp"
+geneAnno="${resDir}/gene_annotations.gtf.gz"
+cp "$geneAnnoFile" "$geneAnno"
 cp "$geneAnno" "$geneAnnoFiltTmp"
 
     # Filter requested chromosomes
@@ -142,11 +128,12 @@ cp "$geneAnno" "$geneAnnoFiltTmp"
 
 rm "${resDir}/gene_annotations.filtered.gtf.gz"
 rm "${resDir}/gene_annotations.gtf.gz"
+
 #############
 ###  END  ###
 #############
 
-echo "Original data in: $rawDir" >> "$logFile"
+echo "Original data in: $fileDir" >> "$logFile"
 echo "Processed data in: $resDir" >> "$logFile"
 echo "Done. No errors." >> "$logFile"
->&2 echo "Done. No errors."
+>&2 echo "Done. No errors."
\ No newline at end of file
-- 
GitLab