Skip to content
Snippets Groups Projects
Commit 6a49c7d5 authored by Iris Mestres Pascual's avatar Iris Mestres Pascual
Browse files

refactor: use local annotation file

parent 9a2662c5
No related branches found
No related tags found
1 merge request!26refactor: use local files for genome resources
...@@ -24,18 +24,16 @@ log_dir="$2" ...@@ -24,18 +24,16 @@ log_dir="$2"
# Paths (DO NOT CHANGE!) #Modified by Iborra P # Paths (DO NOT CHANGE!) #Modified by Iborra P
root="$PWD" root="$PWD"
resDir="${root}/${output_dir}" resDir="${root}/${output_dir}"
rawDir="${resDir}/raw" fileDir="${root}/test_files"
tmpDir="${root}/.tmp" tmpDir="${root}/.tmp"
logDir="${root}/${log_dir}" logDir="${root}/${log_dir}"
# URLs # Annotation file
# ---- # ----
# - All URLs variables represent Bash arrays, so that multiple URLs can be provided; in that case,
# files are concatenated after download
# - It is assumed that the specified transcriptome files contain sequences for all transcripts in # - It is assumed that the specified transcriptome files contain sequences for all transcripts in
# the (filtered) gene annotations # the (filtered) gene annotations
geneAnnoURLs="$3" #Modified by Iborra P geneAnnoFile="$3"
# Filters # Filters
# ------- # -------
...@@ -65,7 +63,6 @@ set -o pipefail ...@@ -65,7 +63,6 @@ set -o pipefail
# Create directories # Create directories
mkdir --parents "$resDir" mkdir --parents "$resDir"
mkdir --parents "$rawDir"
mkdir --parents "$tmpDir" mkdir --parents "$tmpDir"
# Create log file # Create log file
...@@ -78,25 +75,14 @@ rm -fr "$logFile"; touch "$logFile" ...@@ -78,25 +75,14 @@ rm -fr "$logFile"; touch "$logFile"
### MAIN ### ### MAIN ###
############## ##############
## GET & FILTER GENE ANNOTATIONS ## FILTER GENE ANNOTATIONS
# Get gene annotation files
echo "Downloading gene annotations..." >> "$logFile"
for url in "${geneAnnoURLs[@]}"; do
wget "$url" --output-document "${rawDir}/$(basename "$url")" &> /dev/null
done
# Concatenate gene annotation files
echo "Concatenating gene annotation files..." >> "$logFile"
geneAnno="${resDir}/gene_annotations.gtf.gz"
for url in "${geneAnnoURLs[@]}"; do
cat "${rawDir}/$(basename "$url")" >> "$geneAnno"
done
# Filter gene annotations # Filter gene annotations
geneAnnoFilt="${resDir}/gene_annotations.filtered.gtf.gz" geneAnnoFilt="${resDir}/gene_annotations.filtered.gtf.gz"
geneAnnoOut="${resDir}/gene_annotations.filtered.gtf" geneAnnoOut="${resDir}/gene_annotations.filtered.gtf"
geneAnnoFiltTmp="${tmpDir}/gene_annotations.filtered.gtf.gz.tmp" geneAnnoFiltTmp="${tmpDir}/gene_annotations.filtered.gtf.gz.tmp"
geneAnno="${resDir}/gene_annotations.gtf.gz"
cp "$geneAnnoFile" "$geneAnno"
cp "$geneAnno" "$geneAnnoFiltTmp" cp "$geneAnno" "$geneAnnoFiltTmp"
# Filter requested chromosomes # Filter requested chromosomes
...@@ -142,11 +128,12 @@ cp "$geneAnno" "$geneAnnoFiltTmp" ...@@ -142,11 +128,12 @@ cp "$geneAnno" "$geneAnnoFiltTmp"
rm "${resDir}/gene_annotations.filtered.gtf.gz" rm "${resDir}/gene_annotations.filtered.gtf.gz"
rm "${resDir}/gene_annotations.gtf.gz" rm "${resDir}/gene_annotations.gtf.gz"
############# #############
### END ### ### END ###
############# #############
echo "Original data in: $rawDir" >> "$logFile" echo "Original data in: $fileDir" >> "$logFile"
echo "Processed data in: $resDir" >> "$logFile" echo "Processed data in: $resDir" >> "$logFile"
echo "Done. No errors." >> "$logFile" echo "Done. No errors." >> "$logFile"
>&2 echo "Done. No errors." >&2 echo "Done. No errors."
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment