From 3e441ff696b67d619a09baf3c853ec6e68fb5576 Mon Sep 17 00:00:00 2001
From: "christoph.stritt@unibas.ch" <christoph.stritt@unibas.ch>
Date: Tue, 11 Jun 2024 14:58:59 +0200
Subject: [PATCH] Updated README and folder structure

---
 .gitignore                                    |  13 +-
 README.md                                     | 116 ++++++++++++++----
 README_old.md                                 |  44 +++++++
 assembly/README.md                            | 109 ----------------
 {assembly/cluster => cluster}/config.yaml     |   0
 {assembly/config => config}/config.yaml       |   0
 {assembly/config => config}/environment.yml   |   2 +-
 {assembly/config => config}/samples.tsv       |   0
 {assembly/container => container}/README.md   |   0
 .../container => container}/assemblySC.def    |   0
 .../conda-linux-64.lock                       |   0
 .../container => container}/conda-lock.yml    |   0
 .../container => container}/environment.yml   |   0
 notebooks/pangenome_graph.ipynb               |   0
 {assembly/resources => resources}/H37Rv.gbf   |   0
 ...ly_pipeline.py => run_assembly_pipeline.py |   0
 variantcalling/README.md                      |  36 ------
 variantcalling/config/biopython.yaml          |  40 ------
 variantcalling/config/config.yaml             |  11 --
 variantcalling/config/samples.txt             |  17 ---
 variantcalling/workflow/Snakefile             |  71 -----------
 .../workflow/scripts/combine_assemblies.py    |  26 ----
 {assembly/workflow => workflow}/Snakefile     |   0
 .../workflow => workflow}/rules/annotate.smk  |   0
 .../workflow => workflow}/rules/assemble.smk  |   0
 .../rules/circularize.smk                     |   0
 .../workflow => workflow}/rules/common.smk    |   0
 .../workflow => workflow}/rules/mapreads.smk  |   0
 .../workflow => workflow}/rules/readQC.smk    |   0
 .../workflow => workflow}/rules/summarize.smk |   0
 30 files changed, 142 insertions(+), 343 deletions(-)
 create mode 100755 README_old.md
 delete mode 100755 assembly/README.md
 rename {assembly/cluster => cluster}/config.yaml (100%)
 rename {assembly/config => config}/config.yaml (100%)
 rename {assembly/config => config}/environment.yml (86%)
 rename {assembly/config => config}/samples.tsv (100%)
 rename {assembly/container => container}/README.md (100%)
 rename {assembly/container => container}/assemblySC.def (100%)
 rename {assembly/container => container}/conda-linux-64.lock (100%)
 rename {assembly/container => container}/conda-lock.yml (100%)
 rename {assembly/container => container}/environment.yml (100%)
 create mode 100644 notebooks/pangenome_graph.ipynb
 rename {assembly/resources => resources}/H37Rv.gbf (100%)
 rename assembly/run_assembly_pipeline.py => run_assembly_pipeline.py (100%)
 delete mode 100755 variantcalling/README.md
 delete mode 100755 variantcalling/config/biopython.yaml
 delete mode 100755 variantcalling/config/config.yaml
 delete mode 100755 variantcalling/config/samples.txt
 delete mode 100755 variantcalling/workflow/Snakefile
 delete mode 100755 variantcalling/workflow/scripts/combine_assemblies.py
 rename {assembly/workflow => workflow}/Snakefile (100%)
 rename {assembly/workflow => workflow}/rules/annotate.smk (100%)
 rename {assembly/workflow => workflow}/rules/assemble.smk (100%)
 rename {assembly/workflow => workflow}/rules/circularize.smk (100%)
 rename {assembly/workflow => workflow}/rules/common.smk (100%)
 rename {assembly/workflow => workflow}/rules/mapreads.smk (100%)
 rename {assembly/workflow => workflow}/rules/readQC.smk (100%)
 rename {assembly/workflow => workflow}/rules/summarize.smk (100%)

diff --git a/.gitignore b/.gitignore
index 479190f..5f15816 100755
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,8 @@
 pangenome
-assembly/container/assemblySC.sif
-assembly/.snakemake
-assembly/.cache
-assembly/rulegraph.pdf
-assembly/resources/bakta_db
+container/assemblySC.sif
+.snakemake
+.cache
+rulegraph.pdf
+resources/bakta_db
 facienda.md
-variantcalling/container/pggb_latest.sif
-variantcalling/.snakemake
+
diff --git a/README.md b/README.md
index 1efd40a..8c55244 100755
--- a/README.md
+++ b/README.md
@@ -1,44 +1,110 @@
-# Genome assembly and variant calling from PacBio HiFi reads
+# Genome assembly workflow
 
-This folder contains two Snakemake workflows:
-  - [*assembly*](assembly/README.md): from PacBio HiFi consensus reads to annotated genome assemblies
-  - [*variantcalling*](variantcalling/README.md): combine assemblies into a pangenome graph and call variants from the graph
+The genome assembly workflow includes the following tools/steps:
+  - [LongQC](https://doi.org/10.1534/g3.119.400864): Get some read summary statistics. The reads are not modified in any way before assembly, 
+  - [Flye](https://doi.org/10.1038/s41587-019-0072-8): Assembly.
+  - [circlator](https://doi.org/10.1186/s13059-015-0849-0): Reorient the assembly such that it begins with dnaA.
+  - [bakta](https://doi.org/10.1099/mgen.0.000685): Annotate the reoriented assembly.
+  - [minimap2](https://doi.org/10.1093/bioinformatics/bty191): Map the long reads back against the assembly. The resulting alignments can be used to check for inconsistencies between reads and assemblies.  
 
-This is ongoing work, some things will change.
+All this software is ready-to-use in a container (see the .def and .yml files in the container folder). 
 
 
-## Requirements
-On the sciCORE cluster, the pipeline is installed in the GROUP folder (**/scicore/home/gagneux/GROUP/PacbioSnake**) and ready to run. 
+# Quick start
 
-In other contexts, four things need to be set up before the pipeline can be run: 
-  
-  1. Install Snakemake and Singularity
-  2. Build the singularity container for the assembly pipeline
-  3. Download the bakta database for genome annotation
-  4. Pull the singularity container for the variant calling pipeline
+## Set up conda environment with snakemake and singularity
+```
+conda env create -f ./environment.yml
+```
+## Run the pipeline
+It is assumed that the pipeline is run from within the PacbioSnake folder. If not, adapt paths accordingly.
 
-These steps are detailed below. 
+```
+# Connect to sciCORE through the terminal
 
+# Create a screen, named assembly, which allows you run a job in the background. 
+screen -R assembly
 
-### 1. Install Snakemake and Singularity
-As described on the [Snakemake](https://snakemake.readthedocs.io) and the [Singularity](https://docs.sylabs.io/guides/latest/user-guide/) sites. 
+# Load the conda environment
+conda activate PacbioSnake
 
+# Run the pipeline 
+./run_assembly_pipeline.py \
+  -s config/samples.tsv \
+  -o ~/assemblies \
+  -j 5 \
+  -t 2
 
-### 2. Build the singularity container for the assembly pipeline
-```
-cd assembly/container
-sudo singularity build assemblySC.sif assemblySC.def
+# Leave the screen while the pipeline is running: Ctrl + a + d
+# Re-attach the screen
+screen -r assembly
 ```
 
+# Some explanations
+The assembly pipeline can be run by executing the **run_assembly_pipeline.py** script. This is a wrapper around the snakemake command where some parameters and paths are hardwired to work in the sciCORE environment with minimal user input. 
+
+The see the arguments required for run_assembly_pipeline.py, type
+'''
+./run_assembly_pipeline.py -h
+'''
+
+Two arguments are required:
+ - **-s**: a tab separate table, without header, that contains the sample names and the corresponding paths to the HiFi consensus reads in fastq format
+ - **-o**: path to the output directory
 
-### 3. Download the bakta database for genome annotation
-Light-weight (1.3 Gb) and full (33.1 Gb) databases for the *bakta* annotation tool can be downloaded from https://zenodo.org/records/7669534.
-The extracted folder should be located at assembly/resources/bakta_db/. Otherwise the path to the database can be modified in the assembly config file.
+ Optional arguments:
+ - **-n**: perform dry run (recommended), to see if all the paths work out
+ - **-j**: number of jobs to run in parallel (default = 4)
+ - **-t**: number of threads per job (default = 10)
 
+ 
+# Output
+For each sample defined in the samples table, a folder is generated in the output directory. It contains: 
 
-### 4. Pull the singularity container for the variant calling pipeline
 ```
-singularity pull docker://ghcr.io/pangenome/pggb:latest
+assembly.circularized.renamed.fasta
+bakta/
+circlator/
+flye/
+longqc/
+remapping/
+
 ```
 
+# Configuration
+
+
+Two big files required to run the pipeline are not in this repository but available on sciSCORE:
+
+## Singularity container with all required software 
+/scicore/home/gagneux/GROUP/PacbioSnake_resources/containers/assemblySC.sif
+
+## Bakta database
+/scicore/home/gagneux/GROUP/PacbioSnake_resources/databases/bakta_db
+
+
+
+## config.yml
+In the file config/config.yaml some global parameters can be set:
+
+```yaml
+# REQUIRED
+samples: config/samples.tsv # Path to sample table, no header, tab-separated
+outdir: ./results # Path to output directory
+
+# OPTIONAL
+annotate: "Yes" # Annotate assembly with bakta Yes/No
+
+ref:
+  genome_size: 4.4m # 
+  gbf: resources/H37Rv.gbf # Used for bakta annotation step
+
+bakta_db: resources/bakta_db # Used for bakta annotation step
+container: containers/assemblySMK.sif # Singularity container containing all reuquired software
+
+threads_per_job: 4 # Should match cpus-per-task in the snakemake command
+ 
+keep_intermediate: "Yes" # Not implemented yet...
+
+```
 
diff --git a/README_old.md b/README_old.md
new file mode 100755
index 0000000..1efd40a
--- /dev/null
+++ b/README_old.md
@@ -0,0 +1,44 @@
+# Genome assembly and variant calling from PacBio HiFi reads
+
+This folder contains two Snakemake workflows:
+  - [*assembly*](assembly/README.md): from PacBio HiFi consensus reads to annotated genome assemblies
+  - [*variantcalling*](variantcalling/README.md): combine assemblies into a pangenome graph and call variants from the graph
+
+This is ongoing work, some things will change.
+
+
+## Requirements
+On the sciCORE cluster, the pipeline is installed in the GROUP folder (**/scicore/home/gagneux/GROUP/PacbioSnake**) and ready to run. 
+
+In other contexts, four things need to be set up before the pipeline can be run: 
+  
+  1. Install Snakemake and Singularity
+  2. Build the singularity container for the assembly pipeline
+  3. Download the bakta database for genome annotation
+  4. Pull the singularity container for the variant calling pipeline
+
+These steps are detailed below. 
+
+
+### 1. Install Snakemake and Singularity
+As described on the [Snakemake](https://snakemake.readthedocs.io) and the [Singularity](https://docs.sylabs.io/guides/latest/user-guide/) sites. 
+
+
+### 2. Build the singularity container for the assembly pipeline
+```
+cd assembly/container
+sudo singularity build assemblySC.sif assemblySC.def
+```
+
+
+### 3. Download the bakta database for genome annotation
+Light-weight (1.3 Gb) and full (33.1 Gb) databases for the *bakta* annotation tool can be downloaded from https://zenodo.org/records/7669534.
+The extracted folder should be located at assembly/resources/bakta_db/. Otherwise the path to the database can be modified in the assembly config file.
+
+
+### 4. Pull the singularity container for the variant calling pipeline
+```
+singularity pull docker://ghcr.io/pangenome/pggb:latest
+```
+
+
diff --git a/assembly/README.md b/assembly/README.md
deleted file mode 100755
index 8c339e5..0000000
--- a/assembly/README.md
+++ /dev/null
@@ -1,109 +0,0 @@
-# Genome assembly workflow
-
-The genome assembly workflow includes the following tools/steps:
-  - [LongQC](https://doi.org/10.1534/g3.119.400864): Get some read summary statistics. The reads are not modified in any way before assembly, 
-  - [Flye](https://doi.org/10.1038/s41587-019-0072-8): Assembly.
-  - [circlator](https://doi.org/10.1186/s13059-015-0849-0): Reorient the assembly such that it begins with dnaA.
-  - [bakta](https://doi.org/10.1099/mgen.0.000685): Annotate the reoriented assembly.
-  - [minimap2](https://doi.org/10.1093/bioinformatics/bty191): Map the long reads back against the assembly. The resulting alignments can be used to check for inconsistencies between reads and assemblies.  
-
-
-# Run the pipeline on sciCORE
-The user needs to provide two things to run the workflow on her samples:
-- a config file with some global options for the analysis
-- a tab separate table, without header, that contains the sample names and the corresponding paths to the HiFi consensus reads. 
-
-## Create conda environment containing snakemake and singularity
-```
-conda env create -f environment.yml
-
-```
-
-
-## config.yml
-In the file config/config.yaml some global parameters can be set:
-
-```yaml
-# REQUIRED
-samples: config/samples.tsv # Path to sample table, no header, tab-separated
-outdir: ./results # Path to output directory
-
-# OPTIONAL
-annotate: "Yes" # Annotate assembly with bakta Yes/No
-
-ref:
-  genome_size: 4.4m # 
-  gbf: resources/H37Rv.gbf # Used for bakta annotation step
-
-bakta_db: resources/bakta_db # Used for bakta annotation step
-container: containers/assemblySMK.sif # Singularity container containing all reuquired software
-
-threads_per_job: 4 # Should match cpus-per-task in the snakemake command
- 
-keep_intermediate: "Yes" # Not implemented yet...
-
-```
-
-## samples.tsv
-This is a tab-separated table with no header and two colums (see example): the first containing the sample names, which will be used to name the assemblies; the second with the absolute paths to the fastq files. 
-
-
-
-## Snakemake dry run
-
-```
-snakemake -n --configfile /path/to/config.yml
-```
-
-## Run the workflow on sciCORE
-
-Important: singularity containers most be given access to the file locations through the --bind argument. E.g. if the long reads are on /scicore/home/jean-jacques/reads/, add this location in the snakemake command (see also the full command below): 
-
-```
---singularity-args "--bind /scicore/home/jean-jacques/reads/" 
-```
-
-It's convenient to run snakemake in a screen, so we can do other things on scicore while it's running and occasionally check the progress.
-
-
-```
-# Open screen 
-screen -R assembly
-
-# Load Snakemake module
-ml snakemake/6.6.1-foss-2021a 
-
-# Dry run 
-snakemake -n \
- --configfile /scicore/home/gagneux/stritt0001/TB/projects/pacbio_microscale/results/demo/config.yml
-
-
-# Real run 
-snakemake \
- --configfile /scicore/home/gagneux/stritt0001/TB/projects/pacbio_microscale/results/demo/config.yml \
- --jobs 4 \
- --keep-going \
- --latency-wait 60 \
- --use-singularity --singularity-args "--bind /scicore/home/gagneux/GROUP/tbresearch/genomes/IN_PROGRESS/PacBio_genomes/Gagneux --bind /scicore/home/gagneux/stritt0001 --bind /scratch" \
- --cluster "sbatch --job-name=pbassembly --cpus-per-task=4 --mem-per-cpu=4G --time=06:00:00 --qos=6hours --output=/scicore/home/gagneux/stritt0001/TB/projects/pacbio_microscale/results/demo/pbassembly.o%j --error=/scicore/home/gagneux/stritt0001/TB/projects/pacbio_microscale/results/demo/pbassembly.e%j"
-
-
-# Leave the screen: CTRL+a+d
-
-# Return to the screen 
-screen -r assembly
-
-```
-
-# Output
-For each sample defined in the samples table, a folder is generated in the output directory. It contains: 
-
-```
-assembly.circularized.renamed.fasta
-bakta/
-circlator/
-flye/
-longqc/
-remapping/
-
-```
diff --git a/assembly/cluster/config.yaml b/cluster/config.yaml
similarity index 100%
rename from assembly/cluster/config.yaml
rename to cluster/config.yaml
diff --git a/assembly/config/config.yaml b/config/config.yaml
similarity index 100%
rename from assembly/config/config.yaml
rename to config/config.yaml
diff --git a/assembly/config/environment.yml b/config/environment.yml
similarity index 86%
rename from assembly/config/environment.yml
rename to config/environment.yml
index f5e4dfb..a6ca87a 100644
--- a/assembly/config/environment.yml
+++ b/config/environment.yml
@@ -1,4 +1,4 @@
-name: assemblySMK
+name: PacbioSnake
 channels:
   - conda-forge
   - bioconda
diff --git a/assembly/config/samples.tsv b/config/samples.tsv
similarity index 100%
rename from assembly/config/samples.tsv
rename to config/samples.tsv
diff --git a/assembly/container/README.md b/container/README.md
similarity index 100%
rename from assembly/container/README.md
rename to container/README.md
diff --git a/assembly/container/assemblySC.def b/container/assemblySC.def
similarity index 100%
rename from assembly/container/assemblySC.def
rename to container/assemblySC.def
diff --git a/assembly/container/conda-linux-64.lock b/container/conda-linux-64.lock
similarity index 100%
rename from assembly/container/conda-linux-64.lock
rename to container/conda-linux-64.lock
diff --git a/assembly/container/conda-lock.yml b/container/conda-lock.yml
similarity index 100%
rename from assembly/container/conda-lock.yml
rename to container/conda-lock.yml
diff --git a/assembly/container/environment.yml b/container/environment.yml
similarity index 100%
rename from assembly/container/environment.yml
rename to container/environment.yml
diff --git a/notebooks/pangenome_graph.ipynb b/notebooks/pangenome_graph.ipynb
new file mode 100644
index 0000000..e69de29
diff --git a/assembly/resources/H37Rv.gbf b/resources/H37Rv.gbf
similarity index 100%
rename from assembly/resources/H37Rv.gbf
rename to resources/H37Rv.gbf
diff --git a/assembly/run_assembly_pipeline.py b/run_assembly_pipeline.py
similarity index 100%
rename from assembly/run_assembly_pipeline.py
rename to run_assembly_pipeline.py
diff --git a/variantcalling/README.md b/variantcalling/README.md
deleted file mode 100755
index 1b8a2da..0000000
--- a/variantcalling/README.md
+++ /dev/null
@@ -1,36 +0,0 @@
-# Variant calling workflow
-
-
-
-
-## container
-```
-singularity pull docker://ghcr.io/pangenome/pggb:latest
-```
-
-## assemblies.txt
-
-
-## config/config.yaml
-```yaml
-outdir: /home/cristobal/TB/projects/pacbio_microscale/results/variants/assembly
-samples: /home/cristobal/TB/projects/pacbio_microscale/results/variants/assembly/samples.tsv
-reference: N1426
-threads: 20
-```
-
-## Dry run
-```
-snakemake -n --configfile 
-```
-
-## Run
-```
-snakemake \
- --jobs 1 \
- --configfile ~/TB/projects/pacbio_microscale/results/variants/assembly/config.yml \
- --latency-wait 60 \
- --use-conda --use-envmodules \
- --use-singularity --singularity-args "--bind /scicore/home/gagneux/stritt0001 --bind /scratch" \
- --cluster "sbatch --job-name=pggb --cpus-per-task=20 --mem-per-cpu=1G --time=06:00:00 --qos=6hours --output=pggb.o%j --error=pggb.e%j"
-```
diff --git a/variantcalling/config/biopython.yaml b/variantcalling/config/biopython.yaml
deleted file mode 100755
index 9b7042f..0000000
--- a/variantcalling/config/biopython.yaml
+++ /dev/null
@@ -1,40 +0,0 @@
-name: biopython
-channels:
-  - defaults
-  - conda-forge
-  - bioconda
-  - r
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=5.1=1_gnu
-  - biopython=1.78=py311h5eee18b_0
-  - blas=1.0=mkl
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2023.08.22=h06a4308_0
-  - intel-openmp=2023.1.0=hdb19cb5_46305
-  - ld_impl_linux-64=2.38=h1181459_1
-  - libffi=3.4.4=h6a678d5_0
-  - libgcc-ng=11.2.0=h1234567_1
-  - libgomp=11.2.0=h1234567_1
-  - libstdcxx-ng=11.2.0=h1234567_1
-  - libuuid=1.41.5=h5eee18b_0
-  - mkl=2023.1.0=h213fc3f_46343
-  - mkl-service=2.4.0=py311h5eee18b_1
-  - mkl_fft=1.3.8=py311h5eee18b_0
-  - mkl_random=1.2.4=py311hdb19cb5_0
-  - ncurses=6.4=h6a678d5_0
-  - numpy=1.26.0=py311h08b1b3b_0
-  - numpy-base=1.26.0=py311hf175353_0
-  - openssl=3.0.11=h7f8727e_2
-  - pip=23.2.1=py311h06a4308_0
-  - python=3.11.5=h955ad1f_0
-  - readline=8.2=h5eee18b_0
-  - setuptools=68.0.0=py311h06a4308_0
-  - sqlite=3.41.2=h5eee18b_0
-  - tbb=2021.8.0=hdb19cb5_0
-  - tk=8.6.12=h1ccaba5_0
-  - tzdata=2023c=h04d1e81_0
-  - wheel=0.41.2=py311h06a4308_0
-  - xz=5.4.2=h5eee18b_0
-  - zlib=1.2.13=h5eee18b_0
-prefix: /home/cristobal/anaconda3/envs/biopython
diff --git a/variantcalling/config/config.yaml b/variantcalling/config/config.yaml
deleted file mode 100755
index 6dd9d5b..0000000
--- a/variantcalling/config/config.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-
-reference: 
-
-threads: 4
-
-output_dir: ./results
-
-pggb:
-  p: 99
-  s: 5k
-
diff --git a/variantcalling/config/samples.txt b/variantcalling/config/samples.txt
deleted file mode 100755
index a1bce25..0000000
--- a/variantcalling/config/samples.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-PB000150
-PB000152
-PB000153
-PB000155 
-PB000177
-PB000178
-PB000180
-PB000182
-PB000183
-PB000184
-PB000186
-PB000187
-PB000189
-PB000190
-PB000191
-PB000192
-PB000194
\ No newline at end of file
diff --git a/variantcalling/workflow/Snakefile b/variantcalling/workflow/Snakefile
deleted file mode 100755
index d344417..0000000
--- a/variantcalling/workflow/Snakefile
+++ /dev/null
@@ -1,71 +0,0 @@
-
-configfile: "config/config.yml"
-
-import pandas as pd
-
-samples = pd.read_table(config["samples"], header=None )
-
-
-rule all:
-    input:
-        config["outdir"] + "/variants.vcf"
-
-rule combine_assemblies:
-    input: list(samples[1])
-    output: 
-        fasta = config["outdir"] + "/single_contig_assemblies.fasta",
-        discarded = config["outdir"] + "/discarded_assemblies.txt"
-    params:
-        outdir = config["outdir"]
-    conda: "../config/biopython.yaml"
-    shell:
-        """
-        python workflow/scripts/combine_assemblies.py {params.outdir} {input} 
-
-        """
-    
-
-rule pggb:
-    input: config["outdir"] + "/single_contig_assemblies.fasta"
-    output: config["outdir"] + "/graph.smooth.final.gfa"
-    threads: config["threads"]
-    params:
-        nr_strains = len(samples),
-        outdir = config["outdir"],
-        outgraph = config["outdir"] + "/*smooth.final.gfa"
-    singularity: "container/pggb_latest.sif"
-
-    shell:
-        """
-        bgzip {input}
-        samtools faidx {input}.gz
-
-        pggb -i {input}.gz \
-        -o {params.outdir} \
-        -t {threads} \
-        -n {params.nr_strains} \
-        -p 99 \
-        -s 5k
-
-        cp {params.outgraph} {output}
-
-    """
-
-rule call_variants:
-    input: config["outdir"] + "/graph.smooth.final.gfa"
-    output: config["outdir"] + "/variants.vcf"
-    threads: config["threads"]
-    params:
-        reference = config["reference"]
-    singularity: "container/pggb_latest.sif"
-
-    shell:
-        """
-        vg deconstruct {input} -d1 -e \
-          -p {params.reference} \
-          -t {threads} \
-          > {output}
-
-        """
-
-
diff --git a/variantcalling/workflow/scripts/combine_assemblies.py b/variantcalling/workflow/scripts/combine_assemblies.py
deleted file mode 100755
index d9b6ae1..0000000
--- a/variantcalling/workflow/scripts/combine_assemblies.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-import re
-import sys
-from Bio import SeqIO
-
-
-outdir = sys.argv[1]
-assemblies = sys.argv[2:]
-
-fasta_handle = open(outdir + "/single_contig_assemblies.fasta", "w")
-discarded = open(outdir + "/discarded_assemblies.txt", "w")
-
-for ASSEMBLY in assemblies:
-                    
-    records = [rec for rec in SeqIO.parse(ASSEMBLY, "fasta")]
-    if len(records) > 1:
-        discarded.write(ASSEMBLY + '\n')
-
-    else:
-        rec = records[0]
-        SeqIO.write(rec, fasta_handle, "fasta")
-
-fasta_handle.close()
-discarded.close() 
\ No newline at end of file
diff --git a/assembly/workflow/Snakefile b/workflow/Snakefile
similarity index 100%
rename from assembly/workflow/Snakefile
rename to workflow/Snakefile
diff --git a/assembly/workflow/rules/annotate.smk b/workflow/rules/annotate.smk
similarity index 100%
rename from assembly/workflow/rules/annotate.smk
rename to workflow/rules/annotate.smk
diff --git a/assembly/workflow/rules/assemble.smk b/workflow/rules/assemble.smk
similarity index 100%
rename from assembly/workflow/rules/assemble.smk
rename to workflow/rules/assemble.smk
diff --git a/assembly/workflow/rules/circularize.smk b/workflow/rules/circularize.smk
similarity index 100%
rename from assembly/workflow/rules/circularize.smk
rename to workflow/rules/circularize.smk
diff --git a/assembly/workflow/rules/common.smk b/workflow/rules/common.smk
similarity index 100%
rename from assembly/workflow/rules/common.smk
rename to workflow/rules/common.smk
diff --git a/assembly/workflow/rules/mapreads.smk b/workflow/rules/mapreads.smk
similarity index 100%
rename from assembly/workflow/rules/mapreads.smk
rename to workflow/rules/mapreads.smk
diff --git a/assembly/workflow/rules/readQC.smk b/workflow/rules/readQC.smk
similarity index 100%
rename from assembly/workflow/rules/readQC.smk
rename to workflow/rules/readQC.smk
diff --git a/assembly/workflow/rules/summarize.smk b/workflow/rules/summarize.smk
similarity index 100%
rename from assembly/workflow/rules/summarize.smk
rename to workflow/rules/summarize.smk
-- 
GitLab