diff --git a/.gitignore b/.gitignore
index 5f15816652b9e312b96c65b2446663e812c5f890..a0a8984108bab4c902ecf3a73bfa0fd28693fce6 100755
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,4 @@ container/assemblySC.sif
 rulegraph.pdf
 resources/bakta_db
 facienda.md
-
+workshop/pggb_latest.sif
diff --git a/notebooks/create_pangenome_graph.ipynb b/notebooks/create_pangenome_graph.ipynb
deleted file mode 100644
index 8ec974a6a0b26207400cec6dec0ac44537da2cae..0000000000000000000000000000000000000000
--- a/notebooks/create_pangenome_graph.ipynb
+++ /dev/null
@@ -1,168 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Create a pangenome graph with pggb\n",
-    "\n",
-    "pggb requires a single gzipped and indexed fasta file containing all assemblies. "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Install biopython\n",
-    "\n",
-    "If not already present, create a conda environment with biopython. \n",
-    "\n",
-    "```\n",
-    "conda create -n bio biopython\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Then select the bio environent as the kernel for the Jupyter notebook."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Combine assemblies\n",
-    "\n",
-    "The script below creates a file 'assemblies_combined.fasta' in which all assemblies listed in 'paths_to_assemblies.txt' are combined.\n",
-    "\n",
-    "'paths_to_assemblies.txt' should be a text file with one path per line. \n",
-    "\n",
-    "If one_contig_only is set to True, only single-contig assemblies are included."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from Bio import SeqIO\n",
-    "\n",
-    "\n",
-    "assemblies = 'paths_to_assemblies.txt'\n",
-    "one_contig_only = False\n",
-    "\n",
-    "fasta_combined = []\n",
-    "\n",
-    "with open(assemblies) as f:\n",
-    "    \n",
-    "    for line in f:\n",
-    "        \n",
-    "        assembly_path = line.strip()\n",
-    "        strain = assembly_path.split('/')[-1].split('.')[0]\n",
-    "     \n",
-    "        records = [rec for rec in SeqIO.parse(assembly_path, \"fasta\")]\n",
-    "        \n",
-    "        if one_contig_only and len(records) > 1:\n",
-    "            print('Discarded multi-contig assembly for ' + strain)\n",
-    "            continue\n",
-    "        \n",
-    "        fasta_combined += records\n",
-    "\n",
-    "SeqIO.write(fasta_combined, 'assemblies_combined.fasta', 'fasta')\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Zip and index fasta"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "vscode": {
-     "languageId": "shellscript"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "%%bash\n",
-    "\n",
-    "gzip single_contig_assemblies.fasta\n",
-    "samtools faidx single_contig_assemblies.fasta.gz"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Run pggb\n",
-    "\n",
-    "Important paramters:\n",
-    "\n",
-    "    -i : path to combined assemblies\n",
-    "    -o : output directory name\n",
-    "    -n : Number of contigs included\n",
-    "    -p : Similarity of contigs\n",
-    "    -s : Maximum length of repeats in the genome"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Example slurm script:"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#!/bin/bash\n",
-    "\n",
-    "#SBATCH --cpus-per-task=20\n",
-    "#SBATCH --mem-per-cpu=4G\n",
-    "#SBATCH --time=24:00:00\n",
-    "#SBATCH --qos=1day\n",
-    "#SBATCH --output=stdout.%j\n",
-    "#SBATCH --error=stderr.%j\n",
-    "\n",
-    "singularity exec /scicore/home/gagneux/GROUP/PacbioSnake_resources/containers/pggb_latest.sif pggb \\\n",
-    "  -i assemblies_combined.fasta.gz \\\n",
-    "  -o ./pggb \\\n",
-    "  -m \\\n",
-    "  -t 20 \\\n",
-    "  -n 52 \\\n",
-    "  -p 99 \\\n",
-    "  -s 5k\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/workshop/A_create_graph.ipynb b/workshop/A_create_graph.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..8d02ef63a5ee23d9a1069e20948989baf307e01c
--- /dev/null
+++ b/workshop/A_create_graph.ipynb
@@ -0,0 +1,1184 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Create a pangenome graph with PGGB\n",
+    "\n",
+    "PGGB requires as input a single fasta file that contains the genome assemblies. \n",
+    "\n",
+    "The file **/data/MTBC.L1.assemblies.fasta** contains all the contigs resulting from the assembly of the 19 L1 strains, while **/data/assembly_summaries.tsv** contains assembly metadata.\n",
+    "\n",
+    "\n",
+    "Starting from these two files, we do the following:\n",
+    "  - load the metadata, select complete genomes\n",
+    "  - write all complete genomes to a fasta\n",
+    "\n",
+    "❓ What do we mean with 'complete'?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Inspect the Flye assembly summary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sample</th>\n",
+       "      <th>#seq_name</th>\n",
+       "      <th>length</th>\n",
+       "      <th>cov.</th>\n",
+       "      <th>circ.</th>\n",
+       "      <th>repeat</th>\n",
+       "      <th>mult.</th>\n",
+       "      <th>alt_group</th>\n",
+       "      <th>graph_path</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>PB000195</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4444823</td>\n",
+       "      <td>114</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>PB000196</td>\n",
+       "      <td>contig_10</td>\n",
+       "      <td>4421359</td>\n",
+       "      <td>133</td>\n",
+       "      <td>N</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>10,41</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>PB000196</td>\n",
+       "      <td>contig_47</td>\n",
+       "      <td>13589</td>\n",
+       "      <td>32</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>1</td>\n",
+       "      <td>48</td>\n",
+       "      <td>*,47,*</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>PB000196</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>10270</td>\n",
+       "      <td>45</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>*,1,*</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>PB000196</td>\n",
+       "      <td>contig_42</td>\n",
+       "      <td>1662</td>\n",
+       "      <td>52</td>\n",
+       "      <td>N</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>-41,42</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     sample  #seq_name   length  cov. circ. repeat  mult. alt_group graph_path\n",
+       "0  PB000195   contig_1  4444823   114     Y      N      1         *          1\n",
+       "1  PB000196  contig_10  4421359   133     N      N      1         *      10,41\n",
+       "2  PB000196  contig_47    13589    32     N      Y      1        48     *,47,*\n",
+       "3  PB000196   contig_1    10270    45     N      Y      1         6      *,1,*\n",
+       "4  PB000196  contig_42     1662    52     N      N      1         *     -41,42"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas\n",
+    "\n",
+    "md = pandas.read_csv('data/assembly_summaries.tsv', sep='\\t')\n",
+    "md.head()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "How many sequences per strain?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "sample\n",
+       "PB000205    49\n",
+       "PB000202     7\n",
+       "PB000196     4\n",
+       "PB000220     2\n",
+       "PB000198     2\n",
+       "PB000207     1\n",
+       "PB000219     1\n",
+       "PB000211     1\n",
+       "PB000210     1\n",
+       "PB000209     1\n",
+       "PB000208     1\n",
+       "PB000195     1\n",
+       "PB000206     1\n",
+       "PB000203     1\n",
+       "PB000201     1\n",
+       "PB000200     1\n",
+       "PB000199     1\n",
+       "PB000197     1\n",
+       "PB000204     1\n",
+       "Name: count, dtype: int64"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "md['sample'].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Which sequences are cirularized?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sample</th>\n",
+       "      <th>#seq_name</th>\n",
+       "      <th>length</th>\n",
+       "      <th>cov.</th>\n",
+       "      <th>circ.</th>\n",
+       "      <th>repeat</th>\n",
+       "      <th>mult.</th>\n",
+       "      <th>alt_group</th>\n",
+       "      <th>graph_path</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>PB000195</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4444823</td>\n",
+       "      <td>114</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>PB000197</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4410932</td>\n",
+       "      <td>129</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>PB000199</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4426248</td>\n",
+       "      <td>112</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>PB000200</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4428581</td>\n",
+       "      <td>84</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>PB000201</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4408479</td>\n",
+       "      <td>96</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>PB000203</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4434008</td>\n",
+       "      <td>47</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>PB000204</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4419776</td>\n",
+       "      <td>74</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>PB000205</td>\n",
+       "      <td>contig_50</td>\n",
+       "      <td>4420883</td>\n",
+       "      <td>37</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>2</td>\n",
+       "      <td>*</td>\n",
+       "      <td>50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>69</th>\n",
+       "      <td>PB000206</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4440794</td>\n",
+       "      <td>119</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>70</th>\n",
+       "      <td>PB000207</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4424460</td>\n",
+       "      <td>85</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>71</th>\n",
+       "      <td>PB000208</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4419922</td>\n",
+       "      <td>119</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72</th>\n",
+       "      <td>PB000209</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4443791</td>\n",
+       "      <td>80</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>73</th>\n",
+       "      <td>PB000210</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4444571</td>\n",
+       "      <td>115</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>74</th>\n",
+       "      <td>PB000211</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4441994</td>\n",
+       "      <td>120</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75</th>\n",
+       "      <td>PB000219</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4410932</td>\n",
+       "      <td>101</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      sample  #seq_name   length  cov. circ. repeat  mult. alt_group  \\\n",
+       "0   PB000195   contig_1  4444823   114     Y      N      1         *   \n",
+       "5   PB000197   contig_1  4410932   129     Y      N      1         *   \n",
+       "8   PB000199   contig_1  4426248   112     Y      N      1         *   \n",
+       "9   PB000200   contig_1  4428581    84     Y      N      1         *   \n",
+       "10  PB000201   contig_1  4408479    96     Y      N      1         *   \n",
+       "18  PB000203   contig_1  4434008    47     Y      N      1         *   \n",
+       "19  PB000204   contig_1  4419776    74     Y      N      1         *   \n",
+       "20  PB000205  contig_50  4420883    37     Y      Y      2         *   \n",
+       "69  PB000206   contig_1  4440794   119     Y      N      1         *   \n",
+       "70  PB000207   contig_1  4424460    85     Y      N      1         *   \n",
+       "71  PB000208   contig_1  4419922   119     Y      N      1         *   \n",
+       "72  PB000209   contig_1  4443791    80     Y      N      1         *   \n",
+       "73  PB000210   contig_1  4444571   115     Y      N      1         *   \n",
+       "74  PB000211   contig_1  4441994   120     Y      N      1         *   \n",
+       "75  PB000219   contig_1  4410932   101     Y      N      1         *   \n",
+       "\n",
+       "   graph_path  \n",
+       "0           1  \n",
+       "5           1  \n",
+       "8           1  \n",
+       "9           1  \n",
+       "10          1  \n",
+       "18          1  \n",
+       "19          1  \n",
+       "20         50  \n",
+       "69          1  \n",
+       "70          1  \n",
+       "71          1  \n",
+       "72          1  \n",
+       "73          1  \n",
+       "74          1  \n",
+       "75          1  "
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "md[md['circ.'] == 'Y']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Sequences larger than 4 Mb"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sample</th>\n",
+       "      <th>#seq_name</th>\n",
+       "      <th>length</th>\n",
+       "      <th>cov.</th>\n",
+       "      <th>circ.</th>\n",
+       "      <th>repeat</th>\n",
+       "      <th>mult.</th>\n",
+       "      <th>alt_group</th>\n",
+       "      <th>graph_path</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>PB000195</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4444823</td>\n",
+       "      <td>114</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>PB000196</td>\n",
+       "      <td>contig_10</td>\n",
+       "      <td>4421359</td>\n",
+       "      <td>133</td>\n",
+       "      <td>N</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>10,41</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>PB000197</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4410932</td>\n",
+       "      <td>129</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>PB000198</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4332805</td>\n",
+       "      <td>86</td>\n",
+       "      <td>N</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>2,1,2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>PB000199</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4426248</td>\n",
+       "      <td>112</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>PB000200</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4428581</td>\n",
+       "      <td>84</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>PB000201</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4408479</td>\n",
+       "      <td>96</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>PB000203</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4434008</td>\n",
+       "      <td>47</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>PB000204</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4419776</td>\n",
+       "      <td>74</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>PB000205</td>\n",
+       "      <td>contig_50</td>\n",
+       "      <td>4420883</td>\n",
+       "      <td>37</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>2</td>\n",
+       "      <td>*</td>\n",
+       "      <td>50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>69</th>\n",
+       "      <td>PB000206</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4440794</td>\n",
+       "      <td>119</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>70</th>\n",
+       "      <td>PB000207</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4424460</td>\n",
+       "      <td>85</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>71</th>\n",
+       "      <td>PB000208</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4419922</td>\n",
+       "      <td>119</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72</th>\n",
+       "      <td>PB000209</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4443791</td>\n",
+       "      <td>80</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>73</th>\n",
+       "      <td>PB000210</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4444571</td>\n",
+       "      <td>115</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>74</th>\n",
+       "      <td>PB000211</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4441994</td>\n",
+       "      <td>120</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75</th>\n",
+       "      <td>PB000219</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4410932</td>\n",
+       "      <td>101</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>76</th>\n",
+       "      <td>PB000220</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>4319282</td>\n",
+       "      <td>88</td>\n",
+       "      <td>N</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>2,1,2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      sample  #seq_name   length  cov. circ. repeat  mult. alt_group  \\\n",
+       "0   PB000195   contig_1  4444823   114     Y      N      1         *   \n",
+       "1   PB000196  contig_10  4421359   133     N      N      1         *   \n",
+       "5   PB000197   contig_1  4410932   129     Y      N      1         *   \n",
+       "6   PB000198   contig_1  4332805    86     N      N      1         *   \n",
+       "8   PB000199   contig_1  4426248   112     Y      N      1         *   \n",
+       "9   PB000200   contig_1  4428581    84     Y      N      1         *   \n",
+       "10  PB000201   contig_1  4408479    96     Y      N      1         *   \n",
+       "18  PB000203   contig_1  4434008    47     Y      N      1         *   \n",
+       "19  PB000204   contig_1  4419776    74     Y      N      1         *   \n",
+       "20  PB000205  contig_50  4420883    37     Y      Y      2         *   \n",
+       "69  PB000206   contig_1  4440794   119     Y      N      1         *   \n",
+       "70  PB000207   contig_1  4424460    85     Y      N      1         *   \n",
+       "71  PB000208   contig_1  4419922   119     Y      N      1         *   \n",
+       "72  PB000209   contig_1  4443791    80     Y      N      1         *   \n",
+       "73  PB000210   contig_1  4444571   115     Y      N      1         *   \n",
+       "74  PB000211   contig_1  4441994   120     Y      N      1         *   \n",
+       "75  PB000219   contig_1  4410932   101     Y      N      1         *   \n",
+       "76  PB000220   contig_1  4319282    88     N      N      1         *   \n",
+       "\n",
+       "   graph_path  \n",
+       "0           1  \n",
+       "1       10,41  \n",
+       "5           1  \n",
+       "6       2,1,2  \n",
+       "8           1  \n",
+       "9           1  \n",
+       "10          1  \n",
+       "18          1  \n",
+       "19          1  \n",
+       "20         50  \n",
+       "69          1  \n",
+       "70          1  \n",
+       "71          1  \n",
+       "72          1  \n",
+       "73          1  \n",
+       "74          1  \n",
+       "75          1  \n",
+       "76      2,1,2  "
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "md[md['length']>4e6]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For which samples do we lack a circular genome?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'PB000202', 'PB000220', 'PB000198', 'PB000196'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "samples_circ = md['sample'][md['circ.'] == 'Y']\n",
+    "samples_noncirc = md['sample'][md['sample'].isin(samples_circ) == False]\n",
+    "\n",
+    "print(set(samples_noncirc))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "❓ What is the matter with these genomes? \n",
+    "\n",
+    "❓ Should we include them in the graph?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sample</th>\n",
+       "      <th>#seq_name</th>\n",
+       "      <th>length</th>\n",
+       "      <th>cov.</th>\n",
+       "      <th>circ.</th>\n",
+       "      <th>repeat</th>\n",
+       "      <th>mult.</th>\n",
+       "      <th>alt_group</th>\n",
+       "      <th>graph_path</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>PB000196</td>\n",
+       "      <td>contig_10</td>\n",
+       "      <td>4421359</td>\n",
+       "      <td>133</td>\n",
+       "      <td>N</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>10,41</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>PB000196</td>\n",
+       "      <td>contig_47</td>\n",
+       "      <td>13589</td>\n",
+       "      <td>32</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>1</td>\n",
+       "      <td>48</td>\n",
+       "      <td>*,47,*</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>PB000196</td>\n",
+       "      <td>contig_1</td>\n",
+       "      <td>10270</td>\n",
+       "      <td>45</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>*,1,*</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>PB000196</td>\n",
+       "      <td>contig_42</td>\n",
+       "      <td>1662</td>\n",
+       "      <td>52</td>\n",
+       "      <td>N</td>\n",
+       "      <td>N</td>\n",
+       "      <td>1</td>\n",
+       "      <td>*</td>\n",
+       "      <td>-41,42</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     sample  #seq_name   length  cov. circ. repeat  mult. alt_group graph_path\n",
+       "1  PB000196  contig_10  4421359   133     N      N      1         *      10,41\n",
+       "2  PB000196  contig_47    13589    32     N      Y      1        48     *,47,*\n",
+       "3  PB000196   contig_1    10270    45     N      Y      1         6      *,1,*\n",
+       "4  PB000196  contig_42     1662    52     N      N      1         *     -41,42"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "md[md['sample'] == 'PB000196']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Combine complete genomes\n",
+    "\n",
+    "Let us be liberal and just use all sequences > 4 Mb. We can add a tag to the name of the 'unclean' ones.\n",
+    "\n",
+    "First a quick look at the fasta headers."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">PB000195_1\n",
+      ">PB000196_1\n",
+      ">PB000196_2\n",
+      ">PB000196_3\n",
+      ">PB000197_1\n",
+      ">PB000198_1\n",
+      ">PB000198_2\n",
+      ">PB000199_1\n",
+      ">PB000200_1\n",
+      ">PB000201_1\n",
+      ">PB000202_1\n",
+      ">PB000202_2\n",
+      ">PB000202_3\n",
+      ">PB000202_4\n",
+      ">PB000202_5\n",
+      ">PB000202_6\n",
+      ">PB000202_7\n",
+      ">PB000203_1\n",
+      ">PB000204_1\n",
+      ">PB000205_1\n",
+      ">PB000205_2\n",
+      ">PB000205_3\n",
+      ">PB000205_4\n",
+      ">PB000205_5\n",
+      ">PB000205_6\n",
+      ">PB000205_7\n",
+      ">PB000205_8\n",
+      ">PB000205_9\n",
+      ">PB000205_10\n",
+      ">PB000205_11\n",
+      ">PB000205_12\n",
+      ">PB000205_13\n",
+      ">PB000205_14\n",
+      ">PB000205_15\n",
+      ">PB000205_16\n",
+      ">PB000205_17\n",
+      ">PB000205_18\n",
+      ">PB000205_19\n",
+      ">PB000205_20\n",
+      ">PB000205_21\n",
+      ">PB000205_22\n",
+      ">PB000205_23\n",
+      ">PB000205_24\n",
+      ">PB000205_25\n",
+      ">PB000205_26\n",
+      ">PB000205_27\n",
+      ">PB000205_28\n",
+      ">PB000205_29\n",
+      ">PB000205_30\n",
+      ">PB000205_31\n",
+      ">PB000205_32\n",
+      ">PB000205_33\n",
+      ">PB000205_34\n",
+      ">PB000205_35\n",
+      ">PB000205_36\n",
+      ">PB000205_37\n",
+      ">PB000205_38\n",
+      ">PB000205_39\n",
+      ">PB000205_40\n",
+      ">PB000205_41\n",
+      ">PB000205_42\n",
+      ">PB000205_43\n",
+      ">PB000205_44\n",
+      ">PB000205_45\n",
+      ">PB000205_46\n",
+      ">PB000205_47\n",
+      ">PB000205_48\n",
+      ">PB000205_49\n",
+      ">PB000206_1\n",
+      ">PB000207_1\n",
+      ">PB000208_1\n",
+      ">PB000209_1\n",
+      ">PB000210_1\n",
+      ">PB000211_1\n",
+      ">PB000219_1\n",
+      ">PB000220_1\n",
+      ">PB000220_2\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "grep ^\\> data/MTBC.L1.assemblies.fasta"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from Bio import SeqIO\n",
+    "\n",
+    "assemblies_all = 'data/MTBC.L1.assemblies.fasta'\n",
+    "assemblies_pggb = open('results/assemblies_4mb.fasta', 'w')\n",
+    "\n",
+    "# All sequences > 4Mb\n",
+    "md_4mb = md[md['length']>4e6]\n",
+    "\n",
+    "for rec in SeqIO.parse(assemblies_all, \"fasta\"):\n",
+    "       \n",
+    "    if len(rec.seq) < 4e6:\n",
+    "        continue\n",
+    "        \n",
+    "    strain = rec.id.split('_')[0]\n",
+    "    strain_circ = md_4mb['circ.'][md_4mb['sample'] == strain].to_string(index=False)\n",
+    "    strain_repeat = md_4mb['repeat'][md_4mb['sample'] == strain].to_string(index=False)   \n",
+    "    \n",
+    "    new_id = strain\n",
+    "    if strain_circ == 'N':\n",
+    "        new_id += '#circNo'\n",
+    "    if strain_repeat == 'Y':\n",
+    "        new_id += '#repeatY'\n",
+    "    rec.id = new_id\n",
+    "    rec.description = ''\n",
+    "    SeqIO.write(rec, assemblies_pggb, 'fasta')\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Run PGGB!\n",
+    "\n",
+    "\n",
+    "\n",
+    "Important paramters:\n",
+    "\n",
+    "    -i : path to combined assemblies\n",
+    "    -o : output directory name\n",
+    "    -n : Number of contigs included\n",
+    "    -p : Similarity of contigs\n",
+    "    -s : Maximum length of repeats in the genome"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create slurm script"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "echo '\\\n",
+    "#!/bin/bash\n",
+    "\n",
+    "#SBATCH --cpus-per-task=20\n",
+    "#SBATCH --mem-per-cpu=1G\n",
+    "#SBATCH --time=06:00:00\n",
+    "#SBATCH --qos=6hours\n",
+    "#SBATCH --output=stdout.%j\n",
+    "#SBATCH --error=stderr.%j\n",
+    "\n",
+    "singularity exec /scicore/home/gagneux/GROUP/PacbioSnake_resources/containers/pggb_latest.sif pggb \\\n",
+    "  -i assemblies_combined.fasta.gz \\\n",
+    "  -o ./pggb \\\n",
+    "  -m \\\n",
+    "  -t 20 \\\n",
+    "  -n 52 \\\n",
+    "  -p 99 \\\n",
+    "  -s 5k\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Variant calling"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "REF=\n",
+    "\n",
+    "/scicore/home/gagneux/GROUP/PacbioSnake_resources/containers/pggb_latest.sif vg deconstruct"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Visualization"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/workshop/B_call_variants.ipynb b/workshop/B_call_variants.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..0476c68dc1acb47f4485a306959a85fc1c081c54
--- /dev/null
+++ b/workshop/B_call_variants.ipynb
@@ -0,0 +1,57 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "REF=\n",
+    "\n",
+    "/scicore/home/gagneux/GROUP/PacbioSnake_resources/containers/pggb_latest.sif vg deconstruct"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "singularity exec /home/cristobal/programs/pggb_latest.sif vg deconstruct \\\n",
+    "  pggb/assemblies_combined.fasta.gz.d71a954.11fba48.a4da63a.smooth.final.gfa -d1 -e \\\n",
+    "    -p H37Rv \\\n",
+    "    -t 4 \\\n",
+    "    --all-snarls \\\n",
+    "    > variants.vcf\n",
+    "\n",
+    "grep -v -c ^# variants.vcf"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "https://github.com/pangenome/resolve-nested-genotypes"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/tree_from_graph.ipynb b/workshop/C_core_genome_alignment.ipynb
similarity index 100%
rename from notebooks/tree_from_graph.ipynb
rename to workshop/C_core_genome_alignment.ipynb
diff --git a/workshop/D_graph_annotation.ipynb b/workshop/D_graph_annotation.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/workshop/E_gene_conversion.ipynb b/workshop/E_gene_conversion.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/workshop/README.md b/workshop/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c96d3aa6369f17750d7525064a19ff653769de40
--- /dev/null
+++ b/workshop/README.md
@@ -0,0 +1,71 @@
+# Workshop: Long-read genome assembly and pangenome graph analysis
+Swiss TPH, 5.11.2024
+
+The aim of this workshop is to get familiar with the pangenome graph framework, 
+
+It consists of a series of Jupyter notebooks in which we progress from creating
+
+This workshop consists of a collection of Jupyter notebooks that 
+
+
+*  [Set up](#set-up)
+*  [The data](#the-data)
+*  [A) Create a graph and visualize it](#a-create-a-graph--visualize)
+*  [B) Call variants from a graph](#b-call-variants-from-a-graph)
+*  [C) Core genome alignment](#c-core-genome-alignment)
+*  [D) Graph annotation & liftovers](#d-graph-annotation--liftovers)
+*  [E) SV categorization](#e-categorize-svs-into-insertions-and-deletions)
+*  [F) Identify gene conversion events](#f-identify-gene-conversion-events)
+
+
+## Set up
+This workshop is based on Jupyiter notebooks, from where we run Python and bash code. You can run the notebooks from the sciCORE open-on-demand platform (http://ood-ubuntu.scicore.unibas.ch/) or from a editor like Visual Studio Code. 
+
+We are going to use [PGGB](https://pggb.readthedocs.io/en/latest/) for creating analyzing pangenome graphs, a tool set developed by the human pangenome consortium. See [Yang et al. 2023](https://doi.org/10.3389/fgene.2023.1225248) for an evaluation of PGGB with bacterial genomes. In addition, a few Python packages need to be installed, as explained below. Please try to do at least steps 1 and 2 below **before** the workshop, especially step 2 might take some time.
+
+
+Step 1: clone the repository from gitlab.
+```
+git clone https://git.scicore.unibas.ch/TBRU/PacbioSnake
+```
+
+
+Step 2: Pull the PGGB container (2Gb, might take a while...)
+```
+cd PacbioSnake
+singularity pull docker://ghcr.io/pangenome/pggb:latest
+```
+
+
+Step 3: Install python packages (pandas and biopython). Ideally we create a new conda environment, which can then be selected as the kernel for running the notebooks.
+```
+conda create -n genomegraphs pandas biopython
+```
+
+
+## The data
+The data we will explore in this workshop are 19 genomes that have been assembled from PacBio HiFi reads by Venus for her PhD project. All should be *Mycobacterium tuberculosis* strains belonging to lineage 1, and we expect them to be fairly diverse because they have been selected from across the diverse L1 clade. These genomes have neither been published nor thoroughly analyzed, so genuine discoveries are possible during the workshop!
+
+
+## A. Create a graph & visualize it
+Given a set of assemblies and some assembly metadata, we select assemblies, rename them, and write them to a single fasta file. This is the only mandatory input for PGGB. We also explore the effect of two key parameters of PGGB, the minimum pairwise identity between seeds (-p) and the seed length (-s). 
+
+
+## B. Call variants from a graph
+In this part, we obtain variants in classic vcf format from the graph, using an arbitrary reference assembly. A summary is created of the indels and SVs, and the complication of nested SVs is explored. 
+
+
+## C. Core genome alignment
+To make sense of genetic variation we need a phylogenetic tree. Here we traverse the graph and extract SNPs in nodes that are s in single strains. hared by all assemblies (i.e. the core genome). These SNPs are used to create an alignment and to estimate a tree. 
+
+
+## D. Graph annotation & liftovers
+In this part we explore variants in genes and regions of interest, making use of the lift-over functionalities of PGGB. These allow to translate positions in one genome to any other genome in the graph. 
+
+
+## E. Categorize SVs into insertions and deletions
+To tell whether a sequence missing in genome A reflects a deletion in an ancestor of A or an insertion elsewhere requires information about this sequence in more than two strains. Here we assess the frequency of he structural variant and its presence/absence in an outgroup strain in order to distinguish insertions from deletions.  
+
+
+## F. Identify gene conversion events
+Gene conversion occurs through recombination between close paralogs and can result in heavily mutated genes. Gene conversion in the MTBC affects the functionally interesting PE/PPE gene families. Furthermore, treating SNPs introduced throug gene conversion as point mutations can bias various downstream analyses. Here we identify gene conversion events, which give themselves away as variant hotspots in single strains. 
diff --git a/workshop/data/MTBC.L1.assemblies.fasta b/workshop/data/MTBC.L1.assemblies.fasta
new file mode 100644
index 0000000000000000000000000000000000000000..baeb7faa075416a09d948c0c3394716193722ba0
Binary files /dev/null and b/workshop/data/MTBC.L1.assemblies.fasta differ
diff --git a/workshop/data/assembly_summaries.tsv b/workshop/data/assembly_summaries.tsv
new file mode 100644
index 0000000000000000000000000000000000000000..5469b0ac9275f8d65720eacbb4537f4933c14735
--- /dev/null
+++ b/workshop/data/assembly_summaries.tsv
@@ -0,0 +1,79 @@
+sample	#seq_name	length	cov.	circ.	repeat	mult.	alt_group	graph_path
+PB000195	contig_1	4444823	114	Y	N	1	*	1
+PB000196	contig_10	4421359	133	N	N	1	*	10,41
+PB000196	contig_47	13589	32	N	Y	1	48	*,47,*
+PB000196	contig_1	10270	45	N	Y	1	6	*,1,*
+PB000196	contig_42	1662	52	N	N	1	*	-41,42
+PB000197	contig_1	4410932	129	Y	N	1	*	1
+PB000198	contig_1	4332805	86	N	N	1	*	2,1,2
+PB000198	contig_2	110357	171	N	Y	2	*	2
+PB000199	contig_1	4426248	112	Y	N	1	*	1
+PB000200	contig_1	4428581	84	Y	N	1	*	1
+PB000201	contig_1	4408479	96	Y	N	1	*	1
+PB000202	contig_1	1530994	13	N	N	1	*	*,1,*
+PB000202	contig_6	1184280	13	N	N	1	*	*,6,*
+PB000202	contig_8	1077139	14	N	N	1	*	*,8,*
+PB000202	contig_9	434170	13	N	N	1	*	*,9,*
+PB000202	contig_7	100047	13	N	N	1	*	*,7,*
+PB000202	contig_10	66133	13	N	N	1	*	*,10,*
+PB000202	contig_5	33682	14	N	N	1	*	*,5,*
+PB000203	contig_1	4434008	47	Y	N	1	*	1
+PB000204	contig_1	4419776	74	Y	N	1	*	1
+PB000205	contig_50	4420883	37	Y	Y	2	*	50
+PB000205	contig_49	216232	6	N	Y	1	*	*,49,*
+PB000205	contig_53	163344	6	N	Y	1	*	*,53,*
+PB000205	contig_51	142109	6	N	Y	1	*	*,51,*
+PB000205	contig_9	131264	6	N	Y	1	*	*,9,*
+PB000205	contig_8	120360	6	N	Y	1	*	*,8,*
+PB000205	contig_37	117735	6	N	Y	1	*	*,37,*
+PB000205	contig_2	117435	7	N	Y	1	*	*,2,*
+PB000205	contig_33	104292	6	N	Y	1	*	*,33,*
+PB000205	contig_44	102971	6	N	Y	1	*	*,44,*
+PB000205	contig_31	93943	5	N	Y	1	*	*,31,*
+PB000205	contig_6	89619	5	N	Y	1	*	*,6,*
+PB000205	contig_5	87578	6	N	Y	1	*	*,5,*
+PB000205	contig_23	85152	5	N	Y	1	*	*,23,*
+PB000205	contig_13	84548	5	N	Y	1	*	*,13,*
+PB000205	contig_19	76959	5	N	Y	1	*	*,19,*
+PB000205	contig_54	74739	5	N	Y	1	*	*,54,*
+PB000205	contig_52	72581	5	N	Y	1	*	*,52,*
+PB000205	contig_21	69776	6	N	Y	1	*	*,21,*
+PB000205	contig_20	68663	5	N	Y	1	*	*,20,*
+PB000205	contig_22	68060	6	N	Y	1	*	*,22,*
+PB000205	contig_15	67068	6	N	Y	1	*	*,15,*
+PB000205	contig_25	66397	7	N	Y	1	*	*,25,*
+PB000205	contig_12	55613	7	N	Y	1	*	*,12,*
+PB000205	contig_18	52108	5	N	Y	1	*	*,18,*
+PB000205	contig_17	51926	6	N	Y	1	*	*,17,*
+PB000205	contig_1	47711	6	N	Y	1	*	*,1,*
+PB000205	contig_48	45758	6	N	Y	1	*	*,48,*
+PB000205	contig_42	45451	5	N	Y	1	*	*,42,*
+PB000205	contig_38	45013	6	N	Y	1	*	*,38,*
+PB000205	contig_43	41645	5	N	Y	1	*	*,43,*
+PB000205	contig_34	39550	6	N	Y	1	*	*,34,*
+PB000205	contig_26	38539	7	N	Y	1	*	*,26,*
+PB000205	contig_30	37006	7	N	Y	1	*	*,30,*
+PB000205	contig_47	35474	6	N	Y	1	*	*,47,*
+PB000205	contig_28	33249	7	N	Y	1	*	*,28,*
+PB000205	contig_32	33146	5	N	Y	1	*	*,32,*
+PB000205	contig_41	32796	5	N	Y	1	*	*,41,*
+PB000205	contig_3	31924	7	N	Y	1	*	*,3,*
+PB000205	contig_40	31087	6	N	Y	1	*	*,40,*
+PB000205	contig_16	28303	6	N	Y	1	*	*,16,*
+PB000205	contig_36	26152	6	N	Y	1	*	*,36,*
+PB000205	contig_46	24509	6	N	Y	1	*	*,46,*
+PB000205	contig_45	19766	6	N	Y	1	*	*,45,*
+PB000205	contig_14	15999	5	N	Y	1	*	*,14,*
+PB000205	contig_24	15160	8	N	Y	1	*	*,24,*
+PB000205	contig_29	13880	6	N	Y	1	*	*,29,*
+PB000205	contig_39	12605	8	N	Y	1	*	*,39,*
+PB000205	contig_4	11914	6	N	Y	1	*	*,4,*
+PB000206	contig_1	4440794	119	Y	N	1	*	1
+PB000207	contig_1	4424460	85	Y	N	1	*	1
+PB000208	contig_1	4419922	119	Y	N	1	*	1
+PB000209	contig_1	4443791	80	Y	N	1	*	1
+PB000210	contig_1	4444571	115	Y	N	1	*	1
+PB000211	contig_1	4441994	120	Y	N	1	*	1
+PB000219	contig_1	4410932	101	Y	N	1	*	1
+PB000220	contig_1	4319282	88	N	N	1	*	2,1,2
+PB000220	contig_2	110354	166	N	Y	2	*	2
diff --git a/workshop/data/read_summaries.tsv b/workshop/data/read_summaries.tsv
new file mode 100644
index 0000000000000000000000000000000000000000..7249a255173ccbd030feda0e5c7d89230479d34b
--- /dev/null
+++ b/workshop/data/read_summaries.tsv
@@ -0,0 +1,20 @@
+sample	Yield	Q7 bases	Longest_read	Num_of_reads	Mean_read_length	N50_read_length	Mean_GC_content	SD_GC_content	Estimated non-sense read fraction	Mean_coverage	SD_coverage	Estimated crude Xome size	Num_of_trimmed_reads_5	Max_identity_adp5	Average_position_from_5_end
+PB000195	496838122	99.90%	22822	96525	5147.248091168091	6095.0	0.6551989316940308	0.02350557968020439	0.0	106.6074447950661	6.515689275641003	4660444 (e = 0.0%)	NA	NA	NA
+PB000196	577922899	99.90%	22590	111741	5171.986101788958	6024.0	0.6549097299575806	0.023503180593252182	0.0	121.68457644074489	8.002560154467622	4749352 (e = 0.0%)	NA	NA	NA
+PB000197	561009752	99.89%	38252	110879	5059.657392292499	5845.0	0.6560201644897461	0.02437005750834942	0.0	124.07422861763345	6.4293378693676155	4521565 (e = 0.0%)	1	0.8478260869565217	55.0
+PB000198	383992338	99.90%	38782	79144	4851.818684928738	5651.0	0.6548930406570435	0.025800971314311028	0.0	72.55553984530178	6.5554600268589365	5292391 (e = 0.0%)	NA	NA	NA
+PB000199	493951865	99.91%	20156	106489	4638.524777207035	5299.0	0.654647707939148	0.023217422887682915	0.0	93.66784531033443	9.19962202133822	5273441 (e = 0.0%)	NA	NA	NA
+PB000200	363727687	99.90%	22639	74718	4868.006196632672	5659.0	0.6544701457023621	0.024078119546175003	0.0	78.77961877044588	5.397476074822339	4617027 (e = 0.0%)	NA	NA	NA
+PB000201	419391636	99.90%	32959	89432	4689.503041416942	5340.0	0.6566223502159119	0.0249309204518795	0.0	93.7500460347495	6.375166080751846	4473508 (e = 0.0%)	NA	NA	NA
+PB000202	57934854	99.92%	22459	16180	3580.6461063040792	3953.0	0.659533679485321	0.027512168511748314	0.0002	14.039143522362133	1.8212913989188695	4125840 (e = 0.0%)	NA	NA	NA
+PB000203	206235793	99.90%	36492	44619	4622.151841143907	5304.0	0.6563654541969299	0.025541380047798157	0.0	46.51565870214725	3.522865285320088	4433685 (e = 0.0%)	NA	NA	NA
+PB000204	323460510	99.91%	22350	75398	4290.04098251943	4905.0	0.6549028158187866	0.024565229192376137	0.0	71.49859846690619	4.607852010001684	4524011 (e = 0.0%)	NA	NA	NA
+PB000205	184355678	99.90%	36335	42724	4315.037870985862	4882.0	0.6576676368713379	0.02602369524538517	0.0028	33.33687157434451	4.595978327212487	5514599 (e = 0.3%)	NA	NA	NA
+PB000206	515555342	99.90%	22956	98268	5246.421439329181	6221.0	0.6553107500076294	0.02373949997127056	0.0	111.32085766246837	6.980499520212127	4631255 (e = 0.0%)	NA	NA	NA
+PB000207	371607027	99.90%	29108	68207	5448.224185200933	6405.0	0.6548841595649719	0.023478785529732704	0.0	79.52935113256152	5.248578702547423	4672577 (e = 0.0%)	NA	NA	NA
+PB000208	517060722	99.90%	21247	99789	5181.540269969636	6096.0	0.6554038524627686	0.023686150088906288	0.0002	113.38246882208041	6.8755649188101495	4559411 (e = 0.0%)	NA	NA	NA
+PB000209	351043474	99.90%	23071	66955	5242.976237771638	6153.0	0.6550890803337097	0.023656221106648445	0.0002	75.2347457453207	5.193755501084815	4665042 (e = 0.0%)	NA	NA	NA
+PB000210	501693130	99.89%	23118	84353	5947.543418728439	6800.0	0.6554515957832336	0.022847512736916542	0.0	105.41636152332916	6.881157735196752	4759158 (e = 0.0%)	NA	NA	NA
+PB000211	529662039	99.89%	26563	89427	5922.842530779295	7181.0	0.6550024151802063	0.0231816116720438	0.0	112.72149266444393	7.720437008678516	4698855 (e = 0.0%)	NA	NA	NA
+PB000219	436604862	99.91%	21045	89326	4887.769092985245	5561.0	0.6548299193382263	0.02320525422692299	0.0	94.74941415110527	6.01532942153573	4607995 (e = 0.0%)	NA	NA	NA
+PB000220	393141481	99.90%	20088	81970	4796.162998658046	5586.0	0.6545571684837341	0.024594135582447052	0.0002	73.24541012773749	6.599511848402256	5366382 (e = 0.0%)	NA	NA	NA
diff --git a/workshop/pics/is6110_ppe.png b/workshop/pics/is6110_ppe.png
new file mode 100755
index 0000000000000000000000000000000000000000..f5048981f7c4c4638a1cbd141cc58cf4ffd6c968
Binary files /dev/null and b/workshop/pics/is6110_ppe.png differ
diff --git a/workshop/pics/pggb-flow-diagram.png b/workshop/pics/pggb-flow-diagram.png
new file mode 100644
index 0000000000000000000000000000000000000000..38c5e59fac9c6d7aa0eddb7f129dd0bc8630e96d
Binary files /dev/null and b/workshop/pics/pggb-flow-diagram.png differ