diff --git a/notebooks/CapeTown_Genomics_Tutorial_partII.ipynb b/notebooks/CapeTown_Genomics_Tutorial_partII.ipynb index c67c16f1b414831c2e5fe5ad4be54845e399a713..c7e25353a370357cb5f71a48c799ceda5e75dc06 100644 --- a/notebooks/CapeTown_Genomics_Tutorial_partII.ipynb +++ b/notebooks/CapeTown_Genomics_Tutorial_partII.ipynb @@ -253,7 +253,7 @@ }, "outputs": [], "source": [ - "! head ERR760779_paired.sam" + "! head $HOME/ERR760779_paired.sam" ] }, { @@ -271,7 +271,7 @@ }, "outputs": [], "source": [ - "! du -h ERR760779_paired.sam" + "! du -h $HOME/ERR760779_paired.sam" ] }, { @@ -282,7 +282,7 @@ }, "outputs": [], "source": [ - "! du -h ERR760779_1U.sam" + "! du -h $HOME/ERR760779_1U.sam" ] }, { @@ -293,7 +293,7 @@ }, "outputs": [], "source": [ - "! du -h ERR760779_2U.sam" + "! du -h $HOME/ERR760779_2U.sam" ] }, { @@ -351,7 +351,7 @@ }, "outputs": [], "source": [ - "! du -h ERR760779_paired.bam" + "! du -h $HOME/ERR760779_paired.bam" ] }, { @@ -462,7 +462,7 @@ }, "outputs": [], "source": [ - "! cat ERR760779.markduplicates.metrics" + "! cat $HOME/ERR760779.markduplicates.metrics" ] }, { @@ -512,7 +512,47 @@ "it is common to realign reads around small indels. Since, differences in resolving the indels may\n", "cause artificial SNPs in the downstream analysis.\n", "The\n", - "GATK software for instance offers the possibility to realign" + "GATK software for instance offers the possibility to realign\n", + "\n", + "\n", + "\n", + "Please run in the command line:\n", + " - sbatch ~/Workshop_SA/notebooks/slurm_scripts/launch_GATK.slurm\n", + "\n", + "\n", + "Then you have to index the newly produced BAM:\n", + " - sbatch ~/Workshop_SA/notebooks/slurm_scripts/launch_index2.slurm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "#!/bin/bash \r\n", + "\r\n", + "#SBATCH --job-name=GATK\r\n", + "#SBATCH --cpus-per-task=1\r\n", + "#SBATCH --mem-per-cpu=4G\r\n", + "#SBATCH --time=6:00:00\r\n", + "#SBATCH --output=GATK.o\r\n", + "#SBATCH --error=GATK.e\r\n", + "\r\n", + "singularity exec container.img gatk-launch -T RealignerTargetCreator -nt 1 -R ~/Workshop_SA/notebooks/reference_genome/MTB_ancestor_reference.fasta -o ERR760779.intervals -I ERR760779.dedup.bam\r\n", + "\r\n", + "\r\n", + "singularity exec container.img gatk-launch --disable_bam_indexing -T IndelRealigner R ~/Workshop_SA/notebooks/reference_genome/MTB_ancestor_reference.fasta -targetIntervals ERR760779.intervals -I ERR760779.dedup.bam -o ERR760779.dedup.realigned.bam\r\n" + ] + } + ], + "source": [ + "! cat /scicore/home/gagneux/loiseau/Workshop_SA/notebooks/slurm_scripts/launch_GATK.slurm" ] }, { diff --git a/notebooks/CapeTown_Genomics_Tutorial_partIII.ipynb b/notebooks/CapeTown_Genomics_Tutorial_partIII.ipynb index 1bdd61f9db32a09774663509ffaa8c918e1a6137..0915b67c27576e6d8ff6d913efcf6f1a5fb66f3a 100644 --- a/notebooks/CapeTown_Genomics_Tutorial_partIII.ipynb +++ b/notebooks/CapeTown_Genomics_Tutorial_partIII.ipynb @@ -413,7 +413,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "collapsed": true }, @@ -507,9 +507,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { - "collapsed": true + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/scicore/home/gagneux/loiseau\n" + ] + } + ], + "source": [ + "%cd loiseau/" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false }, "outputs": [], "source": [ @@ -537,14 +556,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": { - "collapsed": true + "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['761110', 'A', 'T', 'Rv0667', 'D435V', 'RIF', 'Walker_resistant-resistant']\n", + "['4247431', 'G', 'C', 'Rv3795', 'M306I', 'EMB', 'Walker_resistant-resistant']\n", + "['4249583', 'G', 'A', 'Rv3795', 'D1024N', 'EMB', 'Boettger_DST']\n", + "['761100', 'C', 'A', 'Rv0667', 'Q432K', 'RIF', 'Walker_resistant-resistant']\n", + "['2155168', 'C', 'G', 'Rv1908c', 'S315T', 'INH', 'Walker_resistant-resistant']\n", + "['761155', 'C', 'T', 'Rv0667', 'S450L', 'RIF', 'Walker_resistant-resistant']\n", + "['761140', 'A', 'G', 'Rv0667', 'H445R', 'RIF', 'Walker_resistant-resistant']\n", + "['7581', 'G', 'A', 'Rv0006', 'D94N', 'FQ', 'Walker_resistant-resistant']\n", + "['7582', 'A', 'G', 'Rv0006', 'D94G', 'FQ', 'Walker_resistant-resistant']\n", + "['4247429', 'A', 'G', 'Rv3795', 'M306V', 'EMB', 'Walker_resistant-resistant']\n", + "['7570', 'C', 'T', 'Rv0006', 'A90V', 'FQ', 'Walker_resistant-resistant']\n", + "['1473246', 'A', 'G', 'rrs', 'A1401G', 'AK;CAP;KAN', 'Walker_resistant-resistant']\n", + "['1673432', 'T', 'C', 'Rv1483', 'T-8C', 'INH;ETH', 'Walker_resistant-resistant;Boettger_DST']\n", + "['761139', 'C', 'T', 'Rv0667', 'H445Y', 'RIF', 'Walker_resistant-resistant']\n", + "['1673425', 'C', 'T', 'Rv1483', 'C-15T', 'INH;ETH', 'Walker_resistant-resistant;Boettger_DST']\n", + "['4247730', 'G', 'C', 'Rv3795', 'G406A', 'EMB', 'Walker_resistant-resistant']\n" + ] + } + ], "source": [ "with open('all_DRM_in_Eldholm.txt','w') as DRM_Eldholm_output_file:\n", - " DRM_Eldholm_output_file.write('position\\tref\\talt\\t'+'\\t'.join(list_of_sample_names)+'\\n')\n", + " DRM_Eldholm_output_file.write('position\\tref\\talt\\tlocus\\tmutation\\t'+'\\t'.join(list_of_sample_names)+'\\n')\n", " for items in VCF_merged_dico.items():\n", " genomic_position = items[0]\n", " ann = items[1]\n", @@ -561,11 +603,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": { - "collapsed": true + "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('D435V', 4, 0)\n", + "('M306I', 1, 0)\n", + "('D1024N', 1, 0)\n", + "('Q432K', 1, 0)\n", + "('S315T', 248, 0)\n", + "('S450L', 234, 1)\n", + "('H445R', 1, 0)\n", + "('D94N', 0, 1)\n", + "('D94G', 3, 0)\n", + "('M306V', 1, 0)\n", + "('A90V', 3, 0)\n", + "('A1401G', 230, 0)\n", + "('T-8C', 1, 0)\n", + "('H445Y', 3, 0)\n", + "('C-15T', 1, 0)\n", + "('G406A', 234, 0)\n" + ] + } + ], "source": [ "\n", "with open('all_DRM_in_Eldholm.txt','r') as ifile:\n", @@ -578,6 +643,15 @@ "\n", " " ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] } ], "metadata": { @@ -585,18 +659,6 @@ "display_name": "Python 2", "language": "python", "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" } }, "nbformat": 4, diff --git a/notebooks/slurm_scripts/launch_bamqc.slurm b/notebooks/slurm_scripts/launch_bamqc.slurm index 434c173131b3401021f29a7f3ed72fff557628f8..42919a9555bcbb285326e96afd14f1849eb777a5 100644 --- a/notebooks/slurm_scripts/launch_bamqc.slurm +++ b/notebooks/slurm_scripts/launch_bamqc.slurm @@ -4,4 +4,4 @@ #SBATCH --cpus-per-task=1 #SBATCH --mem-per-cpu=4G -singularity exec /home/container.img qualimap bamqc -bam ERR760779.dedup.bam -sd -sdmode 1 -outdir . -outfile ERR760779_bamqc +singularity exec /home/container.img qualimap bamqc -bam ERR760779.dedup.realigned.bam -sd -sdmode 1 -outdir . -outfile ERR760779_bamqc diff --git a/notebooks/slurm_scripts/launch_mpileup.slurm b/notebooks/slurm_scripts/launch_mpileup.slurm index d737accc19399e483851c7fdad071fd3d8323e6a..aca1de106c3dfa65a4beb45dfcc0f0a65d481aa8 100644 --- a/notebooks/slurm_scripts/launch_mpileup.slurm +++ b/notebooks/slurm_scripts/launch_mpileup.slurm @@ -4,5 +4,5 @@ #SBATCH --cpus-per-task=1 #SBATCH --mem-per-cpu=4G -singularity exec /home/container.img samtools mpileup -ABQ0 -q 20 -f ~/Workshop_SA/notebooks/reference_genome/MTB_ancestor_reference.fasta ERR760779.dedup.bam > ERR760779.pileup +singularity exec /home/container.img samtools mpileup -ABQ0 -q 20 -f ~/Workshop_SA/notebooks/reference_genome/MTB_ancestor_reference.fasta ERR760779.dedup.realigned.bam > ERR760779.pileup