From fe336e5b7cb7d33d48bf695b63fadc7245dde257 Mon Sep 17 00:00:00 2001
From: BIOPZ-Gypas Foivos <foivos.gypas@unibas.ch>
Date: Mon, 19 Nov 2018 14:23:26 +0100
Subject: [PATCH] Addition of script that plots the creates a read length
 histogram based on the mapped reads in the process data pipeline.

---
 snakemake/process_data/Snakefile              |  25 +++-
 .../process_data/scripts/plot_read_lengths.py | 110 ++++++++++++++++++
 2 files changed, 133 insertions(+), 2 deletions(-)
 create mode 100755 snakemake/process_data/scripts/plot_read_lengths.py

diff --git a/snakemake/process_data/Snakefile b/snakemake/process_data/Snakefile
index 3136843..1e96622 100644
--- a/snakemake/process_data/Snakefile
+++ b/snakemake/process_data/Snakefile
@@ -1,7 +1,7 @@
 configfile: "config.yaml"
 #from snakemake.utils import listfiles
 
-localrules: create_output_and_log_directories, remove_multimappers, finish
+localrules: create_output_and_log_directories, remove_multimappers, read_length_histogram, finish
 
 #################################################################################
 ### Finish rule
@@ -9,7 +9,7 @@ localrules: create_output_and_log_directories, remove_multimappers, finish
 
 rule finish:
 	input:
-		sam = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam"), sample=config["sample"])
+		sam = expand(os.path.join(config["output_dir"], "{sample}/read_length/read_length_histogram.pdf"), sample=config["sample"])
 
 #################################################################################
 ### Create output and log directories
@@ -225,3 +225,24 @@ rule remove_multimappers:
 	threads:	1
 	shell:
 		"(grep -P \"^@|\tNH:i:1\t\" {input.sam} > {output.sam}) &> {log}"
+
+################################################################################
+### Read length histogram
+################################################################################
+
+rule read_length_histogram:
+	input:
+		sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam")
+	output:
+		read_length_plot = os.path.join(config["output_dir"], "{sample}/read_length/read_length_histogram.pdf")
+	params:
+		dir = os.path.join(config["output_dir"], "{sample}/read_length")
+	log:
+		os.path.join(config["local_log"], "read_length_histogram_{sample}.log")
+	threads:	1
+	singularity:
+		"docker://zavolab/rcrunch_python:1.0"
+	shell:
+		"(python scripts/plot_read_lengths.py \
+		--sam {input.sam} \
+		--outdir {params.dir}) & > {log}"
diff --git a/snakemake/process_data/scripts/plot_read_lengths.py b/snakemake/process_data/scripts/plot_read_lengths.py
new file mode 100755
index 0000000..9bed2e1
--- /dev/null
+++ b/snakemake/process_data/scripts/plot_read_lengths.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+
+# _____________________________________________________________________________
+# -----------------------------------------------------------------------------
+# import needed (external) modules
+# -----------------------------------------------------------------------------
+
+from argparse import ArgumentParser, RawTextHelpFormatter
+import sys
+import os
+import matplotlib as mpl
+mpl.use('Agg')
+import matplotlib.pyplot as plt
+
+# _____________________________________________________________________________
+# -----------------------------------------------------------------------------
+# Main function
+# -----------------------------------------------------------------------------
+
+def main():
+    """ Main function """
+
+    __doc__ = "Plot length of reads"
+    __version__ = "0.1"
+
+    parser = ArgumentParser(
+        description=__doc__,
+        formatter_class=RawTextHelpFormatter
+    )
+
+    parser.add_argument(
+        "--sam",
+        dest="sam",
+        help="Input sam file",
+        required=True,
+        metavar="FILE"
+    )
+
+    parser.add_argument(
+        "--outdir",
+        dest="outdir",
+        help="Output directory",
+        required=True,
+        metavar="FILE"
+    )
+
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        dest="verbose",
+        default=False,
+        required=False,
+        help="Be verbose"
+    )
+
+    parser.add_argument(
+        '--version',
+        action='version',
+        version=__version__
+    )
+
+    # _________________________________________________________________________
+    # -------------------------------------------------------------------------
+    # get the arguments
+    # -------------------------------------------------------------------------
+    try:
+        options = parser.parse_args()
+    except Exception:
+        parser.print_help()
+
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+
+    if options.verbose:
+        sys.stdout.write("Creating output directory: {} {}".format(options.outdir, os.linesep))
+
+    if not os.path.exists(options.outdir):
+        os.mkdir(options.outdir)
+
+    if options.verbose:
+        sys.stdout.write("Parsing {} {}".format(options.sam, os.linesep))
+
+    lengths = []
+    with open(options.sam) as sam:
+        for line in sam:
+            if line[0] != "@":
+                lengths.append(len(line.split("\t")[9]))
+
+    fig = plt.figure()
+    plt.hist(lengths, bins=30)
+    plt.xlabel('Length')
+    plt.ylabel('Number of cases')
+    # plt.axis([0, 20000, None, None])
+    plt.savefig(os.path.join(options.outdir, "read_length_histogram.pdf"))
+
+
+
+# _____________________________________________________________________________
+# -----------------------------------------------------------------------------
+# Call the Main function and catch Keyboard interrups
+# -----------------------------------------------------------------------------
+
+
+if __name__ == '__main__':
+    try:
+        main()
+    except KeyboardInterrupt:
+        sys.stderr.write("User interrupt!" + os.linesep)
+        sys.exit(0)
-- 
GitLab