From fe336e5b7cb7d33d48bf695b63fadc7245dde257 Mon Sep 17 00:00:00 2001 From: BIOPZ-Gypas Foivos <foivos.gypas@unibas.ch> Date: Mon, 19 Nov 2018 14:23:26 +0100 Subject: [PATCH] Addition of script that plots the creates a read length histogram based on the mapped reads in the process data pipeline. --- snakemake/process_data/Snakefile | 25 +++- .../process_data/scripts/plot_read_lengths.py | 110 ++++++++++++++++++ 2 files changed, 133 insertions(+), 2 deletions(-) create mode 100755 snakemake/process_data/scripts/plot_read_lengths.py diff --git a/snakemake/process_data/Snakefile b/snakemake/process_data/Snakefile index 3136843..1e96622 100644 --- a/snakemake/process_data/Snakefile +++ b/snakemake/process_data/Snakefile @@ -1,7 +1,7 @@ configfile: "config.yaml" #from snakemake.utils import listfiles -localrules: create_output_and_log_directories, remove_multimappers, finish +localrules: create_output_and_log_directories, remove_multimappers, read_length_histogram, finish ################################################################################# ### Finish rule @@ -9,7 +9,7 @@ localrules: create_output_and_log_directories, remove_multimappers, finish rule finish: input: - sam = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam"), sample=config["sample"]) + sam = expand(os.path.join(config["output_dir"], "{sample}/read_length/read_length_histogram.pdf"), sample=config["sample"]) ################################################################################# ### Create output and log directories @@ -225,3 +225,24 @@ rule remove_multimappers: threads: 1 shell: "(grep -P \"^@|\tNH:i:1\t\" {input.sam} > {output.sam}) &> {log}" + +################################################################################ +### Read length histogram +################################################################################ + +rule read_length_histogram: + input: + sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam") + output: + read_length_plot = os.path.join(config["output_dir"], "{sample}/read_length/read_length_histogram.pdf") + params: + dir = os.path.join(config["output_dir"], "{sample}/read_length") + log: + os.path.join(config["local_log"], "read_length_histogram_{sample}.log") + threads: 1 + singularity: + "docker://zavolab/rcrunch_python:1.0" + shell: + "(python scripts/plot_read_lengths.py \ + --sam {input.sam} \ + --outdir {params.dir}) & > {log}" diff --git a/snakemake/process_data/scripts/plot_read_lengths.py b/snakemake/process_data/scripts/plot_read_lengths.py new file mode 100755 index 0000000..9bed2e1 --- /dev/null +++ b/snakemake/process_data/scripts/plot_read_lengths.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python + +# _____________________________________________________________________________ +# ----------------------------------------------------------------------------- +# import needed (external) modules +# ----------------------------------------------------------------------------- + +from argparse import ArgumentParser, RawTextHelpFormatter +import sys +import os +import matplotlib as mpl +mpl.use('Agg') +import matplotlib.pyplot as plt + +# _____________________________________________________________________________ +# ----------------------------------------------------------------------------- +# Main function +# ----------------------------------------------------------------------------- + +def main(): + """ Main function """ + + __doc__ = "Plot length of reads" + __version__ = "0.1" + + parser = ArgumentParser( + description=__doc__, + formatter_class=RawTextHelpFormatter + ) + + parser.add_argument( + "--sam", + dest="sam", + help="Input sam file", + required=True, + metavar="FILE" + ) + + parser.add_argument( + "--outdir", + dest="outdir", + help="Output directory", + required=True, + metavar="FILE" + ) + + parser.add_argument( + "--verbose", + action="store_true", + dest="verbose", + default=False, + required=False, + help="Be verbose" + ) + + parser.add_argument( + '--version', + action='version', + version=__version__ + ) + + # _________________________________________________________________________ + # ------------------------------------------------------------------------- + # get the arguments + # ------------------------------------------------------------------------- + try: + options = parser.parse_args() + except Exception: + parser.print_help() + + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + + if options.verbose: + sys.stdout.write("Creating output directory: {} {}".format(options.outdir, os.linesep)) + + if not os.path.exists(options.outdir): + os.mkdir(options.outdir) + + if options.verbose: + sys.stdout.write("Parsing {} {}".format(options.sam, os.linesep)) + + lengths = [] + with open(options.sam) as sam: + for line in sam: + if line[0] != "@": + lengths.append(len(line.split("\t")[9])) + + fig = plt.figure() + plt.hist(lengths, bins=30) + plt.xlabel('Length') + plt.ylabel('Number of cases') + # plt.axis([0, 20000, None, None]) + plt.savefig(os.path.join(options.outdir, "read_length_histogram.pdf")) + + + +# _____________________________________________________________________________ +# ----------------------------------------------------------------------------- +# Call the Main function and catch Keyboard interrups +# ----------------------------------------------------------------------------- + + +if __name__ == '__main__': + try: + main() + except KeyboardInterrupt: + sys.stderr.write("User interrupt!" + os.linesep) + sys.exit(0) -- GitLab