Skip to content
Snippets Groups Projects
Commit fe336e5b authored by BIOPZ-Gypas Foivos's avatar BIOPZ-Gypas Foivos
Browse files

Addition of script that plots the creates a read length histogram based on the...

Addition of script that plots the creates a read length histogram based on the mapped reads in the process data pipeline.
parent d19f7b46
Branches
Tags
No related merge requests found
configfile: "config.yaml"
#from snakemake.utils import listfiles
localrules: create_output_and_log_directories, remove_multimappers, finish
localrules: create_output_and_log_directories, remove_multimappers, read_length_histogram, finish
#################################################################################
### Finish rule
......@@ -9,7 +9,7 @@ localrules: create_output_and_log_directories, remove_multimappers, finish
rule finish:
input:
sam = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam"), sample=config["sample"])
sam = expand(os.path.join(config["output_dir"], "{sample}/read_length/read_length_histogram.pdf"), sample=config["sample"])
#################################################################################
### Create output and log directories
......@@ -225,3 +225,24 @@ rule remove_multimappers:
threads: 1
shell:
"(grep -P \"^@|\tNH:i:1\t\" {input.sam} > {output.sam}) &> {log}"
################################################################################
### Read length histogram
################################################################################
rule read_length_histogram:
input:
sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam")
output:
read_length_plot = os.path.join(config["output_dir"], "{sample}/read_length/read_length_histogram.pdf")
params:
dir = os.path.join(config["output_dir"], "{sample}/read_length")
log:
os.path.join(config["local_log"], "read_length_histogram_{sample}.log")
threads: 1
singularity:
"docker://zavolab/rcrunch_python:1.0"
shell:
"(python scripts/plot_read_lengths.py \
--sam {input.sam} \
--outdir {params.dir}) & > {log}"
#!/usr/bin/env python
# _____________________________________________________________________________
# -----------------------------------------------------------------------------
# import needed (external) modules
# -----------------------------------------------------------------------------
from argparse import ArgumentParser, RawTextHelpFormatter
import sys
import os
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
# _____________________________________________________________________________
# -----------------------------------------------------------------------------
# Main function
# -----------------------------------------------------------------------------
def main():
""" Main function """
__doc__ = "Plot length of reads"
__version__ = "0.1"
parser = ArgumentParser(
description=__doc__,
formatter_class=RawTextHelpFormatter
)
parser.add_argument(
"--sam",
dest="sam",
help="Input sam file",
required=True,
metavar="FILE"
)
parser.add_argument(
"--outdir",
dest="outdir",
help="Output directory",
required=True,
metavar="FILE"
)
parser.add_argument(
"--verbose",
action="store_true",
dest="verbose",
default=False,
required=False,
help="Be verbose"
)
parser.add_argument(
'--version',
action='version',
version=__version__
)
# _________________________________________________________________________
# -------------------------------------------------------------------------
# get the arguments
# -------------------------------------------------------------------------
try:
options = parser.parse_args()
except Exception:
parser.print_help()
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
if options.verbose:
sys.stdout.write("Creating output directory: {} {}".format(options.outdir, os.linesep))
if not os.path.exists(options.outdir):
os.mkdir(options.outdir)
if options.verbose:
sys.stdout.write("Parsing {} {}".format(options.sam, os.linesep))
lengths = []
with open(options.sam) as sam:
for line in sam:
if line[0] != "@":
lengths.append(len(line.split("\t")[9]))
fig = plt.figure()
plt.hist(lengths, bins=30)
plt.xlabel('Length')
plt.ylabel('Number of cases')
# plt.axis([0, 20000, None, None])
plt.savefig(os.path.join(options.outdir, "read_length_histogram.pdf"))
# _____________________________________________________________________________
# -----------------------------------------------------------------------------
# Call the Main function and catch Keyboard interrups
# -----------------------------------------------------------------------------
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
sys.stderr.write("User interrupt!" + os.linesep)
sys.exit(0)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment