Skip to content
Snippets Groups Projects
Commit 3f110ae0 authored by BIOPZ-Gypas Foivos's avatar BIOPZ-Gypas Foivos
Browse files

Addition of script that filters reads based on P-site offsets and read length...

Addition of script that filters reads based on P-site offsets and read length and creates the A-site coverage.
parent fc115178
No related branches found
No related tags found
No related merge requests found
configfile: "config.yaml"
#from snakemake.utils import listfiles
localrules: create_output_and_log_directories, remove_multimappers, read_length_histogram, count_reads, determine_p_site_offset, finish
localrules: create_output_and_log_directories, remove_multimappers, read_length_histogram, count_reads, determine_p_site_offset, filter_reads_based_on_read_lengths_and_offsets, bam_sort_and_index, finish
#################################################################################
################################################################################
### Finish rule
#################################################################################
################################################################################
rule finish:
input:
p_site_offsets = expand(os.path.join(config["output_dir"], "{sample}/p_site_offsets/alignment_offset.json"), sample=config["sample"]),
pdf = expand(os.path.join(config["output_dir"], "{sample}/read_length/read_length_histogram.pdf"), sample=config["sample"]),
counts = expand(os.path.join(config["output_dir"], "{sample}/counts.tsv"), sample=config["sample"])
#################################################################################
counts = expand(os.path.join(config["output_dir"], "{sample}/counts.tsv"), sample=config["sample"]),
bai = expand(os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam.bai"), sample=config["sample"])
################################################################################
### Create output and log directories
#################################################################################
################################################################################
rule create_output_and_log_directories:
output:
......@@ -32,9 +31,9 @@ rule create_output_and_log_directories:
mkdir -p {output.sample_dir}; \
touch {output.flag};"
#################################################################################
################################################################################
### Clipping reads
#################################################################################
################################################################################
rule clip_reads:
input:
......@@ -63,9 +62,9 @@ rule clip_reads:
-i <(zcat {input.reads}) \
-o {output.reads}) &> {log}"
#################################################################################
################################################################################
### Trimming reads
#################################################################################
################################################################################
rule trim_reads:
input:
......@@ -93,9 +92,9 @@ rule trim_reads:
-i <(zcat {input.reads}) \
-o {output.reads}) &> {log}"
#################################################################################
################################################################################
### Filtering reads
#################################################################################
################################################################################
rule filter_reads:
input:
......@@ -123,9 +122,9 @@ rule filter_reads:
-i <(zcat {input.reads}) \
-o {output.reads}) &> {log}"
#################################################################################
################################################################################
### Convert fastq to fasta
#################################################################################
################################################################################
rule fastq_to_fasta:
input:
......@@ -149,9 +148,9 @@ rule fastq_to_fasta:
-i <(zcat {input.reads}) \
-o {output.reads}) &> {log}"
#################################################################################
################################################################################
### Map reads to other genes (rRNA, tRNA, etc...)
#################################################################################
################################################################################
rule map_to_other_genes:
input:
......@@ -181,9 +180,9 @@ rule map_to_other_genes:
-o {output.sam} \
-u {output.reads} ) &> {log}"
#################################################################################
################################################################################
### Map reads to other genes (rRNA, tRNA, etc...)
#################################################################################
################################################################################
rule map_to_transcripts:
input:
......@@ -322,5 +321,49 @@ rule determine_p_site_offset:
--outdir {params.outdir}) &> {log}"
################################################################################
### Filter reads based on selected read lengths
### Filter reads based on selected read lengths and offsets
### (Create a-site profile)
################################################################################
rule filter_reads_based_on_read_lengths_and_offsets:
input:
bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sorted.bam"),
p_site_offsets = os.path.join(
config["output_dir"],
"{sample}/p_site_offsets/alignment_offset.json"
)
output:
bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.bam"),
params:
cluster_log = os.path.join(config["cluster_log"], "filter_reads_based_on_read_lengths_and_offsets_{sample}.log")
log:
os.path.join(config["local_log"], "filter_reads_based_on_read_lengths_and_offsets_{sample}.log")
threads: 1
singularity:
"docker://fgypas/python_pysam:3.6.5_0.15.1"
shell:
"(python scripts/filter_reads_based_on_read_lengths_and_offsets.py \
--bam {input.bam} \
--p_site_offsets {input.p_site_offsets} \
--bam_out {output.bam}) &> {log}"
################################################################################
### SAM to BAM sort and index a-site profile
################################################################################
rule bam_sort_and_index:
input:
bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.bam"),
output:
bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam"),
bai = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.a_site_profile.sorted.bam.bai"),
params:
cluster_log = os.path.join(config["cluster_log"], "bam_sort_and_index_{sample}.log")
log:
os.path.join(config["local_log"], "bam_sort_and_index_{sample}.log")
threads: 1
singularity:
"docker://zavolab/samtools:1.8"
shell:
"(samtools sort {input.bam} > {output.bam}; \
samtools index {output.bam};) &> {log}"
......@@ -195,7 +195,8 @@ def main():
sys.stdout.write(str(alignment_offset) + os.linesep)
with open(os.path.join(options.outdir, "alignment_offset.json"), 'w') as file:
file.write(json.dumps(alignment_offset))
json.dump(alignment_offset, file)
# file.write(json.dumps(alignment_offset))
# _____________________________________________________________________________
# -----------------------------------------------------------------------------
......
#!/usr/bin/env python
# _____________________________________________________________________________
# -----------------------------------------------------------------------------
# import needed (external) modules
# -----------------------------------------------------------------------------
from argparse import ArgumentParser, RawTextHelpFormatter
import itertools
import sys
import os
import pysam
import json
# _____________________________________________________________________________
# -----------------------------------------------------------------------------
# Main function
# -----------------------------------------------------------------------------
def main():
""" Main function """
__doc__ = "Determine p-site offset"
__version__ = "0.1"
parser = ArgumentParser(
description=__doc__,
formatter_class=RawTextHelpFormatter
)
parser.add_argument(
"--bam",
dest="bam",
help="Input bam file (sorted and indexed)",
required=True,
metavar="FILE"
)
parser.add_argument(
"--p_site_offsets",
dest="p_site_offsets",
help="P-site offsets (dictionary of read length and offset)",
required=True,
metavar="FILE"
)
parser.add_argument(
"--bam_out",
dest="bam_out",
help="Output bam file",
required=True,
metavar="FILE"
)
parser.add_argument(
"--verbose",
action="store_true",
dest="verbose",
default=False,
required=False,
help="Be verbose"
)
parser.add_argument(
'--version',
action='version',
version=__version__
)
# _________________________________________________________________________
# -------------------------------------------------------------------------
# get the arguments
# -------------------------------------------------------------------------
try:
options = parser.parse_args()
except Exception:
parser.print_help()
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
if options.verbose:
sys.stdout.write("Parsing {} {}".format(options.p_site_offsets, os.linesep))
with open(options.p_site_offsets, 'r') as fp:
p_site_offsets = json.load(fp)
# cast dict to contain keys and values as ints
p_site_offsets = {int(k):int(v) for k,v in p_site_offsets.items()}
if options.verbose:
sys.stdout.write("and parsing {} {}".format(options.bam, os.linesep))
# Open alignment file
bam = pysam.AlignmentFile(options.bam, "rb")
bam_out = pysam.AlignmentFile(options.bam_out, "wb", header=bam.header)
for read in bam.fetch():
# keep only + strand
if read.is_reverse:
continue
read_len = len(str(read.seq))
if read_len in p_site_offsets:
a_site_start = p_site_offsets[read_len] + 3
read.reference_start += a_site_start
read.cigar = [(0, 3)]
read.seq = read.seq[a_site_start: 3]
bam_out.write(read)
# Close alignment files
bam_out.close()
bam.close()
# alignment_offset = {}
#
# for key, value in readsDict.items():
# best_offset = 0
# best_offset_score = 0
# for offset in possible_offsets:
# raw_counts = readsDict[key][offset][0:3]
# if sum(raw_counts) > 0:
# if ((sum(raw_counts) > 200) \
# and (raw_counts[0] > raw_counts[1]) \
# and (raw_counts[0] > raw_counts[2])):
# if ((raw_counts[0] > best_offset_score) \
# and ((not options.filterReadsForPeriodicity) \
# or (float(raw_counts[0])/sum(raw_counts) > 0.4))):
# best_offset = offset
# best_offset_score = raw_counts[0]
# if best_offset != 0:
# alignment_offset[key] = best_offset
# # self.filtered_alignments += (readsDict[rl]["reads"])
# sys.stderr.write("PEAK CALL:" + \
# str(key) + \
# ": peak found at position " + \
# str(best_offset) \
# + os.linesep)
# else:
# sys.stderr.write("PEAK CALL:" + \
# str(key) + \
# ": peak not found" + \
# os.linesep)
#
# sys.stdout.write(str(alignment_offset) + os.linesep)
#
# with open(os.path.join(options.outdir, "alignment_offset.json"), 'w') as file:
# file.write(json.dumps(alignment_offset))
# _____________________________________________________________________________
# -----------------------------------------------------------------------------
# Call the Main function and catch Keyboard interrups
# -----------------------------------------------------------------------------
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
sys.stderr.write("User interrupt!" + os.linesep)
sys.exit(0)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment