Newer
Older
BIOPZ-Gypas Foivos
committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python
# _____________________________________________________________________________
# -----------------------------------------------------------------------------
# import needed (external) modules
# -----------------------------------------------------------------------------
from argparse import ArgumentParser, RawTextHelpFormatter
import sys
import os
from Bio import SeqIO
# _____________________________________________________________________________
# -----------------------------------------------------------------------------
# Main function
# -----------------------------------------------------------------------------
def main():
""" Main function """
__doc__ = "Find overepresented sequences from a SAM file."
__version__ = "0.1"
parser = ArgumentParser(
description=__doc__,
formatter_class=RawTextHelpFormatter
)
parser.add_argument(
"--fastq",
dest="fastq",
help="Input fastq file",
required=True,
metavar="FILE"
)
parser.add_argument(
"--oligos",
dest="oligos",
help="Input oligos file",
required=True,
metavar="FILE"
)
parser.add_argument(
"--out",
dest="out",
help="Output file",
required=True,
metavar="FILE"
)
parser.add_argument(
"--verbose",
action="store_true",
dest="verbose",
default=False,
required=False,
help="Be verbose"
)
parser.add_argument(
'--version',
action='version',
version=__version__
)
# _________________________________________________________________________
# -------------------------------------------------------------------------
# get the arguments
# -------------------------------------------------------------------------
try:
options = parser.parse_args()
except Exception:
parser.print_help()
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
if options.verbose:
sys.stdout.write("arsing {} {}".format(options.oligos, os.linesep))
oligos = {}
with open(options.oligos) as fp:
for line in fp:
oligo = line.strip()
if oligo not in oligos:
oligos[oligo] = 0
if options.verbose:
sys.stdout.write("Parsing {} {}".format(options.fastq, os.linesep))
for line in SeqIO.parse(options.fastq, "fastq"):
sequence = str(line.seq)
for oligo, number_of_oligos in oligos.items():
if oligo in sequence:
oligos[oligo] += 1
if options.verbose:
sys.stdout.write("Writing {} {}".format(options.out, os.linesep))
fh = open(options.out, "w")
for w in sorted(oligos, key=oligos.get, reverse=True):
fh.write(str(w) + "\t" + str(oligos[w]) + "\n")
fh.close()
# _____________________________________________________________________________
# -----------------------------------------------------------------------------
# Call the Main function and catch Keyboard interrups
# -----------------------------------------------------------------------------
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
sys.stderr.write("User interrupt!" + os.linesep)
sys.exit(0)