Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
A
A-seq2-processing
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
zavolan_public
A-seq2-processing
Commits
90c6ef59
Commit
90c6ef59
authored
6 years ago
by
BIOPZ-Schmidt Ralf
Browse files
Options
Downloads
Patches
Plain Diff
bug fix of merging samples after background filtering; bug fix in rs-bam2bed.py
parent
1342c0d7
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
Snakefile
+3
-30
3 additions, 30 deletions
Snakefile
scripts/merge-sample-bg-files.py
+39
-0
39 additions, 0 deletions
scripts/merge-sample-bg-files.py
scripts/rs-bam2bed.py
+4
-1
4 additions, 1 deletion
scripts/rs-bam2bed.py
with
46 additions
and
31 deletions
Snakefile
+
3
−
30
View file @
90c6ef59
...
@@ -788,42 +788,15 @@ rule create_noBG_3pSites_table:
...
@@ -788,42 +788,15 @@ rule create_noBG_3pSites_table:
filtered = expand( config["results_dir"] + "/filteredSites/{sample}.filtered.tsv", sample=config['samples']),
filtered = expand( config["results_dir"] + "/filteredSites/{sample}.filtered.tsv", sample=config['samples']),
raw_table = config["results_dir"] + "/3pSites.PAS.tsv.gz"
raw_table = config["results_dir"] + "/3pSites.PAS.tsv.gz"
output:
output:
temp(config["results_dir"] + "/3pSites.PAS.filtered.tsv.gz")
table_adjusted =
temp(config["results_dir"] + "/3pSites.PAS.filtered.tsv.gz")
params:
params:
cluster_log = config["log_dir"] + "/cluster_logs/create_noBG_3pSites_table.log"
cluster_log = config["log_dir"] + "/cluster_logs/create_noBG_3pSites_table.log"
resources:
resources:
mem = 10
mem = 10
log:
log:
config["log_dir"] + "/create_noBG_3pSites_table.log"
config["log_dir"] + "/create_noBG_3pSites_table.log"
run:
script:
import gzip
"scripts/merge-sample-bg-files.py"
sites = []
reads = {}
header_lines = []
with gzip.open(input.raw_table, "rt") as infile:
for line in infile:
if line.startswith("#"):
header_lines.append(line)
continue
F = line.rstrip().split("\t")
site_id = ":".join(F[0:3])
sites.append(site_id)
reads[site_id] = [F[0], F[1], F[2], F[-2], F[-1]]
for in_f in input.filtered:
with open(in_f, "r") as ifile:
for line in ifile:
line_list = line.rstrip().split("\t")
curr_id = ":".join(line_list[0:3])
reads[curr_id].insert(-2, line_list[3])
with gzip.open(output[0], "wt") as out_file:
for h in header_lines:
out_file.write("%s" % h)
for s in sites:
out_file.write("%s\n" % "\t".join( reads[s] ) )
#-------------------------------------------------------------------------------
#-------------------------------------------------------------------------------
# delete 3' end sites without cutoff-corrected read support from any sample
# delete 3' end sites without cutoff-corrected read support from any sample
...
...
This diff is collapsed.
Click to expand it.
scripts/merge-sample-bg-files.py
0 → 100644
+
39
−
0
View file @
90c6ef59
import
gzip
map
=
{}
sites
=
[]
reads
=
{}
header_lines
=
[]
# Get info from original 3pSites file
with
gzip
.
open
(
snakemake
.
input
.
raw_table
,
"
rt
"
)
as
infile
:
for
line
in
infile
:
# Header lines
if
line
.
startswith
(
"
#
"
):
l
=
line
.
rstrip
().
split
(
"
;
"
)
col
=
int
(
l
[
0
].
lstrip
(
"
#
"
))
sample
=
l
[
1
].
split
(
"
/
"
)[
-
1
].
split
(
"
.
"
)[
0
]
# map column to sample
map
[
sample
]
=
col
header_lines
.
append
(
line
)
continue
F
=
line
.
rstrip
().
split
(
"
\t
"
)
site_id
=
"
:
"
.
join
(
F
[
0
:
3
])
sites
.
append
(
site_id
)
# For each site, store list with appropriate length to accomodate all samples
reads
[
site_id
]
=
[
F
[
0
],
F
[
1
],
F
[
2
]]
+
[
None
]
*
len
(
map
)
+
[
F
[
-
2
],
F
[
-
1
]]
for
in_f
in
snakemake
.
input
.
filtered
:
# Find sample id
sample
=
in_f
.
rstrip
().
split
(
"
/
"
)[
-
1
].
split
(
"
.
"
)[
0
]
with
open
(
in_f
,
"
r
"
)
as
ifile
:
for
line
in
ifile
:
line_list
=
line
.
rstrip
().
split
(
"
\t
"
)
curr_id
=
"
:
"
.
join
(
line_list
[
0
:
3
])
reads
[
curr_id
][
map
[
sample
]]
=
line_list
[
3
]
with
gzip
.
open
(
snakemake
.
output
.
table_adjusted
,
"
wt
"
)
as
out_file
:
for
h
in
header_lines
:
out_file
.
write
(
"
%s
"
%
h
)
for
s
in
sites
:
out_file
.
write
(
"
%s
\n
"
%
"
\t
"
.
join
(
reads
[
s
]
)
)
This diff is collapsed.
Click to expand it.
scripts/rs-bam2bed.py
+
4
−
1
View file @
90c6ef59
...
@@ -163,6 +163,9 @@ def generate_bed_line(sam):
...
@@ -163,6 +163,9 @@ def generate_bed_line(sam):
number
=
int
(
exploded_MID
[
i
*
2
])
number
=
int
(
exploded_MID
[
i
*
2
])
type
=
exploded_MID
[
i
*
2
+
1
]
type
=
exploded_MID
[
i
*
2
+
1
]
# no need to process type "N" at this point
# because currently the genomic seq anyways
# only consists of "?"
if
type
==
"
M
"
:
if
type
==
"
M
"
:
# read and reference were aligned
# read and reference were aligned
for
_
in
itertools
.
repeat
(
None
,
number
):
for
_
in
itertools
.
repeat
(
None
,
number
):
...
@@ -196,7 +199,7 @@ def generate_bed_line(sam):
...
@@ -196,7 +199,7 @@ def generate_bed_line(sam):
if
c
==
''
:
if
c
==
''
:
continue
# Skip c and this iteration when this happens
continue
# Skip c and this iteration when this happens
# Skip the next G[pointer] that is not a
minus strand
# Skip the next G[pointer] that is not a
dash
if
c
.
isdigit
():
if
c
.
isdigit
():
# perfect match here
# perfect match here
value
=
int
(
c
)
value
=
int
(
c
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment