Skip to content
Snippets Groups Projects
Commit 501332df authored by Samuel Mondal's avatar Samuel Mondal
Browse files

New function that reads fasta file one line at a time rather than loading the whole file

parent 5ab12718
No related branches found
No related tags found
1 merge request!37New function that reads fasta file one line at a time rather than loading the whole file
def exon_concatenation(
def exon_concatenation_old(
filename: str
) -> list:
"""Concatenates all sequences in fasta file with the same transcript ID header and then outputs a list containing sequence headers (Transcript ID) and sequences that have been concatenated.
......@@ -27,3 +27,33 @@ def exon_concatenation(
to_write_to_file.append(annotation)
to_write_to_file.append(read)
return to_write_to_file
def exon_concatenation(
post_bedtools_fasta: str
) -> list:
"""Concatenate all sequences starting with identical transcripit ID and outputs it as a list with sequence header (Transcript ID) and concatenated sequences.
Args:
post_bedtools_fasta: The name of the fasta file obtained after bedtools has been run
Returns:
A list with transcript ID in even indices and corresponding concatenated exons in odd indices.
"""
with open(post_bedtools_fasta,'r') as fa:
annotation = []
fasta_format_list = []
for line1,line2 in zip(fa,fa):
if len(annotation) == 0:
annotation.append(line1[0:16])
read = line2[:-1]
else:
if annotation[-1] == line1[0:16]:
read += line2[:-1]
elif annotation[-1] != line1[0:16]:
fasta_format_list.append(annotation[-1])
fasta_format_list.append(read)
annotation.append(line1[0:16])
read = line2[:-1]
fasta_format_list.append(annotation[-1])
fasta_format_list.append(read)
return fasta_format_list
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment