Skip to content
Snippets Groups Projects

New function that reads fasta file one line at a time rather than loading the whole file

Merged Samuel Mondal requested to merge new into master
1 file
+ 31
1
Compare changes
  • Side-by-side
  • Inline
def exon_concatenation(
def exon_concatenation_old(
filename: str
filename: str
) -> list:
) -> list:
"""Concatenates all sequences in fasta file with the same transcript ID header and then outputs a list containing sequence headers (Transcript ID) and sequences that have been concatenated.
"""Concatenates all sequences in fasta file with the same transcript ID header and then outputs a list containing sequence headers (Transcript ID) and sequences that have been concatenated.
@@ -27,3 +27,33 @@ def exon_concatenation(
@@ -27,3 +27,33 @@ def exon_concatenation(
to_write_to_file.append(annotation)
to_write_to_file.append(annotation)
to_write_to_file.append(read)
to_write_to_file.append(read)
return to_write_to_file
return to_write_to_file
 
 
def exon_concatenation(
 
post_bedtools_fasta: str
 
) -> list:
 
"""Concatenate all sequences starting with identical transcripit ID and outputs it as a list with sequence header (Transcript ID) and concatenated sequences.
 
 
Args:
 
post_bedtools_fasta: The name of the fasta file obtained after bedtools has been run
 
 
Returns:
 
A list with transcript ID in even indices and corresponding concatenated exons in odd indices.
 
"""
 
with open(post_bedtools_fasta,'r') as fa:
 
annotation = []
 
fasta_format_list = []
 
for line1,line2 in zip(fa,fa):
 
if len(annotation) == 0:
 
annotation.append(line1[0:16])
 
read = line2[:-1]
 
else:
 
if annotation[-1] == line1[0:16]:
 
read += line2[:-1]
 
elif annotation[-1] != line1[0:16]:
 
fasta_format_list.append(annotation[-1])
 
fasta_format_list.append(read)
 
annotation.append(line1[0:16])
 
read = line2[:-1]
 
fasta_format_list.append(annotation[-1])
 
fasta_format_list.append(read)
 
return fasta_format_list
Loading