Skip to content
Snippets Groups Projects
Commit 88665b17 authored by Timon Baltisberger's avatar Timon Baltisberger
Browse files

fix: flake8 fixes

parent 968f449c
No related branches found
No related tags found
1 merge request!15add: generate transcript structure
Pipeline #13641 passed
"""Creates differently spliced transcripts."""
import random
import csv
import copy
class BuildTranscriptStructure:
"""Creates differently spliced transcripts.
Args:
......@@ -34,7 +35,6 @@ class BuildTranscriptStructure:
gtf_lines(list): List with all newly created gtf lines.
_transcripts_generated(bool): Indicates whether splicing was conducted or not yet.
"""
def __init__(self,
input_gene_count: str,
input_coordinates: str,
......@@ -110,7 +110,7 @@ class BuildTranscriptStructure:
self.gene_sequences_dict[gene_name] = gene_info
def _make_new_transcripts(self) -> None:
""" Generates the differently spliced transcripts."""
"""Generates the differently spliced transcripts."""
for gene in self.gene_count_dict:
# Computes the intron splicing for each transcript.
......@@ -136,7 +136,7 @@ class BuildTranscriptStructure:
self.gene_transcript_dict[gene] = transcript_numbers
def _make_gtf_info(self) -> None:
""" Writes the lines of the new gtf file for the differently spliced transcripts."""
"""Writes the lines of the new gtf file for the differently spliced transcripts."""
for gene in self.gene_transcript_dict: # Iterates over all genes required.
self.gtf_lines.append(self.gene_sequences_dict[gene]['gene_line']) # Add gene line to list.
sense = self.gene_sequences_dict[gene]['strand_sense']
......@@ -189,12 +189,9 @@ class BuildTranscriptStructure:
self.gtf_lines.extend(exon_lines)
def _sort_gtf_lines(self) -> None:
""" Sorts the gtf lines by the position of the genes (increasing) and returns it."""
"""Sorts the gtf lines by the position of the genes (increasing) and returns it."""
# Builds and uses a dictionary with the start of the gene as key, and all lines related to this gene as value:
# {start_gene(int): [[gene_line],[transcript_line],[exon_line1],[exon_line2],...]}
gene_lines_dict = {}
gene_start = 0 # Validation: This key should remain unused, as every gtf file starts with a gene.
for index, line in enumerate(self.gtf_lines):
......@@ -214,13 +211,11 @@ class BuildTranscriptStructure:
def write_csv(self,
csv_output: str
) -> None:
""" Writes a csv file containing the number of differently spliced transcripts.
"""Writes a csv file containing the number of differently spliced transcripts.
Args:
csv_output(str): Path and name of the output cvs file: "transcript_ID", "gene_ID", count.
"""
with open(csv_output, 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(['Transcript_ID', 'Gene_ID', 'count'])
......@@ -231,8 +226,7 @@ class BuildTranscriptStructure:
def write_gtf(self,
gtf_output: str
) -> None:
""" Writes a gtf file with the information about the differently spliced transcripts.
"""Writes a gtf file with the information about the differently spliced transcripts.
Args:
gtf_output(str): Path and name of the output gtf file with the information of all relevant transcripts.
......@@ -242,7 +236,7 @@ class BuildTranscriptStructure:
def main():
""" Main Function."""
"""Main Function."""
# Inputs
# gene_count = 'gene_count/Rik_5.csv' # Strand with + sense
# gene_count = 'gene_count/Rp1_5.csv' # Strand with - sense.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment