diff --git a/transcript_structure/Generate_transcript_structure.py b/transcript_structure/Generate_transcript_structure.py index 31fc77a33ca0555051a45d1cf49edf2ca5fbf328..edbca34079e1ae23453ddc0c123bbc0355bf4216 100644 --- a/transcript_structure/Generate_transcript_structure.py +++ b/transcript_structure/Generate_transcript_structure.py @@ -1,10 +1,11 @@ +"""Creates differently spliced transcripts.""" + import random import csv import copy class BuildTranscriptStructure: - """Creates differently spliced transcripts. Args: @@ -34,7 +35,6 @@ class BuildTranscriptStructure: gtf_lines(list): List with all newly created gtf lines. _transcripts_generated(bool): Indicates whether splicing was conducted or not yet. """ - def __init__(self, input_gene_count: str, input_coordinates: str, @@ -110,7 +110,7 @@ class BuildTranscriptStructure: self.gene_sequences_dict[gene_name] = gene_info def _make_new_transcripts(self) -> None: - """ Generates the differently spliced transcripts.""" + """Generates the differently spliced transcripts.""" for gene in self.gene_count_dict: # Computes the intron splicing for each transcript. @@ -136,7 +136,7 @@ class BuildTranscriptStructure: self.gene_transcript_dict[gene] = transcript_numbers def _make_gtf_info(self) -> None: - """ Writes the lines of the new gtf file for the differently spliced transcripts.""" + """Writes the lines of the new gtf file for the differently spliced transcripts.""" for gene in self.gene_transcript_dict: # Iterates over all genes required. self.gtf_lines.append(self.gene_sequences_dict[gene]['gene_line']) # Add gene line to list. sense = self.gene_sequences_dict[gene]['strand_sense'] @@ -189,12 +189,9 @@ class BuildTranscriptStructure: self.gtf_lines.extend(exon_lines) def _sort_gtf_lines(self) -> None: - - """ Sorts the gtf lines by the position of the genes (increasing) and returns it.""" - + """Sorts the gtf lines by the position of the genes (increasing) and returns it.""" # Builds and uses a dictionary with the start of the gene as key, and all lines related to this gene as value: # {start_gene(int): [[gene_line],[transcript_line],[exon_line1],[exon_line2],...]} - gene_lines_dict = {} gene_start = 0 # Validation: This key should remain unused, as every gtf file starts with a gene. for index, line in enumerate(self.gtf_lines): @@ -214,13 +211,11 @@ class BuildTranscriptStructure: def write_csv(self, csv_output: str ) -> None: - - """ Writes a csv file containing the number of differently spliced transcripts. + """Writes a csv file containing the number of differently spliced transcripts. Args: csv_output(str): Path and name of the output cvs file: "transcript_ID", "gene_ID", count. """ - with open(csv_output, 'w', newline='') as file: writer = csv.writer(file) writer.writerow(['Transcript_ID', 'Gene_ID', 'count']) @@ -231,8 +226,7 @@ class BuildTranscriptStructure: def write_gtf(self, gtf_output: str ) -> None: - - """ Writes a gtf file with the information about the differently spliced transcripts. + """Writes a gtf file with the information about the differently spliced transcripts. Args: gtf_output(str): Path and name of the output gtf file with the information of all relevant transcripts. @@ -242,7 +236,7 @@ class BuildTranscriptStructure: def main(): - """ Main Function.""" + """Main Function.""" # Inputs # gene_count = 'gene_count/Rik_5.csv' # Strand with + sense # gene_count = 'gene_count/Rp1_5.csv' # Strand with - sense.