Skip to content
Snippets Groups Projects
Commit 4ff2475d authored by Hugo Gillet's avatar Hugo Gillet
Browse files

Update match_reprtranscript_expressionlevel.py

parent 34841fad
No related branches found
No related tags found
No related merge requests found
import pandas as pd
import json
import re
......@@ -6,7 +5,7 @@ import representative as repr
import os
def dict_reprTrans_to_df(dict_reprTrans: dict):
def dict_reprTrans_to_df(dict_reprTrans: dict[str,str])-> pd.Dataframe:
"""Convert a dictionary of genes and their representative transcript into a dataframe
......@@ -17,11 +16,18 @@ def dict_reprTrans_to_df(dict_reprTrans: dict):
Pandas dataframe having Gene and transcript as columns
Raises:
/!\ None, I wasn't able to make a TypeError with dict
: Only dict made of key string and value string is allowed
Only dict are allowed
Key should be strings
Value should be strings
"""
pass
if not type(dict_reprTrans) is dict :
raise TypeError("Only dict are allowed")
if type(list(dict_reprTrans.keys())[0]) is not str :
raise TypeError("Key should be strings")
if type(list(dict_reprTrans.values())[0]) is not str :
raise TypeError("Values should be strings")
df_reprTrans = pd.DataFrame.from_dict(dict_reprTrans, orient="index", columns=["reprTranscript"])
df_reprTrans = df_reprTrans.reset_index(level=0)
......@@ -30,7 +36,7 @@ def dict_reprTrans_to_df(dict_reprTrans: dict):
return df_reprTrans
def txt_to_dict(dict_txt: str):
def txt_to_dict(dict_txt: str) -> dict:
"""Convert a txt file into a dictionary
Args:
......@@ -52,13 +58,14 @@ def txt_to_dict(dict_txt: str):
def transcripts_by_gene_inDf(df_gtfSelection: str) -> pd.DataFrame:
def transcripts_by_gene_inDf(df_gtfSelection: pd.DataFrame) -> pd.DataFrame:
"""Convert multiindex dataframe from function into a simple dataframe
Args:
df_gtfSelection (str): Pandas multiindex dataframe having Gene,
df_gtfSelection : Pandas multiindex dataframe having Gene,
transcript as indexs and support level as columns.
Come from the function import_gtfSelection_to_df()
from representative.py script.
Returns:
df_gene (str): Pandas dataframe having Gene and
......@@ -75,7 +82,7 @@ def transcripts_by_gene_inDf(df_gtfSelection: str) -> pd.DataFrame:
return df_gene
def tsv_or_csv_to_df(input_txt:str) :
def tsv_or_csv_to_df(input_txt:str) -> pd.DataFrame :
"""Convert tsv or csv file into a pandas dataframe
Args:
......@@ -95,12 +102,15 @@ def tsv_or_csv_to_df(input_txt:str) :
return df_input
def exprLevel_byGene(df_exprTrasncript:str, df_output_gtf_selection:str) -> pd.DataFrame :
"""Find matching transcripts bewteen the 2 args
def exprLevel_byGene(df_exprTrasncript:pd.DataFrame, df_output_gtf_selection:pd.DataFrame) -> pd.DataFrame :
"""find the gene of each transcipt given by the expression level csv/tsv file,
and summ expression level of all transcipts from the same gene.
Args:
df_exprTranscript (str): pandas Dataframe containing transcript and their expression level
df_output_gtf_selection (str) : pandas Dataframe containing genes and transcripts
df_exprTranscript : pandas Dataframe containing transcript and their expression level,
generated by "tsv_or_csv_to_df" function
df_output_gtf_selection : pandas Dataframe containing genes and transcripts,
generated by "transcripts_by_gene_inDf" function
Returns:
Pandas dataframe having gene and sum of its transcript expression level
......@@ -113,14 +123,16 @@ def exprLevel_byGene(df_exprTrasncript:str, df_output_gtf_selection:str) -> pd.D
df_sum = df_merged.groupby("Gene").sum("Expression_level") # sum transcripts comming from the same gene
return df_sum
def match_byGene(df_reprTranscript:str, df_expressionLevel_byGene:str) -> pd.DataFrame:
def match_byGene(df_reprTranscript: pd.DataFrame, df_expressionLevel_byGene:pd.DataFrame) -> pd.DataFrame:
"""Find matching genes bewteen the 2 args
Args:
df_reprTranscript (str): pandas Dataframe containing genes
and their representative transcript
df_expressionLevel_byGene (str) : pandas Dataframe containing
genes and their expression level
df_reprTranscript : pandas Dataframe containing genes
and their representative transcript, generated by
"dict_reprTrans_to_df()"
df_expressionLevel_byGene : pandas Dataframe containing
genes and their expression level generated by
"transcript_by_gene_inDf()"
Returns:
Pandas dataframe having representative trasncripts
......@@ -135,7 +147,7 @@ def match_byGene(df_reprTranscript:str, df_expressionLevel_byGene:str) -> pd.Dat
df_clean = df_clean.loc[:, ["reprTrans","Expression_level"]]
return df_clean
def output_tsv(dataframe:str)-> pd.DataFrame :
def output_tsv(dataframe: pd.DataFrame)-> pd.DataFrame :
"""Convert pandas dataframe into a tsv file
Args:
......@@ -195,6 +207,6 @@ def match_reprTranscript_expressionLevel(exprTrans:str, dict_reprTrans:dict, int
#match_final = match_reprTranscript_expressionLevel(input_expr, dict_reprTrans, input_intermediate_file)
#print("this is the function :\n\n {}".format(match_final))
#if __name__ == "__main__":
# match_reprTranscript_expressionLevel()
if __name__ == "__main__":
match_reprTranscript_expressionLevel()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment