Skip to content
Snippets Groups Projects
Commit 4ff2475d authored by Hugo Gillet's avatar Hugo Gillet
Browse files

Update match_reprtranscript_expressionlevel.py

parent 34841fad
No related branches found
No related tags found
No related merge requests found
import pandas as pd import pandas as pd
import json import json
import re import re
...@@ -6,7 +5,7 @@ import representative as repr ...@@ -6,7 +5,7 @@ import representative as repr
import os import os
def dict_reprTrans_to_df(dict_reprTrans: dict): def dict_reprTrans_to_df(dict_reprTrans: dict[str,str])-> pd.Dataframe:
"""Convert a dictionary of genes and their representative transcript into a dataframe """Convert a dictionary of genes and their representative transcript into a dataframe
...@@ -17,11 +16,18 @@ def dict_reprTrans_to_df(dict_reprTrans: dict): ...@@ -17,11 +16,18 @@ def dict_reprTrans_to_df(dict_reprTrans: dict):
Pandas dataframe having Gene and transcript as columns Pandas dataframe having Gene and transcript as columns
Raises: Raises:
/!\ None, I wasn't able to make a TypeError with dict Only dict are allowed
: Only dict made of key string and value string is allowed Key should be strings
Value should be strings
""" """
pass pass
if not type(dict_reprTrans) is dict :
raise TypeError("Only dict are allowed")
if type(list(dict_reprTrans.keys())[0]) is not str :
raise TypeError("Key should be strings")
if type(list(dict_reprTrans.values())[0]) is not str :
raise TypeError("Values should be strings")
df_reprTrans = pd.DataFrame.from_dict(dict_reprTrans, orient="index", columns=["reprTranscript"]) df_reprTrans = pd.DataFrame.from_dict(dict_reprTrans, orient="index", columns=["reprTranscript"])
df_reprTrans = df_reprTrans.reset_index(level=0) df_reprTrans = df_reprTrans.reset_index(level=0)
...@@ -30,7 +36,7 @@ def dict_reprTrans_to_df(dict_reprTrans: dict): ...@@ -30,7 +36,7 @@ def dict_reprTrans_to_df(dict_reprTrans: dict):
return df_reprTrans return df_reprTrans
def txt_to_dict(dict_txt: str): def txt_to_dict(dict_txt: str) -> dict:
"""Convert a txt file into a dictionary """Convert a txt file into a dictionary
Args: Args:
...@@ -52,13 +58,14 @@ def txt_to_dict(dict_txt: str): ...@@ -52,13 +58,14 @@ def txt_to_dict(dict_txt: str):
def transcripts_by_gene_inDf(df_gtfSelection: str) -> pd.DataFrame: def transcripts_by_gene_inDf(df_gtfSelection: pd.DataFrame) -> pd.DataFrame:
"""Convert multiindex dataframe from function into a simple dataframe """Convert multiindex dataframe from function into a simple dataframe
Args: Args:
df_gtfSelection (str): Pandas multiindex dataframe having Gene, df_gtfSelection : Pandas multiindex dataframe having Gene,
transcript as indexs and support level as columns. transcript as indexs and support level as columns.
Come from the function import_gtfSelection_to_df() Come from the function import_gtfSelection_to_df()
from representative.py script.
Returns: Returns:
df_gene (str): Pandas dataframe having Gene and df_gene (str): Pandas dataframe having Gene and
...@@ -75,7 +82,7 @@ def transcripts_by_gene_inDf(df_gtfSelection: str) -> pd.DataFrame: ...@@ -75,7 +82,7 @@ def transcripts_by_gene_inDf(df_gtfSelection: str) -> pd.DataFrame:
return df_gene return df_gene
def tsv_or_csv_to_df(input_txt:str) : def tsv_or_csv_to_df(input_txt:str) -> pd.DataFrame :
"""Convert tsv or csv file into a pandas dataframe """Convert tsv or csv file into a pandas dataframe
Args: Args:
...@@ -95,12 +102,15 @@ def tsv_or_csv_to_df(input_txt:str) : ...@@ -95,12 +102,15 @@ def tsv_or_csv_to_df(input_txt:str) :
return df_input return df_input
def exprLevel_byGene(df_exprTrasncript:str, df_output_gtf_selection:str) -> pd.DataFrame : def exprLevel_byGene(df_exprTrasncript:pd.DataFrame, df_output_gtf_selection:pd.DataFrame) -> pd.DataFrame :
"""Find matching transcripts bewteen the 2 args """find the gene of each transcipt given by the expression level csv/tsv file,
and summ expression level of all transcipts from the same gene.
Args: Args:
df_exprTranscript (str): pandas Dataframe containing transcript and their expression level df_exprTranscript : pandas Dataframe containing transcript and their expression level,
df_output_gtf_selection (str) : pandas Dataframe containing genes and transcripts generated by "tsv_or_csv_to_df" function
df_output_gtf_selection : pandas Dataframe containing genes and transcripts,
generated by "transcripts_by_gene_inDf" function
Returns: Returns:
Pandas dataframe having gene and sum of its transcript expression level Pandas dataframe having gene and sum of its transcript expression level
...@@ -113,14 +123,16 @@ def exprLevel_byGene(df_exprTrasncript:str, df_output_gtf_selection:str) -> pd.D ...@@ -113,14 +123,16 @@ def exprLevel_byGene(df_exprTrasncript:str, df_output_gtf_selection:str) -> pd.D
df_sum = df_merged.groupby("Gene").sum("Expression_level") # sum transcripts comming from the same gene df_sum = df_merged.groupby("Gene").sum("Expression_level") # sum transcripts comming from the same gene
return df_sum return df_sum
def match_byGene(df_reprTranscript:str, df_expressionLevel_byGene:str) -> pd.DataFrame: def match_byGene(df_reprTranscript: pd.DataFrame, df_expressionLevel_byGene:pd.DataFrame) -> pd.DataFrame:
"""Find matching genes bewteen the 2 args """Find matching genes bewteen the 2 args
Args: Args:
df_reprTranscript (str): pandas Dataframe containing genes df_reprTranscript : pandas Dataframe containing genes
and their representative transcript and their representative transcript, generated by
df_expressionLevel_byGene (str) : pandas Dataframe containing "dict_reprTrans_to_df()"
genes and their expression level df_expressionLevel_byGene : pandas Dataframe containing
genes and their expression level generated by
"transcript_by_gene_inDf()"
Returns: Returns:
Pandas dataframe having representative trasncripts Pandas dataframe having representative trasncripts
...@@ -135,7 +147,7 @@ def match_byGene(df_reprTranscript:str, df_expressionLevel_byGene:str) -> pd.Dat ...@@ -135,7 +147,7 @@ def match_byGene(df_reprTranscript:str, df_expressionLevel_byGene:str) -> pd.Dat
df_clean = df_clean.loc[:, ["reprTrans","Expression_level"]] df_clean = df_clean.loc[:, ["reprTrans","Expression_level"]]
return df_clean return df_clean
def output_tsv(dataframe:str)-> pd.DataFrame : def output_tsv(dataframe: pd.DataFrame)-> pd.DataFrame :
"""Convert pandas dataframe into a tsv file """Convert pandas dataframe into a tsv file
Args: Args:
...@@ -195,6 +207,6 @@ def match_reprTranscript_expressionLevel(exprTrans:str, dict_reprTrans:dict, int ...@@ -195,6 +207,6 @@ def match_reprTranscript_expressionLevel(exprTrans:str, dict_reprTrans:dict, int
#match_final = match_reprTranscript_expressionLevel(input_expr, dict_reprTrans, input_intermediate_file) #match_final = match_reprTranscript_expressionLevel(input_expr, dict_reprTrans, input_intermediate_file)
#print("this is the function :\n\n {}".format(match_final)) #print("this is the function :\n\n {}".format(match_final))
#if __name__ == "__main__": if __name__ == "__main__":
# match_reprTranscript_expressionLevel() match_reprTranscript_expressionLevel()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment