diff --git a/test/Test_representative_and_match/test_Functions.py b/test/Test_representative_and_match/test_Functions.py new file mode 100644 index 0000000000000000000000000000000000000000..72a120d1d2e6967233abd0dc2bc14607a8ef40fe --- /dev/null +++ b/test/Test_representative_and_match/test_Functions.py @@ -0,0 +1,132 @@ +import pandas as pd +import numpy as np +import os + +def find_path(filename:str)->str: + """Find the path to a file + + Args: + name of a file + + Returns: + str path of a file + + Raises: + None + """ + absolute_path = os.path.dirname(__file__) + test_file = "inputs\\" + str(filename) + full_path = os.path.join(absolute_path, test_file) + return full_path + +def find_output(): + """Find the path of the output file + + Args: + name of a file + + Returns: + str path of a file + + Raises: + None + """ + absolute_path = os.path.dirname(__file__) + test_file = "ReprTrans_ExpressionLevel.tsv" + full_path = os.path.join(absolute_path, test_file) + return full_path + + +def find_path_intermediateFile()->str: + """Find the path to gencode.vM31.annotation_intermediat_file.txt + + Args: + none + + Returns: + str path of gencode.vM31.annotation_intermediat_file.txt + + Raises: + None + """ + absolute_path = os.path.dirname(__file__) + test_file = r"inputs\test_gencode.vM31.annotation_intermediat_file.txt" + full_path = os.path.join(absolute_path, test_file) + return full_path + +def column_number(df :pd.DataFrame)-> int: + + """Return the number of column of a df + + Args: + dataframe + + Returns: + int + + Raises: + None + """ + length = len(df.columns) + return length + +def column_dType(df : pd.DataFrame) -> dict[str,np.dtype]: + """Return the type of each column of a df in a dict + + Args: + Pandas dataframe + + Returns: + dict{column:np.dtype()} + + Raises: + None + """ + dtype=df.dtypes.to_dict() + return dtype + +def duplicated_rows(df: pd.DataFrame) -> pd.DataFrame: + """Return the sum of duplicated rows in a df + + Args: + Pandas dataframe + + Returns: + int + + Raises: + None + """ + df_dupl = df[df.duplicated()] + return df_dupl + +def duplicated_index(df: pd.DataFrame) -> pd.DataFrame: + """Return the sum of duplicated index in a df + + Args: + Pandas dataframe + + Returns: + int + + Raises: + None + """ + df_dupl = df[df.index.duplicated()] + return df_dupl + +def NA_value(df: pd.DataFrame) -> int: + """Return the sum of NA values in a df + + Args: + Pandas dataframe + + Returns: + int + + Raises: + None + """ + nNA = df.isna().sum().sum() + return nNA +