From b571e818c5eff1bd55a4f0043fe05176b3f8653c Mon Sep 17 00:00:00 2001 From: Hugo Gillet <hugo.gillet@stud.unibas.ch> Date: Tue, 13 Dec 2022 09:34:29 +0000 Subject: [PATCH] Add new file --- .../test_Functions.py | 132 ++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 test/Test_representative_and_match/test_Functions.py diff --git a/test/Test_representative_and_match/test_Functions.py b/test/Test_representative_and_match/test_Functions.py new file mode 100644 index 0000000..72a120d --- /dev/null +++ b/test/Test_representative_and_match/test_Functions.py @@ -0,0 +1,132 @@ +import pandas as pd +import numpy as np +import os + +def find_path(filename:str)->str: + """Find the path to a file + + Args: + name of a file + + Returns: + str path of a file + + Raises: + None + """ + absolute_path = os.path.dirname(__file__) + test_file = "inputs\\" + str(filename) + full_path = os.path.join(absolute_path, test_file) + return full_path + +def find_output(): + """Find the path of the output file + + Args: + name of a file + + Returns: + str path of a file + + Raises: + None + """ + absolute_path = os.path.dirname(__file__) + test_file = "ReprTrans_ExpressionLevel.tsv" + full_path = os.path.join(absolute_path, test_file) + return full_path + + +def find_path_intermediateFile()->str: + """Find the path to gencode.vM31.annotation_intermediat_file.txt + + Args: + none + + Returns: + str path of gencode.vM31.annotation_intermediat_file.txt + + Raises: + None + """ + absolute_path = os.path.dirname(__file__) + test_file = r"inputs\test_gencode.vM31.annotation_intermediat_file.txt" + full_path = os.path.join(absolute_path, test_file) + return full_path + +def column_number(df :pd.DataFrame)-> int: + + """Return the number of column of a df + + Args: + dataframe + + Returns: + int + + Raises: + None + """ + length = len(df.columns) + return length + +def column_dType(df : pd.DataFrame) -> dict[str,np.dtype]: + """Return the type of each column of a df in a dict + + Args: + Pandas dataframe + + Returns: + dict{column:np.dtype()} + + Raises: + None + """ + dtype=df.dtypes.to_dict() + return dtype + +def duplicated_rows(df: pd.DataFrame) -> pd.DataFrame: + """Return the sum of duplicated rows in a df + + Args: + Pandas dataframe + + Returns: + int + + Raises: + None + """ + df_dupl = df[df.duplicated()] + return df_dupl + +def duplicated_index(df: pd.DataFrame) -> pd.DataFrame: + """Return the sum of duplicated index in a df + + Args: + Pandas dataframe + + Returns: + int + + Raises: + None + """ + df_dupl = df[df.index.duplicated()] + return df_dupl + +def NA_value(df: pd.DataFrame) -> int: + """Return the sum of NA values in a df + + Args: + Pandas dataframe + + Returns: + int + + Raises: + None + """ + nNA = df.isna().sum().sum() + return nNA + -- GitLab