import pytest import pandas as pd import datatest as dt import representative as repr import numpy as np import test_Functions as tFun def test_import_gtfSelection_to_df(): """ Test if gencode.vM31.annotation_intermediat_file.txt is imported in the correct pandas df format Args: None Returns: Assert results Raises: None """ path = tFun.find_path_intermediateFile() df = repr.import_gtfSelection_to_df(path) datatype={'Gene': np.dtype('O'), 'Transcript': np.dtype('O'), 'Support_level': np.dtype('float64')} assert tFun.column_number(df)==3, "number of columns is not equal to 3" assert tFun.column_dType(df)==datatype, "at lease one column has the wrong datatype" assert tFun.duplicated_rows(df).empty, "at lease one row are duplicated " assert tFun.NA_value(df) == 0, "at lease one row contain NA values " with pytest.raises(TypeError, match=r"Only str path is allowed"): repr.import_gtfSelection_to_df(123) def test_representative_transcript_inDict(): """ Test if df generated by "import_gtfSelection_to_df()" output a dict in the right format Args: Pandas dataframe with [Gene, Transcript, Support_level] as columns, validated with test_import_gtfSelection_to_df() Returns: Assert results Raises: None """ path = tFun.find_path_intermediateFile() df = repr.import_gtfSelection_to_df(path) dict_to_test = repr.representative_transcripts_inDict(df) dict_expected = {'ENSMUSG00000024691': ['ENSMUST00000025595.5'], 'ENSMUSG00000063683': ['ENSMUST00000044976.12', 'ENSMUST00000119960.2'], 'ENSMUSG00000079415': ['ENSMUST00000112933.2']} assert dict_to_test == dict_expected with pytest.raises(TypeError, match=r"Only pandas DataFrame is allowed"): repr.representative_transcripts_inDict(123) with pytest.raises(TypeError, match=r"Only pandas DataFrame is allowed"): repr.representative_transcripts_inDict("hello") with pytest.raises(TypeError, match=r"Only pandas DataFrame is allowed"): repr.representative_transcripts_inDict(["hello","world",123]) with pytest.raises(TypeError, match=r"Only pandas DataFrame is allowed"): repr.representative_transcripts_inDict({"hello":"world", "bonjour":["le monde", 123]}) def test_find_repr_by_SupportLevel(): """ Test if the correct dict is generated from gencode.vM31.annotation_intermediat_file.txt Args: None Returns: Assert results Raises: None """ path = tFun.find_path_intermediateFile() dict_to_test = repr.find_repr_by_SupportLevel(path) dict_expected = {'ENSMUSG00000024691': ['ENSMUST00000025595.5'], 'ENSMUSG00000063683': ['ENSMUST00000044976.12', 'ENSMUST00000119960.2'], 'ENSMUSG00000079415': ['ENSMUST00000112933.2']} assert dict_to_test == dict_expected test_representative_transcript_inDict() test_find_repr_by_SupportLevel() test_import_gtfSelection_to_df() print("test_representative is done ! No error was found")