diff --git a/test/Test_representative_and_match/test_representative.py b/test/Test_representative_and_match/test_representative.py new file mode 100644 index 0000000000000000000000000000000000000000..4d000977434368393cedeb6ac0d4b93f30609ab7 --- /dev/null +++ b/test/Test_representative_and_match/test_representative.py @@ -0,0 +1,85 @@ +import pytest +import pandas as pd +import datatest as dt +import representative as repr +import numpy as np +import test_Functions as tFun + + +def test_import_gtfSelection_to_df(): + """ + Test if gencode.vM31.annotation_intermediat_file.txt + is imported in the correct pandas df format + Args: + None + + Returns: + Assert results + + Raises: + None + """ + path = tFun.find_path_intermediateFile() + df = repr.import_gtfSelection_to_df(path) + datatype={'Gene': np.dtype('O'), 'Transcript': np.dtype('O'), 'Support_level': np.dtype('float64')} + assert tFun.column_number(df)==3, "number of columns is not equal to 3" + assert tFun.column_dType(df)==datatype, "at lease one column has the wrong datatype" + assert tFun.duplicated_rows(df).empty, "at lease one row are duplicated " + assert tFun.NA_value(df) == 0, "at lease one row contain NA values " + with pytest.raises(TypeError, match=r"Only str path is allowed"): + repr.import_gtfSelection_to_df(123) + + +def test_representative_transcript_inDict(): + """ + Test if df generated by "import_gtfSelection_to_df()" output + a dict in the right format + Args: + Pandas dataframe with [Gene, Transcript, Support_level] + as columns, validated with test_import_gtfSelection_to_df() + + Returns: + Assert results + + Raises: + None + """ + path = tFun.find_path_intermediateFile() + df = repr.import_gtfSelection_to_df(path) + dict_to_test = repr.representative_transcripts_inDict(df) + dict_expected = {'ENSMUSG00000024691': ['ENSMUST00000025595.5'], + 'ENSMUSG00000063683': ['ENSMUST00000044976.12', 'ENSMUST00000119960.2'], + 'ENSMUSG00000079415': ['ENSMUST00000112933.2']} + assert dict_to_test == dict_expected + with pytest.raises(TypeError, match=r"Only pandas DataFrame is allowed"): + repr.representative_transcripts_inDict(123) + with pytest.raises(TypeError, match=r"Only pandas DataFrame is allowed"): + repr.representative_transcripts_inDict("hello") + with pytest.raises(TypeError, match=r"Only pandas DataFrame is allowed"): + repr.representative_transcripts_inDict(["hello","world",123]) + with pytest.raises(TypeError, match=r"Only pandas DataFrame is allowed"): + repr.representative_transcripts_inDict({"hello":"world", "bonjour":["le monde", 123]}) + +def test_find_repr_by_SupportLevel(): + """ + Test if the correct dict is generated from gencode.vM31.annotation_intermediat_file.txt + Args: + None + + Returns: + Assert results + + Raises: + None + """ + path = tFun.find_path_intermediateFile() + dict_to_test = repr.find_repr_by_SupportLevel(path) + dict_expected = {'ENSMUSG00000024691': ['ENSMUST00000025595.5'], + 'ENSMUSG00000063683': ['ENSMUST00000044976.12', 'ENSMUST00000119960.2'], + 'ENSMUSG00000079415': ['ENSMUST00000112933.2']} + assert dict_to_test == dict_expected + +test_representative_transcript_inDict() +test_find_repr_by_SupportLevel() +test_import_gtfSelection_to_df() +print("test_representative is done ! No error was found")