import pytest
import pandas as pd
import datatest as dt
import representative as repr
import numpy as np 
import test_Functions as tFun


def test_import_gtfSelection_to_df():
    """
    Test if gencode.vM31.annotation_intermediat_file.txt
     is imported in the correct pandas df format
        Args:
            None

        Returns:
            Assert results 
      
        Raises:
            None
    """
    path = tFun.find_path_intermediateFile()
    df = repr.import_gtfSelection_to_df(path) 
    datatype={'Gene': np.dtype('O'), 'Transcript': np.dtype('O'), 'Support_level': np.dtype('float64')}
    assert tFun.column_number(df)==3, "number of columns is not equal to 3"
    assert tFun.column_dType(df)==datatype, "at lease one column has the wrong datatype"
    assert tFun.duplicated_rows(df).empty, "at lease one row are duplicated "
    assert tFun.NA_value(df) == 0, "at lease one row contain NA values "
    with pytest.raises(TypeError, match=r"Only str path is allowed"):
        repr.import_gtfSelection_to_df(123)


def test_representative_transcript_inDict(): 
    """
    Test if df generated by "import_gtfSelection_to_df()" output
    a dict in the right format 
        Args:
            Pandas dataframe with [Gene, Transcript, Support_level]
            as columns, validated with test_import_gtfSelection_to_df()

        Returns:
            Assert results 
      
        Raises:
            None
     """
    path = tFun.find_path_intermediateFile()
    df = repr.import_gtfSelection_to_df(path) 
    dict_to_test = repr.representative_transcripts_inDict(df)
    dict_expected = {'ENSMUSG00000024691': ['ENSMUST00000025595.5'], 
    'ENSMUSG00000063683': ['ENSMUST00000044976.12', 'ENSMUST00000119960.2'], 
    'ENSMUSG00000079415': ['ENSMUST00000112933.2']}
    assert dict_to_test == dict_expected
    with pytest.raises(TypeError, match=r"Only pandas DataFrame is allowed"):
        repr.representative_transcripts_inDict(123)
    with pytest.raises(TypeError, match=r"Only pandas DataFrame is allowed"):
        repr.representative_transcripts_inDict("hello")
    with pytest.raises(TypeError, match=r"Only pandas DataFrame is allowed"):
        repr.representative_transcripts_inDict(["hello","world",123])
    with pytest.raises(TypeError, match=r"Only pandas DataFrame is allowed"):
        repr.representative_transcripts_inDict({"hello":"world", "bonjour":["le monde", 123]})

def test_find_repr_by_SupportLevel():
    """
    Test if the correct dict is generated from gencode.vM31.annotation_intermediat_file.txt
        Args:
            None 

        Returns:
            Assert results 
      
        Raises:
            None
    """
    path = tFun.find_path_intermediateFile()
    dict_to_test = repr.find_repr_by_SupportLevel(path)
    dict_expected = {'ENSMUSG00000024691': ['ENSMUST00000025595.5'], 
    'ENSMUSG00000063683': ['ENSMUST00000044976.12', 'ENSMUST00000119960.2'], 
    'ENSMUSG00000079415': ['ENSMUST00000112933.2']}
    assert dict_to_test == dict_expected

test_representative_transcript_inDict()
test_find_repr_by_SupportLevel()
test_import_gtfSelection_to_df()
print("test_representative is done ! No error was found")