Skip to content
Snippets Groups Projects
Commit 1569975c authored by Mate Balajti's avatar Mate Balajti
Browse files

feat: add and update tests, ci, env

parent 5a912247
No related branches found
No related tags found
1 merge request!7feat: add tests
...@@ -29,6 +29,6 @@ lint-test-job: # This job also runs in the test stage. ...@@ -29,6 +29,6 @@ lint-test-job: # This job also runs in the test stage.
- pip install -r requirements.txt - pip install -r requirements.txt
- pip install -r requirements_dev.txt - pip install -r requirements_dev.txt
- pip install -e . - pip install -e .
# - flake8 --docstring-convention google transcript_sampler/ tests/ - flake8 --docstring-convention google transcript_sampler/ tests/
# - pylint transcript_sampler/ tests/ - pylint transcript_sampler/ tests/
# - mypy transcript_sampler/ - mypy transcript_sampler/ tests/
\ No newline at end of file \ No newline at end of file
name: scrna-seq-sim
channels:
- defaults
- bioconda
- conda-forge
dependencies:
- argparse
- biopython>=1.78
- black
- coverage
- flake8
- flake8-docstrings
- gtfparse
- polars==0.16.17
- mypy
- numpy>=1.23.3
- pylint
- pytest
- nextflow
- pandas>=1.4.4
- pip>=20.2.3
- python>=3.6, <=3.10
- pip:
- -e .
argparse argparse
biopython biopython
gtfparse gtfparse
numpy >= 1.23.3 numpy>=1.23.3
pandas >= 1.4.4 pandas>=1.4.4
\ No newline at end of file \ No newline at end of file
"""Set up project.""" """Set up project."""
from pathlib import Path from pathlib import Path
from setuptools import setup, find_packages from setuptools import setup, find_packages # type: ignore
project_root_dir = Path(__file__).parent.resolve() project_root_dir = Path(__file__).parent.resolve()
with open(project_root_dir / "requirements.txt", with open(project_root_dir / "requirements.txt",
......
"""Initialize tests."""
File moved
File moved
"""Tests functions.""" """Tests functions."""
import os import os
import pandas as pd import pandas as pd # type: ignore
import numpy as np import numpy as np
...@@ -36,7 +36,7 @@ def find_output(): ...@@ -36,7 +36,7 @@ def find_output():
None None
""" """
absolute_path = os.path.dirname(__file__) absolute_path = os.path.dirname(__file__)
test_file = "ReprTrans_ExpressionLevel.tsv" test_file = "inputs/test_ref_output.tsv"
full_path = os.path.join(absolute_path, test_file) full_path = os.path.join(absolute_path, test_file)
return full_path return full_path
......
"""Tests for match representative transcript with expression level.""" """Tests for match representative transcript with expression level."""
import pytest import pytest
import pandas as pd import pandas as pd # type: ignore
import numpy as np import numpy as np
from pandas.testing import assert_frame_equal from pandas.testing import assert_frame_equal # type: ignore
import tests.test_functions as tFun from transcript_sampler.match_reptrans_explvl import (
from transcript_sampler.match_reptrans_explvl import \
MatchReptransExplvl as match MatchReptransExplvl as match
)
import tests.test_functions as tFun
class TestMatchReptrans: class TestMatchReptrans:
"""Tests for match_reptrans_explvl.py.""" """Tests for match_reptrans_explvl.py."""
# def test_gtf_to_df(self):
# TO DO
def test_dict_repr_trans_to_df(self): def test_dict_repr_trans_to_df(self):
"""Test dict_repr_trans_to_df() function. """Test dict_repr_trans_to_df() function.
...@@ -44,7 +42,7 @@ class TestMatchReptrans: ...@@ -44,7 +42,7 @@ class TestMatchReptrans:
assert tFun.duplicated_rows(data_frame).empty, \ assert tFun.duplicated_rows(data_frame).empty, \
"at least one row is duplicated" "at least one row is duplicated"
assert tFun.na_value(data_frame) == 0, \ assert tFun.na_value(data_frame) == 0, \
"at least one row contain NA values" "at least one row contains NA values"
def test_tsv_or_csv_to_df(self): def test_tsv_or_csv_to_df(self):
"""Test tsv_or_csv_to_df() function. """Test tsv_or_csv_to_df() function.
...@@ -65,9 +63,9 @@ class TestMatchReptrans: ...@@ -65,9 +63,9 @@ class TestMatchReptrans:
assert tFun.column_d_type(df_tsv) == datatype, \ assert tFun.column_d_type(df_tsv) == datatype, \
"at least one column has the wrong datatype" "at least one column has the wrong datatype"
assert tFun.duplicated_rows(df_tsv).empty, \ assert tFun.duplicated_rows(df_tsv).empty, \
"at least one row are duplicated " "at least one row is duplicated"
assert tFun.na_value(df_tsv) == 0, \ assert tFun.na_value(df_tsv) == 0, \
"at least one row contain NA values" "at least one row contains NA values"
assert assert_frame_equal(df_tsv, df_csv) is None, \ assert assert_frame_equal(df_tsv, df_csv) is None, \
"csv and tsv import doesn't match" "csv and tsv import doesn't match"
...@@ -75,7 +73,7 @@ class TestMatchReptrans: ...@@ -75,7 +73,7 @@ class TestMatchReptrans:
"""Test expr_level_by_gene() function. """Test expr_level_by_gene() function.
This function test if the function expr_level_by_gene can find This function test if the function expr_level_by_gene can find
the gene of each transcipt given by the expression level csv/tsv the gene of each transcript given by the expression level csv/tsv
file and sum their expression level file and sum their expression level
""" """
path_tsv = tFun.find_path(r"test_gene_exprL") path_tsv = tFun.find_path(r"test_gene_exprL")
...@@ -104,9 +102,9 @@ class TestMatchReptrans: ...@@ -104,9 +102,9 @@ class TestMatchReptrans:
assert tFun.column_d_type(df_exp_lvl) == datatype, \ assert tFun.column_d_type(df_exp_lvl) == datatype, \
"at least one column has the wrong datatype" "at least one column has the wrong datatype"
assert tFun.duplicated_rows(df_exp_lvl).empty, \ assert tFun.duplicated_rows(df_exp_lvl).empty, \
"at least one row are duplicated " "at least one row is duplicated"
assert tFun.na_value(df_exp_lvl) == 0, \ assert tFun.na_value(df_exp_lvl) == 0, \
"at least one row contain NA values " "at least one row contains NA values"
assert tFun.duplicated_index(df_exp_lvl).empty, \ assert tFun.duplicated_index(df_exp_lvl).empty, \
"at least one index element is duplicated" "at least one index element is duplicated"
...@@ -151,9 +149,9 @@ class TestMatchReptrans: ...@@ -151,9 +149,9 @@ class TestMatchReptrans:
assert tFun.column_d_type(df_match) == datatype, \ assert tFun.column_d_type(df_match) == datatype, \
"at least one column has the wrong datatype" "at least one column has the wrong datatype"
assert tFun.duplicated_rows(df_match).empty, \ assert tFun.duplicated_rows(df_match).empty, \
"at least one row are duplicated " "at least one row is duplicated"
assert tFun.na_value(df_match) == 0, \ assert tFun.na_value(df_match) == 0, \
"at least one row contain NA values " "at least one row contains NA values"
assert tFun.duplicated_index(df_match).empty, \ assert tFun.duplicated_index(df_match).empty, \
"at least one index element is duplicated" "at least one index element is duplicated"
...@@ -164,104 +162,37 @@ class TestMatchReptrans: ...@@ -164,104 +162,37 @@ class TestMatchReptrans:
function match_repr_transcript_expression_level(). function match_repr_transcript_expression_level().
""" """
input_path = tFun.find_path("test_gene_exprL") input_path = tFun.find_path("test_gene_exprL")
intermediate_path = tFun.find_path_intermediate_file() gtf_file = tFun.find_path("test.gtf")
dict_repr_test = { dict_repr_test = {
'ENSMUSG00000079415': 'ENSMUST00000112933', 'ENSMUSG00000079415': 'ENSMUST00000112933',
"ENSMUSG00000024691": "ENSMUST00000025595", "ENSMUSG00000024691": "ENSMUST00000025595",
"ENSMUSG00000063683": "ENSMUST00000119960"} "ENSMUSG00000063683": "ENSMUST00000119960"}
match.match_repr_transcript_expression_level( # Create an instance of MatchReptransExplvl
self, match_instance = match()
exprTrans=input_path,
dict_reprTrans=dict_repr_test, df_result = match_instance.match_repr_transcript_expression_level(
gtf_file=intermediate_path expr_trans=input_path,
) dict_repr_trans=dict_repr_test,
gtf_file=gtf_file
)
ref_path = tFun.find_path("test_ref_output.tsv") ref_path = tFun.find_path("test_ref_output.tsv")
output_path = tFun.find_output() output_path = tFun.find_output()
with open(ref_path, 'r', encoding="utf-8") as t1,\ with open(
open(output_path, 'r', encoding="utf-8") as t2,\ ref_path, 'r', encoding="utf-8"
open(input_path, 'r', encoding="utf-8") as t3: ) as test_file_1, open(
fileRef = t1.readlines() output_path, 'r', encoding="utf-8"
fileOutput = t2.readlines() ) as test_file_2:
fileInput = t3.readlines() file_ref = test_file_1.readlines()
file_output = test_file_2.readlines()
assert ( assert (
sorted(fileRef) == sorted(fileOutput) sorted(file_ref) == sorted(file_output)
), "the output does't match the expected tsv file" ), "the output doesn't match the expected tsv file"
assert ( assert (
sorted(fileRef) != sorted(fileInput) sorted(file_ref) != sorted(
), "the output does't match the expected tsv file" df_result.to_csv(index=False).splitlines()
def test_txt_to_dict(self):
"""This function tests if txt is convertod to dict"""
path = tFun.find_path("test_dict_repr_trans.txt")
dico = match.txt_to_dict(path)
dict_test = {'ENSMUSG00000079415': 'ENSMUST00000112933',
"ENSMUSG00000024691": "ENSMUST00000025595",
"ENSMUSG00000063683": "ENSMUST00000119960"}
assert dico == dict_test
def test_transcripts_by_gene_inDf():
"""
This function test if a dataframe generated from
the intermediate file is converted in another
dataframe without the support level column.
"""
path = tFun.find_path_intermediate_file()
df = repr.import_gtfSelection_to_df(path)
df_gene = match.transcripts_by_gene_inDf(df)
datatype = {'Gene': np.dtype('O'), 'Transcript': np.dtype('O')}
assert tFun.column_number(df_gene) == (
2, "number of columns is not equal to 2")
assert tFun.column_d_type(df_gene) == (
datatype, "at least one column has the wrong datatype")
assert tFun.duplicated_rows(df_gene).empty, \
"at least one row are duplicated"
assert tFun.na_value(df_gene) == 0, \
"at least one row contain NA values"
def test_output_tsv():
"""Test if a tsv file is generated from a df in the right format."""
dict_repr_test = {
'ENSMUSG00000079415': 'ENSMUST00000112933',
"ENSMUSG00000024691": "ENSMUST00000025595",
"ENSMUSG00000063683": "ENSMUST00000119960"}
df_dict_repr_trans = match.dict_repr_trans_to_df(dict_repr_test)
path_tsv = tFun.find_path(r"test_gene_exprL")
df_tsv_exp_lvl = match.tsv_or_csv_to_df(path_tsv)
path_intermediate = tFun.find_path_intermediate_file()
df_intermediate = repr.import_gtfSelection_to_df(path_intermediate)
df_gene_transcript = match.transcripts_by_gene_inDf(df_intermediate)
df_exp_lvl = match.expr_level_by_gene(
df_tsv_exp_lvl, df_gene_transcript
) )
), "the output doesn't match the expected tsv file"
df_match = match.match_by_gene(df_dict_repr_trans, df_exp_lvl)
match.output_tsv(df_match)
ref_path = tFun.find_path("test_ref_output.tsv")
output_path = tFun.find_output()
with open(ref_path, 'r', encoding="utf-8") as t1, open(output_path, 'r') as t2:
fileRef = t1.readlines()
fileOutput = t2.readlines()
assert (
sorted(fileRef) == sorted(fileOutput)
), "the output does't match the expected tsv file"
# test_dict_repr_trans_to_df()
# test_txt_to_dict()
# test_transcripts_by_gene_inDf()
# test_tsv_or_csv_to_df()
# test_expr_level_by_gene()
# test_match_by_gene()
# test_output_tsv()
# test_match_repr_transcript_expression_level()
# print("test_match is done ! No error was found")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment