Skip to content
Snippets Groups Projects
Commit 1569975c authored by Mate Balajti's avatar Mate Balajti
Browse files

feat: add and update tests, ci, env

parent 5a912247
No related branches found
No related tags found
1 merge request!7feat: add tests
......@@ -29,6 +29,6 @@ lint-test-job: # This job also runs in the test stage.
- pip install -r requirements.txt
- pip install -r requirements_dev.txt
- pip install -e .
# - flake8 --docstring-convention google transcript_sampler/ tests/
# - pylint transcript_sampler/ tests/
# - mypy transcript_sampler/
\ No newline at end of file
- flake8 --docstring-convention google transcript_sampler/ tests/
- pylint transcript_sampler/ tests/
- mypy transcript_sampler/ tests/
\ No newline at end of file
name: scrna-seq-sim
channels:
- defaults
- bioconda
- conda-forge
dependencies:
- argparse
- biopython>=1.78
- black
- coverage
- flake8
- flake8-docstrings
- gtfparse
- polars==0.16.17
- mypy
- numpy>=1.23.3
- pylint
- pytest
- nextflow
- pandas>=1.4.4
- pip>=20.2.3
- python>=3.6, <=3.10
- pip:
- -e .
argparse
biopython
gtfparse
numpy >= 1.23.3
pandas >= 1.4.4
\ No newline at end of file
numpy>=1.23.3
pandas>=1.4.4
\ No newline at end of file
"""Set up project."""
from pathlib import Path
from setuptools import setup, find_packages
from setuptools import setup, find_packages # type: ignore
project_root_dir = Path(__file__).parent.resolve()
with open(project_root_dir / "requirements.txt",
......
"""Initialize tests."""
File moved
File moved
"""Tests functions."""
import os
import pandas as pd
import pandas as pd # type: ignore
import numpy as np
......@@ -36,7 +36,7 @@ def find_output():
None
"""
absolute_path = os.path.dirname(__file__)
test_file = "ReprTrans_ExpressionLevel.tsv"
test_file = "inputs/test_ref_output.tsv"
full_path = os.path.join(absolute_path, test_file)
return full_path
......
"""Tests for match representative transcript with expression level."""
import pytest
import pandas as pd
import pandas as pd # type: ignore
import numpy as np
from pandas.testing import assert_frame_equal
import tests.test_functions as tFun
from transcript_sampler.match_reptrans_explvl import \
from pandas.testing import assert_frame_equal # type: ignore
from transcript_sampler.match_reptrans_explvl import (
MatchReptransExplvl as match
)
import tests.test_functions as tFun
class TestMatchReptrans:
"""Tests for match_reptrans_explvl.py."""
# def test_gtf_to_df(self):
# TO DO
def test_dict_repr_trans_to_df(self):
"""Test dict_repr_trans_to_df() function.
......@@ -44,7 +42,7 @@ class TestMatchReptrans:
assert tFun.duplicated_rows(data_frame).empty, \
"at least one row is duplicated"
assert tFun.na_value(data_frame) == 0, \
"at least one row contain NA values"
"at least one row contains NA values"
def test_tsv_or_csv_to_df(self):
"""Test tsv_or_csv_to_df() function.
......@@ -65,9 +63,9 @@ class TestMatchReptrans:
assert tFun.column_d_type(df_tsv) == datatype, \
"at least one column has the wrong datatype"
assert tFun.duplicated_rows(df_tsv).empty, \
"at least one row are duplicated "
"at least one row is duplicated"
assert tFun.na_value(df_tsv) == 0, \
"at least one row contain NA values"
"at least one row contains NA values"
assert assert_frame_equal(df_tsv, df_csv) is None, \
"csv and tsv import doesn't match"
......@@ -75,7 +73,7 @@ class TestMatchReptrans:
"""Test expr_level_by_gene() function.
This function test if the function expr_level_by_gene can find
the gene of each transcipt given by the expression level csv/tsv
the gene of each transcript given by the expression level csv/tsv
file and sum their expression level
"""
path_tsv = tFun.find_path(r"test_gene_exprL")
......@@ -104,9 +102,9 @@ class TestMatchReptrans:
assert tFun.column_d_type(df_exp_lvl) == datatype, \
"at least one column has the wrong datatype"
assert tFun.duplicated_rows(df_exp_lvl).empty, \
"at least one row are duplicated "
"at least one row is duplicated"
assert tFun.na_value(df_exp_lvl) == 0, \
"at least one row contain NA values "
"at least one row contains NA values"
assert tFun.duplicated_index(df_exp_lvl).empty, \
"at least one index element is duplicated"
......@@ -151,9 +149,9 @@ class TestMatchReptrans:
assert tFun.column_d_type(df_match) == datatype, \
"at least one column has the wrong datatype"
assert tFun.duplicated_rows(df_match).empty, \
"at least one row are duplicated "
"at least one row is duplicated"
assert tFun.na_value(df_match) == 0, \
"at least one row contain NA values "
"at least one row contains NA values"
assert tFun.duplicated_index(df_match).empty, \
"at least one index element is duplicated"
......@@ -164,104 +162,37 @@ class TestMatchReptrans:
function match_repr_transcript_expression_level().
"""
input_path = tFun.find_path("test_gene_exprL")
intermediate_path = tFun.find_path_intermediate_file()
gtf_file = tFun.find_path("test.gtf")
dict_repr_test = {
'ENSMUSG00000079415': 'ENSMUST00000112933',
"ENSMUSG00000024691": "ENSMUST00000025595",
"ENSMUSG00000063683": "ENSMUST00000119960"}
match.match_repr_transcript_expression_level(
self,
exprTrans=input_path,
dict_reprTrans=dict_repr_test,
gtf_file=intermediate_path
)
# Create an instance of MatchReptransExplvl
match_instance = match()
df_result = match_instance.match_repr_transcript_expression_level(
expr_trans=input_path,
dict_repr_trans=dict_repr_test,
gtf_file=gtf_file
)
ref_path = tFun.find_path("test_ref_output.tsv")
output_path = tFun.find_output()
with open(ref_path, 'r', encoding="utf-8") as t1,\
open(output_path, 'r', encoding="utf-8") as t2,\
open(input_path, 'r', encoding="utf-8") as t3:
fileRef = t1.readlines()
fileOutput = t2.readlines()
fileInput = t3.readlines()
with open(
ref_path, 'r', encoding="utf-8"
) as test_file_1, open(
output_path, 'r', encoding="utf-8"
) as test_file_2:
file_ref = test_file_1.readlines()
file_output = test_file_2.readlines()
assert (
sorted(fileRef) == sorted(fileOutput)
), "the output does't match the expected tsv file"
sorted(file_ref) == sorted(file_output)
), "the output doesn't match the expected tsv file"
assert (
sorted(fileRef) != sorted(fileInput)
), "the output does't match the expected tsv file"
def test_txt_to_dict(self):
"""This function tests if txt is convertod to dict"""
path = tFun.find_path("test_dict_repr_trans.txt")
dico = match.txt_to_dict(path)
dict_test = {'ENSMUSG00000079415': 'ENSMUST00000112933',
"ENSMUSG00000024691": "ENSMUST00000025595",
"ENSMUSG00000063683": "ENSMUST00000119960"}
assert dico == dict_test
def test_transcripts_by_gene_inDf():
"""
This function test if a dataframe generated from
the intermediate file is converted in another
dataframe without the support level column.
"""
path = tFun.find_path_intermediate_file()
df = repr.import_gtfSelection_to_df(path)
df_gene = match.transcripts_by_gene_inDf(df)
datatype = {'Gene': np.dtype('O'), 'Transcript': np.dtype('O')}
assert tFun.column_number(df_gene) == (
2, "number of columns is not equal to 2")
assert tFun.column_d_type(df_gene) == (
datatype, "at least one column has the wrong datatype")
assert tFun.duplicated_rows(df_gene).empty, \
"at least one row are duplicated"
assert tFun.na_value(df_gene) == 0, \
"at least one row contain NA values"
def test_output_tsv():
"""Test if a tsv file is generated from a df in the right format."""
dict_repr_test = {
'ENSMUSG00000079415': 'ENSMUST00000112933',
"ENSMUSG00000024691": "ENSMUST00000025595",
"ENSMUSG00000063683": "ENSMUST00000119960"}
df_dict_repr_trans = match.dict_repr_trans_to_df(dict_repr_test)
path_tsv = tFun.find_path(r"test_gene_exprL")
df_tsv_exp_lvl = match.tsv_or_csv_to_df(path_tsv)
path_intermediate = tFun.find_path_intermediate_file()
df_intermediate = repr.import_gtfSelection_to_df(path_intermediate)
df_gene_transcript = match.transcripts_by_gene_inDf(df_intermediate)
df_exp_lvl = match.expr_level_by_gene(
df_tsv_exp_lvl, df_gene_transcript
sorted(file_ref) != sorted(
df_result.to_csv(index=False).splitlines()
)
df_match = match.match_by_gene(df_dict_repr_trans, df_exp_lvl)
match.output_tsv(df_match)
ref_path = tFun.find_path("test_ref_output.tsv")
output_path = tFun.find_output()
with open(ref_path, 'r', encoding="utf-8") as t1, open(output_path, 'r') as t2:
fileRef = t1.readlines()
fileOutput = t2.readlines()
assert (
sorted(fileRef) == sorted(fileOutput)
), "the output does't match the expected tsv file"
# test_dict_repr_trans_to_df()
# test_txt_to_dict()
# test_transcripts_by_gene_inDf()
# test_tsv_or_csv_to_df()
# test_expr_level_by_gene()
# test_match_by_gene()
# test_output_tsv()
# test_match_repr_transcript_expression_level()
# print("test_match is done ! No error was found")
), "the output doesn't match the expected tsv file"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment