diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c4402dd0525a2dbd2516704c8e9c45c42887ff56..e34cd6f9e5329d2cd11cb04cf36917d369201567 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -29,6 +29,6 @@ lint-test-job: # This job also runs in the test stage. - pip install -r requirements.txt - pip install -r requirements_dev.txt - pip install -e . - # - flake8 --docstring-convention google transcript_sampler/ tests/ - # - pylint transcript_sampler/ tests/ - # - mypy transcript_sampler/ \ No newline at end of file + - flake8 --docstring-convention google transcript_sampler/ tests/ + - pylint transcript_sampler/ tests/ + - mypy transcript_sampler/ tests/ \ No newline at end of file diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000000000000000000000000000000000000..bb7989a0686411bbbfd16d711e406e44132357a3 --- /dev/null +++ b/environment.yml @@ -0,0 +1,24 @@ +name: scrna-seq-sim +channels: + - defaults + - bioconda + - conda-forge +dependencies: + - argparse + - biopython>=1.78 + - black + - coverage + - flake8 + - flake8-docstrings + - gtfparse + - polars==0.16.17 + - mypy + - numpy>=1.23.3 + - pylint + - pytest + - nextflow + - pandas>=1.4.4 + - pip>=20.2.3 + - python>=3.6, <=3.10 + - pip: + - -e . diff --git a/requirements.txt b/requirements.txt index 98d4d622197cba5a0b39ff5ffc87237dea9045f5..782fb4a06387e54280e2bfd3c0045e87a6665a83 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ argparse biopython gtfparse -numpy >= 1.23.3 -pandas >= 1.4.4 \ No newline at end of file +numpy>=1.23.3 +pandas>=1.4.4 \ No newline at end of file diff --git a/setup.py b/setup.py index 9542e8a5d2e5e83f2e3f1ce44a26a9bfc796731e..721659b0ed83e01955c19efb8e10a6eb03e0e502 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ """Set up project.""" from pathlib import Path -from setuptools import setup, find_packages +from setuptools import setup, find_packages # type: ignore project_root_dir = Path(__file__).parent.resolve() with open(project_root_dir / "requirements.txt", diff --git a/tests/__init__.py b/tests/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..b3d9b62f79cdffdee3c6c65a87102862285a170c 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Initialize tests.""" diff --git a/tests/input_files/expression.csv b/tests/inputs/expression.csv similarity index 100% rename from tests/input_files/expression.csv rename to tests/inputs/expression.csv diff --git a/tests/input_files/test.gtf b/tests/inputs/test.gtf similarity index 100% rename from tests/input_files/test.gtf rename to tests/inputs/test.gtf diff --git a/tests/test_functions.py b/tests/test_functions.py index a0f2f1ad3d9630cc613012398e9aa045d13fb2a6..a81b0754d1d7ca809642da2a5477d0afcdf00149 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -1,6 +1,6 @@ """Tests functions.""" import os -import pandas as pd +import pandas as pd # type: ignore import numpy as np @@ -36,7 +36,7 @@ def find_output(): None """ absolute_path = os.path.dirname(__file__) - test_file = "ReprTrans_ExpressionLevel.tsv" + test_file = "inputs/test_ref_output.tsv" full_path = os.path.join(absolute_path, test_file) return full_path diff --git a/tests/test_match_reptrans_explvl.py b/tests/test_match_reptrans_explvl.py index abef23b98d6aae2bf75b1aaf7ba5ddca4c2d96ca..495155d25e5fbd9f3e63cb477f2f5ba4218cd31d 100644 --- a/tests/test_match_reptrans_explvl.py +++ b/tests/test_match_reptrans_explvl.py @@ -1,19 +1,17 @@ """Tests for match representative transcript with expression level.""" import pytest -import pandas as pd +import pandas as pd # type: ignore import numpy as np -from pandas.testing import assert_frame_equal -import tests.test_functions as tFun -from transcript_sampler.match_reptrans_explvl import \ +from pandas.testing import assert_frame_equal # type: ignore +from transcript_sampler.match_reptrans_explvl import ( MatchReptransExplvl as match +) +import tests.test_functions as tFun class TestMatchReptrans: """Tests for match_reptrans_explvl.py.""" - # def test_gtf_to_df(self): - # TO DO - def test_dict_repr_trans_to_df(self): """Test dict_repr_trans_to_df() function. @@ -44,7 +42,7 @@ class TestMatchReptrans: assert tFun.duplicated_rows(data_frame).empty, \ "at least one row is duplicated" assert tFun.na_value(data_frame) == 0, \ - "at least one row contain NA values" + "at least one row contains NA values" def test_tsv_or_csv_to_df(self): """Test tsv_or_csv_to_df() function. @@ -65,9 +63,9 @@ class TestMatchReptrans: assert tFun.column_d_type(df_tsv) == datatype, \ "at least one column has the wrong datatype" assert tFun.duplicated_rows(df_tsv).empty, \ - "at least one row are duplicated " + "at least one row is duplicated" assert tFun.na_value(df_tsv) == 0, \ - "at least one row contain NA values" + "at least one row contains NA values" assert assert_frame_equal(df_tsv, df_csv) is None, \ "csv and tsv import doesn't match" @@ -75,7 +73,7 @@ class TestMatchReptrans: """Test expr_level_by_gene() function. This function test if the function expr_level_by_gene can find - the gene of each transcipt given by the expression level csv/tsv + the gene of each transcript given by the expression level csv/tsv file and sum their expression level """ path_tsv = tFun.find_path(r"test_gene_exprL") @@ -104,9 +102,9 @@ class TestMatchReptrans: assert tFun.column_d_type(df_exp_lvl) == datatype, \ "at least one column has the wrong datatype" assert tFun.duplicated_rows(df_exp_lvl).empty, \ - "at least one row are duplicated " + "at least one row is duplicated" assert tFun.na_value(df_exp_lvl) == 0, \ - "at least one row contain NA values " + "at least one row contains NA values" assert tFun.duplicated_index(df_exp_lvl).empty, \ "at least one index element is duplicated" @@ -151,9 +149,9 @@ class TestMatchReptrans: assert tFun.column_d_type(df_match) == datatype, \ "at least one column has the wrong datatype" assert tFun.duplicated_rows(df_match).empty, \ - "at least one row are duplicated " + "at least one row is duplicated" assert tFun.na_value(df_match) == 0, \ - "at least one row contain NA values " + "at least one row contains NA values" assert tFun.duplicated_index(df_match).empty, \ "at least one index element is duplicated" @@ -164,104 +162,37 @@ class TestMatchReptrans: function match_repr_transcript_expression_level(). """ input_path = tFun.find_path("test_gene_exprL") - intermediate_path = tFun.find_path_intermediate_file() + gtf_file = tFun.find_path("test.gtf") dict_repr_test = { 'ENSMUSG00000079415': 'ENSMUST00000112933', "ENSMUSG00000024691": "ENSMUST00000025595", "ENSMUSG00000063683": "ENSMUST00000119960"} - match.match_repr_transcript_expression_level( - self, - exprTrans=input_path, - dict_reprTrans=dict_repr_test, - gtf_file=intermediate_path - ) + # Create an instance of MatchReptransExplvl + match_instance = match() + + df_result = match_instance.match_repr_transcript_expression_level( + expr_trans=input_path, + dict_repr_trans=dict_repr_test, + gtf_file=gtf_file + ) ref_path = tFun.find_path("test_ref_output.tsv") output_path = tFun.find_output() - with open(ref_path, 'r', encoding="utf-8") as t1,\ - open(output_path, 'r', encoding="utf-8") as t2,\ - open(input_path, 'r', encoding="utf-8") as t3: - fileRef = t1.readlines() - fileOutput = t2.readlines() - fileInput = t3.readlines() + with open( + ref_path, 'r', encoding="utf-8" + ) as test_file_1, open( + output_path, 'r', encoding="utf-8" + ) as test_file_2: + file_ref = test_file_1.readlines() + file_output = test_file_2.readlines() assert ( - sorted(fileRef) == sorted(fileOutput) - ), "the output does't match the expected tsv file" + sorted(file_ref) == sorted(file_output) + ), "the output doesn't match the expected tsv file" assert ( - sorted(fileRef) != sorted(fileInput) - ), "the output does't match the expected tsv file" - - def test_txt_to_dict(self): - """This function tests if txt is convertod to dict""" - path = tFun.find_path("test_dict_repr_trans.txt") - dico = match.txt_to_dict(path) - dict_test = {'ENSMUSG00000079415': 'ENSMUST00000112933', - "ENSMUSG00000024691": "ENSMUST00000025595", - "ENSMUSG00000063683": "ENSMUST00000119960"} - assert dico == dict_test - - def test_transcripts_by_gene_inDf(): - """ - This function test if a dataframe generated from - the intermediate file is converted in another - dataframe without the support level column. - """ - path = tFun.find_path_intermediate_file() - df = repr.import_gtfSelection_to_df(path) - df_gene = match.transcripts_by_gene_inDf(df) - datatype = {'Gene': np.dtype('O'), 'Transcript': np.dtype('O')} - assert tFun.column_number(df_gene) == ( - 2, "number of columns is not equal to 2") - assert tFun.column_d_type(df_gene) == ( - datatype, "at least one column has the wrong datatype") - assert tFun.duplicated_rows(df_gene).empty, \ - "at least one row are duplicated" - assert tFun.na_value(df_gene) == 0, \ - "at least one row contain NA values" - - def test_output_tsv(): - """Test if a tsv file is generated from a df in the right format.""" - dict_repr_test = { - 'ENSMUSG00000079415': 'ENSMUST00000112933', - "ENSMUSG00000024691": "ENSMUST00000025595", - "ENSMUSG00000063683": "ENSMUST00000119960"} - df_dict_repr_trans = match.dict_repr_trans_to_df(dict_repr_test) - - path_tsv = tFun.find_path(r"test_gene_exprL") - df_tsv_exp_lvl = match.tsv_or_csv_to_df(path_tsv) - path_intermediate = tFun.find_path_intermediate_file() - df_intermediate = repr.import_gtfSelection_to_df(path_intermediate) - df_gene_transcript = match.transcripts_by_gene_inDf(df_intermediate) - - df_exp_lvl = match.expr_level_by_gene( - df_tsv_exp_lvl, df_gene_transcript + sorted(file_ref) != sorted( + df_result.to_csv(index=False).splitlines() ) - - df_match = match.match_by_gene(df_dict_repr_trans, df_exp_lvl) - - match.output_tsv(df_match) - - ref_path = tFun.find_path("test_ref_output.tsv") - output_path = tFun.find_output() - - with open(ref_path, 'r', encoding="utf-8") as t1, open(output_path, 'r') as t2: - fileRef = t1.readlines() - fileOutput = t2.readlines() - - assert ( - sorted(fileRef) == sorted(fileOutput) - ), "the output does't match the expected tsv file" - -# test_dict_repr_trans_to_df() -# test_txt_to_dict() -# test_transcripts_by_gene_inDf() -# test_tsv_or_csv_to_df() -# test_expr_level_by_gene() -# test_match_by_gene() -# test_output_tsv() -# test_match_repr_transcript_expression_level() - -# print("test_match is done ! No error was found") + ), "the output doesn't match the expected tsv file"