Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import pandas as pd
import numpy as np
import os
def find_path(filename:str)->str:
"""Find the path to a file
Args:
name of a file
Returns:
str path of a file
Raises:
None
"""
absolute_path = os.path.dirname(__file__)
test_file = "inputs\\" + str(filename)
full_path = os.path.join(absolute_path, test_file)
return full_path
def find_output():
"""Find the path of the output file
Args:
name of a file
Returns:
str path of a file
Raises:
None
"""
absolute_path = os.path.dirname(__file__)
test_file = "ReprTrans_ExpressionLevel.tsv"
full_path = os.path.join(absolute_path, test_file)
return full_path
def find_path_intermediateFile()->str:
"""Find the path to gencode.vM31.annotation_intermediat_file.txt
Args:
none
Returns:
str path of gencode.vM31.annotation_intermediat_file.txt
Raises:
None
"""
absolute_path = os.path.dirname(__file__)
test_file = r"inputs\test_gencode.vM31.annotation_intermediat_file.txt"
full_path = os.path.join(absolute_path, test_file)
return full_path
def column_number(df :pd.DataFrame)-> int:
"""Return the number of column of a df
Args:
dataframe
Returns:
int
Raises:
None
"""
length = len(df.columns)
return length
def column_dType(df : pd.DataFrame) -> dict[str,np.dtype]:
"""Return the type of each column of a df in a dict
Args:
Pandas dataframe
Returns:
dict{column:np.dtype()}
Raises:
None
"""
dtype=df.dtypes.to_dict()
return dtype
def duplicated_rows(df: pd.DataFrame) -> pd.DataFrame:
"""Return the sum of duplicated rows in a df
Args:
Pandas dataframe
Returns:
int
Raises:
None
"""
df_dupl = df[df.duplicated()]
return df_dupl
def duplicated_index(df: pd.DataFrame) -> pd.DataFrame:
"""Return the sum of duplicated index in a df
Args:
Pandas dataframe
Returns:
int
Raises:
None
"""
df_dupl = df[df.index.duplicated()]
return df_dupl
def NA_value(df: pd.DataFrame) -> int:
"""Return the sum of NA values in a df
Args:
Pandas dataframe
Returns:
int
Raises:
None
"""
nNA = df.isna().sum().sum()
return nNA