Skip to content

Commit 387c177

Browse files
committed
finally done. All functions were separated. Ready to release 1.8.0.0
1 parent baf7827 commit 387c177

File tree

11 files changed

+1663
-945
lines changed

11 files changed

+1663
-945
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ Currently, <b><u>KOREAN</u></b> and <b><u>ENGLISH</u></b> are supported.
1717
[English Documentation](https://cslee145.notion.site/60cbfcbc90614fe990e02ab8340630cc?v=4991650ae5ce4427a215d1043802f5c0&pvs=4)
1818

1919

20-
## Current Ver 1.7.2 :
20+
## Current Ver 1.8.0.0 :
2121
Source codes are available in the [Github respository](https://github.com/ckdckd145/statmanager-kr)
2222
소스코드는 [깃헙 레포지토리](https://github.com/ckdckd145/statmanager-kr)에서 확인할 수 있습니다.
2323

statmanager/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@ Currently, <b><u>KOREAN</u></b> and <b><u>ENGLISH</u></b> are supported.
1717
[English Documentation](https://cslee145.notion.site/60cbfcbc90614fe990e02ab8340630cc?v=4991650ae5ce4427a215d1043802f5c0&pvs=4)
1818

1919

20-
## Current Ver 1.7.2 :
20+
## Current Ver 1.8.0.0 :
2121
Source codes are available in the [Github respository](https://github.com/ckdckd145/statmanager-kr)
2222
소스코드는 [깃헙 레포지토리](https://github.com/ckdckd145/statmanager-kr)에서 확인할 수 있습니다.
2323

2424
For updates, please see [the notice in the documentation]((https://www.notion.so/cslee145/NOTICEs-4bb2177eeb0f412a81b8dbd3215058e6)) or the [Github release](https://github.com/ckdckd145/statmanager-kr/releases).
2525
업데이트 내역은 정식 문서 내 [공지사항](https://www.notion.so/cslee145/NOTICEs-4bb2177eeb0f412a81b8dbd3215058e6) 혹은 [Github release](https://github.com/ckdckd145/statmanager-kr/releases)에서 확인하시기 바랍니다.
26-
26+
****
2727

2828
#
2929
* [Quick Start with sample jupyter notebook file](https://github.com/ckdckd145/statmanager-kr/blob/main/test.ipynb)

statmanager/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,5 @@
1818
from .posthoc_functions import *
1919
from .effectsize_functions import *
2020

21-
__version__ = '1.7.2.6'
21+
__version__ = '1.8.0.0'
2222
__all__ = ['Stat_Manager']

statmanager/ancova_functions.py

Lines changed: 269 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,282 @@
11
import pandas as pd
22
from .messages_for_reporting import *
3+
from .posthoc_functions import *
4+
35
from statsmodels.stats.anova import anova_lm
46
from statsmodels.formula.api import ols
57

68
# RULE : args for 'vars' --> [dv, [covar]]
79

8-
def oneway_ancova(df:pd.DataFrame, ):
9-
pass
10+
def oneway_ancova(df:pd.DataFrame, vars: list, lang_set, testname, posthoc, posthoc_method, group_vars = None, group_names = None):
11+
12+
result_for_save = []
13+
14+
15+
if group_names == None:
16+
group_names = df[group_vars].unique()
17+
18+
else:
19+
df = df[df[group_vars].isin(group_names)]
20+
21+
dv = vars[0]
22+
covars = vars[1]
23+
iv = group_vars
24+
25+
decribe_vars = [dv] + covars
26+
27+
formula_for_olsmodel = custom_join_for_ancova(vars = vars, group_vars = group_vars, method = 'oneway_ancova')
28+
olsmodel = ols(formula_for_olsmodel, data = df).fit()
29+
30+
describe_df = df.groupby(group_vars)[decribe_vars].agg(['count', 'mean', 'median', 'std']).rename(
31+
columns = {
32+
'count' : 'n',
33+
'std' : 'sd'
34+
}).T.round(2)
35+
describe_df.columns.name = None
36+
37+
anova_table = anova_lm(olsmodel, typ=3)
38+
anova_table.rename(columns = {'PR(>F)' : 'p-value'}, inplace=True)
39+
anova_table['partial_eta_squared'] = anova_table['sum_sq'] / (anova_table['sum_sq'] + anova_table['sum_sq'].loc['Residual'])
40+
anova_table = anova_table.round(3)
41+
42+
43+
raw_coef_table = pd.DataFrame(olsmodel.summary().tables[1].data, columns = ['index','coef', 'std err', 't', 'p', '0.025', '0.975'])[1:].set_index('index')
44+
45+
46+
pair_coef_table = raw_coef_table.loc[['Intercept']]
47+
covar_coef_table = raw_coef_table.loc[covars]
48+
drop_col_for_coef_table = ['Intercept'] + covars
49+
50+
for n in range(len(group_names)):
51+
52+
formula_for_coef = custom_join_for_ancova(vars = vars, group_vars = group_vars, method = 'oneway_ancova', purpose = 'coef', keys = n)
53+
model_for_coef = ols(formula_for_coef, data = df).fit()
54+
working_table_for_coef = pd.DataFrame(model_for_coef.summary().tables[1].data, columns = ['index','coef', 'std err', 't', 'p', '0.025', '0.975'])[1:].set_index('index')
55+
working_table_for_coef.drop(index = drop_col_for_coef_table, inplace=True)
56+
57+
pair_list = working_table_for_coef.index.to_list()
58+
59+
for i in range( len(pair_list) ):
60+
for j in group_names:
61+
if j in pair_list[i]:
62+
pair_list[i] = j
63+
64+
set_for_finding_ref_1 = set(group_names)
65+
set_for_finding_ref_2 = set(pair_list)
66+
reference_col = list(set_for_finding_ref_1 - set_for_finding_ref_2)[0]
67+
68+
for i in range( len(pair_list) ):
69+
pair_list[i] = f"{reference_col} - {pair_list[i]}"
70+
71+
working_table_for_coef.index = pair_list
72+
pair_coef_table = pd.concat([pair_coef_table, working_table_for_coef])
73+
74+
pair_coef_table['coef'] = pair_coef_table['coef'].astype('float')
75+
pair_coef_table = pair_coef_table.loc[~pair_coef_table['coef'].abs().duplicated(keep='first')] #before merge, delete duplicated rows
76+
pair_coef_table = pd.concat([pair_coef_table, covar_coef_table]) # concat with covar_coef_table
77+
78+
reporting_one = oneway_ancova_result_reporting(dv, group_vars, group_names, covars)[lang_set]
79+
reporting_two = ancova_model_result_reporting[lang_set]
80+
reporting_three = ancova_statistic_result_reporting[lang_set]
81+
reporting_four = ancova_coef_result_reporting[lang_set]
82+
reporting_five = ancova_coef_interpreting_message(covars)[lang_set]
83+
84+
result_for_save.append(reporting_one)
85+
result_for_save.append(describe_df)
86+
result_for_save.append(reporting_two)
87+
result_for_save.append(olsmodel.summary().tables[0])
88+
result_for_save.append(reporting_three)
89+
result_for_save.append(anova_table)
90+
result_for_save.append(reporting_four)
91+
result_for_save.append(reporting_five)
92+
result_for_save.append(pair_coef_table)
93+
94+
if posthoc:
95+
posthoc_table = posthoc_between(df = df, vars = dv, group_vars = group_vars, group_names = group_names, parametric = True, posthoc_method = posthoc_method)
96+
reporting_posthoc = 'Posthoc: '
97+
warning= warning_message_for_ancova_posthoc[lang_set]
98+
result_for_save.append(reporting_posthoc)
99+
result_for_save.append(warning)
100+
result_for_save.append(posthoc_table)
101+
102+
print(testname)
103+
for n in result_for_save:
104+
if isinstance(n, str):
105+
print(n)
106+
else:
107+
try:
108+
display(n)
109+
except:
110+
print(n)
111+
112+
return result_for_save
113+
10114

11-
def rm_ancova():
12-
pass
115+
def rm_ancova(df:pd.DataFrame, vars: list, group_vars, lang_set, testname, posthoc, posthoc_method, group_names = None):
116+
117+
result_for_save = []
118+
119+
index_col = df.index.name
120+
repeated_vars = vars [:-1]
121+
covars = vars[-1]
122+
vars_for_melting = [index_col] + covars
123+
124+
decribe_vars = ['value'] + covars
125+
126+
127+
melted_df = df.reset_index().melt(id_vars = index_col, value_vars = repeated_vars, var_name = 'time')
128+
melted_df = melted_df.merge(df.reset_index()[vars_for_melting], on = index_col, how = 'left').set_index(index_col)
129+
130+
formula_for_olsmodel = custom_join_for_ancova(vars = vars, method = 'rm_ancova')
131+
olsmodel = ols(formula_for_olsmodel, data = melted_df).fit()
132+
133+
describe_df = melted_df.groupby('time')[decribe_vars].agg(['count', 'mean', 'median', 'std']).rename(
134+
columns = {
135+
'count' : 'n',
136+
'std' : 'sd'
137+
}).T.round(2)
138+
describe_df.columns.name = None
139+
140+
anova_table = anova_lm(olsmodel, typ=3)
141+
anova_table.rename(columns = {'PR(>F)' : 'p-value'}, inplace=True)
142+
anova_table['partial_eta_squared'] = anova_table['sum_sq'] / (anova_table['sum_sq'] + anova_table['sum_sq'].loc['Residual'])
143+
anova_table = anova_table.round(3)
144+
145+
raw_coef_table = pd.DataFrame(olsmodel.summary().tables[1].data, columns = ['index','coef', 'std err', 't', 'p', '0.025', '0.975'])[1:].set_index('index')
146+
147+
pair_coef_table = raw_coef_table.loc[['Intercept']]
148+
covar_coef_table = raw_coef_table.loc[covars]
149+
drop_col_for_coef_table = ['Intercept'] + covars
150+
151+
for n in range( len (melted_df.time.unique() ) ):
152+
formula_for_coef = custom_join_for_ancova(vars = vars, method = 'rm_ancova', purpose = 'coef', keys = n)
153+
model_for_coef = ols(formula_for_coef, data = melted_df).fit()
154+
working_table_for_coef = pd.DataFrame(model_for_coef.summary().tables[1].data, columns = ['index','coef', 'std err', 't', 'p', '0.025', '0.975'])[1:].set_index('index')
155+
working_table_for_coef.drop(index = drop_col_for_coef_table, inplace=True)
156+
157+
pair_list = working_table_for_coef.index.to_list()
158+
159+
for i in range( len(pair_list) ):
160+
for j in melted_df.time.unique():
161+
if j in pair_list[i]:
162+
pair_list[i] = j
163+
164+
set_for_finding_ref_1 = set(melted_df.time.unique())
165+
set_for_finding_ref_2 = set(pair_list)
166+
reference_col = list(set_for_finding_ref_1 - set_for_finding_ref_2)[0]
167+
168+
for i in range( len(pair_list) ):
169+
pair_list[i] = f"{reference_col} - {pair_list[i]}"
170+
171+
working_table_for_coef.index = pair_list
172+
pair_coef_table = pd.concat([pair_coef_table, working_table_for_coef])
173+
174+
pair_coef_table['coef'] = pair_coef_table['coef'].astype('float')
175+
pair_coef_table = pair_coef_table.loc[~pair_coef_table['coef'].abs().duplicated(keep='first')] #before merge, delete duplicated rows
176+
177+
pair_coef_table = pd.concat([pair_coef_table, covar_coef_table])
178+
179+
reporting_one = rm_ancova_result_reporting(repeated_vars, covars)[lang_set]
180+
reporting_two = ancova_model_result_reporting[lang_set]
181+
reporting_three = ancova_statistic_result_reporting[lang_set]
182+
reporting_four = ancova_coef_result_reporting[lang_set]
183+
reporting_five = ancova_coef_interpreting_message(covars)[lang_set]
184+
185+
result_for_save.append(reporting_one)
186+
result_for_save.append(describe_df)
187+
result_for_save.append(reporting_two)
188+
result_for_save.append(olsmodel.summary().tables[0])
189+
result_for_save.append(reporting_three)
190+
result_for_save.append(anova_table)
191+
result_for_save.append(reporting_four)
192+
result_for_save.append(reporting_five)
193+
result_for_save.append(pair_coef_table)
194+
195+
196+
if posthoc:
197+
posthoc_table = posthoc_within(df = df, vars = repeated_vars, parametric=True, posthoc_method=posthoc_method)
198+
reporting_posthoc = 'Posthoc: '
199+
warning= warning_message_for_ancova_posthoc[lang_set]
200+
result_for_save.append(reporting_posthoc)
201+
result_for_save.append(warning)
202+
result_for_save.append(posthoc_table)
203+
204+
print(testname)
205+
for n in result_for_save:
206+
if isinstance(n, str):
207+
print(n)
208+
else:
209+
try:
210+
display(n)
211+
except:
212+
print(n)
213+
214+
return result_for_save
215+
216+
217+
218+
def custom_join_for_ancova(vars = None, group_vars = None, method = None, purpose = 'normal', keys = None):
219+
220+
if method == 'oneway_ancova':
221+
222+
dv = vars[0]
223+
covars = vars[1]
224+
iv = group_vars
225+
number = keys
226+
227+
if purpose == 'normal':
228+
formula_result = f"{dv} ~ C({iv}) + {' + '.join(covars)}"
229+
230+
elif purpose == 'coef':
231+
formula_result = f"{dv} ~ C({iv}, Treatment(reference={number})) + {' + '.join(covars)}"
232+
233+
return formula_result
234+
235+
elif method == 'rm_ancova':
236+
dv = 'value'
237+
covars = vars[-1]
238+
iv = 'time'
239+
number = keys
240+
241+
if purpose == 'normal':
242+
243+
formula_result = f"{dv} ~ C({iv}) + {' + '.join(covars)}"
244+
245+
246+
elif purpose == 'coef':
247+
248+
formula_result = f"{dv} ~ C({iv}, Treatment(reference={number})) + {' + '.join(covars)}"
249+
250+
return formula_result
251+
252+
elif method == 'nway_ancova':
253+
254+
# vars = ['dv', [covar1, covar2...] ]
255+
# group_vars = [group1, group2..]
256+
257+
dv = vars[0]
258+
covars = vars[1]
259+
iv = group_vars #list 형태임
260+
261+
formula_ivs = ""
262+
263+
for var in iv:
264+
formula_ivs += f"C({var}) + "
265+
266+
for i in range(len(iv)):
267+
for j in range(i + 1, len(iv)):
268+
item1 = f"C({iv[i]})"
269+
item2 = f"C({iv[j]})"
270+
items = f"{item1} * {item2}"
271+
272+
formula_ivs += f"{items} + "
13273

274+
formula_ivs = formula_ivs[:-2]
275+
formula_result = f"{dv} ~ {formula_ivs} + {' + '.join(covars)}"
14276

277+
return formula_result
278+
elif method == 'nway_rm_ancova':
279+
pass
15280

16281
# -- just for saving.. future development #
17282
# def nway_ancova():

0 commit comments

Comments
 (0)