|
1 | 1 | import pandas as pd
|
2 | 2 | from .messages_for_reporting import *
|
| 3 | +from .posthoc_functions import * |
| 4 | + |
3 | 5 | from statsmodels.stats.anova import anova_lm
|
4 | 6 | from statsmodels.formula.api import ols
|
5 | 7 |
|
6 | 8 | # RULE : args for 'vars' --> [dv, [covar]]
|
7 | 9 |
|
8 |
| -def oneway_ancova(df:pd.DataFrame, ): |
9 |
| - pass |
| 10 | +def oneway_ancova(df:pd.DataFrame, vars: list, lang_set, testname, posthoc, posthoc_method, group_vars = None, group_names = None): |
| 11 | + |
| 12 | + result_for_save = [] |
| 13 | + |
| 14 | + |
| 15 | + if group_names == None: |
| 16 | + group_names = df[group_vars].unique() |
| 17 | + |
| 18 | + else: |
| 19 | + df = df[df[group_vars].isin(group_names)] |
| 20 | + |
| 21 | + dv = vars[0] |
| 22 | + covars = vars[1] |
| 23 | + iv = group_vars |
| 24 | + |
| 25 | + decribe_vars = [dv] + covars |
| 26 | + |
| 27 | + formula_for_olsmodel = custom_join_for_ancova(vars = vars, group_vars = group_vars, method = 'oneway_ancova') |
| 28 | + olsmodel = ols(formula_for_olsmodel, data = df).fit() |
| 29 | + |
| 30 | + describe_df = df.groupby(group_vars)[decribe_vars].agg(['count', 'mean', 'median', 'std']).rename( |
| 31 | + columns = { |
| 32 | + 'count' : 'n', |
| 33 | + 'std' : 'sd' |
| 34 | + }).T.round(2) |
| 35 | + describe_df.columns.name = None |
| 36 | + |
| 37 | + anova_table = anova_lm(olsmodel, typ=3) |
| 38 | + anova_table.rename(columns = {'PR(>F)' : 'p-value'}, inplace=True) |
| 39 | + anova_table['partial_eta_squared'] = anova_table['sum_sq'] / (anova_table['sum_sq'] + anova_table['sum_sq'].loc['Residual']) |
| 40 | + anova_table = anova_table.round(3) |
| 41 | + |
| 42 | + |
| 43 | + raw_coef_table = pd.DataFrame(olsmodel.summary().tables[1].data, columns = ['index','coef', 'std err', 't', 'p', '0.025', '0.975'])[1:].set_index('index') |
| 44 | + |
| 45 | + |
| 46 | + pair_coef_table = raw_coef_table.loc[['Intercept']] |
| 47 | + covar_coef_table = raw_coef_table.loc[covars] |
| 48 | + drop_col_for_coef_table = ['Intercept'] + covars |
| 49 | + |
| 50 | + for n in range(len(group_names)): |
| 51 | + |
| 52 | + formula_for_coef = custom_join_for_ancova(vars = vars, group_vars = group_vars, method = 'oneway_ancova', purpose = 'coef', keys = n) |
| 53 | + model_for_coef = ols(formula_for_coef, data = df).fit() |
| 54 | + working_table_for_coef = pd.DataFrame(model_for_coef.summary().tables[1].data, columns = ['index','coef', 'std err', 't', 'p', '0.025', '0.975'])[1:].set_index('index') |
| 55 | + working_table_for_coef.drop(index = drop_col_for_coef_table, inplace=True) |
| 56 | + |
| 57 | + pair_list = working_table_for_coef.index.to_list() |
| 58 | + |
| 59 | + for i in range( len(pair_list) ): |
| 60 | + for j in group_names: |
| 61 | + if j in pair_list[i]: |
| 62 | + pair_list[i] = j |
| 63 | + |
| 64 | + set_for_finding_ref_1 = set(group_names) |
| 65 | + set_for_finding_ref_2 = set(pair_list) |
| 66 | + reference_col = list(set_for_finding_ref_1 - set_for_finding_ref_2)[0] |
| 67 | + |
| 68 | + for i in range( len(pair_list) ): |
| 69 | + pair_list[i] = f"{reference_col} - {pair_list[i]}" |
| 70 | + |
| 71 | + working_table_for_coef.index = pair_list |
| 72 | + pair_coef_table = pd.concat([pair_coef_table, working_table_for_coef]) |
| 73 | + |
| 74 | + pair_coef_table['coef'] = pair_coef_table['coef'].astype('float') |
| 75 | + pair_coef_table = pair_coef_table.loc[~pair_coef_table['coef'].abs().duplicated(keep='first')] #before merge, delete duplicated rows |
| 76 | + pair_coef_table = pd.concat([pair_coef_table, covar_coef_table]) # concat with covar_coef_table |
| 77 | + |
| 78 | + reporting_one = oneway_ancova_result_reporting(dv, group_vars, group_names, covars)[lang_set] |
| 79 | + reporting_two = ancova_model_result_reporting[lang_set] |
| 80 | + reporting_three = ancova_statistic_result_reporting[lang_set] |
| 81 | + reporting_four = ancova_coef_result_reporting[lang_set] |
| 82 | + reporting_five = ancova_coef_interpreting_message(covars)[lang_set] |
| 83 | + |
| 84 | + result_for_save.append(reporting_one) |
| 85 | + result_for_save.append(describe_df) |
| 86 | + result_for_save.append(reporting_two) |
| 87 | + result_for_save.append(olsmodel.summary().tables[0]) |
| 88 | + result_for_save.append(reporting_three) |
| 89 | + result_for_save.append(anova_table) |
| 90 | + result_for_save.append(reporting_four) |
| 91 | + result_for_save.append(reporting_five) |
| 92 | + result_for_save.append(pair_coef_table) |
| 93 | + |
| 94 | + if posthoc: |
| 95 | + posthoc_table = posthoc_between(df = df, vars = dv, group_vars = group_vars, group_names = group_names, parametric = True, posthoc_method = posthoc_method) |
| 96 | + reporting_posthoc = 'Posthoc: ' |
| 97 | + warning= warning_message_for_ancova_posthoc[lang_set] |
| 98 | + result_for_save.append(reporting_posthoc) |
| 99 | + result_for_save.append(warning) |
| 100 | + result_for_save.append(posthoc_table) |
| 101 | + |
| 102 | + print(testname) |
| 103 | + for n in result_for_save: |
| 104 | + if isinstance(n, str): |
| 105 | + print(n) |
| 106 | + else: |
| 107 | + try: |
| 108 | + display(n) |
| 109 | + except: |
| 110 | + print(n) |
| 111 | + |
| 112 | + return result_for_save |
| 113 | + |
10 | 114 |
|
11 |
| -def rm_ancova(): |
12 |
| - pass |
| 115 | +def rm_ancova(df:pd.DataFrame, vars: list, group_vars, lang_set, testname, posthoc, posthoc_method, group_names = None): |
| 116 | + |
| 117 | + result_for_save = [] |
| 118 | + |
| 119 | + index_col = df.index.name |
| 120 | + repeated_vars = vars [:-1] |
| 121 | + covars = vars[-1] |
| 122 | + vars_for_melting = [index_col] + covars |
| 123 | + |
| 124 | + decribe_vars = ['value'] + covars |
| 125 | + |
| 126 | + |
| 127 | + melted_df = df.reset_index().melt(id_vars = index_col, value_vars = repeated_vars, var_name = 'time') |
| 128 | + melted_df = melted_df.merge(df.reset_index()[vars_for_melting], on = index_col, how = 'left').set_index(index_col) |
| 129 | + |
| 130 | + formula_for_olsmodel = custom_join_for_ancova(vars = vars, method = 'rm_ancova') |
| 131 | + olsmodel = ols(formula_for_olsmodel, data = melted_df).fit() |
| 132 | + |
| 133 | + describe_df = melted_df.groupby('time')[decribe_vars].agg(['count', 'mean', 'median', 'std']).rename( |
| 134 | + columns = { |
| 135 | + 'count' : 'n', |
| 136 | + 'std' : 'sd' |
| 137 | + }).T.round(2) |
| 138 | + describe_df.columns.name = None |
| 139 | + |
| 140 | + anova_table = anova_lm(olsmodel, typ=3) |
| 141 | + anova_table.rename(columns = {'PR(>F)' : 'p-value'}, inplace=True) |
| 142 | + anova_table['partial_eta_squared'] = anova_table['sum_sq'] / (anova_table['sum_sq'] + anova_table['sum_sq'].loc['Residual']) |
| 143 | + anova_table = anova_table.round(3) |
| 144 | + |
| 145 | + raw_coef_table = pd.DataFrame(olsmodel.summary().tables[1].data, columns = ['index','coef', 'std err', 't', 'p', '0.025', '0.975'])[1:].set_index('index') |
| 146 | + |
| 147 | + pair_coef_table = raw_coef_table.loc[['Intercept']] |
| 148 | + covar_coef_table = raw_coef_table.loc[covars] |
| 149 | + drop_col_for_coef_table = ['Intercept'] + covars |
| 150 | + |
| 151 | + for n in range( len (melted_df.time.unique() ) ): |
| 152 | + formula_for_coef = custom_join_for_ancova(vars = vars, method = 'rm_ancova', purpose = 'coef', keys = n) |
| 153 | + model_for_coef = ols(formula_for_coef, data = melted_df).fit() |
| 154 | + working_table_for_coef = pd.DataFrame(model_for_coef.summary().tables[1].data, columns = ['index','coef', 'std err', 't', 'p', '0.025', '0.975'])[1:].set_index('index') |
| 155 | + working_table_for_coef.drop(index = drop_col_for_coef_table, inplace=True) |
| 156 | + |
| 157 | + pair_list = working_table_for_coef.index.to_list() |
| 158 | + |
| 159 | + for i in range( len(pair_list) ): |
| 160 | + for j in melted_df.time.unique(): |
| 161 | + if j in pair_list[i]: |
| 162 | + pair_list[i] = j |
| 163 | + |
| 164 | + set_for_finding_ref_1 = set(melted_df.time.unique()) |
| 165 | + set_for_finding_ref_2 = set(pair_list) |
| 166 | + reference_col = list(set_for_finding_ref_1 - set_for_finding_ref_2)[0] |
| 167 | + |
| 168 | + for i in range( len(pair_list) ): |
| 169 | + pair_list[i] = f"{reference_col} - {pair_list[i]}" |
| 170 | + |
| 171 | + working_table_for_coef.index = pair_list |
| 172 | + pair_coef_table = pd.concat([pair_coef_table, working_table_for_coef]) |
| 173 | + |
| 174 | + pair_coef_table['coef'] = pair_coef_table['coef'].astype('float') |
| 175 | + pair_coef_table = pair_coef_table.loc[~pair_coef_table['coef'].abs().duplicated(keep='first')] #before merge, delete duplicated rows |
| 176 | + |
| 177 | + pair_coef_table = pd.concat([pair_coef_table, covar_coef_table]) |
| 178 | + |
| 179 | + reporting_one = rm_ancova_result_reporting(repeated_vars, covars)[lang_set] |
| 180 | + reporting_two = ancova_model_result_reporting[lang_set] |
| 181 | + reporting_three = ancova_statistic_result_reporting[lang_set] |
| 182 | + reporting_four = ancova_coef_result_reporting[lang_set] |
| 183 | + reporting_five = ancova_coef_interpreting_message(covars)[lang_set] |
| 184 | + |
| 185 | + result_for_save.append(reporting_one) |
| 186 | + result_for_save.append(describe_df) |
| 187 | + result_for_save.append(reporting_two) |
| 188 | + result_for_save.append(olsmodel.summary().tables[0]) |
| 189 | + result_for_save.append(reporting_three) |
| 190 | + result_for_save.append(anova_table) |
| 191 | + result_for_save.append(reporting_four) |
| 192 | + result_for_save.append(reporting_five) |
| 193 | + result_for_save.append(pair_coef_table) |
| 194 | + |
| 195 | + |
| 196 | + if posthoc: |
| 197 | + posthoc_table = posthoc_within(df = df, vars = repeated_vars, parametric=True, posthoc_method=posthoc_method) |
| 198 | + reporting_posthoc = 'Posthoc: ' |
| 199 | + warning= warning_message_for_ancova_posthoc[lang_set] |
| 200 | + result_for_save.append(reporting_posthoc) |
| 201 | + result_for_save.append(warning) |
| 202 | + result_for_save.append(posthoc_table) |
| 203 | + |
| 204 | + print(testname) |
| 205 | + for n in result_for_save: |
| 206 | + if isinstance(n, str): |
| 207 | + print(n) |
| 208 | + else: |
| 209 | + try: |
| 210 | + display(n) |
| 211 | + except: |
| 212 | + print(n) |
| 213 | + |
| 214 | + return result_for_save |
| 215 | + |
| 216 | + |
| 217 | + |
| 218 | +def custom_join_for_ancova(vars = None, group_vars = None, method = None, purpose = 'normal', keys = None): |
| 219 | + |
| 220 | + if method == 'oneway_ancova': |
| 221 | + |
| 222 | + dv = vars[0] |
| 223 | + covars = vars[1] |
| 224 | + iv = group_vars |
| 225 | + number = keys |
| 226 | + |
| 227 | + if purpose == 'normal': |
| 228 | + formula_result = f"{dv} ~ C({iv}) + {' + '.join(covars)}" |
| 229 | + |
| 230 | + elif purpose == 'coef': |
| 231 | + formula_result = f"{dv} ~ C({iv}, Treatment(reference={number})) + {' + '.join(covars)}" |
| 232 | + |
| 233 | + return formula_result |
| 234 | + |
| 235 | + elif method == 'rm_ancova': |
| 236 | + dv = 'value' |
| 237 | + covars = vars[-1] |
| 238 | + iv = 'time' |
| 239 | + number = keys |
| 240 | + |
| 241 | + if purpose == 'normal': |
| 242 | + |
| 243 | + formula_result = f"{dv} ~ C({iv}) + {' + '.join(covars)}" |
| 244 | + |
| 245 | + |
| 246 | + elif purpose == 'coef': |
| 247 | + |
| 248 | + formula_result = f"{dv} ~ C({iv}, Treatment(reference={number})) + {' + '.join(covars)}" |
| 249 | + |
| 250 | + return formula_result |
| 251 | + |
| 252 | + elif method == 'nway_ancova': |
| 253 | + |
| 254 | + # vars = ['dv', [covar1, covar2...] ] |
| 255 | + # group_vars = [group1, group2..] |
| 256 | + |
| 257 | + dv = vars[0] |
| 258 | + covars = vars[1] |
| 259 | + iv = group_vars #list 형태임 |
| 260 | + |
| 261 | + formula_ivs = "" |
| 262 | + |
| 263 | + for var in iv: |
| 264 | + formula_ivs += f"C({var}) + " |
| 265 | + |
| 266 | + for i in range(len(iv)): |
| 267 | + for j in range(i + 1, len(iv)): |
| 268 | + item1 = f"C({iv[i]})" |
| 269 | + item2 = f"C({iv[j]})" |
| 270 | + items = f"{item1} * {item2}" |
| 271 | + |
| 272 | + formula_ivs += f"{items} + " |
13 | 273 |
|
| 274 | + formula_ivs = formula_ivs[:-2] |
| 275 | + formula_result = f"{dv} ~ {formula_ivs} + {' + '.join(covars)}" |
14 | 276 |
|
| 277 | + return formula_result |
| 278 | + elif method == 'nway_rm_ancova': |
| 279 | + pass |
15 | 280 |
|
16 | 281 | # -- just for saving.. future development #
|
17 | 282 | # def nway_ancova():
|
|
0 commit comments