回答編集履歴

5

全面改訂(何度もすみません)

2019/09/15 03:04

投稿

nomuken
nomuken

スコア1627

test CHANGED
@@ -1,150 +1,16 @@
1
- 実際に演算ているのは
1
+ 内容を全面改訂ました。
2
-
3
- https://github.com/statsmodels/statsmodels/blob/bc5680db6265d275d89505815a5cec9e9f632239/statsmodels/sandbox/stats/multicomp.py#L1239
4
-
5
- だと思うのでこれ読み解かないといけないと思います。
6
2
 
7
3
 
8
4
 
9
- ---
5
+ `print(pairwise_tukeyhsd(data_arr,ind_arr))`
6
+
7
+ を実行するとpairwise_tukeyhsd(data_arr,ind_arr)はTukeyHSDResultsインスタンスを返してきますがメソッド`__str__()`で文字列に変換されてしまいます。
10
8
 
11
9
 
12
10
 
13
- コピペ&可変で算出に関連るパラメタをデバッグ出力る版
11
+ 一旦、受けて、`vars()`で表示ればよいで
14
12
 
15
13
  ```Python
16
-
17
- from statsmodels.stats.multicomp import pairwise_tukeyhsd
18
-
19
- import numpy as np
20
-
21
- from statsmodels.sandbox.stats.multicomp import ( # noqa:F401
22
-
23
- tukeyhsd, MultiComparison, GroupsStats, TukeyHSDResults, get_tukey_pvalue, varcorrection_pairs_unbalanced, get_tukeyQcrit2)
24
-
25
-
26
-
27
- import copy
28
-
29
- import math
30
-
31
-
32
-
33
- import numpy as np
34
-
35
- from numpy.testing import assert_almost_equal, assert_equal
36
-
37
- from scipy import stats, interpolate
38
-
39
- from statsmodels.compat.python import lzip, lrange
40
-
41
- from statsmodels.iolib.table import SimpleTable
42
-
43
- #temporary circular import
44
-
45
- from statsmodels.stats.multitest import multipletests, _ecdf as ecdf, fdrcorrection as fdrcorrection0, fdrcorrection_twostage
46
-
47
- from statsmodels.graphics import utils
48
-
49
- from statsmodels.tools.sm_exceptions import ValueWarning
50
-
51
-
52
-
53
- class MultiComparison2(MultiComparison):
54
-
55
- def __init__(self, data, groups, group_order=None):
56
-
57
- super().__init__(data, groups, group_order)
58
-
59
-
60
-
61
- def tukeyhsd2(self, alpha=0.05):
62
-
63
-
64
-
65
- self.groupstats = GroupsStats(
66
-
67
- np.column_stack([self.data, self.groupintlab]),
68
-
69
- useranks=False)
70
-
71
-
72
-
73
- gmeans = self.groupstats.groupmean
74
-
75
- gnobs = self.groupstats.groupnobs
76
-
77
- # var_ = self.groupstats.groupvarwithin()
78
-
79
- # #possibly an error in varcorrection in this case
80
-
81
- var_ = np.var(self.groupstats.groupdemean(), ddof=len(gmeans))
82
-
83
- # res contains: 0:(idx1, idx2), 1:reject, 2:meandiffs, 3: std_pairs,
84
-
85
- # 4:confint, 5:q_crit, 6:df_total, 7:reject2, 8: pvals
86
-
87
- res = tukeyhsd(gmeans, gnobs, var_, df=None, alpha=alpha, q_crit=None)
88
-
89
-
90
-
91
- resarr = np.array(lzip(self.groupsunique[res[0][0]],
92
-
93
- self.groupsunique[res[0][1]],
94
-
95
- np.round(res[2], 4),
96
-
97
- np.round(res[8], 4),
98
-
99
- np.round(res[4][:, 0], 4),
100
-
101
- np.round(res[4][:, 1], 4),
102
-
103
- res[1]),
104
-
105
- dtype=[('group1', object),
106
-
107
- ('group2', object),
108
-
109
- ('meandiff', float),
110
-
111
- ('p-adj', float),
112
-
113
- ('lower', float),
114
-
115
- ('upper', float),
116
-
117
- ('reject', np.bool8)])
118
-
119
- results_table = SimpleTable(resarr, headers=resarr.dtype.names)
120
-
121
- results_table.title = 'Multiple Comparison of Means - Tukey HSD, ' + \
122
-
123
- 'FWER=%4.2f' % alpha
124
-
125
- print(res)
126
-
127
- print("pvals is ", res[8])
128
-
129
- print("reject is ", res[1])
130
-
131
- print("std_pairs is ", res[3])
132
-
133
-
134
-
135
- st_range = np.abs(res[2]) / res[3]
136
-
137
- print("st_range is ", st_range)
138
-
139
- print("q_crit is ", res[5])
140
-
141
- print("st_range > q_crit is ", st_range > res[5])
142
-
143
- return TukeyHSDResults(self, results_table, res[5], res[1], res[2],
144
-
145
- res[3], res[4], res[6], res[7], var_, res[8])
146
-
147
-
148
14
 
149
15
  def tukey_hsd( lst, ind, n ):
150
16
 
@@ -152,7 +18,9 @@
152
18
 
153
19
  ind_arr = np.repeat(ind, n)
154
20
 
155
- print(MultiComparison2(data_arr, ind_arr).tukeyhsd2(alpha=0.05))
21
+ res = pairwise_tukeyhsd(data_arr, ind_arr)
22
+
23
+ print(vars(res))
156
24
 
157
25
 
158
26
 
@@ -176,9 +44,9 @@
176
44
 
177
45
  ```result
178
46
 
179
- ((array([0, 0, 0, 1, 1, 2], dtype=int64), array([1, 2, 3, 2, 3, 3], dtype=int64)), array([False, True, True, False, False, False]), array([-5.2, -5.6, -8.4, -0.4, -3.2, -2.8]), array([1.31339255, 1.31339255, 1.31339255, 1.31339255, 1.31339255,
47
+ {'_multicomp': <statsmodels.sandbox.stats.multicomp.MultiComparison object at 0x0000015F51131C88>, '_results_table': <class 'statsmodels.iolib.table.SimpleTable'>, 'q_crit': 4.046412438282385, 'reject': array([False, True, True, False, False, False]), 'meandiffs': array([-5.2, -5.6, -8.4, -0.4, -3.2, -2.8]), 'std_pairs': array([1.31339255, 1.31339255, 1.31339255, 1.31339255, 1.31339255,
180
48
 
181
- 1.31339255]), array([[-10.51452797, 0.11452797],
49
+ 1.31339255]), 'confint': array([[-10.51452797, 0.11452797],
182
50
 
183
51
  [-10.91452797, -0.28547203],
184
52
 
@@ -188,56 +56,16 @@
188
56
 
189
57
  [ -8.51452797, 2.11452797],
190
58
 
191
- [ -8.11452797, 2.51452797]]), 4.046412438282385, 16, array([False, True, True, False, False, False]), array([0.0562591 , 0.03714849, 0.00177409, 0.9 , 0.34502168,
59
+ [ -8.11452797, 2.51452797]]), 'df_total': 16, 'reject2': array([False, True, True, False, False, False]), 'variance': 8.625000000000002, 'pvalues': array([0.0562591 , 0.03714849, 0.00177409, 0.9 , 0.34502168,
192
60
 
193
- 0.45735327]))
61
+ 0.45735327]), 'data': array([15, 9, 18, 14, 18, 13, 8, 8, 12, 7, 10, 6, 11, 7, 12, 10, 7,
194
62
 
195
- pvals is [0.0562591 0.03714849 0.00177409 0.9 0.34502168 0.45735327]
63
+ 3, 5, 7]), 'groups': array(['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B', 'C', 'C', 'C',
196
64
 
197
- reject is [False True True False False False]
198
-
199
- std_pairs is [1.31339255 1.31339255 1.31339255 1.31339255 1.31339255 1.31339255]
200
-
201
- st_range is [3.95921234 4.26376713 6.3956507 0.3045548 2.43643836 2.13188357]
65
+ 'C', 'C', 'D', 'D', 'D', 'D', 'D'], dtype='<U1'), 'groupsunique': array(['A', 'B', 'C', 'D'], dtype='<U1')}
202
-
203
- q_crit is 4.046412438282385
204
-
205
- st_range > q_crit is [False True True False False False]
206
-
207
- Multiple Comparison of Means - Tukey HSD, FWER=0.05
208
-
209
- =====================================================
210
-
211
- group1 group2 meandiff p-adj lower upper reject
212
-
213
- -----------------------------------------------------
214
-
215
- A B -5.2 0.0563 -10.5145 0.1145 False
216
-
217
- A C -5.6 0.0371 -10.9145 -0.2855 True
218
-
219
- A D -8.4 0.0018 -13.7145 -3.0855 True
220
-
221
- B C -0.4 0.9 -5.7145 4.9145 False
222
-
223
- B D -3.2 0.345 -8.5145 2.1145 False
224
-
225
- C D -2.8 0.4574 -8.1145 2.5145 False
226
-
227
- -----------------------------------------------------
228
66
 
229
67
  ```
230
68
 
69
+ `'pvalues': array([0.0562591 , 0.03714849, 0.00177409, 0.9 , 0.34502168,
231
70
 
232
-
233
- > Attributesに(pvaluesadjusted p-values from the HSD test)と書かれているので、
234
-
235
- > そのTukeyHSDResultsインスタンスのAttributesの中身を見る方法があればこの問題は解決すると思うのですが。
236
-
237
-
238
-
239
- TukeyHSDResults()の引数`pvalues`に渡しているのが`res[8]`でその値が
240
-
241
- `pvals is [0.0562591 0.03714849 0.00177409 0.9 0.34502168 0.45735327]`
242
-
243
- になりますが、期待した出力になっているでしょうか?
71
+ 0.45735327])`が所望のデータだと思います

4

いろいろ誤り修正

2019/09/15 03:04

投稿

nomuken
nomuken

スコア1627

test CHANGED
@@ -8,8 +8,6 @@
8
8
 
9
9
  ---
10
10
 
11
- 求め方じゃなくて値を確認したいんですかね。
12
-
13
11
 
14
12
 
15
13
  コピペ&可変で算出に関連するパラメタをデバッグ出力する版
@@ -22,7 +20,7 @@
22
20
 
23
21
  from statsmodels.sandbox.stats.multicomp import ( # noqa:F401
24
22
 
25
- tukeyhsd, MultiComparison, GroupsStats, TukeyHSDResults, get_tukey_pvalue)
23
+ tukeyhsd, MultiComparison, GroupsStats, TukeyHSDResults, get_tukey_pvalue, varcorrection_pairs_unbalanced, get_tukeyQcrit2)
26
24
 
27
25
 
28
26
 
@@ -126,7 +124,15 @@
126
124
 
127
125
  print(res)
128
126
 
127
+ print("pvals is ", res[8])
128
+
129
+ print("reject is ", res[1])
130
+
131
+ print("std_pairs is ", res[3])
132
+
133
+
134
+
129
- st_range = np.abs(res[2])
135
+ st_range = np.abs(res[2]) / res[3]
130
136
 
131
137
  print("st_range is ", st_range)
132
138
 
@@ -134,16 +140,12 @@
134
140
 
135
141
  print("st_range > q_crit is ", st_range > res[5])
136
142
 
137
- print("reject is ", res[1])
138
-
139
143
  return TukeyHSDResults(self, results_table, res[5], res[1], res[2],
140
144
 
141
145
  res[3], res[4], res[6], res[7], var_, res[8])
142
146
 
143
147
 
144
148
 
145
-
146
-
147
149
  def tukey_hsd( lst, ind, n ):
148
150
 
149
151
  data_arr = np.hstack( lst )
@@ -170,8 +172,6 @@
170
172
 
171
173
  ```
172
174
 
173
- meandiff列の絶対値がq_critを上回るとreject列がTrueになるはずだけどA-BがなぜFalseになるか不明。
174
-
175
175
 
176
176
 
177
177
  ```result
@@ -192,13 +192,17 @@
192
192
 
193
193
  0.45735327]))
194
194
 
195
+ pvals is [0.0562591 0.03714849 0.00177409 0.9 0.34502168 0.45735327]
196
+
197
+ reject is [False True True False False False]
198
+
199
+ std_pairs is [1.31339255 1.31339255 1.31339255 1.31339255 1.31339255 1.31339255]
200
+
195
- st_range is [5.2 5.6 8.4 0.4 3.2 2.8]
201
+ st_range is [3.95921234 4.26376713 6.3956507 0.3045548 2.43643836 2.13188357]
196
202
 
197
203
  q_crit is 4.046412438282385
198
204
 
199
- st_range > q_crit is [ True True True False False False]
205
+ st_range > q_crit is [False True True False False False]
200
-
201
- reject is [False True True False False False]
202
206
 
203
207
  Multiple Comparison of Means - Tukey HSD, FWER=0.05
204
208
 
@@ -223,3 +227,17 @@
223
227
  -----------------------------------------------------
224
228
 
225
229
  ```
230
+
231
+
232
+
233
+ > Attributesに(pvaluesadjusted p-values from the HSD test)と書かれているので、
234
+
235
+ > そのTukeyHSDResultsインスタンスのAttributesの中身を見る方法があればこの問題は解決すると思うのですが。
236
+
237
+
238
+
239
+ TukeyHSDResults()の引数`pvalues`に渡しているのが`res[8]`でその値が
240
+
241
+ `pvals is [0.0562591 0.03714849 0.00177409 0.9 0.34502168 0.45735327]`
242
+
243
+ になりますが、期待した出力になっているでしょうか?

3

誤記修正

2019/09/15 02:36

投稿

nomuken
nomuken

スコア1627

test CHANGED
@@ -170,7 +170,7 @@
170
170
 
171
171
  ```
172
172
 
173
- meandiff列の絶対値がq_critを上回るとFalseになるはずだけどA-BがなぜFalseになるか不明。
173
+ meandiff列の絶対値がq_critを上回るとreject列がTrueになるはずだけどA-BがなぜFalseになるか不明。
174
174
 
175
175
 
176
176
 

2

いろいろ更新

2019/09/14 11:32

投稿

nomuken
nomuken

スコア1627

test CHANGED
@@ -3,3 +3,223 @@
3
3
  https://github.com/statsmodels/statsmodels/blob/bc5680db6265d275d89505815a5cec9e9f632239/statsmodels/sandbox/stats/multicomp.py#L1239
4
4
 
5
5
  だと思うのでこれ読み解かないといけないと思います。
6
+
7
+
8
+
9
+ ---
10
+
11
+ 求め方じゃなくて値を確認したいんですかね。
12
+
13
+
14
+
15
+ コピペ&可変で算出に関連するパラメタをデバッグ出力する版
16
+
17
+ ```Python
18
+
19
+ from statsmodels.stats.multicomp import pairwise_tukeyhsd
20
+
21
+ import numpy as np
22
+
23
+ from statsmodels.sandbox.stats.multicomp import ( # noqa:F401
24
+
25
+ tukeyhsd, MultiComparison, GroupsStats, TukeyHSDResults, get_tukey_pvalue)
26
+
27
+
28
+
29
+ import copy
30
+
31
+ import math
32
+
33
+
34
+
35
+ import numpy as np
36
+
37
+ from numpy.testing import assert_almost_equal, assert_equal
38
+
39
+ from scipy import stats, interpolate
40
+
41
+ from statsmodels.compat.python import lzip, lrange
42
+
43
+ from statsmodels.iolib.table import SimpleTable
44
+
45
+ #temporary circular import
46
+
47
+ from statsmodels.stats.multitest import multipletests, _ecdf as ecdf, fdrcorrection as fdrcorrection0, fdrcorrection_twostage
48
+
49
+ from statsmodels.graphics import utils
50
+
51
+ from statsmodels.tools.sm_exceptions import ValueWarning
52
+
53
+
54
+
55
+ class MultiComparison2(MultiComparison):
56
+
57
+ def __init__(self, data, groups, group_order=None):
58
+
59
+ super().__init__(data, groups, group_order)
60
+
61
+
62
+
63
+ def tukeyhsd2(self, alpha=0.05):
64
+
65
+
66
+
67
+ self.groupstats = GroupsStats(
68
+
69
+ np.column_stack([self.data, self.groupintlab]),
70
+
71
+ useranks=False)
72
+
73
+
74
+
75
+ gmeans = self.groupstats.groupmean
76
+
77
+ gnobs = self.groupstats.groupnobs
78
+
79
+ # var_ = self.groupstats.groupvarwithin()
80
+
81
+ # #possibly an error in varcorrection in this case
82
+
83
+ var_ = np.var(self.groupstats.groupdemean(), ddof=len(gmeans))
84
+
85
+ # res contains: 0:(idx1, idx2), 1:reject, 2:meandiffs, 3: std_pairs,
86
+
87
+ # 4:confint, 5:q_crit, 6:df_total, 7:reject2, 8: pvals
88
+
89
+ res = tukeyhsd(gmeans, gnobs, var_, df=None, alpha=alpha, q_crit=None)
90
+
91
+
92
+
93
+ resarr = np.array(lzip(self.groupsunique[res[0][0]],
94
+
95
+ self.groupsunique[res[0][1]],
96
+
97
+ np.round(res[2], 4),
98
+
99
+ np.round(res[8], 4),
100
+
101
+ np.round(res[4][:, 0], 4),
102
+
103
+ np.round(res[4][:, 1], 4),
104
+
105
+ res[1]),
106
+
107
+ dtype=[('group1', object),
108
+
109
+ ('group2', object),
110
+
111
+ ('meandiff', float),
112
+
113
+ ('p-adj', float),
114
+
115
+ ('lower', float),
116
+
117
+ ('upper', float),
118
+
119
+ ('reject', np.bool8)])
120
+
121
+ results_table = SimpleTable(resarr, headers=resarr.dtype.names)
122
+
123
+ results_table.title = 'Multiple Comparison of Means - Tukey HSD, ' + \
124
+
125
+ 'FWER=%4.2f' % alpha
126
+
127
+ print(res)
128
+
129
+ st_range = np.abs(res[2])
130
+
131
+ print("st_range is ", st_range)
132
+
133
+ print("q_crit is ", res[5])
134
+
135
+ print("st_range > q_crit is ", st_range > res[5])
136
+
137
+ print("reject is ", res[1])
138
+
139
+ return TukeyHSDResults(self, results_table, res[5], res[1], res[2],
140
+
141
+ res[3], res[4], res[6], res[7], var_, res[8])
142
+
143
+
144
+
145
+
146
+
147
+ def tukey_hsd( lst, ind, n ):
148
+
149
+ data_arr = np.hstack( lst )
150
+
151
+ ind_arr = np.repeat(ind, n)
152
+
153
+ print(MultiComparison2(data_arr, ind_arr).tukeyhsd2(alpha=0.05))
154
+
155
+
156
+
157
+
158
+
159
+ A = np.array([15,9,18,14,18])
160
+
161
+ B = np.array([13,8,8,12,7])
162
+
163
+ C = np.array([10,6,11,7,12])
164
+
165
+ D = np.array([10,7,3,5,7])
166
+
167
+
168
+
169
+ tukey_hsd( (A,B,C,D), list('ABCD') , 5)
170
+
171
+ ```
172
+
173
+ meandiff列の絶対値がq_critを上回るとFalseになるはずだけどA-BがなぜFalseになるか不明。
174
+
175
+
176
+
177
+ ```result
178
+
179
+ ((array([0, 0, 0, 1, 1, 2], dtype=int64), array([1, 2, 3, 2, 3, 3], dtype=int64)), array([False, True, True, False, False, False]), array([-5.2, -5.6, -8.4, -0.4, -3.2, -2.8]), array([1.31339255, 1.31339255, 1.31339255, 1.31339255, 1.31339255,
180
+
181
+ 1.31339255]), array([[-10.51452797, 0.11452797],
182
+
183
+ [-10.91452797, -0.28547203],
184
+
185
+ [-13.71452797, -3.08547203],
186
+
187
+ [ -5.71452797, 4.91452797],
188
+
189
+ [ -8.51452797, 2.11452797],
190
+
191
+ [ -8.11452797, 2.51452797]]), 4.046412438282385, 16, array([False, True, True, False, False, False]), array([0.0562591 , 0.03714849, 0.00177409, 0.9 , 0.34502168,
192
+
193
+ 0.45735327]))
194
+
195
+ st_range is [5.2 5.6 8.4 0.4 3.2 2.8]
196
+
197
+ q_crit is 4.046412438282385
198
+
199
+ st_range > q_crit is [ True True True False False False]
200
+
201
+ reject is [False True True False False False]
202
+
203
+ Multiple Comparison of Means - Tukey HSD, FWER=0.05
204
+
205
+ =====================================================
206
+
207
+ group1 group2 meandiff p-adj lower upper reject
208
+
209
+ -----------------------------------------------------
210
+
211
+ A B -5.2 0.0563 -10.5145 0.1145 False
212
+
213
+ A C -5.6 0.0371 -10.9145 -0.2855 True
214
+
215
+ A D -8.4 0.0018 -13.7145 -3.0855 True
216
+
217
+ B C -0.4 0.9 -5.7145 4.9145 False
218
+
219
+ B D -3.2 0.345 -8.5145 2.1145 False
220
+
221
+ C D -2.8 0.4574 -8.1145 2.5145 False
222
+
223
+ -----------------------------------------------------
224
+
225
+ ```

1

勘違い修正

2019/09/14 11:23

投稿

nomuken
nomuken

スコア1627

test CHANGED
@@ -1 +1,5 @@
1
+ 実際に演算しているのは
2
+
3
+ https://github.com/statsmodels/statsmodels/blob/bc5680db6265d275d89505815a5cec9e9f632239/statsmodels/sandbox/stats/multicomp.py#L1239
4
+
1
- `pairwise_tukeyhsd`関数の第三パラメタ`alpha`の値だと思います。省略する0.05にります。値を指定するこもできます。
5
+ だと思うのでこれ読み解かないといけ思います。