teratail header banner
teratail header banner
質問するログイン新規登録

回答編集履歴

5

全面改訂(何度もすみません)

2019/09/15 03:04

投稿

nomuken
nomuken

スコア1627

answer CHANGED
@@ -1,81 +1,15 @@
1
- 実際に演算ているのは
1
+ 内容を全面改訂ました。
2
- https://github.com/statsmodels/statsmodels/blob/bc5680db6265d275d89505815a5cec9e9f632239/statsmodels/sandbox/stats/multicomp.py#L1239
3
- だと思うのでこれ読み解かないといけないと思います。
4
2
 
5
- ---
3
+ `print(pairwise_tukeyhsd(data_arr,ind_arr))`
4
+ を実行するとpairwise_tukeyhsd(data_arr,ind_arr)はTukeyHSDResultsインスタンスを返してきますがメソッド`__str__()`で文字列に変換されてしまいます。
6
5
 
7
- コピペ&可変で算出に関連るパラメタをデバッグ出力る版
6
+ 一旦、受けて、`vars()`で表示ればよいで
8
7
  ```Python
9
- from statsmodels.stats.multicomp import pairwise_tukeyhsd
10
- import numpy as np
11
- from statsmodels.sandbox.stats.multicomp import ( # noqa:F401
12
- tukeyhsd, MultiComparison, GroupsStats, TukeyHSDResults, get_tukey_pvalue, varcorrection_pairs_unbalanced, get_tukeyQcrit2)
13
-
14
- import copy
15
- import math
16
-
17
- import numpy as np
18
- from numpy.testing import assert_almost_equal, assert_equal
19
- from scipy import stats, interpolate
20
- from statsmodels.compat.python import lzip, lrange
21
- from statsmodels.iolib.table import SimpleTable
22
- #temporary circular import
23
- from statsmodels.stats.multitest import multipletests, _ecdf as ecdf, fdrcorrection as fdrcorrection0, fdrcorrection_twostage
24
- from statsmodels.graphics import utils
25
- from statsmodels.tools.sm_exceptions import ValueWarning
26
-
27
- class MultiComparison2(MultiComparison):
28
- def __init__(self, data, groups, group_order=None):
29
- super().__init__(data, groups, group_order)
30
-
31
- def tukeyhsd2(self, alpha=0.05):
32
-
33
- self.groupstats = GroupsStats(
34
- np.column_stack([self.data, self.groupintlab]),
35
- useranks=False)
36
-
37
- gmeans = self.groupstats.groupmean
38
- gnobs = self.groupstats.groupnobs
39
- # var_ = self.groupstats.groupvarwithin()
40
- # #possibly an error in varcorrection in this case
41
- var_ = np.var(self.groupstats.groupdemean(), ddof=len(gmeans))
42
- # res contains: 0:(idx1, idx2), 1:reject, 2:meandiffs, 3: std_pairs,
43
- # 4:confint, 5:q_crit, 6:df_total, 7:reject2, 8: pvals
44
- res = tukeyhsd(gmeans, gnobs, var_, df=None, alpha=alpha, q_crit=None)
45
-
46
- resarr = np.array(lzip(self.groupsunique[res[0][0]],
47
- self.groupsunique[res[0][1]],
48
- np.round(res[2], 4),
49
- np.round(res[8], 4),
50
- np.round(res[4][:, 0], 4),
51
- np.round(res[4][:, 1], 4),
52
- res[1]),
53
- dtype=[('group1', object),
54
- ('group2', object),
55
- ('meandiff', float),
56
- ('p-adj', float),
57
- ('lower', float),
58
- ('upper', float),
59
- ('reject', np.bool8)])
60
- results_table = SimpleTable(resarr, headers=resarr.dtype.names)
61
- results_table.title = 'Multiple Comparison of Means - Tukey HSD, ' + \
62
- 'FWER=%4.2f' % alpha
63
- print(res)
64
- print("pvals is ", res[8])
65
- print("reject is ", res[1])
66
- print("std_pairs is ", res[3])
67
-
68
- st_range = np.abs(res[2]) / res[3]
69
- print("st_range is ", st_range)
70
- print("q_crit is ", res[5])
71
- print("st_range > q_crit is ", st_range > res[5])
72
- return TukeyHSDResults(self, results_table, res[5], res[1], res[2],
73
- res[3], res[4], res[6], res[7], var_, res[8])
74
-
75
8
  def tukey_hsd( lst, ind, n ):
76
9
  data_arr = np.hstack( lst )
77
10
  ind_arr = np.repeat(ind, n)
78
- print(MultiComparison2(data_arr, ind_arr).tukeyhsd2(alpha=0.05))
11
+ res = pairwise_tukeyhsd(data_arr, ind_arr)
12
+ print(vars(res))
79
13
 
80
14
 
81
15
  A = np.array([15,9,18,14,18])
@@ -87,36 +21,16 @@
87
21
  ```
88
22
 
89
23
  ```result
90
- ((array([0, 0, 0, 1, 1, 2], dtype=int64), array([1, 2, 3, 2, 3, 3], dtype=int64)), array([False, True, True, False, False, False]), array([-5.2, -5.6, -8.4, -0.4, -3.2, -2.8]), array([1.31339255, 1.31339255, 1.31339255, 1.31339255, 1.31339255,
91
- 1.31339255]), array([[-10.51452797, 0.11452797],
24
+ {'_multicomp': <statsmodels.sandbox.stats.multicomp.MultiComparison object at 0x0000015F51131C88>, '_results_table': <class 'statsmodels.iolib.table.SimpleTable'>, 'q_crit': 4.046412438282385, 'reject': array([False, True, True, False, False, False]), 'meandiffs': array([-5.2, -5.6, -8.4, -0.4, -3.2, -2.8]), 'std_pairs': array([1.31339255, 1.31339255, 1.31339255, 1.31339255, 1.31339255,
25
+ 1.31339255]), 'confint': array([[-10.51452797, 0.11452797],
92
26
  [-10.91452797, -0.28547203],
93
27
  [-13.71452797, -3.08547203],
94
28
  [ -5.71452797, 4.91452797],
95
29
  [ -8.51452797, 2.11452797],
96
- [ -8.11452797, 2.51452797]]), 4.046412438282385, 16, array([False, True, True, False, False, False]), array([0.0562591 , 0.03714849, 0.00177409, 0.9 , 0.34502168,
97
- 0.45735327]))
98
- pvals is [0.0562591 0.03714849 0.00177409 0.9 0.34502168 0.45735327]
99
- reject is [False True True False False False]
100
- std_pairs is [1.31339255 1.31339255 1.31339255 1.31339255 1.31339255 1.31339255]
101
- st_range is [3.95921234 4.26376713 6.3956507 0.3045548 2.43643836 2.13188357]
102
- q_crit is 4.046412438282385
103
- st_range > q_crit is [False True True False False False]
104
- Multiple Comparison of Means - Tukey HSD, FWER=0.05
105
- =====================================================
106
- group1 group2 meandiff p-adj lower upper reject
107
- -----------------------------------------------------
108
- A B -5.2 0.0563 -10.5145 0.1145 False
109
- A C -5.6 0.0371 -10.9145 -0.2855 True
110
- A D -8.4 0.0018 -13.7145 -3.0855 True
111
- B C -0.4 0.9 -5.7145 4.9145 False
112
- B D -3.2 0.345 -8.5145 2.1145 False
113
- C D -2.8 0.4574 -8.1145 2.5145 False
114
- -----------------------------------------------------
30
+ [ -8.11452797, 2.51452797]]), 'df_total': 16, 'reject2': array([False, True, True, False, False, False]), 'variance': 8.625000000000002, 'pvalues': array([0.0562591 , 0.03714849, 0.00177409, 0.9 , 0.34502168,
31
+ 0.45735327]), 'data': array([15, 9, 18, 14, 18, 13, 8, 8, 12, 7, 10, 6, 11, 7, 12, 10, 7,
32
+ 3, 5, 7]), 'groups': array(['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B', 'C', 'C', 'C',
33
+ 'C', 'C', 'D', 'D', 'D', 'D', 'D'], dtype='<U1'), 'groupsunique': array(['A', 'B', 'C', 'D'], dtype='<U1')}
115
34
  ```
116
-
117
- > Attributesに(pvaluesadjusted p-values from the HSD test)と書かれているので、
118
- > そのTukeyHSDResultsインスタンスのAttributesの中身を見る方法があればこの問題は解決すると思うのですが。
119
-
120
- TukeyHSDResults()の引数`pvalues`に渡しているのが`res[8]`でその値が
121
- `pvals is [0.0562591 0.03714849 0.00177409 0.9 0.34502168 0.45735327]`
35
+ `'pvalues': array([0.0562591 , 0.03714849, 0.00177409, 0.9 , 0.34502168,
122
- になります、期待した出力になってるでしょうか?
36
+ 0.45735327])`所望のデータだと思ます。

4

いろいろ誤り修正

2019/09/15 03:04

投稿

nomuken
nomuken

スコア1627

answer CHANGED
@@ -3,14 +3,13 @@
3
3
  だと思うのでこれ読み解かないといけないと思います。
4
4
 
5
5
  ---
6
- 求め方じゃなくて値を確認したいんですかね。
7
6
 
8
7
  コピペ&可変で算出に関連するパラメタをデバッグ出力する版
9
8
  ```Python
10
9
  from statsmodels.stats.multicomp import pairwise_tukeyhsd
11
10
  import numpy as np
12
11
  from statsmodels.sandbox.stats.multicomp import ( # noqa:F401
13
- tukeyhsd, MultiComparison, GroupsStats, TukeyHSDResults, get_tukey_pvalue)
12
+ tukeyhsd, MultiComparison, GroupsStats, TukeyHSDResults, get_tukey_pvalue, varcorrection_pairs_unbalanced, get_tukeyQcrit2)
14
13
 
15
14
  import copy
16
15
  import math
@@ -62,15 +61,17 @@
62
61
  results_table.title = 'Multiple Comparison of Means - Tukey HSD, ' + \
63
62
  'FWER=%4.2f' % alpha
64
63
  print(res)
64
+ print("pvals is ", res[8])
65
+ print("reject is ", res[1])
66
+ print("std_pairs is ", res[3])
67
+
65
- st_range = np.abs(res[2])
68
+ st_range = np.abs(res[2]) / res[3]
66
69
  print("st_range is ", st_range)
67
70
  print("q_crit is ", res[5])
68
71
  print("st_range > q_crit is ", st_range > res[5])
69
- print("reject is ", res[1])
70
72
  return TukeyHSDResults(self, results_table, res[5], res[1], res[2],
71
73
  res[3], res[4], res[6], res[7], var_, res[8])
72
74
 
73
-
74
75
  def tukey_hsd( lst, ind, n ):
75
76
  data_arr = np.hstack( lst )
76
77
  ind_arr = np.repeat(ind, n)
@@ -84,7 +85,6 @@
84
85
 
85
86
  tukey_hsd( (A,B,C,D), list('ABCD') , 5)
86
87
  ```
87
- meandiff列の絶対値がq_critを上回るとreject列がTrueになるはずだけどA-BがなぜFalseになるか不明。
88
88
 
89
89
  ```result
90
90
  ((array([0, 0, 0, 1, 1, 2], dtype=int64), array([1, 2, 3, 2, 3, 3], dtype=int64)), array([False, True, True, False, False, False]), array([-5.2, -5.6, -8.4, -0.4, -3.2, -2.8]), array([1.31339255, 1.31339255, 1.31339255, 1.31339255, 1.31339255,
@@ -95,10 +95,12 @@
95
95
  [ -8.51452797, 2.11452797],
96
96
  [ -8.11452797, 2.51452797]]), 4.046412438282385, 16, array([False, True, True, False, False, False]), array([0.0562591 , 0.03714849, 0.00177409, 0.9 , 0.34502168,
97
97
  0.45735327]))
98
+ pvals is [0.0562591 0.03714849 0.00177409 0.9 0.34502168 0.45735327]
99
+ reject is [False True True False False False]
100
+ std_pairs is [1.31339255 1.31339255 1.31339255 1.31339255 1.31339255 1.31339255]
98
- st_range is [5.2 5.6 8.4 0.4 3.2 2.8]
101
+ st_range is [3.95921234 4.26376713 6.3956507 0.3045548 2.43643836 2.13188357]
99
102
  q_crit is 4.046412438282385
100
- st_range > q_crit is [ True True True False False False]
103
+ st_range > q_crit is [False True True False False False]
101
- reject is [False True True False False False]
102
104
  Multiple Comparison of Means - Tukey HSD, FWER=0.05
103
105
  =====================================================
104
106
  group1 group2 meandiff p-adj lower upper reject
@@ -110,4 +112,11 @@
110
112
  B D -3.2 0.345 -8.5145 2.1145 False
111
113
  C D -2.8 0.4574 -8.1145 2.5145 False
112
114
  -----------------------------------------------------
113
- ```
115
+ ```
116
+
117
+ > Attributesに(pvaluesadjusted p-values from the HSD test)と書かれているので、
118
+ > そのTukeyHSDResultsインスタンスのAttributesの中身を見る方法があればこの問題は解決すると思うのですが。
119
+
120
+ TukeyHSDResults()の引数`pvalues`に渡しているのが`res[8]`でその値が
121
+ `pvals is [0.0562591 0.03714849 0.00177409 0.9 0.34502168 0.45735327]`
122
+ になりますが、期待した出力になっているでしょうか?

3

誤記修正

2019/09/15 02:36

投稿

nomuken
nomuken

スコア1627

answer CHANGED
@@ -84,7 +84,7 @@
84
84
 
85
85
  tukey_hsd( (A,B,C,D), list('ABCD') , 5)
86
86
  ```
87
- meandiff列の絶対値がq_critを上回るとFalseになるはずだけどA-BがなぜFalseになるか不明。
87
+ meandiff列の絶対値がq_critを上回るとreject列がTrueになるはずだけどA-BがなぜFalseになるか不明。
88
88
 
89
89
  ```result
90
90
  ((array([0, 0, 0, 1, 1, 2], dtype=int64), array([1, 2, 3, 2, 3, 3], dtype=int64)), array([False, True, True, False, False, False]), array([-5.2, -5.6, -8.4, -0.4, -3.2, -2.8]), array([1.31339255, 1.31339255, 1.31339255, 1.31339255, 1.31339255,

2

いろいろ更新

2019/09/14 11:32

投稿

nomuken
nomuken

スコア1627

answer CHANGED
@@ -1,3 +1,113 @@
1
1
  実際に演算しているのは
2
2
  https://github.com/statsmodels/statsmodels/blob/bc5680db6265d275d89505815a5cec9e9f632239/statsmodels/sandbox/stats/multicomp.py#L1239
3
- だと思うのでこれ読み解かないといけないと思います。
3
+ だと思うのでこれ読み解かないといけないと思います。
4
+
5
+ ---
6
+ 求め方じゃなくて値を確認したいんですかね。
7
+
8
+ コピペ&可変で算出に関連するパラメタをデバッグ出力する版
9
+ ```Python
10
+ from statsmodels.stats.multicomp import pairwise_tukeyhsd
11
+ import numpy as np
12
+ from statsmodels.sandbox.stats.multicomp import ( # noqa:F401
13
+ tukeyhsd, MultiComparison, GroupsStats, TukeyHSDResults, get_tukey_pvalue)
14
+
15
+ import copy
16
+ import math
17
+
18
+ import numpy as np
19
+ from numpy.testing import assert_almost_equal, assert_equal
20
+ from scipy import stats, interpolate
21
+ from statsmodels.compat.python import lzip, lrange
22
+ from statsmodels.iolib.table import SimpleTable
23
+ #temporary circular import
24
+ from statsmodels.stats.multitest import multipletests, _ecdf as ecdf, fdrcorrection as fdrcorrection0, fdrcorrection_twostage
25
+ from statsmodels.graphics import utils
26
+ from statsmodels.tools.sm_exceptions import ValueWarning
27
+
28
+ class MultiComparison2(MultiComparison):
29
+ def __init__(self, data, groups, group_order=None):
30
+ super().__init__(data, groups, group_order)
31
+
32
+ def tukeyhsd2(self, alpha=0.05):
33
+
34
+ self.groupstats = GroupsStats(
35
+ np.column_stack([self.data, self.groupintlab]),
36
+ useranks=False)
37
+
38
+ gmeans = self.groupstats.groupmean
39
+ gnobs = self.groupstats.groupnobs
40
+ # var_ = self.groupstats.groupvarwithin()
41
+ # #possibly an error in varcorrection in this case
42
+ var_ = np.var(self.groupstats.groupdemean(), ddof=len(gmeans))
43
+ # res contains: 0:(idx1, idx2), 1:reject, 2:meandiffs, 3: std_pairs,
44
+ # 4:confint, 5:q_crit, 6:df_total, 7:reject2, 8: pvals
45
+ res = tukeyhsd(gmeans, gnobs, var_, df=None, alpha=alpha, q_crit=None)
46
+
47
+ resarr = np.array(lzip(self.groupsunique[res[0][0]],
48
+ self.groupsunique[res[0][1]],
49
+ np.round(res[2], 4),
50
+ np.round(res[8], 4),
51
+ np.round(res[4][:, 0], 4),
52
+ np.round(res[4][:, 1], 4),
53
+ res[1]),
54
+ dtype=[('group1', object),
55
+ ('group2', object),
56
+ ('meandiff', float),
57
+ ('p-adj', float),
58
+ ('lower', float),
59
+ ('upper', float),
60
+ ('reject', np.bool8)])
61
+ results_table = SimpleTable(resarr, headers=resarr.dtype.names)
62
+ results_table.title = 'Multiple Comparison of Means - Tukey HSD, ' + \
63
+ 'FWER=%4.2f' % alpha
64
+ print(res)
65
+ st_range = np.abs(res[2])
66
+ print("st_range is ", st_range)
67
+ print("q_crit is ", res[5])
68
+ print("st_range > q_crit is ", st_range > res[5])
69
+ print("reject is ", res[1])
70
+ return TukeyHSDResults(self, results_table, res[5], res[1], res[2],
71
+ res[3], res[4], res[6], res[7], var_, res[8])
72
+
73
+
74
+ def tukey_hsd( lst, ind, n ):
75
+ data_arr = np.hstack( lst )
76
+ ind_arr = np.repeat(ind, n)
77
+ print(MultiComparison2(data_arr, ind_arr).tukeyhsd2(alpha=0.05))
78
+
79
+
80
+ A = np.array([15,9,18,14,18])
81
+ B = np.array([13,8,8,12,7])
82
+ C = np.array([10,6,11,7,12])
83
+ D = np.array([10,7,3,5,7])
84
+
85
+ tukey_hsd( (A,B,C,D), list('ABCD') , 5)
86
+ ```
87
+ meandiff列の絶対値がq_critを上回るとFalseになるはずだけどA-BがなぜFalseになるか不明。
88
+
89
+ ```result
90
+ ((array([0, 0, 0, 1, 1, 2], dtype=int64), array([1, 2, 3, 2, 3, 3], dtype=int64)), array([False, True, True, False, False, False]), array([-5.2, -5.6, -8.4, -0.4, -3.2, -2.8]), array([1.31339255, 1.31339255, 1.31339255, 1.31339255, 1.31339255,
91
+ 1.31339255]), array([[-10.51452797, 0.11452797],
92
+ [-10.91452797, -0.28547203],
93
+ [-13.71452797, -3.08547203],
94
+ [ -5.71452797, 4.91452797],
95
+ [ -8.51452797, 2.11452797],
96
+ [ -8.11452797, 2.51452797]]), 4.046412438282385, 16, array([False, True, True, False, False, False]), array([0.0562591 , 0.03714849, 0.00177409, 0.9 , 0.34502168,
97
+ 0.45735327]))
98
+ st_range is [5.2 5.6 8.4 0.4 3.2 2.8]
99
+ q_crit is 4.046412438282385
100
+ st_range > q_crit is [ True True True False False False]
101
+ reject is [False True True False False False]
102
+ Multiple Comparison of Means - Tukey HSD, FWER=0.05
103
+ =====================================================
104
+ group1 group2 meandiff p-adj lower upper reject
105
+ -----------------------------------------------------
106
+ A B -5.2 0.0563 -10.5145 0.1145 False
107
+ A C -5.6 0.0371 -10.9145 -0.2855 True
108
+ A D -8.4 0.0018 -13.7145 -3.0855 True
109
+ B C -0.4 0.9 -5.7145 4.9145 False
110
+ B D -3.2 0.345 -8.5145 2.1145 False
111
+ C D -2.8 0.4574 -8.1145 2.5145 False
112
+ -----------------------------------------------------
113
+ ```

1

勘違い修正

2019/09/14 11:23

投稿

nomuken
nomuken

スコア1627

answer CHANGED
@@ -1,1 +1,3 @@
1
+ 実際に演算しているのは
2
+ https://github.com/statsmodels/statsmodels/blob/bc5680db6265d275d89505815a5cec9e9f632239/statsmodels/sandbox/stats/multicomp.py#L1239
1
- `pairwise_tukeyhsd`関数の第三パラメタ`alpha`の値だと思います。省略する0.05にります。値を指定するこもできます。
3
+ だと思うのでこれ読み解かないといけ思います。