質問編集履歴
3
画像
test
CHANGED
File without changes
|
test
CHANGED
@@ -18,188 +18,188 @@
|
|
18
18
|
|
19
19
|
エラーメッセージ!
|
20
20
|
|
21
|
+
```
|
22
|
+
|
21
23
|
![イメージ説明](d4adab8a1717c750b1095bda0c57f6b7.png)
|
22
24
|
|
23
25
|
|
24
26
|
|
27
|
+
|
28
|
+
|
29
|
+
### 該当のソースコード
|
30
|
+
|
31
|
+
|
32
|
+
|
33
|
+
```python
|
34
|
+
|
35
|
+
import numpy as np
|
36
|
+
|
37
|
+
import pandas as pd
|
38
|
+
|
39
|
+
import seaborn as sns
|
40
|
+
|
41
|
+
import matplotlib.pyplot as plt
|
42
|
+
|
43
|
+
from xgboost import XGBClassifier
|
44
|
+
|
45
|
+
import xgboost as xgb
|
46
|
+
|
47
|
+
from sklearn.preprocessing import LabelEncoder
|
48
|
+
|
49
|
+
from sklearn.preprocessing import MinMaxScaler
|
50
|
+
|
51
|
+
from sklearn.model_selection import KFold
|
52
|
+
|
53
|
+
from sklearn.metrics import mean_squared_error
|
54
|
+
|
55
|
+
from sklearn.metrics import r2_score, log_loss, accuracy_score,f1_score, recall_score, precision_score,confusion_matrix
|
56
|
+
|
57
|
+
import itertools
|
58
|
+
|
59
|
+
|
60
|
+
|
61
|
+
test = pd.read_csv("test.csv")
|
62
|
+
|
63
|
+
train = pd.read_csv("train.csv")
|
64
|
+
|
65
|
+
|
66
|
+
|
67
|
+
train_x = train.drop(["disease","id"], axis=1)
|
68
|
+
|
69
|
+
train_x
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
train_y = train["disease"]
|
74
|
+
|
75
|
+
train_y
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
test = test.drop(["id"], axis=1)
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
train_x["Gender"] = train_x["Gender"].replace({"Male":1,"Female":0})
|
84
|
+
|
85
|
+
test["Gender"] = test["Gender"].replace({"Male":1,"Female":0})
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
scores_logloss = []
|
90
|
+
|
91
|
+
scores_accuracy = []
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
params = {
|
96
|
+
|
97
|
+
"booster": "gbtree",
|
98
|
+
|
99
|
+
"objective":"binary:logistic'",
|
100
|
+
|
101
|
+
"eta":0.1,
|
102
|
+
|
103
|
+
"gamma":0,
|
104
|
+
|
105
|
+
"alpha":0.1,
|
106
|
+
|
107
|
+
"lambda":3.0,
|
108
|
+
|
109
|
+
"min_child_weight":0,
|
110
|
+
|
111
|
+
"max_depth":7,
|
112
|
+
|
113
|
+
"subsample":0.8,
|
114
|
+
|
115
|
+
"colsample_bytree":0.8,
|
116
|
+
|
117
|
+
"silent":1,
|
118
|
+
|
119
|
+
"random_state":71}
|
120
|
+
|
121
|
+
|
122
|
+
|
123
|
+
model = XGBClassifier(n_estimators = 170, ramdom_state=71,
|
124
|
+
|
125
|
+
max_depth = 7,
|
126
|
+
|
127
|
+
min_child_weight = 0)
|
128
|
+
|
129
|
+
|
130
|
+
|
131
|
+
model.fit(tr_x,tr_y)
|
132
|
+
|
133
|
+
|
134
|
+
|
135
|
+
va_pred = model.predict(va_x)
|
136
|
+
|
137
|
+
|
138
|
+
|
139
|
+
logloss = log_loss(va_y, va_pred)
|
140
|
+
|
141
|
+
accuracy = accuracy_score(va_y, va_pred > 0.5)
|
142
|
+
|
143
|
+
f1 = f1_score(va_y, va_pred)
|
144
|
+
|
145
|
+
recall = recall_score(va_y,va_pred)
|
146
|
+
|
147
|
+
precision_train = precision_score(va_y, va_pred)
|
148
|
+
|
149
|
+
|
150
|
+
|
151
|
+
scores_logloss.append(logloss)
|
152
|
+
|
153
|
+
scores_accuracy.append(accuracy)
|
154
|
+
|
155
|
+
|
156
|
+
|
157
|
+
logloss = np.mean(scores_logloss)
|
158
|
+
|
159
|
+
accuracy = np.mean(scores_accuracy)
|
160
|
+
|
161
|
+
|
162
|
+
|
163
|
+
from sklearn.metrics import roc_curve, auc
|
164
|
+
|
165
|
+
import matplotlib.pyplot as plt
|
166
|
+
|
167
|
+
|
168
|
+
|
169
|
+
roc = roc_curve(va_y, va_pred,drop_intermediate=False)
|
170
|
+
|
171
|
+
auc = auc(fpr, tpr)
|
172
|
+
|
173
|
+
fpr, tpr, thresholds = roc_curve(va_y, va_pred,drop_intermediate=False)
|
174
|
+
|
175
|
+
|
176
|
+
|
177
|
+
plt.plot(fpr, tpr, label='ROC curve (area = %.2f)'%auc)
|
178
|
+
|
179
|
+
plt.plot(np.linspace(1, 0, len(fpr)), np.linspace(1, 0, len(fpr)), label='Random ROC curve (area = %.2f)'%0.5, linestyle = '--', color = 'gray')
|
180
|
+
|
181
|
+
|
182
|
+
|
183
|
+
plt.legend()
|
184
|
+
|
185
|
+
plt.title('ROC curve')
|
186
|
+
|
187
|
+
plt.xlabel('False Positive Rate')
|
188
|
+
|
189
|
+
plt.ylabel('True Positive Rate')
|
190
|
+
|
191
|
+
plt.grid(True)
|
192
|
+
|
193
|
+
plt.show()
|
194
|
+
|
195
|
+
|
196
|
+
|
197
|
+
|
198
|
+
|
25
199
|
```
|
26
200
|
|
27
201
|
|
28
202
|
|
29
|
-
### 該当のソースコード
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
```python
|
34
|
-
|
35
|
-
import numpy as np
|
36
|
-
|
37
|
-
import pandas as pd
|
38
|
-
|
39
|
-
import seaborn as sns
|
40
|
-
|
41
|
-
import matplotlib.pyplot as plt
|
42
|
-
|
43
|
-
from xgboost import XGBClassifier
|
44
|
-
|
45
|
-
import xgboost as xgb
|
46
|
-
|
47
|
-
from sklearn.preprocessing import LabelEncoder
|
48
|
-
|
49
|
-
from sklearn.preprocessing import MinMaxScaler
|
50
|
-
|
51
|
-
from sklearn.model_selection import KFold
|
52
|
-
|
53
|
-
from sklearn.metrics import mean_squared_error
|
54
|
-
|
55
|
-
from sklearn.metrics import r2_score, log_loss, accuracy_score,f1_score, recall_score, precision_score,confusion_matrix
|
56
|
-
|
57
|
-
import itertools
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
test = pd.read_csv("test.csv")
|
62
|
-
|
63
|
-
train = pd.read_csv("train.csv")
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
train_x = train.drop(["disease","id"], axis=1)
|
68
|
-
|
69
|
-
train_x
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
train_y = train["disease"]
|
74
|
-
|
75
|
-
train_y
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
test = test.drop(["id"], axis=1)
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
train_x["Gender"] = train_x["Gender"].replace({"Male":1,"Female":0})
|
84
|
-
|
85
|
-
test["Gender"] = test["Gender"].replace({"Male":1,"Female":0})
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
scores_logloss = []
|
90
|
-
|
91
|
-
scores_accuracy = []
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
params = {
|
96
|
-
|
97
|
-
"booster": "gbtree",
|
98
|
-
|
99
|
-
"objective":"binary:logistic'",
|
100
|
-
|
101
|
-
"eta":0.1,
|
102
|
-
|
103
|
-
"gamma":0,
|
104
|
-
|
105
|
-
"alpha":0.1,
|
106
|
-
|
107
|
-
"lambda":3.0,
|
108
|
-
|
109
|
-
"min_child_weight":0,
|
110
|
-
|
111
|
-
"max_depth":7,
|
112
|
-
|
113
|
-
"subsample":0.8,
|
114
|
-
|
115
|
-
"colsample_bytree":0.8,
|
116
|
-
|
117
|
-
"silent":1,
|
118
|
-
|
119
|
-
"random_state":71}
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
model = XGBClassifier(n_estimators = 170, ramdom_state=71,
|
124
|
-
|
125
|
-
max_depth = 7,
|
126
|
-
|
127
|
-
min_child_weight = 0)
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
model.fit(tr_x,tr_y)
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
va_pred = model.predict(va_x)
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
logloss = log_loss(va_y, va_pred)
|
140
|
-
|
141
|
-
accuracy = accuracy_score(va_y, va_pred > 0.5)
|
142
|
-
|
143
|
-
f1 = f1_score(va_y, va_pred)
|
144
|
-
|
145
|
-
recall = recall_score(va_y,va_pred)
|
146
|
-
|
147
|
-
precision_train = precision_score(va_y, va_pred)
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
scores_logloss.append(logloss)
|
152
|
-
|
153
|
-
scores_accuracy.append(accuracy)
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
logloss = np.mean(scores_logloss)
|
158
|
-
|
159
|
-
accuracy = np.mean(scores_accuracy)
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
from sklearn.metrics import roc_curve, auc
|
164
|
-
|
165
|
-
import matplotlib.pyplot as plt
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
roc = roc_curve(va_y, va_pred,drop_intermediate=False)
|
170
|
-
|
171
|
-
auc = auc(fpr, tpr)
|
172
|
-
|
173
|
-
fpr, tpr, thresholds = roc_curve(va_y, va_pred,drop_intermediate=False)
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
plt.plot(fpr, tpr, label='ROC curve (area = %.2f)'%auc)
|
178
|
-
|
179
|
-
plt.plot(np.linspace(1, 0, len(fpr)), np.linspace(1, 0, len(fpr)), label='Random ROC curve (area = %.2f)'%0.5, linestyle = '--', color = 'gray')
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
plt.legend()
|
184
|
-
|
185
|
-
plt.title('ROC curve')
|
186
|
-
|
187
|
-
plt.xlabel('False Positive Rate')
|
188
|
-
|
189
|
-
plt.ylabel('True Positive Rate')
|
190
|
-
|
191
|
-
plt.grid(True)
|
192
|
-
|
193
|
-
plt.show()
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
```
|
200
|
-
|
201
|
-
|
202
|
-
|
203
203
|
### 試したこと
|
204
204
|
|
205
205
|
|
2
画像
test
CHANGED
File without changes
|
test
CHANGED
@@ -18,7 +18,7 @@
|
|
18
18
|
|
19
19
|
エラーメッセージ!
|
20
20
|
|
21
|
-
![
|
21
|
+
![イメージ説明](d4adab8a1717c750b1095bda0c57f6b7.png)
|
22
22
|
|
23
23
|
|
24
24
|
|
1
画像挿入
test
CHANGED
File without changes
|
test
CHANGED
@@ -18,7 +18,9 @@
|
|
18
18
|
|
19
19
|
エラーメッセージ!
|
20
20
|
|
21
|
-
(
|
21
|
+
![ROC](448a43dd3ec76ee2b74aa413de359ff9.png)
|
22
|
+
|
23
|
+
|
22
24
|
|
23
25
|
```
|
24
26
|
|