質問編集履歴

2

コードを変更しました。

2020/10/09 06:27

投稿

Pablito
Pablito

スコア71

test CHANGED
File without changes
test CHANGED
@@ -320,11 +320,9 @@
320
320
 
321
321
  #create a function to apply the output to data
322
322
 
323
- import math
324
-
325
323
  def predict_score(x):
326
324
 
327
- return 1 / (1 + math.e ** (x.iloc[1] * -4.35516166 + x.iloc[2] * 490.43789796 + x.iloc[3] * -1.42371766 + x.iloc[4] * 1.0599378 + 69.30989536))
325
+ return 1 / (1 + np.exp(-(x.iloc[1] * -4.4 + x.iloc[2] * 490.4 + x.iloc[3] * -1.4 + x.iloc[4] * 1.1 + 69.3)))
328
326
 
329
327
 
330
328
 
@@ -332,8 +330,6 @@
332
330
 
333
331
  datrum.head(10)
334
332
 
335
-
336
-
337
333
  #出てきたメッセージ
338
334
 
339
335
  #<ipython-input-26-b3ac908bb86f>:4: RuntimeWarning: overflow encountered in double_scalars

1

ロジスティック回帰分析の部分のコードも追加しました

2020/10/09 06:27

投稿

Pablito
Pablito

スコア71

test CHANGED
File without changes
test CHANGED
@@ -66,6 +66,214 @@
66
66
 
67
67
  ```Python
68
68
 
69
+ #元データ
70
+
71
+ ***
72
+
73
+ Amount Profit Quantity Frequency
74
+
75
+ Order ID
76
+
77
+ B-25601 1429.0 -1218.0 19 8
78
+
79
+ B-25602 3889.0 975.0 22 10
80
+
81
+ B-25603 2025.0 -180.0 25 16
82
+
83
+ B-25604 222.0 22.0 11 4
84
+
85
+ B-25605 75.0 0.0 7 2
86
+
87
+ ... ... ... ... ...
88
+
89
+ B-26096 1091.0 121.0 18 6
90
+
91
+ B-26097 1688.0 -504.0 23 7
92
+
93
+ B-26098 1189.0 350.0 21 6
94
+
95
+ B-26099 3417.0 859.0 15 4
96
+
97
+ B-26100 934.0 256.0 6 3
98
+
99
+ ***
100
+
101
+
102
+
103
+ #優良/休眠顧客情報を追加
104
+
105
+ def Dormant(x):
106
+
107
+ if x > 0:
108
+
109
+ return 1
110
+
111
+ else:
112
+
113
+ return 0
114
+
115
+
116
+
117
+ datrum['Customer Type'] = datrum['Profit'].apply(Dormant)
118
+
119
+ datrum.head(10)
120
+
121
+
122
+
123
+ ***
124
+
125
+ Amount Profit Quantity Frequency Customer Type
126
+
127
+ Order ID
128
+
129
+ B-25601 1429.0 -1218.0 19 8 0
130
+
131
+ B-25602 3889.0 975.0 22 10 1
132
+
133
+ B-25603 2025.0 -180.0 25 16 0
134
+
135
+ B-25604 222.0 22.0 11 4 1
136
+
137
+ B-25605 75.0 0.0 7 2 0
138
+
139
+ B-25606 87.0 4.0 2 2 1
140
+
141
+ B-25607 50.0 15.0 4 2 1
142
+
143
+ B-25608 2953.0 -1456.0 19 8 0
144
+
145
+ B-25609 510.0 24.0 8 4 1
146
+
147
+ B-25610 2105.0 -746.0 24 12 0
148
+
149
+ ***
150
+
151
+
152
+
153
+ #テストデータをテスト用と訓練用に分ける
154
+
155
+ x_train, x_test, y_train, y_test = train_test_split(
156
+
157
+ datrum.iloc[:, 0:4],
158
+
159
+ datrum.iloc[:, 4],
160
+
161
+ test_size=0.3,
162
+
163
+ random_state=1
164
+
165
+ )
166
+
167
+
168
+
169
+ #データを標準化
170
+
171
+ scl = StandardScaler()
172
+
173
+ scl.fit(x_train) #学習用データで標準化
174
+
175
+ x_train_std = scl.transform(x_train)
176
+
177
+ x_test_std = scl.transform(x_test)
178
+
179
+
180
+
181
+ clf = LogisticRegression(C=1e5)
182
+
183
+ clf.fit(x_train_std, y_train)#訓練データから学習を行う
184
+
185
+ print( "正解率:{:.2f}%".format(accuracy_score(y_test, clf.predict(x_test_std)) * 100 ))
186
+
187
+
188
+
189
+ ***
190
+
191
+ 正解率:100.00%
192
+
193
+ ***
194
+
195
+
196
+
197
+ print('回帰係数:', clf.coef_)
198
+
199
+ ***
200
+
201
+ 回帰係数: [[ -4.35525949 490.44187802 -1.42342501 1.0620863 ]]
202
+
203
+ ***
204
+
205
+ print('切片:', clf.intercept_)
206
+
207
+ ***
208
+
209
+ 切片: [69.31399488]
210
+
211
+ ***
212
+
213
+
214
+
215
+ x = datrum[['Amount', 'Profit', 'Quantity', 'Frequency']]
216
+
217
+ y = datrum['Customer Type']
218
+
219
+ print('決定係数:', clf.score(x, y))
220
+
221
+
222
+
223
+ ***
224
+
225
+ 決定係数: 0.998
226
+
227
+ ***
228
+
229
+
230
+
231
+ #学習効果の検証
232
+
233
+ X_test = datrum.iloc[:, 0:4]
234
+
235
+ y_predict = clf.predict(X_test)
236
+
237
+
238
+
239
+ print('検証結果:', y_predict)
240
+
241
+
242
+
243
+ ***
244
+
245
+ 検証結果: [0 1 0 1 0 1 1 0 1 0 0 0 0 1 1 0 0 1 1 1 1 0 0 1 0 0 0 0 1 0 0 0 0 0 1 1 1
246
+
247
+ 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0
248
+
249
+ 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 1 1 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
250
+
251
+ 0 0 1 1 0 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1
252
+
253
+ 0 0 0 1 0 0 1 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 1
254
+
255
+ 1 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
256
+
257
+ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
258
+
259
+ 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
260
+
261
+ 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 1 0 0 0
262
+
263
+ 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1
264
+
265
+ 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1
266
+
267
+ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
268
+
269
+ 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
270
+
271
+ 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1]
272
+
273
+ ***
274
+
275
+
276
+
69
277
  #Create an empty column
70
278
 
71
279
  import numpy as np