質問編集履歴

1

コードの追加

2019/06/24 05:54

投稿

tubusio
tubusio

スコア11

test CHANGED
File without changes
test CHANGED
@@ -12,6 +12,396 @@
12
12
 
13
13
 
14
14
 
15
+ ```
16
+
17
+ import numpy
18
+
19
+ import pandas
20
+
21
+ from keras.models import Sequential
22
+
23
+ from keras.layers import Dense
24
+
25
+ from keras import optimizers
26
+
27
+ from keras.wrappers.scikit_learn import KerasRegressor
28
+
29
+ from keras.wrappers.scikit_learn import KerasClassifier
30
+
31
+ from sklearn.model_selection import cross_val_score
32
+
33
+ from sklearn.model_selection import KFold
34
+
35
+ from sklearn.model_selection import GridSearchCV
36
+
37
+ from sklearn.preprocessing import StandardScaler
38
+
39
+ from sklearn.pipeline import Pipeline
40
+
41
+ from keras.models import load_model
42
+
43
+ import os
44
+
45
+ import argparse
46
+
47
+
48
+
49
+ #----------------------------
50
+
51
+ # get command line variables
52
+
53
+ #----------------------------
54
+
55
+ parser = argparse.ArgumentParser(description='Make models by keras. Place Y on the head column in the cleaned dataset with header names on the top row. Rows containing null values will be deleated.')
56
+
57
+ parser.add_argument('--mode', choices=['create', 'predict'], dest='mode', metavar='create/predict', type=str, nargs='+', required=True,
58
+
59
+ help='an integer for the accumulator')
60
+
61
+ parser.add_argument('--input_file', dest='input_file', type=str, nargs='+', required=True,
62
+
63
+ help='path to dataset or model')
64
+
65
+ parser.add_argument('--method', choices=['binary', 'multiple', 'regression'], metavar='binary/multiple/regression', dest='method', type=str, nargs='+', required=True,
66
+
67
+ help='Model type you solve')
68
+
69
+ parser.add_argument('--output_file', dest='output_file', default=False, required=False,
70
+
71
+ help='If you input output_file it will save result as directed path.')
72
+
73
+ parser.add_argument('--model_file', dest='model_file', default=False, nargs='*',
74
+
75
+ help='If you input model_file it will save or load a model.')
76
+
77
+ parser.add_argument('--definition', metavar='array of data type such as str, int and float with delimiter [,]', dest='definition', default=False, nargs='*',
78
+
79
+ help='If you define data type of columns, send array of full column definitions.')
80
+
81
+
82
+
83
+ args = parser.parse_args()
84
+
85
+
86
+
87
+ #----------------------------
88
+
89
+ # functions
90
+
91
+ #----------------------------
92
+
93
+ class MakeModel:
94
+
95
+ #init
96
+
97
+ def __init__(self, args):
98
+
99
+ self.X = self.Y = []
100
+
101
+ self.row_length = self.column_length = 0
102
+
103
+ self.method = args.method[0]
104
+
105
+ self.ifp = args.input_file[0]
106
+
107
+
108
+
109
+ if args.model_file != False:
110
+
111
+ self.mfp = args.model_file[0]
112
+
113
+ else:
114
+
115
+ self.mfp = False
116
+
117
+
118
+
119
+ if args.output_file != False:
120
+
121
+ self.ofp = args.output_file[0]
122
+
123
+ else:
124
+
125
+ self.ofp = False
126
+
127
+
128
+
129
+ if args.definition != False:
130
+
131
+ self.dfin = args.definition.split(",")
132
+
133
+ else:
134
+
135
+ self.dfin = False
136
+
137
+
138
+
139
+ #create layers
140
+
141
+ def create_model(self, evMethod, neurons, layers, act, learn_rate, cls, mtr):
142
+
143
+ # Criate model
144
+
145
+ model = Sequential()
146
+
147
+ model.add(Dense(neurons, input_dim=self.column_length, kernel_initializer='normal', activation='relu'))
148
+
149
+ for i in range(1, layers):
150
+
151
+ model.add(Dense(int(numpy.ceil(numpy.power(neurons,1/i)*2)), kernel_initializer='normal', activation='relu'))
152
+
153
+ model.add(Dense(cls, kernel_initializer='normal', activation=act))
154
+
155
+ # Compile model
156
+
157
+ adam = optimizers.Adam(lr=learn_rate)
158
+
159
+ model.compile(loss=evMethod, optimizer=adam, metrics=mtr)
160
+
161
+ return model
162
+
163
+
164
+
165
+ #load dataset
166
+
167
+ def load_dataset(self):
168
+
169
+ dataframe = pandas.read_csv(self.ifp, header=0, encoding="sjis").dropna()
170
+
171
+ if self.dfin != False:
172
+
173
+ dataframe[dataframe.columns].apply(lambda x: x.astype(self.dfin[dataframe.columns.get_loc(x.name)]))
174
+
175
+ dataframe_X = pandas.get_dummies(dataframe[dataframe.columns[1:]]) #create dummy variables
176
+
177
+ if self.method == 'multiple':
178
+
179
+ dataframe_Y = pandas.get_dummies(dataframe[dataframe.columns[0]]) #create dummy variables
180
+
181
+ else:
182
+
183
+ dataframe_Y = dataframe[dataframe.columns[0]]
184
+
185
+ #print(dataframe_Y.head())
186
+
187
+ #print(dataframe_X.head())
188
+
189
+ self.row_length, self.column_length = dataframe_X.shape
190
+
191
+ self.X = dataframe_X.values
192
+
193
+ self.Y = dataframe_Y.values
194
+
195
+
196
+
197
+ #train
198
+
199
+ def train_model(self):
200
+
201
+ #pipe to Grid Search
202
+
203
+ estimators = []
204
+
205
+ estimators.append(('standardize', StandardScaler()))
206
+
207
+
208
+
209
+ #rely on chosen method parameters
210
+
211
+ if self.method == 'binary':
212
+
213
+ evMethod = ['binary_crossentropy']
214
+
215
+ activation = ['sigmoid']
216
+
217
+ metr = [['accuracy']]
218
+
219
+ estimators.append(('mlp', KerasClassifier(build_fn=self.create_model, epochs=10, batch_size=200, verbose=1)))
220
+
221
+ cls = [1]
222
+
223
+ elif self.method == 'multiple':
224
+
225
+ evMethod = [['categorical_crossentropy']]
226
+
227
+ activation = ['softmax']
228
+
229
+ metr = [['accuracy']]
230
+
231
+ estimators.append(('mlp', KerasClassifier(build_fn=self.create_model, epochs=10, batch_size=200, verbose=1)))
232
+
233
+ cls = [self.Y.shape[1]]
234
+
235
+ else:
236
+
237
+ evMethod = ['mean_squared_error']
238
+
239
+ activation = [None]
240
+
241
+ metr = [None]
242
+
243
+ estimators.append(('mlp', KerasRegressor(build_fn=self.create_model, epochs=10, batch_size=200, verbose=1)))
244
+
245
+ cls = [1]
246
+
247
+
248
+
249
+ pipeline = Pipeline(estimators)
250
+
251
+
252
+
253
+ #test parameters
254
+
255
+ batch_size = list(set([int(numpy.ceil(self.row_length/i)) for i in [1000,300,100]]))
256
+
257
+ epochs = [10, 50, 100]
258
+
259
+ neurons = list(set([int(numpy.ceil(self.column_length/i)*2) for i in numpy.arange(1,3,0.4)]))
260
+
261
+ learn_rate = [0.001, 0.005, 0.01, 0.07]
262
+
263
+ layers = [1,2,3,4,5]
264
+
265
+ #test parameter
266
+
267
+ """batch_size = [31]
268
+
269
+ epochs = [100]
270
+
271
+ neurons = [32]
272
+
273
+ learn_rate = [0.01]
274
+
275
+ layers = [5]"""
276
+
277
+ #execution
278
+
279
+ param_grid = dict(mlp__neurons = neurons, mlp__batch_size = batch_size, mlp__epochs=epochs, mlp__learn_rate=learn_rate, mlp__layers=layers, mlp__act=activation, mlp__evMethod=evMethod, mlp__cls=cls, mlp__mtr=metr)
280
+
281
+ grid = GridSearchCV(estimator=pipeline, param_grid=param_grid)
282
+
283
+ grid_result = grid.fit(self.X, self.Y)
284
+
285
+
286
+
287
+ #output best parameter condition
288
+
289
+ clf = []
290
+
291
+ clf = grid_result.best_estimator_
292
+
293
+ print(clf.get_params())
294
+
295
+ accuracy = clf.score(self.X, self.Y)
296
+
297
+ if self.method in ['binary', 'multiple']:
298
+
299
+ print("\nAccuracy: %.2f" % (accuracy))
300
+
301
+ else:
302
+
303
+ print("Results: %.2f (%.2f) MSE" % (accuracy.mean(), accuracy.std()))
304
+
305
+
306
+
307
+ #save model
308
+
309
+ if self.mfp != False:
310
+
311
+ clf.steps[1][1].model.save(self.mfp)
312
+
313
+
314
+
315
+ #predict dataset
316
+
317
+ def predict_ds(self):
318
+
319
+ model = load_model(self.mfp)
320
+
321
+ model.summary()
322
+
323
+ sc = StandardScaler()
324
+
325
+ self.X = sc.fit_transform(self.X)
326
+
327
+ pr_Y = model.predict(self.X)
328
+
329
+ if len([self.Y != '__null__']) > 0:
330
+
331
+ if self.method == 'binary':
332
+
333
+ predictions = [float(numpy.round(x)) for x in pr_Y]
334
+
335
+ accuracy = numpy.mean(predictions == self.Y)
336
+
337
+ print("Prediction Accuracy: %.2f%%" % (accuracy*100))
338
+
339
+ elif self.method == 'multiple':
340
+
341
+ predictions = []
342
+
343
+ for i in range(0, len(pr_Y)-1):
344
+
345
+ for j in range(0, len(pr_Y[i])-1):
346
+
347
+ predictions.append(int(round(pr_Y[i][j]) - self.Y[i][j]))
348
+
349
+ accuracy_total = len([x for x in predictions if x == 0])/len(predictions)
350
+
351
+ accuracy_tooneg = len([x for x in predictions if x == -1])/len(predictions)
352
+
353
+ accuracy_toopos = len([x for x in predictions if x == 1])/len(predictions)
354
+
355
+ print("Prediction Accuracy: %.2f%% (positive-error:%.2f%%/negative-error:%.2f%%)" % (accuracy_total*100, accuracy_tooneg*100, accuracy_toopos*100))
356
+
357
+ else:
358
+
359
+ accuracy = numpy.mean((self.Y - pr_Y)**2)
360
+
361
+ print("MSE: %.2f" % (numpy.sqrt(accuracy)))
362
+
363
+
364
+
365
+ #save predicted result
366
+
367
+ if self.ofp != False:
368
+
369
+ numpy.savetxt(self.ofp, pr_Y, fmt='%5s')
370
+
371
+
372
+
373
+ #----------------------------
374
+
375
+ # select mode
376
+
377
+ #----------------------------
378
+
379
+ m = MakeModel(args)
380
+
381
+ if args.mode == ['create']:
382
+
383
+ #make model
384
+
385
+ m.load_dataset()
386
+
387
+ m.train_model()
388
+
389
+ else:
390
+
391
+ #predict dataset
392
+
393
+ m.predict_ds()
394
+
395
+ m.load_dataset()
396
+
397
+ m.predict_ds()
398
+
399
+
400
+
401
+ ```
402
+
403
+
404
+
15
405
 
16
406
 
17
407