前提・実現したいこと
GPUが2台あるのでkerasの並列化を行い計算を早くしたいが、結果として遅くなってしまいました。
参考にした記事はこちらです。http://tech.wonderpla.net/entry/2018/01/09/110000
この記事ではgeneratorがボトルネックとなっているため、
fit_generator()も更に並列化しているのでそれも行いましたが、1台のほうが速いです。
発生している問題・エラーメッセージ
GPU1台:9秒 GPU2台:15秒 GPU2台+fit_generatorを並列化:12秒
該当のソースコード
python
1# GPU1台の場合 2from __future__ import print_function 3import keras 4from keras.datasets import cifar10 5from keras.preprocessing.image import ImageDataGenerator 6from keras.models import Sequential 7from keras.layers import Dense, Dropout, Activation, Flatten 8from keras.layers import Conv2D, MaxPooling2D 9import os 10 11 12batch_size = 32 13num_classes = 10 14epochs = 5 15 16num_predictions = 20 17 18 19 20# The data, shuffled and split between train and test sets: 21(x_train, y_train), (x_test, y_test) = cifar10.load_data() 22print('x_train shape:', x_train.shape) 23print(x_train.shape[0], 'train samples') 24print(x_test.shape[0], 'test samples') 25 26# Convert class vectors to binary class matrices. 27y_train = keras.utils.to_categorical(y_train, num_classes) 28y_test = keras.utils.to_categorical(y_test, num_classes) 29 30# モデル構築 31model = Sequential() 32model.add(Conv2D(32, (3, 3), padding='same', 33 input_shape=x_train.shape[1:])) 34model.add(Activation('relu')) 35model.add(Conv2D(32, (3, 3))) 36model.add(Activation('relu')) 37model.add(MaxPooling2D(pool_size=(2, 2))) 38model.add(Dropout(0.25)) 39 40model.add(Conv2D(64, (3, 3), padding='same')) 41model.add(Activation('relu')) 42model.add(Conv2D(64, (3, 3))) 43model.add(Activation('relu')) 44model.add(MaxPooling2D(pool_size=(2, 2))) 45model.add(Dropout(0.25)) 46 47model.add(Flatten()) 48model.add(Dense(512)) 49model.add(Activation('relu')) 50model.add(Dropout(0.5)) 51model.add(Dense(num_classes)) 52model.add(Activation('softmax')) 53 54 55# initiate RMSprop optimizer 56opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) 57 58# Let's train the model using RMSprop 59model.compile(loss='categorical_crossentropy', 60 optimizer=opt, 61 metrics=['accuracy']) 62 63x_train = x_train.astype('float32') 64x_test = x_test.astype('float32') 65x_train /= 255 66x_test /= 255 67 68 69print('Using real-time data augmentation.') 70# This will do preprocessing and realtime data augmentation: 71datagen = ImageDataGenerator( 72 featurewise_center=False, # set input mean to 0 over the dataset 73 samplewise_center=False, # set each sample mean to 0 74 featurewise_std_normalization=False, # divide inputs by std of the dataset 75 samplewise_std_normalization=False, # divide each input by its std 76 zca_whitening=False, # apply ZCA whitening 77 rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) 78 width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) 79 height_shift_range=0.1, # randomly shift images vertically (fraction of total height) 80 horizontal_flip=True, # randomly flip images 81 vertical_flip=False) # randomly flip images 82 83# Compute quantities required for feature-wise normalization 84# (std, mean, and principal components if ZCA whitening is applied). 85datagen.fit(x_train) 86 87# Fit the model on the batches generated by datagen.flow(). 88model.fit_generator(datagen.flow(x_train, y_train, 89 batch_size=batch_size), 90 steps_per_epoch=1000, 91 epochs=epochs, 92 validation_data=(x_test, y_test), 93 workers=4) 94 95 96# Score trained model. 97scores = model.evaluate(x_test, y_test, verbose=1) 98print('Test loss:', scores[0]) 99print('Test accuracy:', scores[1])
python
1# GPU2台の場合 2from __future__ import print_function 3import keras 4from keras.datasets import cifar10 5from keras.preprocessing.image import ImageDataGenerator 6from keras.models import Sequential 7from keras.layers import Dense, Dropout, Activation, Flatten 8from keras.layers import Conv2D, MaxPooling2D 9import os 10import tensorflow as tf # add 11from keras.utils import multi_gpu_model # add 12 13gpu_count = 2 # add 14 15batch_size = 32 * gpu_count # modify 16num_classes = 10 17epochs = 5 18data_augmentation = True 19num_predictions = 20 20save_dir = os.path.join(os.getcwd(), 'saved_models') 21model_name = 'keras_cifar10_trained_model.h5' 22 23 24# The data, shuffled and split between train and test sets: 25(x_train, y_train), (x_test, y_test) = cifar10.load_data() 26print('x_train shape:', x_train.shape) 27print(x_train.shape[0], 'train samples') 28print(x_test.shape[0], 'test samples') 29 30# Convert class vectors to binary class matrices. 31y_train = keras.utils.to_categorical(y_train, num_classes) 32y_test = keras.utils.to_categorical(y_test, num_classes) 33with tf.device("/cpu:0"): # add 34 model = Sequential() 35 model.add(Conv2D(32, (3, 3), padding='same', 36 input_shape=x_train.shape[1:])) 37 model.add(Activation('relu')) 38 model.add(Conv2D(32, (3, 3))) 39 model.add(Activation('relu')) 40 model.add(MaxPooling2D(pool_size=(2, 2))) 41 model.add(Dropout(0.25)) 42 43 model.add(Conv2D(64, (3, 3), padding='same')) 44 model.add(Activation('relu')) 45 model.add(Conv2D(64, (3, 3))) 46 model.add(Activation('relu')) 47 model.add(MaxPooling2D(pool_size=(2, 2))) 48 model.add(Dropout(0.25)) 49 50 model.add(Flatten()) 51 model.add(Dense(512)) 52 model.add(Activation('relu')) 53 model.add(Dropout(0.5)) 54 model.add(Dense(num_classes)) 55 model.add(Activation('softmax')) 56model = multi_gpu_model(model, gpus=gpu_count) # add 57# initiate RMSprop optimizer 58opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) 59 60# Let's train the model using RMSprop 61model.compile(loss='categorical_crossentropy', 62 optimizer=opt, 63 metrics=['accuracy']) 64 65x_train = x_train.astype('float32') 66x_test = x_test.astype('float32') 67x_train /= 255 68x_test /= 255 69 70if not data_augmentation: 71 print('Not using data augmentation.') 72 model.fit(x_train, y_train, 73 batch_size=batch_size, 74 epochs=epochs, 75 validation_data=(x_test, y_test), 76 shuffle=True) 77else: 78 print('Using real-time data augmentation.') 79 # This will do preprocessing and realtime data augmentation: 80 datagen = ImageDataGenerator( 81 featurewise_center=False, # set input mean to 0 over the dataset 82 samplewise_center=False, # set each sample mean to 0 83 featurewise_std_normalization=False, # divide inputs by std of the dataset 84 samplewise_std_normalization=False, # divide each input by its std 85 zca_whitening=False, # apply ZCA whitening 86 rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) 87 width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) 88 height_shift_range=0.1, # randomly shift images vertically (fraction of total height) 89 horizontal_flip=True, # randomly flip images 90 vertical_flip=False) # randomly flip images 91 92 # Compute quantities required for feature-wise normalization 93 # (std, mean, and principal components if ZCA whitening is applied). 94 datagen.fit(x_train) 95 96 # Fit the model on the batches generated by datagen.flow(). 97 model.fit_generator(datagen.flow(x_train, y_train, 98 batch_size=batch_size), 99 steps_per_epoch=1000, 100 epochs=epochs, 101 validation_data=(x_test, y_test), 102 workers=32, 103 max_queue_size=64, 104 use_multiprocessing=True) 105 106 107# Save model and weights 108if not os.path.isdir(save_dir): 109 os.makedirs(save_dir) 110model_path = os.path.join(save_dir, model_name) 111model.save(model_path) 112print('Saved trained model at %s ' % model_path) 113 114# Score trained model. 115scores = model.evaluate(x_test, y_test, verbose=1) 116print('Test loss:', scores[0]) 117print('Test accuracy:', scores[1])
試したこと
ここに問題に対して試したことを記載してください。
補足情報(FW/ツールのバージョンなど)
ここにより詳細な情報を記載してください。
回答1件
あなたの回答
tips
プレビュー
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。