最近、GPU搭載のPCを購入して機械学習をし始めましたが、簡単なRNNでエラーが出てしまい、MNISTでコードを動かしたところ以下のようなエラーが出ます。なおRNNとMNISTでのエラーは同じものです。ネットではメモリ不足などとありますが、自分のGPUはGeforceであり、問題ないと思うのですが。ご回答よろしくお願いします。
解決方法を教えてください。コメントアウトしているところはそれぞれ試みましたが解決しませんでした。
以下コードです。
python
1import os 2os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' 3 4import numpy as np 5from sklearn.model_selection import train_test_split 6import tensorflow as tf 7from tensorflow.keras.datasets import mnist 8from tensorflow import keras 9from tensorflow.keras.layers import Dense, Activation 10from tensorflow.keras.models import Sequential 11from tensorflow.keras.utils import to_categorical 12# tf.Session(config=tf.ConfigProto(device_count = {'GPU': 1})) 13 14# config = tf.ConfigProto() 15# config.gpu_options.allow_growth=True 16# sess = tf.Session(config=config) 17 18# gpus = tf.config.experimental.list_physical_devices('GPU') 19# for gpu in gpus: 20# tf.config.experimental.set_memory_growth(gpu, True) 21 22# config = tf.compat.v1.ConfigProto() 23# config.gpu_options.allow_growth = True 24# session = tf.compat.v1.Session(config=config) 25 26# gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) 27 28# tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,allow_soft_placement=True)) 29 30 31np.random.seed(0) 32(X_train_base, labels_train_base), (X_test, labels_test) = mnist.load_data() 33 34X_train,X_validation,labels_train,labels_validation = train_test_split(X_train_base,labels_train_base,test_size = 0.2) 35 36 37X_train = X_train.reshape(-1,784) 38X_validation = X_validation.reshape(-1,784) 39X_test = X_test.reshape(-1,784) 40 41 42X_train = X_train.astype('float32') 43X_validation = X_validation.astype('float32') 44X_test = X_test.astype('float32') 45X_train /= 255 46X_validation /= 255 47X_test /= 255 48 49y_train = to_categorical(labels_train) 50y_validation = to_categorical(labels_validation) 51y_test = to_categorical(labels_test) 52 53 54n_features = 784 55n_hidden = 100 56bias_init = 0.1 57 58 59rate = 0.01 60 61 62model = Sequential() 63 64 65model.add(Dense(n_hidden,activation='relu',input_shape=(n_features,))) 66model.add(Dense(n_hidden,activation='relu')) 67model.add(Dense(n_hidden,activation='relu')) 68 69 70model.add(Dense(10,activation='softmax')) 71 72 73model.compile(optimizer=tf.optimizers.Adam(rate), 74 loss='categorical_crossentropy', metrics=['mae', 'accuracy']) 75 76 77log = model.fit(X_train, y_train, epochs=3000, batch_size=100, verbose=True, 78 callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', 79 min_delta=0, patience=10, 80 verbose=1)], 81 validation_data=(X_validation, y_validation)) 82 83 84pred_test = model.predict_classes(X_test) 85 86validation = (pred_test == labels_test) 87size = validation.size 88size 89correct = np.count_nonzero(validation) 90print(f"{correct}/{size} correct ({correct*100/size:.3f}%)")
エラーは以下のようにでます。
python
1WARNING:tensorflow:From /home/suzukiharumasa/anaconda3/envs/tf/lib/python3.7/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version. 2Instructions for updating: 3If using Keras pass *_constraint arguments to layers. 4Train on 48000 samples, validate on 12000 samples 5Epoch 1/300 6--------------------------------------------------------------------------- 7InternalError Traceback (most recent call last) 8<ipython-input-3-a31cdad43806> in <module> 9 48 min_delta=0, patience=10, 10 49 verbose=1)], 11---> 50 validation_data=(X_validation, y_validation)) 12 51 13 52 # Test dataで予測を実行。 14 15~/anaconda3/envs/tf/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs) 16 725 max_queue_size=max_queue_size, 17 726 workers=workers, 18--> 727 use_multiprocessing=use_multiprocessing) 19 728 20 729 def evaluate(self, 21 22~/anaconda3/envs/tf/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_arrays.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs) 23 673 validation_steps=validation_steps, 24 674 validation_freq=validation_freq, 25--> 675 steps_name='steps_per_epoch') 26 676 27 677 def evaluate(self, 28 29~/anaconda3/envs/tf/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_arrays.py in model_iteration(model, inputs, targets, sample_weights, batch_size, epochs, verbose, callbacks, val_inputs, val_targets, val_sample_weights, shuffle, initial_epoch, steps_per_epoch, validation_steps, validation_freq, mode, validation_in_fit, prepared_feed_values_from_dataset, steps_name, **kwargs) 30 392 31 393 # Get outputs. 32--> 394 batch_outs = f(ins_batch) 33 395 if not isinstance(batch_outs, list): 34 396 batch_outs = [batch_outs] 35 36~/anaconda3/envs/tf/lib/python3.7/site-packages/tensorflow_core/python/keras/backend.py in __call__(self, inputs) 37 3474 38 3475 fetched = self._callable_fn(*array_vals, 39-> 3476 run_metadata=self.run_metadata) 40 3477 self._call_fetch_callbacks(fetched[-len(self._fetches):]) 41 3478 output_structure = nest.pack_sequence_as( 42 43~/anaconda3/envs/tf/lib/python3.7/site-packages/tensorflow_core/python/client/session.py in __call__(self, *args, **kwargs) 44 1470 ret = tf_session.TF_SessionRunCallable(self._session._session, 45 1471 self._handle, args, 46-> 1472 run_metadata_ptr) 47 1473 if run_metadata: 48 1474 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr) 49 50InternalError: 2 root error(s) found. 51 (0) Internal: Blas GEMM launch failed : a.shape=(100, 100), b.shape=(100, 100), m=100, n=100, k=100 52 [[{{node dense_1/MatMul}}]] 53 [[loss/mul/_93]] 54 (1) Internal: Blas GEMM launch failed : a.shape=(100, 100), b.shape=(100, 100), m=100, n=100, k=100 55 [[{{node dense_1/MatMul}}]] 560 successful operations. 570 derived errors ignored. 58