<やりたいこと>
下記の環境で以下のシンプルなニューラルネットワークモデルを実行しました。
Code1 Denseレイヤのみを使用したモデル
Code2 Convolutionレイヤを使用したモデル(CNN)
Code1は、正常に実行されましたが、2のモデル実行時にUnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
のエラーが出てしまい。
色々と検索し、原因を探りましたが、解決できず。。皆様のお力をお借りしたく。
#####<確認したこと>
- GPUを認識していること(tf.test.gpu_device_name() にて確認)
- CUDA、Tensorflow等のバージョン互換性が一致していること
- GPU RAMが別のプロセスによって不足になっていないこと
環境
Windows 10
NVIDIA GeForce GTX1650
python 3.6.12
conda 4.9.2
TensorFlow-GPU 2.0.0
Visual Studio Community 2017
CUDA 10.0
cuDNN 7.4.2
Code1
1import tensorflow as tf 2from tensorflow import keras 3import numpy as np 4 5fashion_mnist = keras.datasets.fashion_mnist 6(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data() 7train_images = train_images/225.0 8test_images = test_images/225.0 9 10model = keras.Sequential([ 11 keras.layers.Flatten(input_shape=(28,28)), 12 keras.layers.Dense(units=128, activation="relu"), 13 keras.layers.Dense(units=10, activation="softmax") 14]) 15model.compile(optimizer='adam', 16 loss='sparse_categorical_crossentropy', 17 metrics=['accuracy']) 18model.fit(train_images, train_labels, epochs=10)
Code2
1import tensorflow as tf 2from tensorflow.keras import datasets, layers, models 3 4(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data() 5train_images, test_images = train_images / 255.0, test_images / 255.0 6 7model = models.Sequential() 8model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3))) 9model.add(layers.MaxPooling2D((2, 2))) 10model.add(layers.Conv2D(64, (3, 3), activation='relu')) 11model.add(layers.MaxPooling2D((2, 2))) 12model.add(layers.Conv2D(64, (3, 3), activation='relu')) 13model.add(layers.Flatten()) 14model.add(layers.Dense(units=64, activation='relu')) 15model.add(layers.Dense(units=10)) 16model.compile(optimizer='adam', 17 loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 18 metrics=['accuracy']) 19model.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels))
Error
1Train on 50000 samples, validate on 10000 samples 2Epoch 1/10 3 32/50000 [..............................] - ETA: 29:19 4--------------------------------------------------------------------------- 5 6UnknownError Traceback (most recent call last) 7<ipython-input-48-ce520e3e9c1d> in <module>() 8 21 loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 9 22 metrics=['accuracy']) 10---> 23 model.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels)) 11 12C:\Users\koyamashinji\anaconda3\envs\gpu_trial\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs) 13 726 max_queue_size=max_queue_size, 14 727 workers=workers, 15--> 728 use_multiprocessing=use_multiprocessing) 16 729 17 730 def evaluate(self, 18 19C:\Users\koyamashinji\anaconda3\envs\gpu_trial\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs) 20 322 mode=ModeKeys.TRAIN, 21 323 training_context=training_context, 22--> 324 total_epochs=epochs) 23 325 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN) 24 326 25 26C:\Users\koyamashinji\anaconda3\envs\gpu_trial\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs) 27 121 step=step, mode=mode, size=current_batch_size) as batch_logs: 28 122 try: 29--> 123 batch_outs = execution_function(iterator) 30 124 except (StopIteration, errors.OutOfRangeError): 31 125 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError? 32 33C:\Users\koyamashinji\anaconda3\envs\gpu_trial\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in execution_function(input_fn) 34 84 # `numpy` translates Tensors to values in Eager mode. 35 85 return nest.map_structure(_non_none_constant_value, 36---> 86 distributed_function(input_fn)) 37 87 38 88 return execution_function 39 40C:\Users\koyamashinji\anaconda3\envs\gpu_trial\lib\site-packages\tensorflow_core\python\eager\def_function.py in __call__(self, *args, **kwds) 41 455 42 456 tracing_count = self._get_tracing_count() 43--> 457 result = self._call(*args, **kwds) 44 458 if tracing_count == self._get_tracing_count(): 45 459 self._call_counter.called_without_tracing() 46 47C:\Users\koyamashinji\anaconda3\envs\gpu_trial\lib\site-packages\tensorflow_core\python\eager\def_function.py in _call(self, *args, **kwds) 48 518 # Lifting succeeded, so variables are initialized and we can run the 49 519 # stateless function. 50--> 520 return self._stateless_fn(*args, **kwds) 51 521 else: 52 522 canon_args, canon_kwds = \ 53 54C:\Users\koyamashinji\anaconda3\envs\gpu_trial\lib\site-packages\tensorflow_core\python\eager\function.py in __call__(self, *args, **kwargs) 55 1821 """Calls a graph function specialized to the inputs.""" 56 1822 graph_function, args, kwargs = self._maybe_define_function(args, kwargs) 57-> 1823 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access 58 1824 59 1825 @property 60 61C:\Users\koyamashinji\anaconda3\envs\gpu_trial\lib\site-packages\tensorflow_core\python\eager\function.py in _filtered_call(self, args, kwargs) 62 1139 if isinstance(t, (ops.Tensor, 63 1140 resource_variable_ops.BaseResourceVariable))), 64-> 1141 self.captured_inputs) 65 1142 66 1143 def _call_flat(self, args, captured_inputs, cancellation_manager=None): 67 68C:\Users\koyamashinji\anaconda3\envs\gpu_trial\lib\site-packages\tensorflow_core\python\eager\function.py in _call_flat(self, args, captured_inputs, cancellation_manager) 69 1222 if executing_eagerly: 70 1223 flat_outputs = forward_function.call( 71-> 1224 ctx, args, cancellation_manager=cancellation_manager) 72 1225 else: 73 1226 gradient_name = self._delayed_rewrite_functions.register() 74 75C:\Users\koyamashinji\anaconda3\envs\gpu_trial\lib\site-packages\tensorflow_core\python\eager\function.py in call(self, ctx, args, cancellation_manager) 76 509 inputs=args, 77 510 attrs=("executor_type", executor_type, "config_proto", config), 78--> 511 ctx=ctx) 79 512 else: 80 513 outputs = execute.execute_with_cancellation( 81 82C:\Users\koyamashinji\anaconda3\envs\gpu_trial\lib\site-packages\tensorflow_core\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name) 83 65 else: 84 66 message = e.message 85---> 67 six.raise_from(core._status_to_exception(e.code, message), None) 86 68 except TypeError as e: 87 69 keras_symbolic_tensors = [ 88 89C:\Users\koyamashinji\anaconda3\envs\gpu_trial\lib\site-packages\six.py in raise_from(value, from_value) 90 91UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above. 92 [[node sequential_7/conv2d_12/Conv2D (defined at C:\Users\koyamashinji\anaconda3\envs\gpu_trial\lib\site-packages\tensorflow_core\python\framework\ops.py:1751) ]] [Op:__inference_distributed_function_24004] 93 94Function call stack: 95distributed_function
回答1件
あなたの回答
tips
プレビュー