実現したいこと
下記のコードに少し手を加えたものを使い、セグメンテーションを行おうとしています。
https://www.tensorflow.org/tutorials/images/segmentation?hl=ja
前提
TensorFlowでセグメンテーションを行うプログラムを作成中です。
学習枚数60枚程度のデータセットでは学習が行えましたが、
200~300枚に増加させたデータセットを使用したところ、以下のエラーが表示されて学習が進みません。
発生しているエラー(長いため一部省略しています)
Train on 232 samples, validate on 58 samples Epoch 1/10 64/232 [=======>......................] - ETA: 10s --------------------------------------------------------------------------- InvalidArgumentError Traceback (most recent call last) <ipython-input-32-a6845ad9de9a> in <module> 6 model_history = model.fit(x=train_imgarray,y=trainmask_imgarray, batch_size=BATCH_SIZE,epochs=EPOCHS, 7 steps_per_epoch=STEPS_PER_EPOCH, ----> 8 validation_split=VALIDATAION_SPILIT) 9 10 #model_history = model.fit(train_batches, epochs=EPOCHS, ~/.local/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs) 817 max_queue_size=max_queue_size, 818 workers=workers, --> 819 use_multiprocessing=use_multiprocessing) 820 821 def evaluate(self, ~/.local/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs) 340 mode=ModeKeys.TRAIN, 341 training_context=training_context, --> 342 total_epochs=epochs) 343 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN) 344 ~/.local/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs) 126 step=step, mode=mode, size=current_batch_size) as batch_logs: 127 try: --> 128 batch_outs = execution_function(iterator) 129 except (StopIteration, errors.OutOfRangeError): 130 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError? ~/.local/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn) 96 # `numpy` translates Tensors to values in Eager mode. 97 return nest.map_structure(_non_none_constant_value, ---> 98 distributed_function(input_fn)) 99 100 return execution_function ~/.local/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds) 566 xla_context.Exit() 567 else: --> 568 result = self._call(*args, **kwds) 569 570 if tracing_count == self._get_tracing_count(): ~/.local/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in _call(self, *args, **kwds) 597 # In this case we have created variables on the first call, so we run the 598 # defunned version which is guaranteed to never create variables. --> 599 return self._stateless_fn(*args, **kwds) # pylint: disable=not-callable 600 elif self._stateful_fn is not None: 601 # Release the lock early so that multiple threads can perform the call ~/.local/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py in __call__(self, *args, **kwargs) 2361 with self._lock: 2362 graph_function, args, kwargs = self._maybe_define_function(args, kwargs) -> 2363 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access 2364 2365 @property ~/.local/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py in _filtered_call(self, args, kwargs) 1609 if isinstance(t, (ops.Tensor, 1610 resource_variable_ops.BaseResourceVariable))), -> 1611 self.captured_inputs) 1612 1613 def _call_flat(self, args, captured_inputs, cancellation_manager=None): ~/.local/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager) 1690 # No tape is watching; skip to running the function. 1691 return self._build_call_outputs(self._inference_function.call( -> 1692 ctx, args, cancellation_manager=cancellation_manager)) 1693 forward_backward = self._select_forward_and_backward_functions( 1694 args, ~/.local/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py in call(self, ctx, args, cancellation_manager) 543 inputs=args, 544 attrs=("executor_type", executor_type, "config_proto", config), --> 545 ctx=ctx) 546 else: 547 outputs = execute.execute_with_cancellation( ~/.local/lib/python3.6/site-packages/tensorflow_core/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name) 65 else: 66 message = e.message ---> 67 six.raise_from(core._status_to_exception(e.code, message), None) 68 except TypeError as e: 69 keras_symbolic_tensors = [ /usr/local/lib/python3.6/site-packages/six.py in raise_from(value, from_value) InvalidArgumentError: Received a label value of 128 which is outside the valid range of [0, 3). Label values: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 (この後数字が羅列されます) ##数字の羅列の終了後## [[node loss/conv2d_transpose_4_loss/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits (defined at <ipython-input-31-a6845ad9de9a>:8) ]] [Op:__inference_distributed_function_15878] Function call stack: distributed_function
該当セルのソースコード
Python
1#get_index_from_data_list関数の詳細と、画像差し替え時の変更箇所 2 3def get_index_from_data_list(data_list): 4 x_rgb = [] 5 for f in data_list: 6 rgbp=np.zeros((img_size,img_size,1)) 7 image=Image.open(f) 8 imager=image.resize((img_size,img_size)) 9 rgb=np.asarray(imager) 10 11 for tate in range (0,img_size,1): 12 for yoko in range (0,img_size,1): 13 14 rgbp[tate,yoko,0]=0 15 if rgb[tate,yoko,0]>0: 16 rgbp[tate,yoko,0]=rgb[tate,yoko,0] 17 18#画像変更差し替え時には15~16行目を 19#if rgb[tate,yoko]>0: 20# rgbp[tate,yoko,0]=rgb[tate,yoko] 21#に変更しました 22 23# if rgb[tate,yoko,0]<60 and rgb[tate,yoko,1]<60: 24# rgbp[tate,yoko,0]=0 25# if rgb[tate,yoko,0]>60 and rgb[tate,yoko,1]<60: 26# rgbp[tate,yoko,0]=1 27# if rgb[tate,yoko,0]<60 and rgb[tate,yoko,1]>60: 28# rgbp[tate,yoko,0]=1 29# if rgb[tate,yoko,0]>60 and rgb[tate,yoko,1]>60: 30# rgbp[tate,yoko,0]=1 31 32#rgbp=rgbp[:,:,np.newaxis] 33 x_rgb.append(rgbp) 34 return np.stack(x_rgb) 35
Python
1#trainmask_imgarrayを作成 2 3trainmask_path = '/home/user/sample_dataset/train/mask' 4trainmask_lists = sorted(glob.glob(os.path.join(trainmask_path, '*.png'))) 5print(len(trainmask_lists)) 6#out:230 7trainmask_imgarray = get_index_from_data_list(trainmask_lists) 8print(trainmask_imgarray.shape) 9#out:(230, 256, 256, 1)
Python
1#エラー発生箇所 2 3EPOCHS = 10 4#VAL_SUBSPLITS = 1 5#VALIDATION_STEPS = info.splits['test'].num_examples//BATCH_SIZE//VAL_SUBSPLITS 6#VALIDATION_STEPS = train_n_sample//BATCH_SIZE//VAL_SUBSPLITS 7#VALIDATION_STEPS = 10 8model_history = model.fit(x=train_imgarray,y=trainmask_imgarray, batch_size=BATCH_SIZE,epochs=EPOCHS, 9 steps_per_epoch=STEPS_PER_EPOCH, 10 validation_split=VALIDATAION_SPILIT) 11 12#model_history = model.fit(train_batches, epochs=EPOCHS, 13# steps_per_epoch=STEPS_PER_EPOCH, 14# validation_steps= 15# validation_data=test_batches, 16# callbacks=[DisplayCallback()])
試したこと
デフォルトで64に設定してあるバッチサイズを、1, 10, 36, 128等に変更しましたが、エラー内容に変化はありませんでした。
また、
- png形式で読み込んでいた学習画像をjpeg形式に変更したデータセットを使用
- jsonファイルからマスク画像のpngファイルを作成し、データセットとして使用
以上の2つを試したところ、いずれの場合でもエラー文の
「InvalidArgumentError: Received a label value of 128 which is outside the valid range of [0, 3). 」
が
「InvalidArgumentError: Received a label value of 3 which is outside the valid range of [0, 2). 」
に変化しました。
学習が停止するのがバッチサイズ分の学習を終えた後なので、バッチが関係しているのかとも考えていますが、具体的な問題点が分からず行き詰まっています。
補足情報
- Jupyter Notebook
- Python 2.7.5
- TensorFlow 2.6.2
- keras 2.6.0
回答1件
あなたの回答
tips
プレビュー