GitHub-chainerの/examples/cifar/train_cifar.pyにhyperoptを組み込んで畳み込み層のフィルタ数を最適化しようとしています.
コードは以下のようになります.
python
1# coding:utf-8 2from __future__ import print_function 3import argparse 4import random 5from hyperopt import hp, tpe, Trials, fmin 6 7import chainer 8import chainer.links as L 9from chainer import training 10from chainer.training import extensions 11from chainer import Variable 12from chainer import serializers 13 14from chainer.datasets import get_cifar10 15from chainer.datasets import get_cifar100 16import json 17import models.VGG 18import optuna 19import numpy as np 20 21 22###目的関数 23def objective(params): 24 parser = argparse.ArgumentParser(description='Chainer CIFAR example:') 25 parser.add_argument('--dataset', '-d', default='cifar10', 26 help='The dataset to use: cifar10 or cifar100') 27 parser.add_argument('--batchsize', '-b', type=int, default=32, 28 help='Number of images in each mini-batch') 29 parser.add_argument('--learnrate', '-l', type=float, default=0.05, 30 help='Learning rate for SGD') 31 parser.add_argument('--epoch', '-e', type=int, default=300, 32 help='Number of sweeps over the dataset to train') 33 parser.add_argument('--gpu', '-g', type=int, default=0, 34 help='GPU ID (negative value indicates CPU)') 35 parser.add_argument('--out', '-o', default='result', 36 help='Directory to output the result') 37 parser.add_argument('--resume', '-r', default='', 38 help='Resume the training from snapshot') 39 parser.add_argument('--line', '-line', default='', 40 help='line') 41 args = parser.parse_args() 42 43 print('GPU: {}'.format(args.gpu)) 44 print('# Minibatch-size: {}'.format(args.batchsize)) 45 print('# epoch: {}'.format(args.epoch)) 46 print('') 47 48 # Set up a neural network to train. 49 # Classifier reports softmax cross entropy loss and accuracy at every 50 # iteration, which will be used by the PrintReport extension below. 51 if args.dataset == 'cifar10': 52 print('Using CIFAR10 dataset.') 53 class_labels = 10 54 train, test = get_cifar10() 55 elif args.dataset == 'cifar100': 56 print('Using CIFAR100 dataset.') 57 class_labels = 100 58 train, test = get_cifar100() 59 else: 60 raise RuntimeError('Invalid dataset choice.') 61 62 ###最適化するフィルタ数 63 gane = [int(params['conv'+str(i+1)]) for i in range(13)] 64 print(gane) 65 model = L.Classifier(models.VGG.VGG(gane, class_labels)) 66 67 if args.gpu >= 0: 68 # Make a specified GPU current 69 chainer.cuda.get_device_from_id(args.gpu).use() 70 model.to_gpu() # Copy the model to the GPU 71 72 optimizer = chainer.optimizers.MomentumSGD(args.learnrate) 73 optimizer.setup(model) 74 optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) 75 76 train_iter = chainer.iterators.SerialIterator(train, args.batchsize) 77 test_iter = chainer.iterators.SerialIterator(test, args.batchsize, 78 repeat=False, shuffle=False) 79 # Set up a trainer 80 updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) 81 trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) 82 83 # Evaluate the model with the test dataset for each epoch 84 trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) 85 86 # Reduce the learning rate by half every 25 epochs. 87 trainer.extend(extensions.ExponentialShift('lr', 0.5), 88 trigger=(25, 'epoch')) 89 90 # Dump a computational graph from 'loss' variable at the first iteration 91 # The "main" refers to the target link of the "main" optimizer. 92 trainer.extend(extensions.dump_graph('main/loss')) 93 94 # Take a snapshot at each epoch 95 trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) 96 97 # Write a log of evaluation statistics for each epoch 98 trainer.extend(extensions.LogReport()) 99 100 # Print selected entries of the log to stdout 101 # Here "main" refers to the target link of the "main" optimizer again, and 102 # "validation" refers to the default name of the Evaluator extension. 103 # Entries other than 'epoch' are reported by the Classifier link, called by 104 # either the updater or the evaluator. 105 106 # Print a progress bar to stdout 107 trainer.extend(extensions.ProgressBar()) 108 109 if args.resume: 110 # Resume from a snapshot 111 chainer.serializers.load_npz(args.resume, trainer) 112 113 # Run the training 114 trainer.run() 115 116 # 最終epochの正答率を格納 117 log_file = open("result/log", "r") 118 lines = log_file.readlines() 119 log_file.close() 120 train_acc = '"validation/main/accuracy": ' 121 count = 0 122 for line in lines: 123 if train_acc in line: 124 count = count + 1 125 first = line.find('0.') 126 end = line.find(' ') 127 slice = line[first:] 128 if count == args.epoch: 129 # print(slice) 130 # print(type(slice)) 131 fitness = float(slice) 132 133 err = 1.0 - fitness # 誤識別率を返す 134 135 return err 136 137if __name__ == '__main__': 138 139 space = {'conv1': hp.choice('conv1', [32, 64, 128, 256, 512]), 140 'conv2': hp.choice('conv2', [32, 64, 128, 256, 512]), 141 'conv3': hp.choice('conv3', [32, 64, 128, 256, 512]), 142 'conv4': hp.choice('conv4', [32, 64, 128, 256, 512]), 143 'conv5': hp.choice('conv5', [32, 64, 128, 256, 512]), 144 'conv6': hp.choice('conv6', [32, 64, 128, 256, 512]), 145 'conv7': hp.choice('conv7', [32, 64, 128, 256, 512]), 146 'conv8': hp.choice('conv8', [32, 64, 128, 256, 512]), 147 'conv9': hp.choice('conv9', [32, 64, 128, 256, 512]), 148 'conv10': hp.choice('conv10', [32, 64, 128, 256, 512]), 149 'conv11': hp.choice('conv11', [32, 64, 128, 256, 512]), 150 'conv12': hp.choice('conv12', [32, 64, 128, 256, 512]), 151 'conv13': hp.choice('conv13', [32, 64, 128, 256, 512])} 152 153 best = fmin(objective, space, algo=tpe.suggest, max_evals=500) 154 print("best parameters", best)
学習エポックを10にして回していると, しばらくしてcupy.cuda.memory.OutOfMemoryError
と表示されます. どこに問題があるのでしょうか.
python 2.7.12
chainer 5.2.0

下記のような回答は推奨されていません。
このような回答には修正を依頼しましょう。
また依頼した内容が修正された場合は、修正依頼を取り消すようにしましょう。
2019/05/17 03:58
2019/05/17 04:02
2019/05/17 04:12
2019/05/17 04:17
2019/05/17 04:36