yolo3の入力画像を416×４１６以外でも行いたい

yolo3で画像検出を行っています。

・毎回写真を416×416に変更するのが大変であること
・VOTTで動画をアノテーションすると416×416のサイズで画像が出力されないこと

を考えると、416×416サイズの写真と、ほかのサイズの写真も入力できれば良いと考えています。
その時はどのようにコードをいじればよいのでしょうか。
また、416サイズでない写真で学習を行った場合、何がダメなのでしょうか。学習はしてくれるのでしょうか。

いかにtrain.pyのコードを添付しています。

python
1"""
2Retrain the YOLO model for your own dataset.
3"""
4
5import numpy as np
6import keras.backend as K
7import tensorflow as tf
8import tensorflow as tf
9from keras.layers import Input, Lambda
10from keras.models import Model
11from keras.optimizers import Adam
12from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
13
14from yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss
15from yolo3.utils import get_random_data
16
17from keras import backend as K
18
19config = tf.ConfigProto(
20    gpu_options=tf.GPUOptions(
21        visible_device_list="0", # specify GPU number
22        allow_growth=True
23    )
24)
25sess = tf.Session(config=config)
26
27config = tf.ConfigProto()
28config.gpu_options.allow_growth = True
29sess = tf.Session(config=config)
30K.set_session(sess)
31
32def _main():
33    annotation_path = '2007_train.txt'
34    log_dir = 'logs/000/'
35    classes_path = 'model_data/my_classes.txt'
36    anchors_path = 'model_data/yolo_anchors.txt'
37    class_names = get_classes(classes_path)
38    num_classes = len(class_names)
39    anchors = get_anchors(anchors_path)
40
41    input_shape = (416,416) # multiple of 32, hw
42
43    is_tiny_version = len(anchors)==6 # default setting
44    if is_tiny_version:
45        model = create_tiny_model(input_shape, anchors, num_classes,
46            freeze_body=2, weights_path='model_data/tiny_yolo_weights.h5')
47    else:
48        model = create_model(input_shape, anchors, num_classes,
49            freeze_body=2, weights_path='model_data/yolo_weights.h5') # make sure you know what you freeze
50
51    logging = TensorBoard(log_dir=log_dir)
52    checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
53        monitor='val_loss', save_weights_only=True, save_best_only=True, period=3)
54    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1)
55    early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)
56
57    val_split = 0.1
58    with open(annotation_path) as f:
59        lines = f.readlines()
60    np.random.seed(10101)
61    np.random.shuffle(lines)
62    np.random.seed(None)
63    num_val = int(len(lines)*val_split)
64    num_train = len(lines) - num_val
65
66    # Train with frozen layers first, to get a stable loss.
67    # Adjust num epochs to your dataset. This step is enough to obtain a not bad model.
68    if True:
69        model.compile(optimizer=Adam(lr=1e-3), loss={
70            # use custom yolo_loss Lambda layer.
71            'yolo_loss': lambda y_true, y_pred: y_pred})
72
73        batch_size = 1
74        print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
75        model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),
76                steps_per_epoch=max(1, num_train//batch_size),
77                validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes),
78                validation_steps=max(1, num_val//batch_size),
79                epochs=50,
80                initial_epoch=100,
81                callbacks=[logging, checkpoint])
82        model.save_weights(log_dir + 'trained_weights_stage_1.h5')
83
84    # Unfreeze and continue training, to fine-tune.
85    # Train longer if the result is not good.
86    if True:
87        for i in range(len(model.layers)):
88            model.layers[i].trainable = True
89        model.compile(optimizer=Adam(lr=1e-4), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change
90        print('Unfreeze all of the layers.')
91
92        batch_size = 1 # note that more GPU memory is required after unfreezing the body
93        print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
94        model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),
95            steps_per_epoch=max(1, num_train//batch_size),
96            validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes),
97            validation_steps=max(1, num_val//batch_size),
98            epochs=100,
99            initial_epoch=50,
100            callbacks=[logging, checkpoint, reduce_lr, early_stopping])
101        model.save_weights(log_dir + 'trained_weights_final.h5')
102
103    # Further training if needed.
104
105
106def get_classes(classes_path):
107    '''loads the classes'''
108    with open(classes_path) as f:
109        class_names = f.readlines()
110    class_names = [c.strip() for c in class_names]
111    return class_names
112
113def get_anchors(anchors_path):
114    '''loads the anchors from a file'''
115    with open(anchors_path) as f:
116        anchors = f.readline()
117    anchors = [float(x) for x in anchors.split(',')]
118    return np.array(anchors).reshape(-1, 2)
119
120
121def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,
122            weights_path='model_data/yolo_weights.h5'):
123    '''create the training model'''
124    K.clear_session() # get a new session
125    image_input = Input(shape=(None, None, 3))
126    h, w = input_shape
127    num_anchors = len(anchors)
128
129    y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \
130        num_anchors//3, num_classes+5)) for l in range(3)]
131
132    model_body = yolo_body(image_input, num_anchors//3, num_classes)
133    print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
134
135    if load_pretrained:
136        model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
137        print('Load weights {}.'.format(weights_path))
138        if freeze_body in [1, 2]:
139            # Freeze darknet53 body or freeze all but 3 output layers.
140            num = (185, len(model_body.layers)-3)[freeze_body-1]
141            for i in range(num): model_body.layers[i].trainable = False
142            print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))
143
144    model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
145        arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})(
146        [*model_body.output, *y_true])
147    model = Model([model_body.input, *y_true], model_loss)
148
149    return model
150
151def create_tiny_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,
152            weights_path='model_data/tiny_yolo_weights.h5'):
153    '''create the training model, for Tiny YOLOv3'''
154    K.clear_session() # get a new session
155    image_input = Input(shape=(None, None, 3))
156    h, w = input_shape
157    num_anchors = len(anchors)
158
159    y_true = [Input(shape=(h//{0:32, 1:16}[l], w//{0:32, 1:16}[l], \
160        num_anchors//2, num_classes+5)) for l in range(2)]
161
162    model_body = tiny_yolo_body(image_input, num_anchors//2, num_classes)
163    print('Create Tiny YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
164
165    if load_pretrained:
166        model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
167        print('Load weights {}.'.format(weights_path))
168        if freeze_body in [1, 2]:
169            # Freeze the darknet body or freeze all but 2 output layers.
170            num = (20, len(model_body.layers)-2)[freeze_body-1]
171            for i in range(num): model_body.layers[i].trainable = False
172            print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))
173
174    model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
175        arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.7})(
176        [*model_body.output, *y_true])
177    model = Model([model_body.input, *y_true], model_loss)
178
179    return model
180
181def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes):
182    '''data generator for fit_generator'''
183    n = len(annotation_lines)
184    i = 0
185    while True:
186        image_data = []
187        box_data = []
188        for b in range(batch_size):
189            if i==0:
190                np.random.shuffle(annotation_lines)
191            image, box = get_random_data(annotation_lines[i], input_shape, random=True)
192            image_data.append(image)
193            box_data.append(box)
194            i = (i+1) % n
195        image_data = np.array(image_data)
196        box_data = np.array(box_data)
197        y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
198        yield [image_data, *y_true], np.zeros(batch_size)
199
200def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes):
201    n = len(annotation_lines)
202    if n==0 or batch_size<=0: return None
203    return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes)
204
205if __name__ == '__main__':
206    _main()
207