質問編集履歴
6
修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -26,7 +26,7 @@
|
|
26
26
|
|
27
27
|
```
|
28
28
|
|
29
|
-
(tensorflow) C:\Users\
|
29
|
+
(tensorflow) C:\Users\User>python create_datasets.py
|
30
30
|
|
31
31
|
Traceback (most recent call last):
|
32
32
|
|
5
コードブロック
test
CHANGED
File without changes
|
test
CHANGED
@@ -40,6 +40,8 @@
|
|
40
40
|
|
41
41
|
### ソースコード
|
42
42
|
|
43
|
+
```ここに言語を入力
|
44
|
+
|
43
45
|
import os
|
44
46
|
|
45
47
|
import json
|
@@ -388,6 +390,8 @@
|
|
388
390
|
|
389
391
|
tf.app.run()
|
390
392
|
|
393
|
+
```
|
394
|
+
|
391
395
|
|
392
396
|
|
393
397
|
### 試したこと
|
4
見やすく
test
CHANGED
File without changes
|
test
CHANGED
@@ -40,268 +40,356 @@
|
|
40
40
|
|
41
41
|
### ソースコード
|
42
42
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
tf.flags.DEFINE_string("
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
EN
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
43
|
+
import os
|
44
|
+
|
45
|
+
import json
|
46
|
+
|
47
|
+
import numpy as np
|
48
|
+
|
49
|
+
from collections import namedtuple, Counter
|
50
|
+
|
51
|
+
import tensorflow as tf
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
tf.flags.DEFINE_string("train_img_dir", "data/img/train2014/", "Training image directory.")
|
56
|
+
|
57
|
+
tf.flags.DEFINE_string("val_img_dir", "data/img/val2014/", "Validation image directory.")
|
58
|
+
|
59
|
+
tf.flags.DEFINE_string("train_captions", "data/stair_captions_v1.1_train.json", "Training caption file.")
|
60
|
+
|
61
|
+
tf.flags.DEFINE_string("val_captions", "data/stair_captions_v1.1_val.json", "Validation caption file.")
|
62
|
+
|
63
|
+
tf.flags.DEFINE_string("out_dir", "data/tfrecords/", "Output TFRecords directiory.")
|
64
|
+
|
65
|
+
tf.flags.DEFINE_integer("min_word_count", 4, "The minimum number of occurrences of each word in th training set for includion in the vocab.")
|
66
|
+
|
67
|
+
tf.flags.DEFINE_string("word_list_file", "data/dictionary.txt", "Output word list file.")
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
FLAGS = tf.flags.FLAGS
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
START_WORD = '<S>'
|
76
|
+
|
77
|
+
END_WORD = '<E>'
|
78
|
+
|
79
|
+
UNKNOWN_WORD = '<UNW>'
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
NUM_TRAIN_FILE = 256
|
84
|
+
|
85
|
+
NUM_VAL_FILE = 4
|
86
|
+
|
87
|
+
NUM_TEST_FILE = 8
|
88
|
+
|
89
|
+
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
ImageMetadata = namedtuple("ImageMetadata",["img_id", "filename"])
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
#画像メタデータと辞書をもとに、指定されたファイル数に分割してバイナリ(TFRecord)を作成する
|
98
|
+
|
99
|
+
def _create_datasets(name, img_meta, captions, word_to_id, num_file):
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
#画像メタデータをだいたい等しく分割
|
104
|
+
|
105
|
+
img_chunk = np.array_split(img_meta, num_file)
|
106
|
+
|
107
|
+
counter = 0
|
108
|
+
|
109
|
+
for i in range(1, num_file + 1):
|
110
|
+
|
111
|
+
output_file_name = "%s-%.3d.tfrecord" % (name, i)
|
112
|
+
|
113
|
+
output_file_path = os.path.join(FLAGS.out_dir, output_file_name)
|
114
|
+
|
115
|
+
target_chunk = img_chunk[counter]
|
116
|
+
|
117
|
+
#対象画像群書ごとにWriterを定義
|
118
|
+
|
119
|
+
with tf.python_io.TFRecordWriter(output_file_path) as writer:
|
120
|
+
|
121
|
+
for img in target_chunk:
|
122
|
+
|
123
|
+
img_id = img[0]
|
124
|
+
|
125
|
+
filename = img[1]
|
126
|
+
|
127
|
+
#画像ファイルをバイト列として読み込み
|
128
|
+
|
129
|
+
with tf.gfile.FastGFile(filename, "rb") as f:
|
130
|
+
|
131
|
+
data = f.read()
|
132
|
+
|
133
|
+
|
134
|
+
|
135
|
+
#キャプションのid化
|
136
|
+
|
137
|
+
caption = captions[int(img_id)]
|
138
|
+
|
139
|
+
caption_ids = []
|
140
|
+
|
141
|
+
for w in caption:
|
142
|
+
|
143
|
+
if w in word_to_id:
|
144
|
+
|
145
|
+
caption_ids.append(word_to_id[w])
|
146
|
+
|
147
|
+
else:
|
148
|
+
|
149
|
+
caption_ids.append(word_to_id[UNKNOWN_WORD])
|
150
|
+
|
151
|
+
|
152
|
+
|
153
|
+
#固定長部分
|
154
|
+
|
155
|
+
context = tf.train.Features(feature={
|
156
|
+
|
157
|
+
"img_id": tf.train.Feature(int64_list=tf.train.Int64List(value=[int(img_id)])),
|
158
|
+
|
159
|
+
"data": tf.train.Feature(bytes_list=tf.train.BytesList(value=[data])),
|
160
|
+
|
161
|
+
})
|
162
|
+
|
163
|
+
|
164
|
+
|
165
|
+
#可変長部分
|
166
|
+
|
167
|
+
caption_feature = [tf.train.Feature(int64_list=tf.train.Int64List(value=[v])) for v in caption_ids]
|
168
|
+
|
169
|
+
feature_lists = tf.train.FeatureLists(feature_list={
|
170
|
+
|
171
|
+
"caption":tf.train.FeatureList(feature=caption_feature)
|
172
|
+
|
173
|
+
})
|
174
|
+
|
175
|
+
|
176
|
+
|
177
|
+
#TFRecordに書き込み
|
178
|
+
|
179
|
+
sequence_example = tf.train.SequenceExample(context=context, feature_lists=feature_lists)
|
180
|
+
|
181
|
+
writer.write(sequence_example.SerializeToString())
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
counter += 1
|
186
|
+
|
187
|
+
|
188
|
+
|
189
|
+
#jsonファイルを読み込み画像のid, ファイル名, キャプションを取得する。
|
190
|
+
|
191
|
+
def _load_metadata(caption_filename, img_dir):
|
192
|
+
|
193
|
+
|
194
|
+
|
195
|
+
#jsonファイルをロード
|
196
|
+
|
197
|
+
with open(caption_filename, 'r') as f:
|
198
|
+
|
199
|
+
meta_data = json.load(f)
|
200
|
+
|
201
|
+
|
202
|
+
|
203
|
+
#画像idとファイル名を持つnamedtupleのリストを作成
|
204
|
+
|
205
|
+
meta_list = [ImageMetadata(x['id'], os.path.join(img_dir, x['file_name'])) for x in meta_data['images']]
|
206
|
+
|
207
|
+
|
208
|
+
|
209
|
+
#スペース区切りのcaptionを単語の配列に変換
|
210
|
+
|
211
|
+
def _create_word_list(caption):
|
212
|
+
|
213
|
+
tokenized_captions = [START_WORD]
|
214
|
+
|
215
|
+
tokenized_captions.extend(caption.split())
|
216
|
+
|
217
|
+
tokenized_captions.append(END_WORD)
|
218
|
+
|
219
|
+
return tokenized_captions
|
220
|
+
|
221
|
+
|
222
|
+
|
223
|
+
#{画像id => キャプションのリスト}の辞書を作成
|
224
|
+
|
225
|
+
id_to_captions = {}
|
226
|
+
|
227
|
+
for annotation in meta_data["annotations"]:
|
228
|
+
|
229
|
+
img_id = annotation['image_id']
|
230
|
+
|
231
|
+
caption = annotation['tokenized_caption']
|
232
|
+
|
233
|
+
caption = _create_word_list(caption)
|
234
|
+
|
235
|
+
#キャプションはいくつかあるため1つだけを採用
|
236
|
+
|
237
|
+
id_to_captions[img_id] = caption
|
238
|
+
|
239
|
+
|
240
|
+
|
241
|
+
print("Loaded caption metadata for %d images from %s" % (len(meta_list), caption_filename))
|
242
|
+
|
243
|
+
|
244
|
+
|
245
|
+
return meta_list, id_to_captions
|
246
|
+
|
247
|
+
|
248
|
+
|
249
|
+
|
250
|
+
|
251
|
+
def _create_vocab(captions):
|
252
|
+
|
253
|
+
|
254
|
+
|
255
|
+
counter = Counter()
|
256
|
+
|
257
|
+
for c in captions:
|
258
|
+
|
259
|
+
counter.update(c)
|
260
|
+
|
261
|
+
|
262
|
+
|
263
|
+
print("total words:", len(counter))
|
264
|
+
|
265
|
+
#出現回数が一定数のものだけ辞書に採用。出現回数降順でソート
|
266
|
+
|
267
|
+
#word_countsは(単語, 出現回数)のリスト
|
268
|
+
|
269
|
+
word_counts = [x for x in counter.items() if x[1] >= FLAGS.min_word_count]
|
270
|
+
|
271
|
+
word_counts.sort(key=lambda x: x[1], reverse=True)
|
272
|
+
|
273
|
+
print("Words in vocab:", len(word_counts))
|
274
|
+
|
275
|
+
|
276
|
+
|
277
|
+
|
278
|
+
|
279
|
+
#辞書作成
|
280
|
+
|
281
|
+
word_list = [x[0] for x in word_counts]
|
282
|
+
|
283
|
+
#<S>と<E>のidを1,0で固定したいので、一度削除して先頭に追加する
|
284
|
+
|
285
|
+
word_list.remove(START_WORD)
|
286
|
+
|
287
|
+
word_list.remove(END_WORD)
|
288
|
+
|
289
|
+
word_list.insert(0, START_WORD)
|
290
|
+
|
291
|
+
word_list.insert(0, END_WORD)
|
292
|
+
|
293
|
+
|
294
|
+
|
295
|
+
word_list.append(UNKNOWN_WORD)
|
296
|
+
|
297
|
+
word_to_id = dict([(x, y) for (y, x) in enumerate(word_list)])
|
298
|
+
|
299
|
+
id_to_word = dict([(x, y) for (x, y) in enumerate(word_list)])
|
300
|
+
|
301
|
+
return word_to_id, id_to_word
|
302
|
+
|
303
|
+
|
304
|
+
|
305
|
+
|
306
|
+
|
307
|
+
def main(argv):
|
308
|
+
|
309
|
+
|
310
|
+
|
311
|
+
#jsonファイルからメタデータの読み込み
|
312
|
+
|
313
|
+
#(画像id, ファイルパス)のタプルの配列と{id=>キャプションのリスト}を取得
|
314
|
+
|
315
|
+
train_meta, train_captions = _load_metadata(FLAGS.train_captions, FLAGS.train_img_dir)
|
316
|
+
|
317
|
+
val_meta, val_captions = _load_metadata(FLAGS.val_captions, FLAGS.val_img_dir)
|
318
|
+
|
319
|
+
|
320
|
+
|
321
|
+
#キャプションをマージ
|
322
|
+
|
323
|
+
captions = {k:v for dic in [train_captions, val_captions] for k, v in dic.items()}
|
324
|
+
|
325
|
+
|
326
|
+
|
327
|
+
#訓練データ,バリデーションデータ,テストデータに分割
|
328
|
+
|
329
|
+
train_cutoff = int(0.85 * len(val_meta))
|
330
|
+
|
331
|
+
val_cutoff = int(0.90 * len(val_meta))
|
332
|
+
|
333
|
+
|
334
|
+
|
335
|
+
train_dataset = train_meta + val_meta[0:train_cutoff]
|
336
|
+
|
337
|
+
val_dataset = val_meta[train_cutoff:val_cutoff]
|
338
|
+
|
339
|
+
test_dataset = val_meta[val_cutoff:]
|
340
|
+
|
341
|
+
|
342
|
+
|
343
|
+
|
344
|
+
|
345
|
+
#訓練データから辞書作成
|
346
|
+
|
347
|
+
train_captions = []
|
348
|
+
|
349
|
+
for meta in train_dataset:
|
350
|
+
|
351
|
+
c = captions[meta.img_id]
|
352
|
+
|
353
|
+
train_captions.append(c)
|
354
|
+
|
355
|
+
|
356
|
+
|
357
|
+
word_to_id, id_to_word = _create_vocab(train_captions)
|
358
|
+
|
359
|
+
|
360
|
+
|
361
|
+
|
362
|
+
|
363
|
+
#画像を読み込みメタデータと結合したバイナリを作成
|
364
|
+
|
365
|
+
_create_datasets("train", train_dataset, captions, word_to_id, NUM_TRAIN_FILE)
|
366
|
+
|
367
|
+
_create_datasets("val", val_dataset, captions, word_to_id, NUM_VAL_FILE)
|
368
|
+
|
369
|
+
_create_datasets("test", test_dataset, captions, word_to_id, NUM_TEST_FILE)
|
370
|
+
|
371
|
+
|
372
|
+
|
373
|
+
# 単語リスト出力
|
374
|
+
|
375
|
+
with open(FLAGS.word_list_file, 'a') as f:
|
376
|
+
|
377
|
+
for k, v in id_to_word.items():
|
378
|
+
|
379
|
+
f.write(v)
|
380
|
+
|
381
|
+
f.write('\n')
|
382
|
+
|
383
|
+
|
384
|
+
|
385
|
+
|
386
|
+
|
387
|
+
if __name__ == "__main__":
|
388
|
+
|
389
|
+
tf.app.run()
|
90
390
|
|
91
391
|
|
92
392
|
|
93
|
-
meta_list=[ImageMetadata(x['id'],os.path.join(img_dir,x['file_name'])) for x in meta_data['images']]
|
94
|
-
|
95
|
-
def _create_word_list(caption):
|
96
|
-
|
97
|
-
tokenized_caption=[START_WORD]
|
98
|
-
|
99
|
-
tokenized_caption.extend(caption.split())
|
100
|
-
|
101
|
-
tokenized_captions.append(END_WORD)
|
102
|
-
|
103
|
-
return tokenized_captions
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
id_to_captions={}
|
110
|
-
|
111
|
-
for annotation in meta_data["annotations"]:
|
112
|
-
|
113
|
-
img_id=annotation['image_id']
|
114
|
-
|
115
|
-
caption = annotation['tokenized_caption']
|
116
|
-
|
117
|
-
caption =_create_word_list(caption)
|
118
|
-
|
119
|
-
id_to_captions[img_id]=caption
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
print("Loaded caption metadata for %d images from %s" %(len(meta_list),caption_filename))
|
124
|
-
|
125
|
-
return meta_list,id_to_captions
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
def main (argv):
|
130
|
-
|
131
|
-
train_meta,train_captions=_load_metadata(FLAGS.train_captions,FLAGS.train_img_dir)
|
132
|
-
|
133
|
-
val_meta,val_captions=_load_metadata(FLAGS.val_captions,FLAGS.val_img_dir)
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
captions={k:v for dic in [train_captions,val_captions]for k,v in dic.items()}
|
138
|
-
|
139
|
-
train_cutoff=int(0.85*len(val_meta))
|
140
|
-
|
141
|
-
val_cutoff=int(0.90*len(val_meta))
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
train_dataset=train_meta+val_meta[0:train_off]
|
146
|
-
|
147
|
-
val_dataset=val_meta[train_cutoff:val_cutoff]
|
148
|
-
|
149
|
-
test_dataset=val_meta[val_cutoff:]
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
train_captions=[]
|
154
|
-
|
155
|
-
for meta in train_dataset:
|
156
|
-
|
157
|
-
c=captions[meta.img_id]
|
158
|
-
|
159
|
-
train_captions.append(c)
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
word_to_id,id_to_word=_create_vocab(train_captions)
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
_create_datasets("train",train_dataset,captions,word_to_id,NUM_TRAIN_FILE)
|
168
|
-
|
169
|
-
_create_datasets("val",val_dataset,captions,word_to_id,NUM_VAL_FILE)
|
170
|
-
|
171
|
-
_create_datasets("test",test_dataset,captions,word_to_id,NUM_TEST_FILE)
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
with open(FLAGS.word_list_file,'a')as f:
|
176
|
-
|
177
|
-
for k,v in id_to_word.items():
|
178
|
-
|
179
|
-
f.write(v)
|
180
|
-
|
181
|
-
f.write('\n')
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
def _create_vocab(captions):
|
186
|
-
|
187
|
-
counter=Counter()
|
188
|
-
|
189
|
-
for c in captions:
|
190
|
-
|
191
|
-
counter.update(c)
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
print("total words:",len(counter))
|
196
|
-
|
197
|
-
word_counts=[x for x in counter.items()
|
198
|
-
|
199
|
-
if x[1]>=FLAGS.min_word_count]
|
200
|
-
|
201
|
-
word_counts.sort(key=lambda x:x[1],reverse=True)
|
202
|
-
|
203
|
-
print("Word in Vocab:",len(word_counts))
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
word_list=[x[0]for x in word_counts]
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
word_list.remove(START_WORD)
|
212
|
-
|
213
|
-
word_list.remove(END_WORD)
|
214
|
-
|
215
|
-
word_list.insert(0,START_WORD)
|
216
|
-
|
217
|
-
word_list.insert(0,END_WORD)
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
word_list.append(UNKNOWN_WORD)
|
222
|
-
|
223
|
-
word_to_id=dict([(x,y)for (y,x)in enumerate(word_list)])
|
224
|
-
|
225
|
-
id_to_word=dict([(x,y)for (x,y)in enumerate(word_list)])
|
226
|
-
|
227
|
-
return word_to_id, id_to_word
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
def _create_datasets(name,img_meta,captions,word_to_id,num_file):
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
img_chunk=np.array_split(img_meta,num_file)
|
240
|
-
|
241
|
-
counter=0
|
242
|
-
|
243
|
-
for i in range(1,num_file+1):
|
244
|
-
|
245
|
-
output_file_name="%s-%.3d.tfrecord" %(name,i)
|
246
|
-
|
247
|
-
output_file_path=os.path.join(FLAGS.out_dir,output_file_name)
|
248
|
-
|
249
|
-
target_chunk=img_chunk[counter]
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
with tf.python_io.TFRecordWriter(output_file_path)as writer:
|
254
|
-
|
255
|
-
for img in target_chunk:
|
256
|
-
|
257
|
-
img_id=img[0]
|
258
|
-
|
259
|
-
filename=img[1]
|
260
|
-
|
261
|
-
with tf.gfile.FastGFile(filename,"rb")as f:
|
262
|
-
|
263
|
-
data=f.read()
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
caption=captions[int(img_id)]
|
268
|
-
|
269
|
-
caption_ids=[]
|
270
|
-
|
271
|
-
for w in caption:
|
272
|
-
|
273
|
-
if w in word_to_id:
|
274
|
-
|
275
|
-
caption_ids.append(word_to_id[w])
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
else :
|
280
|
-
|
281
|
-
caption_ids.append(word_to_id[UNKNOWN_WORD])
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
context=tf.train.Features(feature={"img_id":tf.train.Feature(int64_list=tf.train.Int64List(value=[int(img_id)])),
|
286
|
-
|
287
|
-
"data":tf.train.Feature(bytes_list=tf.train.BytesList(value=[data])),})
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
caption_feature=[tf.train.Feature(int64_list=tf.train.Int64List(value=[v]))for v in caption_ids]
|
292
|
-
|
293
|
-
feature_lists=tf.train.FeatureLists(feature_list={"caption":tf.train.FeatureList(feature=caption_feature)})
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
sequence_example=tf.train.SequenceExample(context=context,feature_lists=feature_lists)
|
298
|
-
|
299
|
-
writer.write(sequence_example.SerializeToString())
|
300
|
-
|
301
|
-
counter +=1
|
302
|
-
|
303
|
-
|
304
|
-
|
305
393
|
### 試したこと
|
306
394
|
|
307
395
|
pyファイル内で null=Noneや null=''を試しましたが意味なかったです
|
3
ソースコード
test
CHANGED
File without changes
|
test
CHANGED
@@ -38,9 +38,269 @@
|
|
38
38
|
|
39
39
|
```
|
40
40
|
|
41
|
-
|
41
|
+
### ソースコード
|
42
|
+
|
42
|
-
|
43
|
+
tf.flags.DEFINE_string("train_img_dir","data/img/train2014/","Training image directory")
|
44
|
+
|
43
|
-
|
45
|
+
tf.flags.DEFINE_string("val_img_dir","data/img/val2014/","Validation image directory")
|
46
|
+
|
47
|
+
tf.flags.DEFINE_string("train_captions","data/stair_captions_v1.1_train.json","Training caption file")
|
48
|
+
|
49
|
+
tf.flags.DEFINE_string("val_captions","data/stair_captions_v1.1_val.json","Valdation caption file")
|
50
|
+
|
51
|
+
tf.flags.DEFINE_string("out_dir","data/tfrecords/","Output TFRecord directory")
|
52
|
+
|
53
|
+
tf.flags.DEFINE_integer("min_word_count",4,"The minimum number of occurrences of each word in the training set for includion in the vocab ")
|
54
|
+
|
55
|
+
tf.flags.DEFINE_string("word_list_file","data/dirctionary.txt","Output word list file")
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
FLAGS=tf.flags.FLAGS
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
START_WORD='<s>'
|
64
|
+
|
65
|
+
END_WORD='<E>'
|
66
|
+
|
67
|
+
UNKNOWN_WORD='<UNW>'
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
NUM_TRAIN_FILE=256
|
72
|
+
|
73
|
+
NUM_VAL_FILE=4
|
74
|
+
|
75
|
+
NUM_TEST_FILE=8
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
ImageMetadata=namedtuple("ImageMetadata",["img_id","filename"])
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
def _load_metadata(caption_filename,img_dir):
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
with open(caption_filename,'r')as f:
|
88
|
+
|
89
|
+
meta_data=json.load(f)
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
meta_list=[ImageMetadata(x['id'],os.path.join(img_dir,x['file_name'])) for x in meta_data['images']]
|
94
|
+
|
95
|
+
def _create_word_list(caption):
|
96
|
+
|
97
|
+
tokenized_caption=[START_WORD]
|
98
|
+
|
99
|
+
tokenized_caption.extend(caption.split())
|
100
|
+
|
101
|
+
tokenized_captions.append(END_WORD)
|
102
|
+
|
103
|
+
return tokenized_captions
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
|
108
|
+
|
109
|
+
id_to_captions={}
|
110
|
+
|
111
|
+
for annotation in meta_data["annotations"]:
|
112
|
+
|
113
|
+
img_id=annotation['image_id']
|
114
|
+
|
115
|
+
caption = annotation['tokenized_caption']
|
116
|
+
|
117
|
+
caption =_create_word_list(caption)
|
118
|
+
|
119
|
+
id_to_captions[img_id]=caption
|
120
|
+
|
121
|
+
|
122
|
+
|
123
|
+
print("Loaded caption metadata for %d images from %s" %(len(meta_list),caption_filename))
|
124
|
+
|
125
|
+
return meta_list,id_to_captions
|
126
|
+
|
127
|
+
|
128
|
+
|
129
|
+
def main (argv):
|
130
|
+
|
131
|
+
train_meta,train_captions=_load_metadata(FLAGS.train_captions,FLAGS.train_img_dir)
|
132
|
+
|
133
|
+
val_meta,val_captions=_load_metadata(FLAGS.val_captions,FLAGS.val_img_dir)
|
134
|
+
|
135
|
+
|
136
|
+
|
137
|
+
captions={k:v for dic in [train_captions,val_captions]for k,v in dic.items()}
|
138
|
+
|
139
|
+
train_cutoff=int(0.85*len(val_meta))
|
140
|
+
|
141
|
+
val_cutoff=int(0.90*len(val_meta))
|
142
|
+
|
143
|
+
|
144
|
+
|
145
|
+
train_dataset=train_meta+val_meta[0:train_off]
|
146
|
+
|
147
|
+
val_dataset=val_meta[train_cutoff:val_cutoff]
|
148
|
+
|
149
|
+
test_dataset=val_meta[val_cutoff:]
|
150
|
+
|
151
|
+
|
152
|
+
|
153
|
+
train_captions=[]
|
154
|
+
|
155
|
+
for meta in train_dataset:
|
156
|
+
|
157
|
+
c=captions[meta.img_id]
|
158
|
+
|
159
|
+
train_captions.append(c)
|
160
|
+
|
161
|
+
|
162
|
+
|
163
|
+
word_to_id,id_to_word=_create_vocab(train_captions)
|
164
|
+
|
165
|
+
|
166
|
+
|
167
|
+
_create_datasets("train",train_dataset,captions,word_to_id,NUM_TRAIN_FILE)
|
168
|
+
|
169
|
+
_create_datasets("val",val_dataset,captions,word_to_id,NUM_VAL_FILE)
|
170
|
+
|
171
|
+
_create_datasets("test",test_dataset,captions,word_to_id,NUM_TEST_FILE)
|
172
|
+
|
173
|
+
|
174
|
+
|
175
|
+
with open(FLAGS.word_list_file,'a')as f:
|
176
|
+
|
177
|
+
for k,v in id_to_word.items():
|
178
|
+
|
179
|
+
f.write(v)
|
180
|
+
|
181
|
+
f.write('\n')
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
def _create_vocab(captions):
|
186
|
+
|
187
|
+
counter=Counter()
|
188
|
+
|
189
|
+
for c in captions:
|
190
|
+
|
191
|
+
counter.update(c)
|
192
|
+
|
193
|
+
|
194
|
+
|
195
|
+
print("total words:",len(counter))
|
196
|
+
|
197
|
+
word_counts=[x for x in counter.items()
|
198
|
+
|
199
|
+
if x[1]>=FLAGS.min_word_count]
|
200
|
+
|
201
|
+
word_counts.sort(key=lambda x:x[1],reverse=True)
|
202
|
+
|
203
|
+
print("Word in Vocab:",len(word_counts))
|
204
|
+
|
205
|
+
|
206
|
+
|
207
|
+
word_list=[x[0]for x in word_counts]
|
208
|
+
|
209
|
+
|
210
|
+
|
211
|
+
word_list.remove(START_WORD)
|
212
|
+
|
213
|
+
word_list.remove(END_WORD)
|
214
|
+
|
215
|
+
word_list.insert(0,START_WORD)
|
216
|
+
|
217
|
+
word_list.insert(0,END_WORD)
|
218
|
+
|
219
|
+
|
220
|
+
|
221
|
+
word_list.append(UNKNOWN_WORD)
|
222
|
+
|
223
|
+
word_to_id=dict([(x,y)for (y,x)in enumerate(word_list)])
|
224
|
+
|
225
|
+
id_to_word=dict([(x,y)for (x,y)in enumerate(word_list)])
|
226
|
+
|
227
|
+
return word_to_id, id_to_word
|
228
|
+
|
229
|
+
|
230
|
+
|
231
|
+
|
232
|
+
|
233
|
+
|
234
|
+
|
235
|
+
def _create_datasets(name,img_meta,captions,word_to_id,num_file):
|
236
|
+
|
237
|
+
|
238
|
+
|
239
|
+
img_chunk=np.array_split(img_meta,num_file)
|
240
|
+
|
241
|
+
counter=0
|
242
|
+
|
243
|
+
for i in range(1,num_file+1):
|
244
|
+
|
245
|
+
output_file_name="%s-%.3d.tfrecord" %(name,i)
|
246
|
+
|
247
|
+
output_file_path=os.path.join(FLAGS.out_dir,output_file_name)
|
248
|
+
|
249
|
+
target_chunk=img_chunk[counter]
|
250
|
+
|
251
|
+
|
252
|
+
|
253
|
+
with tf.python_io.TFRecordWriter(output_file_path)as writer:
|
254
|
+
|
255
|
+
for img in target_chunk:
|
256
|
+
|
257
|
+
img_id=img[0]
|
258
|
+
|
259
|
+
filename=img[1]
|
260
|
+
|
261
|
+
with tf.gfile.FastGFile(filename,"rb")as f:
|
262
|
+
|
263
|
+
data=f.read()
|
264
|
+
|
265
|
+
|
266
|
+
|
267
|
+
caption=captions[int(img_id)]
|
268
|
+
|
269
|
+
caption_ids=[]
|
270
|
+
|
271
|
+
for w in caption:
|
272
|
+
|
273
|
+
if w in word_to_id:
|
274
|
+
|
275
|
+
caption_ids.append(word_to_id[w])
|
276
|
+
|
277
|
+
|
278
|
+
|
279
|
+
else :
|
280
|
+
|
281
|
+
caption_ids.append(word_to_id[UNKNOWN_WORD])
|
282
|
+
|
283
|
+
|
284
|
+
|
285
|
+
context=tf.train.Features(feature={"img_id":tf.train.Feature(int64_list=tf.train.Int64List(value=[int(img_id)])),
|
286
|
+
|
287
|
+
"data":tf.train.Feature(bytes_list=tf.train.BytesList(value=[data])),})
|
288
|
+
|
289
|
+
|
290
|
+
|
291
|
+
caption_feature=[tf.train.Feature(int64_list=tf.train.Int64List(value=[v]))for v in caption_ids]
|
292
|
+
|
293
|
+
feature_lists=tf.train.FeatureLists(feature_list={"caption":tf.train.FeatureList(feature=caption_feature)})
|
294
|
+
|
295
|
+
|
296
|
+
|
297
|
+
sequence_example=tf.train.SequenceExample(context=context,feature_lists=feature_lists)
|
298
|
+
|
299
|
+
writer.write(sequence_example.SerializeToString())
|
300
|
+
|
301
|
+
counter +=1
|
302
|
+
|
303
|
+
|
44
304
|
|
45
305
|
### 試したこと
|
46
306
|
|
2
修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
### 前提・実現したいこと
|
2
2
|
|
3
|
-
某参考書のコードで
|
3
|
+
某参考書のコードで画像キャプショニング
|
4
4
|
|
5
|
-
JSONファイルをTFRecord形式に整形したいのですが
|
5
|
+
STAIRcaptionの画像データのinfo(JSONファイル)をTFRecord形式に整形したいのですが
|
6
6
|
|
7
|
-
コマンドプロンプトで実行しようとすると Nameerrorが出てしまいます
|
7
|
+
いざpyソースコードファイルをコマンドプロンプトで実行しようとすると Nameerrorが出てしまいます
|
8
8
|
|
9
9
|
pythonでnullは対応されてない?みたいです
|
10
10
|
|
1
文章の欠落
test
CHANGED
File without changes
|
test
CHANGED
@@ -8,7 +8,13 @@
|
|
8
8
|
|
9
9
|
pythonでnullは対応されてない?みたいです
|
10
10
|
|
11
|
-
pythonコード内にexecution_countは
|
11
|
+
pythonコード内にexecution_countという項目は入れてないので
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
execution_countが何なのか、どこから来たのか分からないです。
|
16
|
+
|
17
|
+
|
12
18
|
|
13
19
|
execution_count:nullをなんとかしたいです
|
14
20
|
|