質問編集履歴

6

修正

2018/04/15 10:40

投稿

moviethief319
moviethief319

スコア14

test CHANGED
File without changes
test CHANGED
@@ -26,7 +26,7 @@
26
26
 
27
27
  ```
28
28
 
29
- (tensorflow) C:\Users\Tsai Tensei>python create_datasets.py
29
+ (tensorflow) C:\Users\User>python create_datasets.py
30
30
 
31
31
  Traceback (most recent call last):
32
32
 

5

コードブロック

2018/04/15 10:40

投稿

moviethief319
moviethief319

スコア14

test CHANGED
File without changes
test CHANGED
@@ -40,6 +40,8 @@
40
40
 
41
41
  ### ソースコード
42
42
 
43
+ ```ここに言語を入力
44
+
43
45
  import os
44
46
 
45
47
  import json
@@ -388,6 +390,8 @@
388
390
 
389
391
  tf.app.run()
390
392
 
393
+ ```
394
+
391
395
 
392
396
 
393
397
  ### 試したこと

4

見やすく

2018/04/15 10:38

投稿

moviethief319
moviethief319

スコア14

test CHANGED
File without changes
test CHANGED
@@ -40,268 +40,356 @@
40
40
 
41
41
  ### ソースコード
42
42
 
43
- tf.flags.DEFINE_string("train_img_dir","data/img/train2014/","Training image directory")
44
-
45
- tf.flags.DEFINE_string("val_img_dir","data/img/val2014/","Validation image directory")
46
-
47
- tf.flags.DEFINE_string("train_captions","data/stair_captions_v1.1_train.json","Training caption file")
48
-
49
- tf.flags.DEFINE_string("val_captions","data/stair_captions_v1.1_val.json","Valdation caption file")
50
-
51
- tf.flags.DEFINE_string("out_dir","data/tfrecords/","Output TFRecord directory")
52
-
53
- tf.flags.DEFINE_integer("min_word_count",4,"The minimum number of occurrences of each word in the training set for includion in the vocab ")
54
-
55
- tf.flags.DEFINE_string("word_list_file","data/dirctionary.txt","Output word list file")
56
-
57
-
58
-
59
- FLAGS=tf.flags.FLAGS
60
-
61
-
62
-
63
- START_WORD='<s>'
64
-
65
- END_WORD='<E>'
66
-
67
- UNKNOWN_WORD='<UNW>'
68
-
69
-
70
-
71
- NUM_TRAIN_FILE=256
72
-
73
- NUM_VAL_FILE=4
74
-
75
- NUM_TEST_FILE=8
76
-
77
-
78
-
79
- ImageMetadata=namedtuple("ImageMetadata",["img_id","filename"])
80
-
81
-
82
-
83
- def _load_metadata(caption_filename,img_dir):
84
-
85
-
86
-
87
- with open(caption_filename,'r')as f:
88
-
89
- meta_data=json.load(f)
43
+ import os
44
+
45
+ import json
46
+
47
+ import numpy as np
48
+
49
+ from collections import namedtuple, Counter
50
+
51
+ import tensorflow as tf
52
+
53
+
54
+
55
+ tf.flags.DEFINE_string("train_img_dir", "data/img/train2014/", "Training image directory.")
56
+
57
+ tf.flags.DEFINE_string("val_img_dir", "data/img/val2014/", "Validation image directory.")
58
+
59
+ tf.flags.DEFINE_string("train_captions", "data/stair_captions_v1.1_train.json", "Training caption file.")
60
+
61
+ tf.flags.DEFINE_string("val_captions", "data/stair_captions_v1.1_val.json", "Validation caption file.")
62
+
63
+ tf.flags.DEFINE_string("out_dir", "data/tfrecords/", "Output TFRecords directiory.")
64
+
65
+ tf.flags.DEFINE_integer("min_word_count", 4, "The minimum number of occurrences of each word in th training set for includion in the vocab.")
66
+
67
+ tf.flags.DEFINE_string("word_list_file", "data/dictionary.txt", "Output word list file.")
68
+
69
+
70
+
71
+ FLAGS = tf.flags.FLAGS
72
+
73
+
74
+
75
+ START_WORD = '<S>'
76
+
77
+ END_WORD = '<E>'
78
+
79
+ UNKNOWN_WORD = '<UNW>'
80
+
81
+
82
+
83
+ NUM_TRAIN_FILE = 256
84
+
85
+ NUM_VAL_FILE = 4
86
+
87
+ NUM_TEST_FILE = 8
88
+
89
+
90
+
91
+
92
+
93
+ ImageMetadata = namedtuple("ImageMetadata",["img_id", "filename"])
94
+
95
+
96
+
97
+ #画像メタデータと辞書をもとに、指定されたファイル数に分割してバイナリ(TFRecord)を作成する
98
+
99
+ def _create_datasets(name, img_meta, captions, word_to_id, num_file):
100
+
101
+
102
+
103
+ #画像メタデータをだいたい等しく分割
104
+
105
+ img_chunk = np.array_split(img_meta, num_file)
106
+
107
+ counter = 0
108
+
109
+ for i in range(1, num_file + 1):
110
+
111
+ output_file_name = "%s-%.3d.tfrecord" % (name, i)
112
+
113
+ output_file_path = os.path.join(FLAGS.out_dir, output_file_name)
114
+
115
+ target_chunk = img_chunk[counter]
116
+
117
+ #対象画像群書ごとにWriterを定義
118
+
119
+ with tf.python_io.TFRecordWriter(output_file_path) as writer:
120
+
121
+ for img in target_chunk:
122
+
123
+ img_id = img[0]
124
+
125
+ filename = img[1]
126
+
127
+ #画像ファイルをバイト列として読み込み
128
+
129
+ with tf.gfile.FastGFile(filename, "rb") as f:
130
+
131
+ data = f.read()
132
+
133
+
134
+
135
+ #キャプションのid化
136
+
137
+ caption = captions[int(img_id)]
138
+
139
+ caption_ids = []
140
+
141
+ for w in caption:
142
+
143
+ if w in word_to_id:
144
+
145
+ caption_ids.append(word_to_id[w])
146
+
147
+ else:
148
+
149
+ caption_ids.append(word_to_id[UNKNOWN_WORD])
150
+
151
+
152
+
153
+ #固定長部分
154
+
155
+ context = tf.train.Features(feature={
156
+
157
+ "img_id": tf.train.Feature(int64_list=tf.train.Int64List(value=[int(img_id)])),
158
+
159
+ "data": tf.train.Feature(bytes_list=tf.train.BytesList(value=[data])),
160
+
161
+ })
162
+
163
+
164
+
165
+ #可変長部分
166
+
167
+ caption_feature = [tf.train.Feature(int64_list=tf.train.Int64List(value=[v])) for v in caption_ids]
168
+
169
+ feature_lists = tf.train.FeatureLists(feature_list={
170
+
171
+ "caption":tf.train.FeatureList(feature=caption_feature)
172
+
173
+ })
174
+
175
+
176
+
177
+ #TFRecordに書き込み
178
+
179
+ sequence_example = tf.train.SequenceExample(context=context, feature_lists=feature_lists)
180
+
181
+ writer.write(sequence_example.SerializeToString())
182
+
183
+
184
+
185
+ counter += 1
186
+
187
+
188
+
189
+ #jsonファイルを読み込み画像のid, ファイル名, キャプションを取得する。
190
+
191
+ def _load_metadata(caption_filename, img_dir):
192
+
193
+
194
+
195
+ #jsonファイルをロード
196
+
197
+ with open(caption_filename, 'r') as f:
198
+
199
+ meta_data = json.load(f)
200
+
201
+
202
+
203
+ #画像idとファイル名を持つnamedtupleのリストを作成
204
+
205
+ meta_list = [ImageMetadata(x['id'], os.path.join(img_dir, x['file_name'])) for x in meta_data['images']]
206
+
207
+
208
+
209
+ #スペース区切りのcaptionを単語の配列に変換
210
+
211
+ def _create_word_list(caption):
212
+
213
+ tokenized_captions = [START_WORD]
214
+
215
+ tokenized_captions.extend(caption.split())
216
+
217
+ tokenized_captions.append(END_WORD)
218
+
219
+ return tokenized_captions
220
+
221
+
222
+
223
+ #{画像id => キャプションのリスト}の辞書を作成
224
+
225
+ id_to_captions = {}
226
+
227
+ for annotation in meta_data["annotations"]:
228
+
229
+ img_id = annotation['image_id']
230
+
231
+ caption = annotation['tokenized_caption']
232
+
233
+ caption = _create_word_list(caption)
234
+
235
+ #キャプションはいくつかあるため1つだけを採用
236
+
237
+ id_to_captions[img_id] = caption
238
+
239
+
240
+
241
+ print("Loaded caption metadata for %d images from %s" % (len(meta_list), caption_filename))
242
+
243
+
244
+
245
+ return meta_list, id_to_captions
246
+
247
+
248
+
249
+
250
+
251
+ def _create_vocab(captions):
252
+
253
+
254
+
255
+ counter = Counter()
256
+
257
+ for c in captions:
258
+
259
+ counter.update(c)
260
+
261
+
262
+
263
+ print("total words:", len(counter))
264
+
265
+ #出現回数が一定数のものだけ辞書に採用。出現回数降順でソート
266
+
267
+ #word_countsは(単語, 出現回数)のリスト
268
+
269
+ word_counts = [x for x in counter.items() if x[1] >= FLAGS.min_word_count]
270
+
271
+ word_counts.sort(key=lambda x: x[1], reverse=True)
272
+
273
+ print("Words in vocab:", len(word_counts))
274
+
275
+
276
+
277
+
278
+
279
+ #辞書作成
280
+
281
+ word_list = [x[0] for x in word_counts]
282
+
283
+ #<S>と<E>のidを1,0で固定したいので、一度削除して先頭に追加する
284
+
285
+ word_list.remove(START_WORD)
286
+
287
+ word_list.remove(END_WORD)
288
+
289
+ word_list.insert(0, START_WORD)
290
+
291
+ word_list.insert(0, END_WORD)
292
+
293
+
294
+
295
+ word_list.append(UNKNOWN_WORD)
296
+
297
+ word_to_id = dict([(x, y) for (y, x) in enumerate(word_list)])
298
+
299
+ id_to_word = dict([(x, y) for (x, y) in enumerate(word_list)])
300
+
301
+ return word_to_id, id_to_word
302
+
303
+
304
+
305
+
306
+
307
+ def main(argv):
308
+
309
+
310
+
311
+ #jsonファイルからメタデータの読み込み
312
+
313
+ #(画像id, ファイルパス)のタプルの配列と{id=>キャプションのリスト}を取得
314
+
315
+ train_meta, train_captions = _load_metadata(FLAGS.train_captions, FLAGS.train_img_dir)
316
+
317
+ val_meta, val_captions = _load_metadata(FLAGS.val_captions, FLAGS.val_img_dir)
318
+
319
+
320
+
321
+ #キャプションをマージ
322
+
323
+ captions = {k:v for dic in [train_captions, val_captions] for k, v in dic.items()}
324
+
325
+
326
+
327
+ #訓練データ,バリデーションデータ,テストデータに分割
328
+
329
+ train_cutoff = int(0.85 * len(val_meta))
330
+
331
+ val_cutoff = int(0.90 * len(val_meta))
332
+
333
+
334
+
335
+ train_dataset = train_meta + val_meta[0:train_cutoff]
336
+
337
+ val_dataset = val_meta[train_cutoff:val_cutoff]
338
+
339
+ test_dataset = val_meta[val_cutoff:]
340
+
341
+
342
+
343
+
344
+
345
+ #訓練データから辞書作成
346
+
347
+ train_captions = []
348
+
349
+ for meta in train_dataset:
350
+
351
+ c = captions[meta.img_id]
352
+
353
+ train_captions.append(c)
354
+
355
+
356
+
357
+ word_to_id, id_to_word = _create_vocab(train_captions)
358
+
359
+
360
+
361
+
362
+
363
+ #画像を読み込みメタデータと結合したバイナリを作成
364
+
365
+ _create_datasets("train", train_dataset, captions, word_to_id, NUM_TRAIN_FILE)
366
+
367
+ _create_datasets("val", val_dataset, captions, word_to_id, NUM_VAL_FILE)
368
+
369
+ _create_datasets("test", test_dataset, captions, word_to_id, NUM_TEST_FILE)
370
+
371
+
372
+
373
+ # 単語リスト出力
374
+
375
+ with open(FLAGS.word_list_file, 'a') as f:
376
+
377
+ for k, v in id_to_word.items():
378
+
379
+ f.write(v)
380
+
381
+ f.write('\n')
382
+
383
+
384
+
385
+
386
+
387
+ if __name__ == "__main__":
388
+
389
+ tf.app.run()
90
390
 
91
391
 
92
392
 
93
- meta_list=[ImageMetadata(x['id'],os.path.join(img_dir,x['file_name'])) for x in meta_data['images']]
94
-
95
- def _create_word_list(caption):
96
-
97
- tokenized_caption=[START_WORD]
98
-
99
- tokenized_caption.extend(caption.split())
100
-
101
- tokenized_captions.append(END_WORD)
102
-
103
- return tokenized_captions
104
-
105
-
106
-
107
-
108
-
109
- id_to_captions={}
110
-
111
- for annotation in meta_data["annotations"]:
112
-
113
- img_id=annotation['image_id']
114
-
115
- caption = annotation['tokenized_caption']
116
-
117
- caption =_create_word_list(caption)
118
-
119
- id_to_captions[img_id]=caption
120
-
121
-
122
-
123
- print("Loaded caption metadata for %d images from %s" %(len(meta_list),caption_filename))
124
-
125
- return meta_list,id_to_captions
126
-
127
-
128
-
129
- def main (argv):
130
-
131
- train_meta,train_captions=_load_metadata(FLAGS.train_captions,FLAGS.train_img_dir)
132
-
133
- val_meta,val_captions=_load_metadata(FLAGS.val_captions,FLAGS.val_img_dir)
134
-
135
-
136
-
137
- captions={k:v for dic in [train_captions,val_captions]for k,v in dic.items()}
138
-
139
- train_cutoff=int(0.85*len(val_meta))
140
-
141
- val_cutoff=int(0.90*len(val_meta))
142
-
143
-
144
-
145
- train_dataset=train_meta+val_meta[0:train_off]
146
-
147
- val_dataset=val_meta[train_cutoff:val_cutoff]
148
-
149
- test_dataset=val_meta[val_cutoff:]
150
-
151
-
152
-
153
- train_captions=[]
154
-
155
- for meta in train_dataset:
156
-
157
- c=captions[meta.img_id]
158
-
159
- train_captions.append(c)
160
-
161
-
162
-
163
- word_to_id,id_to_word=_create_vocab(train_captions)
164
-
165
-
166
-
167
- _create_datasets("train",train_dataset,captions,word_to_id,NUM_TRAIN_FILE)
168
-
169
- _create_datasets("val",val_dataset,captions,word_to_id,NUM_VAL_FILE)
170
-
171
- _create_datasets("test",test_dataset,captions,word_to_id,NUM_TEST_FILE)
172
-
173
-
174
-
175
- with open(FLAGS.word_list_file,'a')as f:
176
-
177
- for k,v in id_to_word.items():
178
-
179
- f.write(v)
180
-
181
- f.write('\n')
182
-
183
-
184
-
185
- def _create_vocab(captions):
186
-
187
- counter=Counter()
188
-
189
- for c in captions:
190
-
191
- counter.update(c)
192
-
193
-
194
-
195
- print("total words:",len(counter))
196
-
197
- word_counts=[x for x in counter.items()
198
-
199
- if x[1]>=FLAGS.min_word_count]
200
-
201
- word_counts.sort(key=lambda x:x[1],reverse=True)
202
-
203
- print("Word in Vocab:",len(word_counts))
204
-
205
-
206
-
207
- word_list=[x[0]for x in word_counts]
208
-
209
-
210
-
211
- word_list.remove(START_WORD)
212
-
213
- word_list.remove(END_WORD)
214
-
215
- word_list.insert(0,START_WORD)
216
-
217
- word_list.insert(0,END_WORD)
218
-
219
-
220
-
221
- word_list.append(UNKNOWN_WORD)
222
-
223
- word_to_id=dict([(x,y)for (y,x)in enumerate(word_list)])
224
-
225
- id_to_word=dict([(x,y)for (x,y)in enumerate(word_list)])
226
-
227
- return word_to_id, id_to_word
228
-
229
-
230
-
231
-
232
-
233
-
234
-
235
- def _create_datasets(name,img_meta,captions,word_to_id,num_file):
236
-
237
-
238
-
239
- img_chunk=np.array_split(img_meta,num_file)
240
-
241
- counter=0
242
-
243
- for i in range(1,num_file+1):
244
-
245
- output_file_name="%s-%.3d.tfrecord" %(name,i)
246
-
247
- output_file_path=os.path.join(FLAGS.out_dir,output_file_name)
248
-
249
- target_chunk=img_chunk[counter]
250
-
251
-
252
-
253
- with tf.python_io.TFRecordWriter(output_file_path)as writer:
254
-
255
- for img in target_chunk:
256
-
257
- img_id=img[0]
258
-
259
- filename=img[1]
260
-
261
- with tf.gfile.FastGFile(filename,"rb")as f:
262
-
263
- data=f.read()
264
-
265
-
266
-
267
- caption=captions[int(img_id)]
268
-
269
- caption_ids=[]
270
-
271
- for w in caption:
272
-
273
- if w in word_to_id:
274
-
275
- caption_ids.append(word_to_id[w])
276
-
277
-
278
-
279
- else :
280
-
281
- caption_ids.append(word_to_id[UNKNOWN_WORD])
282
-
283
-
284
-
285
- context=tf.train.Features(feature={"img_id":tf.train.Feature(int64_list=tf.train.Int64List(value=[int(img_id)])),
286
-
287
- "data":tf.train.Feature(bytes_list=tf.train.BytesList(value=[data])),})
288
-
289
-
290
-
291
- caption_feature=[tf.train.Feature(int64_list=tf.train.Int64List(value=[v]))for v in caption_ids]
292
-
293
- feature_lists=tf.train.FeatureLists(feature_list={"caption":tf.train.FeatureList(feature=caption_feature)})
294
-
295
-
296
-
297
- sequence_example=tf.train.SequenceExample(context=context,feature_lists=feature_lists)
298
-
299
- writer.write(sequence_example.SerializeToString())
300
-
301
- counter +=1
302
-
303
-
304
-
305
393
  ### 試したこと
306
394
 
307
395
  pyファイル内で null=Noneや null=''を試しましたが意味なかったです

3

ソースコード

2018/04/15 10:31

投稿

moviethief319
moviethief319

スコア14

test CHANGED
File without changes
test CHANGED
@@ -38,9 +38,269 @@
38
38
 
39
39
  ```
40
40
 
41
-
41
+ ### ソースコード
42
+
42
-
43
+ tf.flags.DEFINE_string("train_img_dir","data/img/train2014/","Training image directory")
44
+
43
-
45
+ tf.flags.DEFINE_string("val_img_dir","data/img/val2014/","Validation image directory")
46
+
47
+ tf.flags.DEFINE_string("train_captions","data/stair_captions_v1.1_train.json","Training caption file")
48
+
49
+ tf.flags.DEFINE_string("val_captions","data/stair_captions_v1.1_val.json","Valdation caption file")
50
+
51
+ tf.flags.DEFINE_string("out_dir","data/tfrecords/","Output TFRecord directory")
52
+
53
+ tf.flags.DEFINE_integer("min_word_count",4,"The minimum number of occurrences of each word in the training set for includion in the vocab ")
54
+
55
+ tf.flags.DEFINE_string("word_list_file","data/dirctionary.txt","Output word list file")
56
+
57
+
58
+
59
+ FLAGS=tf.flags.FLAGS
60
+
61
+
62
+
63
+ START_WORD='<s>'
64
+
65
+ END_WORD='<E>'
66
+
67
+ UNKNOWN_WORD='<UNW>'
68
+
69
+
70
+
71
+ NUM_TRAIN_FILE=256
72
+
73
+ NUM_VAL_FILE=4
74
+
75
+ NUM_TEST_FILE=8
76
+
77
+
78
+
79
+ ImageMetadata=namedtuple("ImageMetadata",["img_id","filename"])
80
+
81
+
82
+
83
+ def _load_metadata(caption_filename,img_dir):
84
+
85
+
86
+
87
+ with open(caption_filename,'r')as f:
88
+
89
+ meta_data=json.load(f)
90
+
91
+
92
+
93
+ meta_list=[ImageMetadata(x['id'],os.path.join(img_dir,x['file_name'])) for x in meta_data['images']]
94
+
95
+ def _create_word_list(caption):
96
+
97
+ tokenized_caption=[START_WORD]
98
+
99
+ tokenized_caption.extend(caption.split())
100
+
101
+ tokenized_captions.append(END_WORD)
102
+
103
+ return tokenized_captions
104
+
105
+
106
+
107
+
108
+
109
+ id_to_captions={}
110
+
111
+ for annotation in meta_data["annotations"]:
112
+
113
+ img_id=annotation['image_id']
114
+
115
+ caption = annotation['tokenized_caption']
116
+
117
+ caption =_create_word_list(caption)
118
+
119
+ id_to_captions[img_id]=caption
120
+
121
+
122
+
123
+ print("Loaded caption metadata for %d images from %s" %(len(meta_list),caption_filename))
124
+
125
+ return meta_list,id_to_captions
126
+
127
+
128
+
129
+ def main (argv):
130
+
131
+ train_meta,train_captions=_load_metadata(FLAGS.train_captions,FLAGS.train_img_dir)
132
+
133
+ val_meta,val_captions=_load_metadata(FLAGS.val_captions,FLAGS.val_img_dir)
134
+
135
+
136
+
137
+ captions={k:v for dic in [train_captions,val_captions]for k,v in dic.items()}
138
+
139
+ train_cutoff=int(0.85*len(val_meta))
140
+
141
+ val_cutoff=int(0.90*len(val_meta))
142
+
143
+
144
+
145
+ train_dataset=train_meta+val_meta[0:train_off]
146
+
147
+ val_dataset=val_meta[train_cutoff:val_cutoff]
148
+
149
+ test_dataset=val_meta[val_cutoff:]
150
+
151
+
152
+
153
+ train_captions=[]
154
+
155
+ for meta in train_dataset:
156
+
157
+ c=captions[meta.img_id]
158
+
159
+ train_captions.append(c)
160
+
161
+
162
+
163
+ word_to_id,id_to_word=_create_vocab(train_captions)
164
+
165
+
166
+
167
+ _create_datasets("train",train_dataset,captions,word_to_id,NUM_TRAIN_FILE)
168
+
169
+ _create_datasets("val",val_dataset,captions,word_to_id,NUM_VAL_FILE)
170
+
171
+ _create_datasets("test",test_dataset,captions,word_to_id,NUM_TEST_FILE)
172
+
173
+
174
+
175
+ with open(FLAGS.word_list_file,'a')as f:
176
+
177
+ for k,v in id_to_word.items():
178
+
179
+ f.write(v)
180
+
181
+ f.write('\n')
182
+
183
+
184
+
185
+ def _create_vocab(captions):
186
+
187
+ counter=Counter()
188
+
189
+ for c in captions:
190
+
191
+ counter.update(c)
192
+
193
+
194
+
195
+ print("total words:",len(counter))
196
+
197
+ word_counts=[x for x in counter.items()
198
+
199
+ if x[1]>=FLAGS.min_word_count]
200
+
201
+ word_counts.sort(key=lambda x:x[1],reverse=True)
202
+
203
+ print("Word in Vocab:",len(word_counts))
204
+
205
+
206
+
207
+ word_list=[x[0]for x in word_counts]
208
+
209
+
210
+
211
+ word_list.remove(START_WORD)
212
+
213
+ word_list.remove(END_WORD)
214
+
215
+ word_list.insert(0,START_WORD)
216
+
217
+ word_list.insert(0,END_WORD)
218
+
219
+
220
+
221
+ word_list.append(UNKNOWN_WORD)
222
+
223
+ word_to_id=dict([(x,y)for (y,x)in enumerate(word_list)])
224
+
225
+ id_to_word=dict([(x,y)for (x,y)in enumerate(word_list)])
226
+
227
+ return word_to_id, id_to_word
228
+
229
+
230
+
231
+
232
+
233
+
234
+
235
+ def _create_datasets(name,img_meta,captions,word_to_id,num_file):
236
+
237
+
238
+
239
+ img_chunk=np.array_split(img_meta,num_file)
240
+
241
+ counter=0
242
+
243
+ for i in range(1,num_file+1):
244
+
245
+ output_file_name="%s-%.3d.tfrecord" %(name,i)
246
+
247
+ output_file_path=os.path.join(FLAGS.out_dir,output_file_name)
248
+
249
+ target_chunk=img_chunk[counter]
250
+
251
+
252
+
253
+ with tf.python_io.TFRecordWriter(output_file_path)as writer:
254
+
255
+ for img in target_chunk:
256
+
257
+ img_id=img[0]
258
+
259
+ filename=img[1]
260
+
261
+ with tf.gfile.FastGFile(filename,"rb")as f:
262
+
263
+ data=f.read()
264
+
265
+
266
+
267
+ caption=captions[int(img_id)]
268
+
269
+ caption_ids=[]
270
+
271
+ for w in caption:
272
+
273
+ if w in word_to_id:
274
+
275
+ caption_ids.append(word_to_id[w])
276
+
277
+
278
+
279
+ else :
280
+
281
+ caption_ids.append(word_to_id[UNKNOWN_WORD])
282
+
283
+
284
+
285
+ context=tf.train.Features(feature={"img_id":tf.train.Feature(int64_list=tf.train.Int64List(value=[int(img_id)])),
286
+
287
+ "data":tf.train.Feature(bytes_list=tf.train.BytesList(value=[data])),})
288
+
289
+
290
+
291
+ caption_feature=[tf.train.Feature(int64_list=tf.train.Int64List(value=[v]))for v in caption_ids]
292
+
293
+ feature_lists=tf.train.FeatureLists(feature_list={"caption":tf.train.FeatureList(feature=caption_feature)})
294
+
295
+
296
+
297
+ sequence_example=tf.train.SequenceExample(context=context,feature_lists=feature_lists)
298
+
299
+ writer.write(sequence_example.SerializeToString())
300
+
301
+ counter +=1
302
+
303
+
44
304
 
45
305
  ### 試したこと
46
306
 

2

修正

2018/04/15 10:28

投稿

moviethief319
moviethief319

スコア14

test CHANGED
File without changes
test CHANGED
@@ -1,10 +1,10 @@
1
1
  ### 前提・実現したいこと
2
2
 
3
- 某参考書のコードで
3
+ 某参考書のコードで画像キャプショニング
4
4
 
5
- JSONファイルをTFRecord形式に整形したいのですが
5
+ STAIRcaptionの画像データのinfo(JSONファイル)をTFRecord形式に整形したいのですが
6
6
 
7
- コマンドプロンプトで実行しようとすると Nameerrorが出てしまいます
7
+ いざpyソースードファイルをコマンドプロンプトで実行しようとすると Nameerrorが出てしまいます
8
8
 
9
9
  pythonでnullは対応されてない?みたいです
10
10
 

1

文章の欠落

2018/04/15 10:22

投稿

moviethief319
moviethief319

スコア14

test CHANGED
File without changes
test CHANGED
@@ -8,7 +8,13 @@
8
8
 
9
9
  pythonでnullは対応されてない?みたいです
10
10
 
11
- pythonコード内にexecution_countは
11
+ pythonコード内にexecution_countという項目入れてないので
12
+
13
+
14
+
15
+ execution_countが何なのか、どこから来たのか分からないです。
16
+
17
+
12
18
 
13
19
  execution_count:nullをなんとかしたいです
14
20