質問編集履歴

1

ソースコードを追加しました

2019/10/09 13:06

投稿

Mattcha
Mattcha

スコア8

test CHANGED
File without changes
test CHANGED
@@ -62,7 +62,291 @@
62
62
 
63
63
  ```
64
64
 
65
-
65
+ ※ソースコードを追加しました。(make_dataset.py)
66
+
67
+
68
+
69
+ ```Python
70
+
71
+ import os
72
+
73
+ import cv2
74
+
75
+ import h5py
76
+
77
+ import parmap
78
+
79
+ import argparse
80
+
81
+ import numpy as np
82
+
83
+ from pathlib import Path
84
+
85
+ from tqdm import tqdm as tqdm
86
+
87
+ import matplotlib.pylab as plt
88
+
89
+
90
+
91
+
92
+
93
+ def format_image(img_path, size, nb_channels):
94
+
95
+ """
96
+
97
+ Load img with opencv and reshape
98
+
99
+ """
100
+
101
+
102
+
103
+ if nb_channels == 1:
104
+
105
+ img = cv2.imread(img_path, 0)
106
+
107
+ img = np.expand_dims(img, axis=-1)
108
+
109
+ else:
110
+
111
+ img = cv2.imread(img_path)
112
+
113
+ img = img[:, :, ::-1] # GBR to RGB
114
+
115
+
116
+
117
+ w = img.shape[1]
118
+
119
+
120
+
121
+ # Slice image in 2 to get both parts
122
+
123
+ img_full = img[:, :w // 2, :]
124
+
125
+ img_sketch = img[:, w // 2:, :]
126
+
127
+
128
+
129
+ img_full = cv2.resize(img_full, (size, size), interpolation=cv2.INTER_AREA)
130
+
131
+ img_sketch = cv2.resize(img_sketch, (size, size), interpolation=cv2.INTER_AREA)
132
+
133
+
134
+
135
+ if nb_channels == 1:
136
+
137
+ img_full = np.expand_dims(img_full, -1)
138
+
139
+ img_sketch = np.expand_dims(img_sketch, -1)
140
+
141
+
142
+
143
+ img_full = np.expand_dims(img_full, 0).transpose(0, 3, 1, 2)
144
+
145
+ img_sketch = np.expand_dims(img_sketch, 0).transpose(0, 3, 1, 2)
146
+
147
+
148
+
149
+ return img_full, img_sketch
150
+
151
+
152
+
153
+
154
+
155
+ def build_HDF5(jpeg_dir, nb_channels, data_dir, size=256):
156
+
157
+ """
158
+
159
+ Gather the data in a single HDF5 file.
160
+
161
+ """
162
+
163
+
164
+
165
+ data_dir = os.path.join(data_dir, 'processed')
166
+
167
+
168
+
169
+ # Put train data in HDF5
170
+
171
+ file_name = os.path.basename(jpeg_dir.rstrip("/"))
172
+
173
+ hdf5_file = os.path.join(data_dir, "%s_data.h5" % file_name)
174
+
175
+ with h5py.File(hdf5_file, "w") as hfw:
176
+
177
+
178
+
179
+ for dset_type in ["train", "test", "val"]:
180
+
181
+
182
+
183
+ list_img = [img for img in Path(jpeg_dir).glob('%s/*.jpg' % dset_type)]
184
+
185
+ list_img = [str(img) for img in list_img]
186
+
187
+ list_img.extend(list(Path(jpeg_dir).glob('%s/*.png' % dset_type)))
188
+
189
+ list_img = list(map(str, list_img))
190
+
191
+ list_img = np.array(list_img)
192
+
193
+
194
+
195
+ data_full = hfw.create_dataset("%s_data_full" % dset_type,
196
+
197
+ (0, nb_channels, size, size),
198
+
199
+ maxshape=(None, 3, size, size),
200
+
201
+ dtype=np.uint8)
202
+
203
+
204
+
205
+ data_sketch = hfw.create_dataset("%s_data_sketch" % dset_type,
206
+
207
+ (0, nb_channels, size, size),
208
+
209
+ maxshape=(None, 3, size, size),
210
+
211
+ dtype=np.uint8)
212
+
213
+
214
+
215
+ num_files = len(list_img)
216
+
217
+ chunk_size = 100
218
+
219
+ num_chunks = num_files / chunk_size
220
+
221
+ arr_chunks = np.array_split(np.arange(num_files), num_chunks)
222
+
223
+
224
+
225
+ for chunk_idx in tqdm(arr_chunks):
226
+
227
+
228
+
229
+ list_img_path = list_img[chunk_idx].tolist()
230
+
231
+ output = parmap.map(format_image, list_img_path, size, nb_channels, pm_parallel=False)
232
+
233
+
234
+
235
+ arr_img_full = np.concatenate([o[0] for o in output], axis=0)
236
+
237
+ arr_img_sketch = np.concatenate([o[1] for o in output], axis=0)
238
+
239
+
240
+
241
+ # Resize HDF5 dataset
242
+
243
+ data_full.resize(data_full.shape[0] + arr_img_full.shape[0], axis=0)
244
+
245
+ data_sketch.resize(data_sketch.shape[0] + arr_img_sketch.shape[0], axis=0)
246
+
247
+
248
+
249
+ data_full[-arr_img_full.shape[0]:] = arr_img_full.astype(np.uint8)
250
+
251
+ data_sketch[-arr_img_sketch.shape[0]:] = arr_img_sketch.astype(np.uint8)
252
+
253
+
254
+
255
+ def check_HDF5(jpeg_dir, nb_channels):
256
+
257
+ """
258
+
259
+ Plot images with landmarks to check the processing
260
+
261
+ """
262
+
263
+
264
+
265
+ # Get hdf5 file
266
+
267
+ file_name = os.path.basename(jpeg_dir.rstrip("/"))
268
+
269
+ hdf5_file = os.path.join(data_dir, "%s_data.h5" % file_name)
270
+
271
+
272
+
273
+ with h5py.File(hdf5_file, "r") as hf:
274
+
275
+ data_full = hf["train_data_full"]
276
+
277
+ data_sketch = hf["train_data_sketch"]
278
+
279
+ for i in range(data_full.shape[0]):
280
+
281
+ plt.figure()
282
+
283
+ img = data_full[i, :, :, :].transpose(1,2,0)
284
+
285
+ img2 = data_sketch[i, :, :, :].transpose(1,2,0)
286
+
287
+ img = np.concatenate((img, img2), axis=1)
288
+
289
+ if nb_channels == 1:
290
+
291
+ plt.imshow(img[:, :, 0], cmap="gray")
292
+
293
+ else:
294
+
295
+ plt.imshow(img)
296
+
297
+ plt.show()
298
+
299
+ plt.clf()
300
+
301
+ plt.close()
302
+
303
+
304
+
305
+
306
+
307
+ if __name__ == '__main__':
308
+
309
+
310
+
311
+ parser = argparse.ArgumentParser(description='Build dataset')
312
+
313
+ parser.add_argument('jpeg_dir', type=str, help='path to jpeg images')
314
+
315
+ parser.add_argument('nb_channels', type=int, help='number of image channels')
316
+
317
+ parser.add_argument('--img_size', default=256, type=int,
318
+
319
+ help='Desired Width == Height')
320
+
321
+ parser.add_argument('--do_plot', action="store_true",
322
+
323
+ help='Plot the images to make sure the data processing went OK')
324
+
325
+ parser.add_argument('--data_dir', default='../../data', type=str, help='Data directory')
326
+
327
+ args = parser.parse_args()
328
+
329
+
330
+
331
+ build_HDF5(args.jpeg_dir,
332
+
333
+ args.nb_channels,
334
+
335
+ args.data_dir,
336
+
337
+ size=args.img_size)
338
+
339
+
340
+
341
+ if args.do_plot:
342
+
343
+ check_HDF5(args.jpeg_dir, args.nb_channels)
344
+
345
+
346
+
347
+
348
+
349
+ ```
66
350
 
67
351
  ### 試したこと
68
352