編集履歴

質問編集履歴

URLを追加しました

2020/04/13 05:54

投稿

ikayakioishii

スコア8

test CHANGED Viewed

File without changes

test CHANGED Viewed

@@ -4,6 +4,10 @@
 Githubに公開されているPeeking into the futureの論文のコード（next-prediction）を実行してみたところ，AssertErrorが発生し，エラー文に何も出ておらず，解決への糸口が見つからないため，手助けいただきたいです．
+Github 再現実験手順URL:https://github.com/JunweiLiang/next-prediction/blob/master/code/prepare_data/README.md
+対象のコードURL：https://github.com/JunweiLiang/next-prediction/blob/master/code/prepare_data/step4_generate_traj.py
 ### 発生している問題・エラーメッセージ

コードが違っていたので，直しました．

2020/04/13 05:54

投稿

ikayakioishii

スコア8

test CHANGED Viewed

File without changes

test CHANGED Viewed

@@ -40,7 +40,7 @@
-"""Given a list of images, run scene semantic segmentation using deeplab."""
+"""Generate trajectory files and scene, person box, other box, activity files."""
@@ -48,17 +48,25 @@
 # pylint: disable=g-bad-import-order
+import argparse
 import os
-import argparse
+import operator
-import tensorflow as tf
+import numpy as np
-from PIL import Image
+import cPickle as pickle
 from tqdm import tqdm
-import numpy as np
+from glob import glob
+from utils import activity2id
+from utils import actev_scene2imgsize
+from utils import get_scene
@@ -66,73 +74,251 @@
 parser = argparse.ArgumentParser()
-parser.add_argument("imglst")
+parser.add_argument("npzpath")
-parser.add_argument("model_path", help="path to the model .pb file")
+parser.add_argument("split_path")
 parser.add_argument("out_path")
-parser.add_argument("--every", type=int, default=1,
-                    help="scene semantic segmentation doesn't have to be"
-                         " run on every frame")
-parser.add_argument("--down_rate", default=8.0, type=float,
-                    help="down-size how many times")
-parser.add_argument("--keep_full", action="store_true",
-                    help="get 512x288 feature")
-# ---- gpu stuff. Now only one gpu is used
-parser.add_argument("--gpuid", default=0, type=int)
-# For running parallel jobs, set --job 4 --curJob k, where k=1/2/3/4
-parser.add_argument("--job", type=int, default=1, help="total job")
-parser.add_argument("--curJob", type=int, default=1,
-                    help="this script run job Num")
-# ade20k -> 150 + 1 (bg) classes
-# city -> 18 + 1
-def resize_seg_map(seg, down_rate, keep_full=False):
-  img_ = Image.fromarray(seg.astype(dtype=np.uint8))
-  w_, h_ = img_.size
-  neww, newh = int(w_ / down_rate), int(h_ / down_rate)
-  if keep_full:
-    neww, newh = 512, 288
-  newimg = img_.resize((neww, newh))  # neareast neighbor
-  newdata = np.array(newimg)
-  return newdata
+parser.add_argument("--drop_frame", default=1, type=int,
+                    help="drop frame to match different fps, assuming "
+                         "the virat fps is 30fps, so to get 2.5fps, "
+                         "need to drop 12 frames every time")
+parser.add_argument("--scene_feat_path",
+                    help="the scene segmentation output path,"
+                         "under it should be frame_name.npy")
+# the following are the output paths
+parser.add_argument("--scene_map_path",
+                    help="frameidx mapping to actual scene feature file output")
+parser.add_argument("--person_box_path",
+                    help="Person box output")
+parser.add_argument("--other_box_path",
+                    help="Other object box output")
+parser.add_argument("--activity_path",
+                    help="activity annotation output")
+# for ETH/UCY you need to write your own video size mapping
+# In the PeekingFuture paper we resize ETH/UCY to 720x576 to extract features
+scene2imgsize = actev_scene2imgsize
+actid2name = {activity2id[n]: n for n in activity2id}
+def resize_xy(xy, vname, resize_w, resize_h):
+  """Resize the xy coordinates."""
+  x_, y_ = xy
+  w, h = scene2imgsize[get_scene(vname)]
+  diff_w = resize_w / float(w)
+  diff_h = resize_h / float(h)
+  x_ *= diff_w
+  y_ *= diff_h
+  # normalize coordinates?
+  return [x_, y_]
+def resize_box(box, vname, resize_w, resize_h):
+  """Resize the box coordintates."""
+  x1, y1, x2, y2 = [float(o) for o in box]
+  w, h = scene2imgsize[get_scene(vname)]
+  diff_w = resize_w / float(w)
+  diff_h = resize_h / float(h)
+  x1 *= diff_w
+  x2 *= diff_w
+  y1 *= diff_h
+  y2 *= diff_h
+  return [x1, y1, x2, y2]
+# frame_lst is [(videoname,frameidx)], assume sorted by the frameidx
+def get_nearest(frame_lst_, frame_idx):
+  """Since we don't run scene seg on every frame, we want to find the nearest one."""
+  frame_idxs = np.array([i_ for _, i_ in frame_lst_])
+  cloests_idx = (np.abs(frame_idxs - frame_idx)).argmin()
+  vname, closest_frame_idx = frame_lst_[cloests_idx]
+  return vname, closest_frame_idx, cloests_idx
+def get_act_list(act_data, frameidx, bgid):
+  """Given a frameidx, get this person' activities."""
+  # act_data is a list of sorted (start,end,actclassid)
+  # return current act list,
+  current_act_list = [(actid, e - frameidx) for s, e, actid in act_data
+                      if (frameidx >= s) and (frameidx <= e)]
+  current_act_list.sort(key=operator.itemgetter(1))  # dist to current act's end
+  current_actid_list_ = [actid for actid, _ in current_act_list]
+  current_dist_list_ = [dist for _, dist in current_act_list]
+  if not current_act_list:
+    current_actid_list_, current_dist_list_ = [bgid], [-1]
+  future_act_list = [(actid, s - frameidx) for s, e, actid in act_data
+                     if frameidx < s]
+  future_act_list.sort(key=operator.itemgetter(1))
+  if not future_act_list:
+    return (current_actid_list_, current_dist_list_, [bgid], [-1])
+  # only the nearest future activity?
+  # smallest_dist = future_act_list[0][1]
+  # future_act_list = [(actid,dist) for actid, dist in future_act_list
+  #                     if dist == smallest_dist]
+  future_actid_list_ = [actid for actid, _ in future_act_list]
+  future_dist_list_ = [dist for _, dist in future_act_list]
+  return (current_actid_list_, current_dist_list_,
+          future_actid_list_, future_dist_list_)
+def check_traj(newdata_, vname):
+  """Check and filter data."""
+  checkdata = np.array(newdata_, dtype="float")
+  frames_ = np.unique(checkdata[:, 0]).tolist()
+  checked_data_ = []
+  for frame_ in frames_:
+    # all personid in this frame
+    this_frame_data = checkdata[frame_ == checkdata[:, 0], :]  # [K,4]
+    ped_ids = this_frame_data[:, 1]
+    unique_ped_ids, unique_idxs = np.unique(ped_ids, return_index=True)
+    if len(ped_ids) != len(unique_ped_ids):
+      tqdm.write("\twarning, %s frame %s has duplicate person annotation person"
+                 " ids: %s/%s, removed the duplicate ones"
+                 % (vname, frame_, len(unique_ped_ids), len(ped_ids)))
+      this_frame_data = this_frame_data[unique_idxs]
+    for f_, p_, x_, y_ in this_frame_data:
+      checked_data_.append((f_, p_, x_, y_))
+  checked_data_.sort(key=operator.itemgetter(0))
+  return checked_data_
@@ -144,133 +330,121 @@
-  input_size = 513  # the model's input size, has to be this
-  # load the model graph
-  print("loading model...")
-  graph = tf.Graph()
-  with graph.as_default():
-    gd = tf.GraphDef()
-    with tf.gfile.GFile(args.model_path, "rb") as f:
-      sg = f.read()
-      gd.ParseFromString(sg)
-      tf.import_graph_def(gd, name="")
-    input_tensor = graph.get_tensor_by_name("ImageTensor:0")
-    output_tensor = graph.get_tensor_by_name("SemanticPredictions:0")
-  print("loaded.")
-  if not os.path.exists(args.out_path):
-    os.makedirs(args.out_path)
-  imgs = [one.strip()
-          for one in open(args.imglst, "r").readlines()][::args.every]
-  tfconfig = tf.ConfigProto()
-  tfconfig.gpu_options.allow_growth = True
-  tfconfig.gpu_options.visible_device_list = "%s" % (
-      ",".join(["%s" % i for i in [args.gpuid]]))
-  with graph.as_default():
-    with tf.Session(graph=graph, config=tfconfig) as sess:
-      count = 0
-      for img in tqdm(imgs):
-        count += 1
-        if (count % args.job) != (args.curJob - 1):
-          continue
-        imgname = os.path.splitext(os.path.basename(img))[0]
-        ori_img = Image.open(img)
-        w, h = ori_img.size
-        resize_r = 1.0 * input_size / max(w, h)
-        target_size = (int(resize_r * w), int(resize_r * h))
-        resize_img = ori_img.convert("RGB").resize(target_size, Image.ANTIALIAS)
-        seg_map, = sess.run([output_tensor],
-                            feed_dict={input_tensor: [np.asarray(resize_img)]})
-        seg_map = seg_map[0]  # single image input test
-        # print seg_map.shape
-        # print seg_map
-        """
-        (288, 513)
-        [[ 8  8  8 ...  8  8  8]
-         [ 8  8  8 ...  8  8  8]
-         [ 8  8  8 ...  8  8  8]
-         ...
-         [11 11 11 ... 11 11 11]
-         [11 11 11 ... 11 11 11]
-         [11 11 11 ... 11 11 11]]
-        """
-        seg_map = resize_seg_map(seg_map, args.down_rate, args.keep_full)
-        targetfile = os.path.join(args.out_path, "%s.npy" % imgname)
-        np.save(targetfile, seg_map)
+  # Hard coded for ActEV experiment.
+  # :P
+  args.resize = True
+  args.resize_h = 1080
+  args.resize_w = 1920
+  filelst = {
+      "train": [os.path.splitext(os.path.basename(line.strip()))[0]
+                for line in open(os.path.join(args.split_path,
+                                              "train.lst"), "r").readlines()],
+      "val": [os.path.splitext(os.path.basename(line.strip()))[0]
+              for line in open(os.path.join(args.split_path,
+                                            "val.lst"), "r").readlines()],
+      "test": [os.path.splitext(os.path.basename(line.strip()))[0]
+               for line in open(os.path.join(args.split_path,
+                                             "test.lst"), "r").readlines()],
+  }
+  for split in tqdm(filelst, ascii=True):
+    out_path = os.path.join(args.out_path, split)
+    if not os.path.exists(out_path):
+      os.makedirs(out_path)
+    if not os.path.exists(os.path.join(args.person_box_path, split)):
+      os.makedirs(os.path.join(args.person_box_path, split))
+    if not os.path.exists(os.path.join(args.other_box_path, split)):
+      os.makedirs(os.path.join(args.other_box_path, split))
+    if not os.path.exists(os.path.join(args.activity_path, split)):
+      os.makedirs(os.path.join(args.activity_path, split))
+    scene_map_path = os.path.join(args.scene_map_path, split)
+    if not os.path.exists(scene_map_path):
+      os.makedirs(scene_map_path)
+    for videoname in tqdm(filelst[split]):
+      npzfile = os.path.join(args.npzpath, "%s.npz" % videoname)
+      data = np.load(npzfile, allow_pickle=True)
+      # each frame's all boxes, for getting other boxes
+      frameidx2boxes = data["frameidx2boxes"]
+      # personId -> all related activity with timespan, sorted by timespan start
+      # (start, end, act_classid)
+      personid2acts = data["personid2acts"]
+      # load all the frames for this video first
+      frame_lst = glob(os.path.join(args.scene_feat_path,
+                                    "%s_F_*.npy"%videoname))
+      assert frame_lst
+      frame_lst = [(os.path.basename(frame),
+                    int(os.path.basename(frame).split(".")[0].split("_F_")[-1]))
+                   for frame in frame_lst]
+・・・
 ```