回答編集履歴

1

コード修正

2019/04/30 03:42

投稿

can110
can110

スコア38266

test CHANGED
@@ -8,7 +8,7 @@
8
8
 
9
9
 
10
10
 
11
- df1 = pd.DataFrame({'name': ['A','B','C','D','E'],'a':[1,5,5,8,9]})
11
+ df1 = pd.DataFrame({'name': ['A','B','C','D','D-2','E'],'a':[1,5,5,5,8,9]})
12
12
 
13
13
  df2 = pd.DataFrame({'table': ['2','3-1','3-2','7'],'a':[2,3,3,7]})
14
14
 
@@ -16,31 +16,47 @@
16
16
 
17
17
  # df2の列値をタプル('table','a')のリストに展開
18
18
 
19
- df2_list = [(r[0],r[1]) for r in df2.values]
19
+ lst2 = [(r[0],r[1]) for r in df2.values]
20
20
 
21
21
 
22
22
 
23
- # df2から指定値に最も近い'table'値を返す
23
+ # df2から指定値に最も近いlst2の要素位置リストを返す
24
24
 
25
25
  def nearest(a):
26
26
 
27
- if not df2_list:
27
+ m = min(lst2, key=lambda v:(v[1]-a)*(v[1]-a))
28
28
 
29
- return np.nan
30
-
31
- m = min(df2_list, key=lambda v:(v[1]-a)*(v[1]-a))
29
+ return [i for i,v in enumerate(lst2) if v[1] == m[1]]
32
-
33
- del df2_list[df2_list.index(m)] # df1に紐づけ済みは削除
34
-
35
- return m[0]
36
30
 
37
31
 
38
32
 
33
+ used = set() # df1に割当済のlst2の要素位置
34
+
35
+
36
+
39
- # 割り当て
37
+ # 最近傍値に割り当て
40
38
 
41
39
  for i,r in df1.iterrows():
42
40
 
41
+ val = np.nan
42
+
43
+ idxs = nearest(r['a'])
44
+
45
+ while idxs:
46
+
47
+ idx = idxs.pop(0)
48
+
49
+ if idx not in used: # 未割当
50
+
51
+ val = lst2[idx][0]
52
+
53
+ used.add(idx) # 割当済を保持
54
+
55
+ break
56
+
57
+
58
+
43
- df1.loc[i,'table'] = nearest(r['a'])
59
+ df1.loc[i,'table'] = val
44
60
 
45
61
 
46
62
 
@@ -56,9 +72,11 @@
56
72
 
57
73
  2 C 5 3-2
58
74
 
59
- 3 D 8 7
75
+ 3 D 5 NaN
60
76
 
77
+ 4 D-2 8 7
78
+
61
- 4 E 9 NaN
79
+ 5 E 9 NaN
62
80
 
63
81
  """
64
82