回答編集履歴
1
コード修正
answer
CHANGED
@@ -3,31 +3,40 @@
|
|
3
3
|
import pandas as pd
|
4
4
|
import numpy as np
|
5
5
|
|
6
|
-
df1 = pd.DataFrame({'name': ['A','B','C','D','E'],'a':[1,5,5,8,9]})
|
6
|
+
df1 = pd.DataFrame({'name': ['A','B','C','D','D-2','E'],'a':[1,5,5,5,8,9]})
|
7
7
|
df2 = pd.DataFrame({'table': ['2','3-1','3-2','7'],'a':[2,3,3,7]})
|
8
8
|
|
9
9
|
# df2の列値をタプル('table','a')のリストに展開
|
10
|
-
|
10
|
+
lst2 = [(r[0],r[1]) for r in df2.values]
|
11
11
|
|
12
|
-
# df2から指定値に最も近い
|
12
|
+
# df2から指定値に最も近いlst2の要素位置リストを返す
|
13
13
|
def nearest(a):
|
14
|
-
if not df2_list:
|
15
|
-
return np.nan
|
16
|
-
m = min(
|
14
|
+
m = min(lst2, key=lambda v:(v[1]-a)*(v[1]-a))
|
17
|
-
|
15
|
+
return [i for i,v in enumerate(lst2) if v[1] == m[1]]
|
18
|
-
return m[0]
|
19
16
|
|
17
|
+
used = set() # df1に割当済のlst2の要素位置
|
18
|
+
|
20
|
-
# 割り当て
|
19
|
+
# 最近傍値に割り当て
|
21
20
|
for i,r in df1.iterrows():
|
21
|
+
val = np.nan
|
22
|
-
|
22
|
+
idxs = nearest(r['a'])
|
23
|
+
while idxs:
|
24
|
+
idx = idxs.pop(0)
|
25
|
+
if idx not in used: # 未割当
|
26
|
+
val = lst2[idx][0]
|
27
|
+
used.add(idx) # 割当済を保持
|
28
|
+
break
|
23
29
|
|
30
|
+
df1.loc[i,'table'] = val
|
31
|
+
|
24
32
|
print(df1)
|
25
33
|
"""
|
26
34
|
name a table
|
27
35
|
0 A 1 2
|
28
36
|
1 B 5 3-1
|
29
37
|
2 C 5 3-2
|
30
|
-
3 D
|
38
|
+
3 D 5 NaN
|
39
|
+
4 D-2 8 7
|
31
|
-
|
40
|
+
5 E 9 NaN
|
32
41
|
"""
|
33
42
|
```
|