回答編集履歴
4
test
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
※ 重複のない組み合わせ(`freq == 0`)を merge
|
1
|
+
※ 重複のない組み合わせ(`freq == 0`)を merge で補完。
|
2
2
|
```python
|
3
3
|
import pandas as pd
|
4
4
|
from itertools import combinations
|
3
test
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
※ 重複のない組み合わせ(`freq == 0`)を merge(
|
1
|
+
※ 重複のない組み合わせ(`freq == 0`)を merge(right join) で補完。
|
2
2
|
```python
|
3
3
|
import pandas as pd
|
4
4
|
from itertools import combinations
|
2
test
CHANGED
@@ -1,7 +1,38 @@
|
|
1
|
+
※ 重複のない組み合わせ(`freq == 0`)を merge(left join) で補完。
|
2
|
+
```python
|
3
|
+
import pandas as pd
|
4
|
+
from itertools import combinations
|
5
|
+
|
6
|
+
df = pd.DataFrame({
|
7
|
+
'group': [*['A']*4, *['B']*3, *['C']*5, *['D']*4],
|
8
|
+
'person': [1, 2, 3, 4, 2, 5, 6, 4, 7, 8, 9, 10, 2, 3, 7, 12],
|
9
|
+
})
|
10
|
+
|
11
|
+
dfx = df.groupby('person')['group']\
|
12
|
+
.apply(lambda g: [*combinations(g, 2)])\
|
13
|
+
.explode().value_counts().sort_index().to_frame('freq')
|
14
|
+
comb = pd.DataFrame(index=[*combinations(df['group'].unique(), 2)])
|
15
|
+
dfx = pd.merge(comb, dfx, left_index=True, right_index=True, how='left')\
|
16
|
+
.fillna(0, downcast='infer')
|
17
|
+
dfx = pd.DataFrame(dfx['freq'], index=pd.MultiIndex.from_tuples(dfx.index))\
|
18
|
+
.rename_axis(['group1', 'group2']).reset_index()
|
19
|
+
|
20
|
+
print(dfx)
|
1
|
-
|
21
|
+
```
|
22
|
+
|
23
|
+
| group1 | group2 | freq |
|
24
|
+
|:--------:|:--------:|-------:|
|
25
|
+
| A | B | 1 |
|
26
|
+
| A | C | 1 |
|
27
|
+
| A | D | 2 |
|
28
|
+
| B | C | 0 |
|
29
|
+
| B | D | 1 |
|
30
|
+
| C | D | 1 |
|
2
31
|
|
3
32
|
|
4
33
|
|
5
34
|
|
6
35
|
|
7
36
|
|
37
|
+
|
38
|
+
|
1
test
CHANGED
@@ -1,29 +1,7 @@
|
|
1
|
-
※ 以前の質問の際にも述べましたが、組み合わせ数は `3000 * 2999 / 2 = 4,498,500` 通りになります。処理時間がどの程度になるのか、こちらでは確認していません。
|
2
|
-
|
1
|
+
※ 削除
|
3
|
-
import pandas as pd
|
4
|
-
from itertools import combinations
|
5
2
|
|
6
|
-
df = pd.DataFrame({
|
7
|
-
'group': [*['A']*4, *['B']*3, *['C']*5, *['D']*4],
|
8
|
-
'person': [1, 2, 3, 4, 2, 5, 6, 4, 7, 8, 9, 10, 2, 3, 7, 12],
|
9
|
-
})
|
10
3
|
|
11
|
-
comb = [*combinations(df['group'].unique(), 2)]
|
12
|
-
dfx = df.set_index('group')
|
13
|
-
dfx = pd.DataFrame(
|
14
|
-
[dfx.loc[c,:].duplicated().sum() for c in comb],
|
15
|
-
index=pd.MultiIndex.from_tuples(comb))\
|
16
|
-
.reset_index().set_axis(['group1', 'group2', 'freq'], axis=1)
|
17
4
|
|
18
|
-
print(dfx)
|
19
|
-
```
|
20
5
|
|
21
|
-
| group1 | group2 | freq |
|
22
|
-
|:--------:|:--------:|-------:|
|
23
|
-
| A | B | 1 |
|
24
|
-
| A | C | 1 |
|
25
|
-
| A | D | 2 |
|
26
|
-
| B | C | 0 |
|
27
|
-
| B | D | 1 |
|
28
|
-
| C | D | 1 |
|
29
6
|
|
7
|
+
|