回答編集履歴
1
Update
answer
CHANGED
@@ -11,4 +11,52 @@
|
|
11
11
|
2,7.00,1.00,2.00
|
12
12
|
3,3.33,5.67,6.67
|
13
13
|
7,6.00,5.00,5.00
|
14
|
+
```
|
15
|
+
|
16
|
+
**追記**
|
17
|
+
|
18
|
+
> 文字列が含まれている場合ではどうしたらいいでしょうか?
|
19
|
+
|
20
|
+
```python
|
21
|
+
import numpy as np
|
22
|
+
|
23
|
+
# load
|
24
|
+
cols = ('num', 'a', 'b', 'c', 'name')
|
25
|
+
tbl = np.loadtxt(
|
26
|
+
'data.csv', delimiter=',', skiprows=1,
|
27
|
+
dtype={'names': cols,
|
28
|
+
'formats': (*(np.int64,)*4, (np.str_, 10))})
|
29
|
+
|
30
|
+
# mean
|
31
|
+
names = np.unique(tbl['name']).tolist()
|
32
|
+
tbl = np.array([tbl[n] for n in cols])[:-1].T.astype(int)
|
33
|
+
result = np.array([tbl[tbl[:,0]==i].mean(axis=0) for i in np.unique(tbl[:,0])])
|
34
|
+
|
35
|
+
# save
|
36
|
+
fmt = ['{:.0f}'] + ['{:.2f}']*3
|
37
|
+
result = [[fmt[m].format(i) for m, i in enumerate(l)] + [names[n]] for n, l in enumerate(result.tolist())]
|
38
|
+
np.savetxt('result.csv', result, delimiter=',', header=','.join(cols), fmt='%s')
|
39
|
+
|
40
|
+
# result.csv
|
41
|
+
# num,a,b,c,name
|
42
|
+
1,3.00,3.00,6.00,aa
|
43
|
+
2,7.00,1.00,2.00,bb
|
44
|
+
3,3.33,5.67,6.67,cc
|
45
|
+
7,6.00,5.00,5.00,dd
|
46
|
+
```
|
47
|
+
|
48
|
+
一方、Pandas を使うと簡単にできます。
|
49
|
+
```python
|
50
|
+
import pandas as pd
|
51
|
+
|
52
|
+
df = pd.read_csv('data.csv')
|
53
|
+
dfx = df.groupby('num').mean().join(df['name'])
|
54
|
+
dfx.to_csv('result.csv', float_format='%.2f')
|
55
|
+
|
56
|
+
# result.csv
|
57
|
+
num,a,b,c,name
|
58
|
+
1,3.00,3.00,6.00,aa
|
59
|
+
2,7.00,1.00,2.00,bb
|
60
|
+
3,3.33,5.67,6.67,cc
|
61
|
+
7,6.00,5.00,5.00,dd
|
14
62
|
```
|