質問編集履歴
2
マークダウンを追加しました
test
CHANGED
File without changes
|
test
CHANGED
@@ -42,6 +42,8 @@
|
|
42
42
|
|
43
43
|
#追加:df2の作成
|
44
44
|
|
45
|
+
```
|
46
|
+
|
45
47
|
import pandas as pd
|
46
48
|
|
47
49
|
import numpy as np
|
@@ -56,13 +58,17 @@
|
|
56
58
|
|
57
59
|
df.shape
|
58
60
|
|
61
|
+
```
|
62
|
+
|
59
63
|
>>>(8100, 2)
|
60
64
|
|
61
|
-
|
65
|
+
```
|
62
66
|
|
63
67
|
df2 = df.copy()
|
64
68
|
|
65
69
|
df2.isnull().sum()
|
70
|
+
|
71
|
+
```
|
66
72
|
|
67
73
|
>>>data 0
|
68
74
|
|
@@ -72,9 +78,13 @@
|
|
72
78
|
|
73
79
|
|
74
80
|
|
81
|
+
```
|
82
|
+
|
75
83
|
df2 = df2.dropna()
|
76
84
|
|
77
85
|
df2.isnull().sum()
|
86
|
+
|
87
|
+
```
|
78
88
|
|
79
89
|
>>>data 0
|
80
90
|
|
@@ -82,29 +92,37 @@
|
|
82
92
|
|
83
93
|
>>>dtype: int64
|
84
94
|
|
85
|
-
|
95
|
+
```
|
86
96
|
|
87
97
|
df2.shape
|
88
98
|
|
99
|
+
```
|
100
|
+
|
89
101
|
>>>(8035, 2)
|
90
102
|
|
91
|
-
|
103
|
+
```
|
92
104
|
|
93
105
|
df2 = df2.drop_duplicates()
|
94
106
|
|
95
107
|
df2.duplicated().sum()
|
96
108
|
|
109
|
+
```
|
110
|
+
|
97
111
|
>>>0
|
98
112
|
|
99
|
-
|
113
|
+
```
|
100
114
|
|
101
115
|
df2.shape
|
102
116
|
|
117
|
+
```
|
118
|
+
|
103
119
|
>>>(6731, 2)
|
104
120
|
|
105
|
-
|
121
|
+
```
|
106
122
|
|
107
123
|
df2.to_csv("data2.csv")
|
124
|
+
|
125
|
+
```
|
108
126
|
|
109
127
|
|
110
128
|
|
@@ -114,13 +132,17 @@
|
|
114
132
|
|
115
133
|
####df2を読み込み
|
116
134
|
|
135
|
+
```
|
136
|
+
|
117
137
|
df2 = pd.read_csv("data2.csv")
|
118
138
|
|
119
139
|
df2.shape
|
120
140
|
|
121
|
-
|
141
|
+
```
|
122
142
|
|
123
143
|
#####フォルダ内の画像データのpathを読み込み
|
144
|
+
|
145
|
+
```
|
124
146
|
|
125
147
|
files = glob.glob("/画像データのあるフォルダのpath/*")
|
126
148
|
|
@@ -128,11 +150,15 @@
|
|
128
150
|
|
129
151
|
len(files)
|
130
152
|
|
153
|
+
```
|
154
|
+
|
131
155
|
>>>6486
|
132
156
|
|
133
157
|
|
134
158
|
|
135
159
|
######filesのデータフレームを作成
|
160
|
+
|
161
|
+
```
|
136
162
|
|
137
163
|
df_files = pd.DataFrame({"data":files})
|
138
164
|
|
@@ -142,11 +168,15 @@
|
|
142
168
|
|
143
169
|
df_files.duplicated().sum()
|
144
170
|
|
171
|
+
```
|
172
|
+
|
145
173
|
>>>0
|
146
174
|
|
147
175
|
|
148
176
|
|
149
177
|
#####マージ
|
178
|
+
|
179
|
+
```
|
150
180
|
|
151
181
|
df_join = pd.merge(df2, df_files, how="inner",on="data",indicator=True)
|
152
182
|
|
@@ -156,6 +186,8 @@
|
|
156
186
|
|
157
187
|
df_join["_merge"].value_counts()
|
158
188
|
|
189
|
+
```
|
190
|
+
|
159
191
|
>>>both 6730
|
160
192
|
|
161
193
|
>>>right_only 0
|
1
欠損値と重複行の処理を追加
test
CHANGED
File without changes
|
test
CHANGED
@@ -38,7 +38,9 @@
|
|
38
38
|
|
39
39
|
### 該当のソースコード
|
40
40
|
|
41
|
+
----------------------------------------------------------
|
41
42
|
|
43
|
+
#追加:df2の作成
|
42
44
|
|
43
45
|
import pandas as pd
|
44
46
|
|
@@ -47,6 +49,66 @@
|
|
47
49
|
import glob
|
48
50
|
|
49
51
|
import cv2
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
df = pd.read_csv("anotation_result.csv")
|
56
|
+
|
57
|
+
df.shape
|
58
|
+
|
59
|
+
>>>(8100, 2)
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
df2 = df.copy()
|
64
|
+
|
65
|
+
df2.isnull().sum()
|
66
|
+
|
67
|
+
>>>data 0
|
68
|
+
|
69
|
+
>>>smile 65
|
70
|
+
|
71
|
+
>>>dtype: int64
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
df2 = df2.dropna()
|
76
|
+
|
77
|
+
df2.isnull().sum()
|
78
|
+
|
79
|
+
>>>data 0
|
80
|
+
|
81
|
+
>>>smile 0
|
82
|
+
|
83
|
+
>>>dtype: int64
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
df2.shape
|
88
|
+
|
89
|
+
>>>(8035, 2)
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
df2 = df2.drop_duplicates()
|
94
|
+
|
95
|
+
df2.duplicated().sum()
|
96
|
+
|
97
|
+
>>>0
|
98
|
+
|
99
|
+
|
100
|
+
|
101
|
+
df2.shape
|
102
|
+
|
103
|
+
>>>(6731, 2)
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
df2.to_csv("data2.csv")
|
108
|
+
|
109
|
+
|
110
|
+
|
111
|
+
--------------------------------------------------------
|
50
112
|
|
51
113
|
|
52
114
|
|
@@ -66,9 +128,7 @@
|
|
66
128
|
|
67
129
|
len(files)
|
68
130
|
|
69
|
-
|
131
|
+
>>>6486
|
70
|
-
|
71
|
-
|
72
132
|
|
73
133
|
|
74
134
|
|
@@ -78,8 +138,28 @@
|
|
78
138
|
|
79
139
|
|
80
140
|
|
141
|
+
df_files.drop_duplicates()
|
142
|
+
|
143
|
+
df_files.duplicated().sum()
|
144
|
+
|
145
|
+
>>>0
|
146
|
+
|
147
|
+
|
148
|
+
|
81
149
|
#####マージ
|
82
150
|
|
83
151
|
df_join = pd.merge(df2, df_files, how="inner",on="data",indicator=True)
|
84
152
|
|
85
153
|
df_join
|
154
|
+
|
155
|
+
|
156
|
+
|
157
|
+
df_join["_merge"].value_counts()
|
158
|
+
|
159
|
+
>>>both 6730
|
160
|
+
|
161
|
+
>>>right_only 0
|
162
|
+
|
163
|
+
>>>left_only 0
|
164
|
+
|
165
|
+
>>>Name: _merge, dtype: int64
|