回答編集履歴
1
補足を追加
test
CHANGED
@@ -113,3 +113,143 @@
|
|
113
113
|
|
114
114
|
|
115
115
|
> ``astype()`` 以降はデータを分単位表記に変えているだけです
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
---
|
120
|
+
|
121
|
+
**【追記】**
|
122
|
+
|
123
|
+
休憩時間を処理するサンプル
|
124
|
+
|
125
|
+
|
126
|
+
|
127
|
+
```Python
|
128
|
+
|
129
|
+
import pandas as pd
|
130
|
+
|
131
|
+
import datetime
|
132
|
+
|
133
|
+
|
134
|
+
|
135
|
+
# 休憩時間(とりあえず適当)
|
136
|
+
|
137
|
+
BREAK_START = datetime.time(9, 27)
|
138
|
+
|
139
|
+
BREAK_END = datetime.time(9, 32)
|
140
|
+
|
141
|
+
|
142
|
+
|
143
|
+
# datetime.time 型同士の差を求めるUtility関数
|
144
|
+
|
145
|
+
def time_diff(start_time, end_time):
|
146
|
+
|
147
|
+
return datetime.datetime.combine(datetime.date.today(), end_time) - datetime.datetime.combine(datetime.date.today(), start_time)
|
148
|
+
|
149
|
+
|
150
|
+
|
151
|
+
# Groupby.apply() にて呼ばれる関数(各行に時間を求める)
|
152
|
+
|
153
|
+
def calc_product_time(data):
|
154
|
+
|
155
|
+
# 後の処理を行いやすくするために DataFrame化しておく
|
156
|
+
|
157
|
+
tmp_df = pd.DataFrame({'start_time': data.shift(1).dt.time,
|
158
|
+
|
159
|
+
'end_time': data.dt.time,
|
160
|
+
|
161
|
+
'total_time': data.diff()},
|
162
|
+
|
163
|
+
index = data.index)
|
164
|
+
|
165
|
+
|
166
|
+
|
167
|
+
#print(tmp_df)
|
168
|
+
|
169
|
+
|
170
|
+
|
171
|
+
# 各行に対して休憩時間を計算する
|
172
|
+
|
173
|
+
for idx, row in tmp_df.iterrows():
|
174
|
+
|
175
|
+
# 範囲内に休憩開始・休憩終了時間が含まれる場合
|
176
|
+
|
177
|
+
if ((row.start_time <= BREAK_START) &
|
178
|
+
|
179
|
+
(BREAK_START < row.end_time) &
|
180
|
+
|
181
|
+
(row.start_time <= BREAK_END) &
|
182
|
+
|
183
|
+
(BREAK_END < row.end_time)):
|
184
|
+
|
185
|
+
|
186
|
+
|
187
|
+
tmp_df.loc[idx, 'break_time'] = time_diff(BREAK_START, BREAK_END)
|
188
|
+
|
189
|
+
|
190
|
+
|
191
|
+
# 範囲内に休憩開始時間のみ含まれる場合
|
192
|
+
|
193
|
+
elif ((row.start_time <= BREAK_START) &
|
194
|
+
|
195
|
+
(BREAK_START < row.end_time) &
|
196
|
+
|
197
|
+
(BREAK_END >= row.end_time)):
|
198
|
+
|
199
|
+
|
200
|
+
|
201
|
+
tmp_df.loc[idx, 'break_time'] = time_diff(BREAK_START, row.end_time)
|
202
|
+
|
203
|
+
|
204
|
+
|
205
|
+
# 範囲内に休憩終了時間のみ含まれる場合
|
206
|
+
|
207
|
+
elif ((row.start_time > BREAK_START) &
|
208
|
+
|
209
|
+
(row.start_time <= BREAK_END) &
|
210
|
+
|
211
|
+
(BREAK_END < row.end_time)):
|
212
|
+
|
213
|
+
|
214
|
+
|
215
|
+
tmp_df.loc[idx, 'break_time'] = time_diff(row.start_time, BREAK_END)
|
216
|
+
|
217
|
+
|
218
|
+
|
219
|
+
# 休憩時間内に、範囲がすべて含まれる場合
|
220
|
+
|
221
|
+
elif ((row.start_time > BREAK_START) &
|
222
|
+
|
223
|
+
(BREAK_END >= row.end_time)):
|
224
|
+
|
225
|
+
tmp_df.loc[idx, 'break_time'] = time_diff(row.start_time, row.end_time)
|
226
|
+
|
227
|
+
|
228
|
+
|
229
|
+
# その他(範囲内に休憩なし)
|
230
|
+
|
231
|
+
else:
|
232
|
+
|
233
|
+
tmp_df.loc[idx, 'break_time'] = datetime.timedelta(0)
|
234
|
+
|
235
|
+
|
236
|
+
|
237
|
+
tmp_df['product_time'] = tmp_df['total_time'] - tmp_df['break_time']
|
238
|
+
|
239
|
+
#print(tmp_df)
|
240
|
+
|
241
|
+
|
242
|
+
|
243
|
+
return tmp_df['product_time']
|
244
|
+
|
245
|
+
|
246
|
+
|
247
|
+
df = pd.read_csv('data.csv', parse_dates={'datetime': ['yyyymmdd', 'hhmm']})
|
248
|
+
|
249
|
+
df['min_per_product'] = df.groupby(['id', df['datetime'].dt.date])['datetime'].apply(calc_product_time)
|
250
|
+
|
251
|
+
print(df)
|
252
|
+
|
253
|
+
|
254
|
+
|
255
|
+
```
|