回答編集履歴

1

補足を追加

2019/09/02 02:15

投稿

magichan
magichan

スコア15898

test CHANGED
@@ -113,3 +113,143 @@
113
113
 
114
114
 
115
115
  > ``astype()`` 以降はデータを分単位表記に変えているだけです
116
+
117
+
118
+
119
+ ---
120
+
121
+ **【追記】**
122
+
123
+ 休憩時間を処理するサンプル
124
+
125
+
126
+
127
+ ```Python
128
+
129
+ import pandas as pd
130
+
131
+ import datetime
132
+
133
+
134
+
135
+ # 休憩時間(とりあえず適当)
136
+
137
+ BREAK_START = datetime.time(9, 27)
138
+
139
+ BREAK_END = datetime.time(9, 32)
140
+
141
+
142
+
143
+ # datetime.time 型同士の差を求めるUtility関数
144
+
145
+ def time_diff(start_time, end_time):
146
+
147
+ return datetime.datetime.combine(datetime.date.today(), end_time) - datetime.datetime.combine(datetime.date.today(), start_time)
148
+
149
+
150
+
151
+ # Groupby.apply() にて呼ばれる関数(各行に時間を求める)
152
+
153
+ def calc_product_time(data):
154
+
155
+ # 後の処理を行いやすくするために DataFrame化しておく
156
+
157
+ tmp_df = pd.DataFrame({'start_time': data.shift(1).dt.time,
158
+
159
+ 'end_time': data.dt.time,
160
+
161
+ 'total_time': data.diff()},
162
+
163
+ index = data.index)
164
+
165
+
166
+
167
+ #print(tmp_df)
168
+
169
+
170
+
171
+ # 各行に対して休憩時間を計算する
172
+
173
+ for idx, row in tmp_df.iterrows():
174
+
175
+ # 範囲内に休憩開始・休憩終了時間が含まれる場合
176
+
177
+ if ((row.start_time <= BREAK_START) &
178
+
179
+ (BREAK_START < row.end_time) &
180
+
181
+ (row.start_time <= BREAK_END) &
182
+
183
+ (BREAK_END < row.end_time)):
184
+
185
+
186
+
187
+ tmp_df.loc[idx, 'break_time'] = time_diff(BREAK_START, BREAK_END)
188
+
189
+
190
+
191
+ # 範囲内に休憩開始時間のみ含まれる場合
192
+
193
+ elif ((row.start_time <= BREAK_START) &
194
+
195
+ (BREAK_START < row.end_time) &
196
+
197
+ (BREAK_END >= row.end_time)):
198
+
199
+
200
+
201
+ tmp_df.loc[idx, 'break_time'] = time_diff(BREAK_START, row.end_time)
202
+
203
+
204
+
205
+ # 範囲内に休憩終了時間のみ含まれる場合
206
+
207
+ elif ((row.start_time > BREAK_START) &
208
+
209
+ (row.start_time <= BREAK_END) &
210
+
211
+ (BREAK_END < row.end_time)):
212
+
213
+
214
+
215
+ tmp_df.loc[idx, 'break_time'] = time_diff(row.start_time, BREAK_END)
216
+
217
+
218
+
219
+ # 休憩時間内に、範囲がすべて含まれる場合
220
+
221
+ elif ((row.start_time > BREAK_START) &
222
+
223
+ (BREAK_END >= row.end_time)):
224
+
225
+ tmp_df.loc[idx, 'break_time'] = time_diff(row.start_time, row.end_time)
226
+
227
+
228
+
229
+ # その他(範囲内に休憩なし)
230
+
231
+ else:
232
+
233
+ tmp_df.loc[idx, 'break_time'] = datetime.timedelta(0)
234
+
235
+
236
+
237
+ tmp_df['product_time'] = tmp_df['total_time'] - tmp_df['break_time']
238
+
239
+ #print(tmp_df)
240
+
241
+
242
+
243
+ return tmp_df['product_time']
244
+
245
+
246
+
247
+ df = pd.read_csv('data.csv', parse_dates={'datetime': ['yyyymmdd', 'hhmm']})
248
+
249
+ df['min_per_product'] = df.groupby(['id', df['datetime'].dt.date])['datetime'].apply(calc_product_time)
250
+
251
+ print(df)
252
+
253
+
254
+
255
+ ```