質問編集履歴
7
試行4追加しました。シンプルなループです。
test
CHANGED
File without changes
|
test
CHANGED
@@ -24,7 +24,7 @@
|
|
24
24
|
|
25
25
|
・試行3を追加しました。データ2の結合はうまくいきますがデータ1の結合がおかしくなります。
|
26
26
|
|
27
|
-
|
27
|
+
・試行4追加しました。シンプルなループです。
|
28
28
|
|
29
29
|
|
30
30
|
|
@@ -330,7 +330,7 @@
|
|
330
330
|
|
331
331
|
test3(df2)```
|
332
332
|
|
333
|
-
# 試行
|
333
|
+
# 試行3 データ1結果:失敗
|
334
334
|
|
335
335
|
# name status1 dtime1 ver n status2 dtime2
|
336
336
|
|
@@ -342,7 +342,7 @@
|
|
342
342
|
|
343
343
|
# 3 siro NaN NaN ver0.1 1 ED 2020-01-01 10:40:00
|
344
344
|
|
345
|
-
# 試行
|
345
|
+
# 試行3 データ1理想の結果
|
346
346
|
|
347
347
|
# name status1 dtime1 ver status2 dtime2
|
348
348
|
|
@@ -370,6 +370,140 @@
|
|
370
370
|
|
371
371
|
|
372
372
|
|
373
|
+
# 試行4 ループさせてみた
|
374
|
+
|
375
|
+
|
376
|
+
|
377
|
+
def test4(df):
|
378
|
+
|
379
|
+
dfs = df[df["status"] == "ST"]
|
380
|
+
|
381
|
+
df["p"] = False
|
382
|
+
|
383
|
+
|
384
|
+
|
385
|
+
cols = ["dtime1", "dtime2", "name", "ver"]
|
386
|
+
|
387
|
+
outdf = pd.DataFrame(index=[], columns=cols)
|
388
|
+
|
389
|
+
|
390
|
+
|
391
|
+
# start loop
|
392
|
+
|
393
|
+
for i, row1 in dfs.iterrows():
|
394
|
+
|
395
|
+
dfe = df[i + 1 :]
|
396
|
+
|
397
|
+
dfe = dfe[dfe["status"] == "ED"]
|
398
|
+
|
399
|
+
# df.loc[i, "p"] =
|
400
|
+
|
401
|
+
|
402
|
+
|
403
|
+
# end loop
|
404
|
+
|
405
|
+
for j, row2 in dfe.iterrows():
|
406
|
+
|
407
|
+
if row1["name"] == row2["name"] and row1["ver"] == row2["ver"]:
|
408
|
+
|
409
|
+
if df.loc[j, "p"] == True:
|
410
|
+
|
411
|
+
continue
|
412
|
+
|
413
|
+
addlist = [row1["dtime"], row2["dtime"], row1["name"], row1["ver"]]
|
414
|
+
|
415
|
+
record = pd.Series(addlist, index=outdf.columns)
|
416
|
+
|
417
|
+
outdf = outdf.append(record, ignore_index=True)
|
418
|
+
|
419
|
+
|
420
|
+
|
421
|
+
df.loc[i, "p"] = True
|
422
|
+
|
423
|
+
df.loc[j, "p"] = True
|
424
|
+
|
425
|
+
break
|
426
|
+
|
427
|
+
# print(df)
|
428
|
+
|
429
|
+
# 不一致結合
|
430
|
+
|
431
|
+
dfn = df[df["p"] == False]
|
432
|
+
|
433
|
+
alist = []
|
434
|
+
|
435
|
+
|
436
|
+
|
437
|
+
for i, row in dfn.iterrows():
|
438
|
+
|
439
|
+
addlist = None
|
440
|
+
|
441
|
+
record = None
|
442
|
+
|
443
|
+
addic = {}
|
444
|
+
|
445
|
+
if row["status"] == "ST":
|
446
|
+
|
447
|
+
addic["dtime1"] = row1["dtime"]
|
448
|
+
|
449
|
+
addic["dtime2"] = ""
|
450
|
+
|
451
|
+
addic["name"] = row1["name"]
|
452
|
+
|
453
|
+
addic["ver"] = row1["ver"]
|
454
|
+
|
455
|
+
alist.append(addic)
|
456
|
+
|
457
|
+
|
458
|
+
|
459
|
+
elif row["status"] == "ED":
|
460
|
+
|
461
|
+
addic["dtime1"] = ""
|
462
|
+
|
463
|
+
addic["dtime2"] = row["dtime"]
|
464
|
+
|
465
|
+
addic["name"] = row["name"]
|
466
|
+
|
467
|
+
addic["ver"] = row["ver"]
|
468
|
+
|
469
|
+
alist.append(addic)
|
470
|
+
|
471
|
+
# print(addic)
|
472
|
+
|
473
|
+
# print(df)
|
474
|
+
|
475
|
+
|
476
|
+
|
477
|
+
outdf = outdf.append(alist, ignore_index=True)
|
478
|
+
|
479
|
+
return outdf
|
480
|
+
|
481
|
+
|
482
|
+
|
483
|
+
# 試行4データ1結果:成功
|
484
|
+
|
485
|
+
rslt1
|
486
|
+
|
487
|
+
dtime1 dtime2 name ver
|
488
|
+
|
489
|
+
0 2020-01-01 00:35:00 2020-01-01 10:40:00 siro ver0.1
|
490
|
+
|
491
|
+
1 2020-01-01 00:35:00 siro ver0.1
|
492
|
+
|
493
|
+
2 2020-01-30 23:00:00 siro ver0.1
|
494
|
+
|
495
|
+
|
496
|
+
|
497
|
+
# 試行4データ1結果:成功
|
498
|
+
|
499
|
+
dtime1 dtime2 name ver
|
500
|
+
|
501
|
+
0 2020-01-01 00:35:00 2020-01-01 10:40:00 siro ver0.1
|
502
|
+
|
503
|
+
1 2020-01-01 00:35:00 2020-01-30 23:00:00 siro ver0.1
|
504
|
+
|
505
|
+
2 2020-01-01 00:34:00 siro ver0.1
|
506
|
+
|
373
507
|
```
|
374
508
|
|
375
509
|
|
6
修正:結合イメージ(一対一の結合+未結合も残す)
test
CHANGED
File without changes
|
test
CHANGED
@@ -72,17 +72,17 @@
|
|
72
72
|
|
73
73
|
)
|
74
74
|
|
75
|
-
# 結合イメージ
|
75
|
+
# 結合イメージ(一対一の結合、ただし未結合も残す)
|
76
76
|
|
77
77
|
# SELECT
|
78
78
|
|
79
|
-
# *
|
79
|
+
# *結合イメージ(一対一の結合、ただし未結合も残す)
|
80
80
|
|
81
81
|
# FROM
|
82
82
|
|
83
83
|
# df_st
|
84
84
|
|
85
|
-
# JOIN
|
85
|
+
# INNER JOIN
|
86
86
|
|
87
87
|
# df_ed
|
88
88
|
|
@@ -94,8 +94,6 @@
|
|
94
94
|
|
95
95
|
# df_st.index < df_ed.index
|
96
96
|
|
97
|
-
# ;
|
98
|
-
|
99
97
|
|
100
98
|
|
101
99
|
# 最終出力イメージ
|
5
追記修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -12,13 +12,19 @@
|
|
12
12
|
|
13
13
|
よろしくお願いいたします。
|
14
14
|
|
15
|
+
|
16
|
+
|
15
|
-
追記
|
17
|
+
追記
|
16
|
-
|
17
|
-
|
18
|
+
|
18
|
-
|
19
|
-
|
19
|
+
・短いコードで実現できる方法を探しています。
|
20
|
+
|
20
|
-
|
21
|
+
・データ1において試行1、試行2は問題なく動作しています。
|
22
|
+
|
21
|
-
|
23
|
+
・データ2を追加しました。
|
24
|
+
|
25
|
+
・試行3を追加しました。データ2の結合はうまくいきますがデータ1の結合がおかしくなります。
|
26
|
+
|
27
|
+
|
22
28
|
|
23
29
|
|
24
30
|
|
4
試行3追加
test
CHANGED
File without changes
|
test
CHANGED
@@ -278,12 +278,98 @@
|
|
278
278
|
|
279
279
|
|
280
280
|
|
281
|
+
# 試行3 試行2とは逆でデータ2はイメージ通りだがデータ1がうまくいかない
|
282
|
+
|
283
|
+
|
284
|
+
|
285
|
+
def test3(df):
|
286
|
+
|
287
|
+
df["n"] = (
|
288
|
+
|
289
|
+
df.groupby([df["name"], df["ver"]])["status"].cumcount()
|
290
|
+
|
291
|
+
- (df["status"] == "ED")
|
292
|
+
|
293
|
+
+ (df["status"] == "ST")
|
294
|
+
|
295
|
+
)
|
296
|
+
|
297
|
+
|
298
|
+
|
299
|
+
odf = pd.merge(
|
300
|
+
|
301
|
+
df[df["status"] == "ST"],
|
302
|
+
|
303
|
+
df[df["status"] == "ED"],
|
304
|
+
|
305
|
+
on=["name", "ver", "n"],
|
306
|
+
|
307
|
+
how="outer",
|
308
|
+
|
309
|
+
suffixes=("1", "2"),
|
310
|
+
|
311
|
+
)
|
312
|
+
|
313
|
+
print(odf)
|
314
|
+
|
315
|
+
|
316
|
+
|
317
|
+
|
318
|
+
|
319
|
+
print("\ntest3")
|
320
|
+
|
321
|
+
|
322
|
+
|
323
|
+
test3(df1)
|
324
|
+
|
325
|
+
print("")
|
326
|
+
|
327
|
+
test3(df2)```
|
328
|
+
|
329
|
+
# 試行2 データ1結果:失敗
|
330
|
+
|
331
|
+
# name status1 dtime1 ver n status2 dtime2
|
332
|
+
|
333
|
+
# 0 siro ST 2020-01-01 00:35:00 ver0.1 2 NaN NaN
|
334
|
+
|
335
|
+
# 1 siro ST 2020-01-30 23:00:00 ver0.1 4 NaN NaN
|
336
|
+
|
337
|
+
# 2 siro NaN NaN ver0.1 -1 ED 2020-01-01 00:35:00
|
338
|
+
|
339
|
+
# 3 siro NaN NaN ver0.1 1 ED 2020-01-01 10:40:00
|
340
|
+
|
341
|
+
# 試行2 データ1理想の結果
|
342
|
+
|
343
|
+
# name status1 dtime1 ver status2 dtime2
|
344
|
+
|
345
|
+
# 0 siro ST 2020-01-01 00:35:00 ver0.1 ED 2020-01-01 10:40:00
|
346
|
+
|
347
|
+
# 1 siro ST 2020-01-30 23:00:00 ver0.1 NaN NaN
|
348
|
+
|
349
|
+
# 2 siro NaN NaN ver0.1 ED 2020-01-01 00:35:00
|
350
|
+
|
351
|
+
|
352
|
+
|
353
|
+
|
354
|
+
|
355
|
+
# 試行2 データ2結果:成功
|
356
|
+
|
357
|
+
|
358
|
+
|
359
|
+
# name status1 dtime1 ver n status2 dtime2
|
360
|
+
|
361
|
+
# 0 siro ST 2020-01-01 00:35:00 ver0.1 2 ED 2020-01-01 10:40:00
|
362
|
+
|
363
|
+
# 1 siro ST 2020-01-01 00:35:00 ver0.1 3 ED 2020-01-30 23:00:00
|
364
|
+
|
365
|
+
# 2 siro NaN NaN ver0.1 -1 ED 2020-01-01 00:34:00
|
366
|
+
|
367
|
+
|
368
|
+
|
281
369
|
```
|
282
370
|
|
283
371
|
|
284
372
|
|
285
|
-
|
286
|
-
|
287
373
|
### 補足情報(FW/ツールのバージョンなど)
|
288
374
|
|
289
375
|
python 3.6以上
|
3
データ2追加。試行2データ2の結果と理想を記載。
test
CHANGED
File without changes
|
test
CHANGED
@@ -32,7 +32,7 @@
|
|
32
32
|
|
33
33
|
|
34
34
|
|
35
|
-
df = pd.DataFrame(data=[
|
35
|
+
df1 = pd.DataFrame(data=[
|
36
36
|
|
37
37
|
['siro','ED','2020-01-01 00:35:00','ver0.1']
|
38
38
|
|
@@ -46,7 +46,25 @@
|
|
46
46
|
|
47
47
|
|
48
48
|
|
49
|
-
|
49
|
+
df2 = pd.DataFrame(
|
50
|
+
|
51
|
+
data=[
|
52
|
+
|
53
|
+
["siro", "ED", "2020-01-01 00:34:00", "ver0.1"],
|
54
|
+
|
55
|
+
["siro", "ST", "2020-01-01 00:35:00", "ver0.1"],
|
56
|
+
|
57
|
+
["siro", "ST", "2020-01-01 00:35:00", "ver0.1"],
|
58
|
+
|
59
|
+
["siro", "ED", "2020-01-01 10:40:00", "ver0.1"],
|
60
|
+
|
61
|
+
["siro", "ED", "2020-01-30 23:00:00", "ver0.1"],
|
62
|
+
|
63
|
+
],
|
64
|
+
|
65
|
+
columns=["name", "status", "dtime", "ver"],
|
66
|
+
|
67
|
+
)
|
50
68
|
|
51
69
|
# 結合イメージ
|
52
70
|
|
@@ -186,29 +204,43 @@
|
|
186
204
|
|
187
205
|
**試行2**
|
188
206
|
|
207
|
+
|
208
|
+
|
209
|
+
def test(df):
|
210
|
+
|
189
|
-
df["n"] = df.groupby([df["name"], df["ver"]])["status"].cumcount() - (
|
211
|
+
df["n"] = df.groupby([df["name"], df["ver"]])["status"].cumcount() - (
|
190
|
-
|
212
|
+
|
191
|
-
df["status"] == "ED"
|
213
|
+
df["status"] == "ED"
|
192
|
-
|
214
|
+
|
193
|
-
)
|
215
|
+
)
|
194
|
-
|
216
|
+
|
195
|
-
odf = pd.merge(
|
217
|
+
odf = pd.merge(
|
196
|
-
|
218
|
+
|
197
|
-
df[df["status"] == "ST"],
|
219
|
+
df[df["status"] == "ST"],
|
198
|
-
|
220
|
+
|
199
|
-
df[df["status"] == "ED"],
|
221
|
+
df[df["status"] == "ED"],
|
200
|
-
|
222
|
+
|
201
|
-
on=["name", "ver", "n"],
|
223
|
+
on=["name", "ver", "n"],
|
202
|
-
|
224
|
+
|
203
|
-
how="outer",
|
225
|
+
how="outer",
|
204
|
-
|
226
|
+
|
205
|
-
suffixes=("1", "2"),
|
227
|
+
suffixes=("1", "2"),
|
206
|
-
|
228
|
+
|
207
|
-
)
|
229
|
+
)
|
230
|
+
|
208
|
-
|
231
|
+
print(odf)
|
232
|
+
|
233
|
+
|
234
|
+
|
235
|
+
test(df1)
|
236
|
+
|
209
|
-
|
237
|
+
print("")
|
238
|
+
|
210
|
-
|
239
|
+
test(df2)
|
240
|
+
|
241
|
+
|
242
|
+
|
211
|
-
# 試行2結果:
|
243
|
+
# 試行2 データ1結果:成功
|
212
244
|
|
213
245
|
# name status1 dtime1 ver status2 dtime2
|
214
246
|
|
@@ -220,6 +252,32 @@
|
|
220
252
|
|
221
253
|
|
222
254
|
|
255
|
+
# 試行2 データ2結果:失敗
|
256
|
+
|
257
|
+
# name status1 dtime1 ver n status2 dtime2
|
258
|
+
|
259
|
+
# 0 siro ST 2020-01-01 00:35:00 ver0.1 1 NaN NaN
|
260
|
+
|
261
|
+
# 1 siro ST 2020-01-01 00:35:00 ver0.1 2 ED 2020-01-01 10:40:00
|
262
|
+
|
263
|
+
# 2 siro NaN NaN ver0.1 -1 ED 2020-01-01 00:34:00
|
264
|
+
|
265
|
+
# 3 siro NaN NaN ver0.1 3 ED 2020-01-30 23:00:00
|
266
|
+
|
267
|
+
|
268
|
+
|
269
|
+
# 試行2データ2理想の結果
|
270
|
+
|
271
|
+
# name status1 dtime1 ver status2 dtime2
|
272
|
+
|
273
|
+
# 0 siro ST 2020-01-01 00:35:00 ver0.1 ED 2020-01-01 10:40:00
|
274
|
+
|
275
|
+
# 1 siro ST 2020-01-01 00:35:00 ver0.1 ED 2020-01-30 23:00:00
|
276
|
+
|
277
|
+
# 2 siro NaN NaN ver0.1 ED 2020-01-01 00:34:00
|
278
|
+
|
279
|
+
|
280
|
+
|
223
281
|
```
|
224
282
|
|
225
283
|
|
2
試行1(成功)と試行2(失敗)を追加いたしました。
test
CHANGED
File without changes
|
test
CHANGED
@@ -12,6 +12,14 @@
|
|
12
12
|
|
13
13
|
よろしくお願いいたします。
|
14
14
|
|
15
|
+
追記:
|
16
|
+
|
17
|
+
試行1(成功)と試行2(失敗)を追加いたしました。
|
18
|
+
|
19
|
+
試行2のような短いコードで実現できる方法を探しています。
|
20
|
+
|
21
|
+
諸々省いてしまって申し訳ありません
|
22
|
+
|
15
23
|
|
16
24
|
|
17
25
|
### 該当のソースコード
|
@@ -78,22 +86,14 @@
|
|
78
86
|
|
79
87
|
|
80
88
|
|
81
|
-
**
|
89
|
+
**試行1**
|
82
90
|
|
83
91
|
|
84
92
|
|
85
93
|
# 以前の質問させていただいた時のcan110様の回答を流用にさせていただいております
|
86
94
|
|
87
|
-
|
88
|
-
|
89
95
|
last = {}
|
90
96
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
97
|
def func(row):
|
98
98
|
|
99
99
|
idx = -1
|
@@ -172,13 +172,53 @@
|
|
172
172
|
|
173
173
|
print(df)
|
174
174
|
|
175
|
+
# 試行1結果:
|
176
|
+
|
175
|
-
name1 status1 ver1 dtime1 status2 ver2 dtime2
|
177
|
+
# name1 status1 ver1 dtime1 status2 ver2 dtime2
|
176
|
-
|
178
|
+
|
177
|
-
2 siro NaN NaN NaN ED ver0.1 2020-01-01 00:35:00
|
179
|
+
#2 siro NaN NaN NaN ED ver0.1 2020-01-01 00:35:00
|
178
|
-
|
180
|
+
|
179
|
-
0 siro ST ver0.1 2020-01-01 00:35:00 ED ver0.1 2020-01-01 10:40:00
|
181
|
+
#0 siro ST ver0.1 2020-01-01 00:35:00 ED ver0.1 2020-01-01 10:40:00
|
180
|
-
|
182
|
+
|
181
|
-
1 siro ST ver0.1 2020-01-30 23:00:00 NaN NaN NaN
|
183
|
+
#1 siro ST ver0.1 2020-01-30 23:00:00 NaN NaN NaN
|
184
|
+
|
185
|
+
|
186
|
+
|
187
|
+
**試行2**
|
188
|
+
|
189
|
+
df["n"] = df.groupby([df["name"], df["ver"]])["status"].cumcount() - (
|
190
|
+
|
191
|
+
df["status"] == "ED"
|
192
|
+
|
193
|
+
)
|
194
|
+
|
195
|
+
odf = pd.merge(
|
196
|
+
|
197
|
+
df[df["status"] == "ST"],
|
198
|
+
|
199
|
+
df[df["status"] == "ED"],
|
200
|
+
|
201
|
+
on=["name", "ver", "n"],
|
202
|
+
|
203
|
+
how="outer",
|
204
|
+
|
205
|
+
suffixes=("1", "2"),
|
206
|
+
|
207
|
+
)
|
208
|
+
|
209
|
+
# odf = odf.drop(columns="n")
|
210
|
+
|
211
|
+
# 試行2結果:失敗
|
212
|
+
|
213
|
+
# name status1 dtime1 ver status2 dtime2
|
214
|
+
|
215
|
+
# 0 siro ST 2020-01-01 00:35:00 ver0.1 ED 2020-01-01 10:40:00
|
216
|
+
|
217
|
+
# 1 siro ST 2020-01-30 23:00:00 ver0.1 NaN NaN
|
218
|
+
|
219
|
+
# 2 siro NaN NaN ver0.1 ED 2020-01-01 00:35:00
|
220
|
+
|
221
|
+
|
182
222
|
|
183
223
|
```
|
184
224
|
|
1
自分での試行を追加
test
CHANGED
File without changes
|
test
CHANGED
@@ -78,6 +78,108 @@
|
|
78
78
|
|
79
79
|
|
80
80
|
|
81
|
+
**#試行**
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
# 以前の質問させていただいた時のcan110様の回答を流用にさせていただいております
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
last = {}
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
def func(row):
|
98
|
+
|
99
|
+
idx = -1
|
100
|
+
|
101
|
+
namever1 = row["name"] + row["ver"]
|
102
|
+
|
103
|
+
namever2 = row.name
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
# print(namever1, namever2)
|
108
|
+
|
109
|
+
if row["status"] == "ED":
|
110
|
+
|
111
|
+
last[namever1] = namever2
|
112
|
+
|
113
|
+
else:
|
114
|
+
|
115
|
+
idx = last.pop(namever1, idx)
|
116
|
+
|
117
|
+
# 終了行に、対応する開始行の位置をセット
|
118
|
+
|
119
|
+
if idx >= 0:
|
120
|
+
|
121
|
+
df.loc[idx, "st"] = namever2
|
122
|
+
|
123
|
+
|
124
|
+
|
125
|
+
|
126
|
+
|
127
|
+
# 逆順に走査して終了行の直近の開始行を決定
|
128
|
+
|
129
|
+
df["st"] = -1
|
130
|
+
|
131
|
+
df.iloc[::-1].apply(func, axis=1)
|
132
|
+
|
133
|
+
|
134
|
+
|
135
|
+
# 開始、終了行同士を結合
|
136
|
+
|
137
|
+
df = pd.merge(
|
138
|
+
|
139
|
+
df[df["status"] == "ST"],
|
140
|
+
|
141
|
+
df[df["status"] == "ED"],
|
142
|
+
|
143
|
+
left_index=True,
|
144
|
+
|
145
|
+
right_on="st",
|
146
|
+
|
147
|
+
how="outer",
|
148
|
+
|
149
|
+
suffixes=("1", "2"),
|
150
|
+
|
151
|
+
)
|
152
|
+
|
153
|
+
|
154
|
+
|
155
|
+
# # 開始行のない終了行に必要な情報をセットして元レコード順に並び替え
|
156
|
+
|
157
|
+
df = df.reset_index()
|
158
|
+
|
159
|
+
rows = df["st"] < 0
|
160
|
+
|
161
|
+
df.loc[rows, "st"] = df.loc[rows, "index"]
|
162
|
+
|
163
|
+
df.loc[rows, "name1"] = df.loc[rows, "name2"]
|
164
|
+
|
165
|
+
df = df.sort_values(["st"])
|
166
|
+
|
167
|
+
|
168
|
+
|
169
|
+
# 必要な列のみ
|
170
|
+
|
171
|
+
df = df.loc[:, ["name1", "status1", "ver1", "dtime1", "status2", "ver2", "dtime2"]]
|
172
|
+
|
173
|
+
print(df)
|
174
|
+
|
175
|
+
name1 status1 ver1 dtime1 status2 ver2 dtime2
|
176
|
+
|
177
|
+
2 siro NaN NaN NaN ED ver0.1 2020-01-01 00:35:00
|
178
|
+
|
179
|
+
0 siro ST ver0.1 2020-01-01 00:35:00 ED ver0.1 2020-01-01 10:40:00
|
180
|
+
|
181
|
+
1 siro ST ver0.1 2020-01-30 23:00:00 NaN NaN NaN
|
182
|
+
|
81
183
|
```
|
82
184
|
|
83
185
|
|