実現したいこと
- 下記に示すRのコードのコメントの行(addtionaltable)をPythonに書き換えたい
前提
- Python3を用いています。
- Rと変数の置き方が同じです。
- コードはレコメンドエンジンのコードです。
- for文を回すことだけはしたくないです。
書き換えたいRのコード
library(data.table) submission = fread("data/sample_submission.csv", col.names=c("KaiinID", "AuctionID")) submission = unique(submission[, .(KaiinID)], by = NULL) auction = fread("data/auction.csv", col.names=c('AuctionID','ShouhinShubetsuID','ShouhinID', 'SaishuppinKaisuu','ConditionID', 'BrandID', 'GenreID','GenreGroupID', 'LineID','ColorID', 'DanjobetsuID', 'SankouKakaku', 'CreateDate')) auction$'CreateDateOrder' = as.double(as.Date(substring(auction$CreateDate, 1, 10))-as.Date("2019-10-01")) watchlist = fread("data/watchlist.csv", col.names=c('KaiinID', 'AuctionID','TourokuDate','SakujoFlag', )) watchlist$TourokuDate = as.double(as.Date(substring(watchlist$TourokuDateOrder, 1, 10))-as.Date("2019-10-01")) watchlist = merge(watchlist, auction, by = "AuctionID") shudounyuusatsu = fread("data/shudounyuusatsu.csv", col.names=c('AuctionID','KaiinID','ShudouNyuusatsuDate', 'Kingaku', 'Suuryou','SokketsuFlag', 'SakujoFlag')) shudounyuusatsu$ShudouNyuusatsuDateOrder = as.double(as.Date(substring(shudounyuusatsu$ShudouNyuusatsuDate, 1, 10))-as.Date("2019-10-01")) shudounyuusatsu = merge(shudounyuusatsu, auction, by = 'AuctionID') dateorder = -7 predict = rbind( auction[, .(KaiinID, AuctionID, Point = 1 / (dateorder - CreateDateOrder) ** 0.5 / (dateorder - ShudouNyuusatsuDateOrder))] , watchlist[SakujoFlag == 1, .(KaiinID, AuctionID, Point = 4 / (dateorder - CreateDateOrder) ** 0.5 / (dateorder - TourokuDateOrder))] ) predict = merge(predict, submission, by = "KaiinID", all.y = T) predict = predivt[, .(Point = max(Point)), .(KaiinID, AuctionID)] predict = predict[order(-Point)] addtionaltable = auction[CreateDateOrder == dateorder - 5, .(AuctionID = sample(AuctionID, 20))] predict = predict[, .(Ranking = 1:20, AuctionID = c(AuctionID[!is.na(AuctionID)], addtionaltable$AuctionID)[1:20]), .(KaiinID)] #additionaltableの箇所をpythonのpandasを用いて書き換えたい
書き換え後のPythonのコード例
import pandas as pd from datetime import datetime auction = pd.read_csv("data/auction.csv") shudounyuusatsu = pd.read_csv("data/shudounyuusatsu.csv") watchlist = pd.read_csv("data/watchlist.csv") submission = pd.read_csv("data/sample_submission.csv") submission = submission.KaiinID.unique() submission = pd.DataFrame([list(submission)],index=["KaiinID"]).T auction["CreateDateOrder"] = auction.CreateDate.map(lambda x: x[0:10]).map( lambda x: datetime.strptime(x, '%Y-%m-%d'))-datetime.strptime('2019-10-01', '%Y-%m-%d') watchlist["TourokuDateOrder"] = watchlist.TourokuDate.map(lambda x: x[0:10]).map( lambda x: datetime.strptime(x, '%Y-%m-%d'))-datetime.strptime('2019-10-01', '%Y-%m-%d') watchlist = pd.merge(watchlist, auction, on="AuctionID") shudounyuusatsu["ShudouNyuusatsuDateOrder"] = shudounyuusatsu.ShudouNyuusatsuDate.map(lambda x: x[0:10]).map( lambda x: datetime.strptime(x, '%Y-%m-%d'))-datetime.strptime('2019-10-01', '%Y-%m-%d') shudounyuusatsu = pd.merge(shudounyuusatsu, auction, on='AuctionID') order = -7 shudounyuusatsu["Point"] = 1/(order-shudounyuusatsu.CreateDateOrder.map(lambda x: x.days))**0.5/( order-shudounyuusatsu.ShudouNyuusatsuDateOrder.map(lambda x: x.days)) watchlist["Point"] = 4/(order-watchlist.CreateDateOrder.map(lambda x: x.days) )**0.5/(order-watchlist.TourokuDateOrder.map(lambda x: x.days)) predict = pd.concat([shudounyuusatsu[["KaiinID", "AuctionID", "Point"]], watchlist[watchlist.SakujoFlag == 1][["KaiinID", "AuctionID", "Point"]]]) predict = predict.groupby(["KaiinID", "AuctionID"]).max() predict = predict.reset_index() predict = pd.merge(submission, predict, how="left") predict = predict.sort_values( ["Point", "KaiinID", "AuctionID"], ascending=[False, True, True]) addtionaltable = auction[auction.CreateDateOrder.map( lambda x:x.days) == order-5].sample(n=20, random_state=0) addtionaltable = kari["AuctionID"] #ここからの記述方法がわからない
あなたの回答
tips
プレビュー