前提
python Chainerで電力使用量予測を行おうとしています。
参考サイト:https://qiita.com/mix_dvd/items/cf625bbb447797afa063#%E3%81%AF%E3%81%98%E3%82%81%E3%81%AB
実現したいこと
Chainerで電力使用量予測
発生している問題・エラーメッセージ
KeyError(f"{not_found} not in index")
該当のソースコード
import pandas as pd
import numpy as np
df_kw = pd.read_csv("juyo-2021.csv",encoding="utf-8_sig",skiprows=2)
df_kw["MW"] = df_kw["kW"] * 10
df_kw["DATETIME"] = df_kw.index.map(lambda : pd.to_datetime(df_kw.DATE[] + " " + df_kw.TIME[_]))
df_kw["MW"].plot(figsize=(15,4))
def read_temp(filename):
df_temp = pd.read_csv(filename,encoding="utf-8_sig",skiprows=4)
df_temp.columns = ["DATETIME","TEMP"]
df_temp.DATETIME = df_temp.DATETIME.map(lambda : pd.to_datetime())
return df_temp
df_temp_Fukui = read_temp("data-Fukui-2021.csv")
df_temp_Fukui.rename(columns = {'TEMP':'TEMP_Fukui'}, inplace=True)
df_temp_Fukui.TEMP_Fukui.plot(figsize=(15,4))
df = df_kw.copy()
df = df.merge(df_temp_Fukui,how="inner", on="DATETIME")
df["MONTH"] = df.DATETIME.map(lambda _: _.month)
df["WEEK"] = df.DATETIME.map(lambda _: _.weekday())
df["HOUR"] = df.DATETIME.map(lambda _: _.hour)
df = df.dropna()
cols = ["MONTH","WEEK","HOUR"]
for col in cols:
df = df.join(pd.get_dummies(df[col], prefix=col))
x_cols = ["TEMP_Fukui"] + df.columns.tolist()[14:]
X = df[x_cols]
y = df["MW"]
from sklearn import model_selection
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=.2, random_state=42)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
from chainer import Chain, optimizers, Variable
import chainer.functions as F
import chainer.links as L
class MyChain(Chain):
def __init__(self, n_units=10): super(MyChain, self).__init__( l1=L.Linear(len(x_cols), n_units), l2=L.Linear(n_units, n_units), l3=L.Linear(n_units, 1)) def __call__(self, x_data, y_data): x = Variable(x_data.astype(np.float32).reshape(len(x_data),len(x_cols))) y = Variable(y_data.values.astype(np.float32).reshape(len(y_data),1)) #print(x) pred = self.predict(x) #print(pred) return F.mean_squared_error(pred, y) def predict(self, x): h1 = F.relu(self.l1(x)) h2 = F.relu(self.l2(h1)) h3 = self.l3(h2) return h3 def get_predata(self, x): return self.predict(Variable(x.astype(np.float32).reshape(len(x),1))).data
batchsize = 16
n_epoch = 200
n_units = 10
model = MyChain(n_units)
optimizer = optimizers.Adam()
optimizer.setup(model)
train_losses =[]
test_losses =[]
N = len(X_train)
print(N)
print(X_train.shape)
print(y_train.shape)
for epoch in range(1, n_epoch + 1):
perm = np.random.permutation(N) sum_loss = 0 for i in range(0, N, batchsize): x_batch = X_train[perm[i:i + batchsize]] y_batch = y_train[perm[i:i + batchsize]] →エラー model.zerograds() loss = model(x_batch,y_batch) sum_loss += loss.data * batchsize loss.backward() optimizer.update() average_loss = sum_loss / N train_losses.append(average_loss) loss = model(X_test,y_test) test_losses.append(loss.data) if epoch % 10 == 0: print("epoch: {}/{} train loss: {} test loss: {}".format(epoch, n_epoch, average_loss, loss.data))
y_pred = model.predict(X_test.astype(np.float32))
pd.DataFrame(np.c_[y_test, y_pred.array], columns=["act","pred"])[:100].reset_index(drop=True).plot(figsize=(15,4))
plt.show()
python
試したこと
print(X_train.shape),print(y_train.shape)から
それぞれ(5280, 37),(5280,)となっていることが分かったのですが、
ここからどうしたらいいのかが分からなくなりました。
補足情報(FW/ツールのバージョンなど)
python 3.10
VScode
あなたの回答
tips
プレビュー