Pythonのjupyter notebookを利用して、重回帰分析を行っています。
以下のサイトのコードに従い、予測区間を算出しようとしております。
【参照サイト】https://cartman0.hatenablog.com/entry/2017/01/02/015025
【環境】Windows10 64bit, chrome
【問題点】以下、コードが長いため、jupyter notebookのセルごとにシャープ記号の列で
区切っております。最後のセルでエラーがでますが、原因が分からずにおります。
###########################
scipy
import scipy as sp
import scipy.stats as stats
pandas
import pandas as pd
statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
matplotlib
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.font_manager as fm
myrmesia_df = pd.read_csv("ant.csv", encoding="utf-8")
myrmesia_df.head(5)
全標本数を確認
total_n = myrmesia_df["ID-SpeciesName"].count()
欠損値"-"の抽出してNanに置き換え
myrmesia_nan = myrmesia_df.copy()
myrmesia_nan[myrmesia_df=="-"] = sp.nan
myrmesia_nan.head(9)
#欠損値を除外
myrmesia_remNan = myrmesia_nan.dropna(axis=0)
myrmesia_remNan.head(9)
数値データをintへ変換
myrmesia_remNan_int = myrmesia_remNan.copy()
myrmesia_remNan_int[["HW", "HL", "SL", "ML", "WL", "PrW", "HFL", "PtW", "PtL", "Ppw", "PpL", "GW"]] = myrmesia_remNan.iloc[:, 2:].astype(int)
myrmesia_remNan_int.dtypes
標本数を確認
n = myrmesia_remNan_int["ID-SpeciesName"].count()
n
myrmesia_remNan_int.describe()
%matplotlib inline
pd.tools.plotting.scatter_matrix(myrmesia_remNan_int, figsize=(7,7), grid=True)
plt.suptitle("Fig. ScatterMatrixPlot of myrmesia measurement", y=0, fontsize=14)
plt.show()
#####################
myrmesia_remNan_int.corr()
lmodel = smf.ols('HW ~ HL + SL + ML + WL + PrW + HFL + PtW + PtL + Ppw + PpL + GW', data=myrmesia_remNan_int.loc[:, "HW":"GW"])
fit = lmodel.fit()
fit.summary()
自由度調整済寄与率
fit.rsquared_adj
AIC
fit.aic
#####################
def AIC_polynomial(n, p, Se):
return n * sp.log(2 * sp.pi * Se / n) + n + 2 * p
#####################
AIC_polynomial(99, 12, fit.ssr)
import itertools
def step(df, model):
'''
Returns:
- Dataframe:
index = pair exog
columns = AIC, F0, adj.R^2
'''
exog = sp.array(model.exog_names)
exog = exog[exog!="Intercept"]
candi_exogs = []
AICs = []
for k in range(1,len(exog)+1):
for candi_exog in itertools.combinations(exog, k):
# Combination of exogs
predictors_df = df[list(candi_exog)].copy()
predictors_df['Intercept'] = 1
res = sm.OLS(model.endog, predictors_df).fit()
candi_exogs.append(str(candi_exog))
AICs.append([res.aic, res.fvalue, res.rsquared_adj])
return pd.DataFrame(AICs, columns=["AIC", "F0", "R2_adj"], index=candi_exogs)
#############################
aic_df = step(myrmesia_remNan_int, lmodel)
aic_df.sort_values("AIC").head()
opt_model = smf.ols(formula="HW ~ HL + ML + WL + PrW + PpL + GW", data=myrmesia_remNan_int)
opt_fit = opt_model.fit()
opt_fit.summary()
##############################
%matplotlib inline
fig, ax = plt.subplots(figsize=(12,8))
fig = sm.graphics.influence_plot(opt_fit, ax=ax)
plt.hlines([-3, 3], 0, 0.5, colors="red")
plt.hlines([-2.5, 2.5], 0, 0.5, colors="yellow")
leverage_warning = 2.5 * (6+1) / n
plt.vlines(leverage_warning, -3.5, 3.5, colors="red")
plt.grid()
plt.xlim(0, 0.25)
plt.ylim(-3.5, 3.5)
plt.show()
##########################
Sigma = sp.asmatrix(myrmesia_remNan_int[["HL", "ML", "WL", "PrW", "PpL", "GW"]].cov().as_matrix())
Sigma
#################
def Mahala2(vec_x, vec_mean, mat):
length = mat.shape[0]
vec = sp.asmatrix((vec_x - vec_mean).reshape(length, 1))
inv = sp.linalg.inv(mat)
mahala2 = vec.T.dot(inv.dot(vec))
return mahala2[0, 0]
######################
%matplotlib inline
plt.figure(figsize=(4.5, 4.5))
import scipy as sp
plt.scatter(x1, y)
hl_l = sp.linspace(20, 70, 100)
hat_y = []
hl以外変数固定
(ml, wl, prw, ppl, gw) = (47, 84, 40, 26, 50)
for hl in hl_l:
X = sp.array([1, hl, ml, wl, prw, ppl, gw])
hat_y.append(X.dot(opt_fit.params))
plt.plot(hl_l, hat_y)
interval
phi_e = n - opt_fit.df_model - 1
t_0025 = stats.t.isf(q=0.05/2, df=phi_e)
マハラノビス距離の二乗
vec_mean = myrmesia_remNan_int[["HL", "ML", "WL", "PrW", "PpL", "GW"]].mean() # 平均ベクトル
print(vec_mean)
print([hl, ml, wl, prw, ppl, gw])
D2 = []
for hl in hl_l:
D2_0 = Mahala2([hl, ml, wl, prw, ppl, gw], vec_mean, Sigma)
D2.append(D2_0)
D2 = sp.array(D2)
interval095 = t_0025 * sp.sqrt((1/n + D2 / (n-1)) * opt_fit.scale)
plt.plot(hl_l, hat_y - interval095, "--g", label="95% interval conf.")
plt.plot(hl_l, hat_y + interval095, "--g")
pred_interval095 = t_0025 * sp.sqrt((1 + 1/n + D2 / (n-1)) * opt_fit.scale)
plt.plot(hl_l, hat_y - pred_interval095, "--r", label="95% prediction conf.")
plt.plot(hl_l, hat_y + pred_interval095, "--r")
##################################################
【出力されるエラー】---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-17-7f6f9f705313> in <module>()
24 D2 = []
25 for hl in hl_l:
---> 26 D2_0 = Mahala2([hl, ml, wl, prw, ppl, gw], vec_mean, Sigma)
27 D2.append(D2_0)
28 D2 = sp.array(D2)
<ipython-input-11-e1909a47c424> in Mahala2(vec_x, vec_mean, mat)
1 def Mahala2(vec_x, vec_mean, mat):
2 length = mat.shape[0]
----> 3 vec = sp.asmatrix((vec_x - vec_mean).reshape(length, 1))
4 inv = sp.linalg.inv(mat)
5 mahala2 = vec.T.dot(inv.dot(vec))
~\Miniconda3\envs\stats\lib\site-packages\pandas\core\generic.py in getattr(self, name)
4370 if self._info_axis._can_hold_identifiers_and_holds_name(name):
4371 return self[name]
-> 4372 return object.getattribute(self, name)
4373
4374 def setattr(self, name, value):
AttributeError: 'Series' object has no attribute 'reshape'
対処方法につき、お分かりになる方がいらっしゃいましたら、ご教示下さい。

回答2件
あなたの回答
tips
プレビュー
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。