質問編集履歴
1
該当のソースコードを追加しました。
title
CHANGED
File without changes
|
body
CHANGED
@@ -77,7 +77,54 @@
|
|
77
77
|
### 該当のソースコード
|
78
78
|
|
79
79
|
```
|
80
|
+
import pandas as pd
|
81
|
+
from sklearn.linear_model import LinearRegression
|
82
|
+
df = pd.read_csv('Bank.csv')
|
83
|
+
df.head(2)
|
84
|
+
get_dummies = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month']
|
85
|
+
GET_DUMMIES = pd.get_dummies(df[get_dummies], drop_first = True, dtype = int)
|
86
|
+
df2 = pd.concat([df, GET_DUMMIES], axis = 1)
|
87
|
+
df2 = df2.drop(get_dummies, axis = 1)
|
88
|
+
df2.head(2)
|
89
|
+
from sklearn.model_selection import train_test_split
|
90
|
+
train_val, test = train_test_split(df2, test_size = 0.2, random_state = 0)
|
91
|
+
train_val.isnull().sum()
|
92
|
+
train_val_mean = train_val.mean(numeric_only = True)
|
93
|
+
train_val2 = train_val.fillna(train_val_mean)
|
94
|
+
colname = train_val2.columns
|
95
|
+
for name in colname:
|
96
|
+
train_val2.plot(kind = 'scatter', x = name, y = 'y')
|
97
|
+
outline1 = train_val2[(train_val2['duration'] > 1400) & (train_val2['y'] < 0.2)]
|
98
|
+
outline2 = train_val2[(train_val2['amount'] > 100000) & (train_val2['y'] < 0.2)]
|
99
|
+
outline3 = train_val2[(train_val2['previous'] > 250) & (train_val2['y'] < 0.2)]
|
100
|
+
print(outline1, outline2, outline3)
|
101
|
+
train_val3 = train_val2.drop([3140, 16312], axis = 0)
|
102
|
+
from sklearn.preprocessing import StandardScaler
|
103
|
+
def learn(x, t):
|
104
|
+
x_train, y_train, x_val, y_val = train_test_split(x, t, test_size = 0.2, random_state = 0)
|
105
|
+
sc_model_x = StandardScaler()
|
106
|
+
sc_model_y = StandardScaler()
|
107
|
+
sc_model_x.fit(x_train)
|
108
|
+
sc_x_train = sc_model_x.transform(x_train)
|
109
|
+
sc_model_y.fit(y_train)
|
110
|
+
sc_y_train = sc_model_y.transform(y_train)
|
111
|
+
|
112
|
+
model = LinearRegression()
|
113
|
+
model.fit(sc_x_train, sc_y_train)
|
114
|
+
sc_x_val = sc_model_x.transform(x_val)
|
115
|
+
sc_y_val = sc_model_y.transform(y_val)
|
116
|
+
train_score = model.score(sc_x_train, sc_y_train)
|
117
|
+
val_score = model.score(sc_x_val, sc_y_train)
|
118
|
+
return train_score, val_score
|
119
|
+
train_cor = train_val3.corr()['y']
|
80
|
-
|
120
|
+
train_cor
|
121
|
+
abs_cor = train_cor.map(abs)
|
122
|
+
abs_cor.sort_values(ascending = False)
|
123
|
+
|
124
|
+
x = train_val3.loc[ : ,['duration', 'housing_yes', 'campaign', 'contact_sending _document']]
|
125
|
+
t = train_val3[['y']]
|
126
|
+
s1, s2 = learn(x, t)
|
127
|
+
print(s1, s2)
|
81
128
|
```
|
82
129
|
|
83
130
|
### 試したこと・調べたこと
|