teratail header banner
teratail header banner
質問するログイン新規登録

質問編集履歴

6

clf = svm.SVC() clf.fit(train_data, label_train) pre = clf.predict() を削除

2017/11/28 09:07

投稿

退会済みユーザー
title CHANGED
File without changes
body CHANGED
@@ -7,16 +7,17 @@
7
7
 
8
8
  ・容量が大きくて実行できない。
9
9
 
10
- TypeError Traceback (most recent call last)
10
+ NameError Traceback (most recent call last)
11
- <ipython-input-20-71497c22a695> in <module>()
11
+ <ipython-input-24-b95cedfc1745> in <module>()
12
- 4 clf = svm.SVC()
13
- 5 clf.fit(train_data, label_train)
12
+ 28 print('k={0}: {1}'.format(k, ac_score))
13
+ 29
14
- ----> 6 pre = clf.predict()
14
+ ---> 30 accuracy_scores.append(score)
15
- 7
15
+ 31
16
- 8 # 使う近傍数ごとに正解率&各経過時間を計算
16
+ 32 # 各経過時間を表示
17
17
 
18
- TypeError: predict() missing 1 required positional argument: 'X'
18
+ NameError: name 'score' is not defined
19
19
 
20
+ ・Digitsデータではそのようなエラーはなかった
20
21
 
21
22
 
22
23
  ###Digitsデータで最適なkを探すknnのプログラム
@@ -78,12 +79,10 @@
78
79
  train_size = 500
79
80
  test_size = 100
80
81
  train_data, test_data, label_train, label_test = model_selection.train_test_split(mnist_data, mnist_label, test_size=test_size, train_size=train_size)
82
+
81
83
  K = 10
82
84
  ks = range(1, K + 1)
83
85
 
84
- clf = svm.SVC()
85
- clf.fit(train_data, label_train)
86
- pre = clf.predict()
87
86
 
88
87
  # 使う近傍数ごとに正解率&各経過時間を計算
89
88
  accuracy_scores = []
@@ -103,7 +102,7 @@
103
102
  model.fit(train_data, label_train)
104
103
 
105
104
  # 一つだけ取り除いたテストデータを識別
106
- predicted_label = model.predict(features[test])
105
+ predicted_label = model.predict(train_data[test])
107
106
  predicted_labels.append(predicted_label)
108
107
 
109
108
  # 正解率を計算

5

修正しました。

2017/11/28 09:07

投稿

退会済みユーザー
title CHANGED
File without changes
body CHANGED
@@ -7,7 +7,18 @@
7
7
 
8
8
  ・容量が大きくて実行できない。
9
9
 
10
+ ・TypeError Traceback (most recent call last)
11
+ <ipython-input-20-71497c22a695> in <module>()
12
+ 4 clf = svm.SVC()
13
+ 5 clf.fit(train_data, label_train)
14
+ ----> 6 pre = clf.predict()
15
+ 7
16
+ 8 # 使う近傍数ごとに正解率&各経過時間を計算
10
17
 
18
+ TypeError: predict() missing 1 required positional argument: 'X'
19
+
20
+
21
+
11
22
  ###Digitsデータで最適なkを探すknnのプログラム
12
23
  ```python
13
24
  from matplotlib import pyplot as plt
@@ -66,14 +77,13 @@
66
77
 
67
78
  train_size = 500
68
79
  test_size = 100
69
- train_data, test_data_, label_train, label_test = model_selection.train_test_split(mnist_data, mnist_label, test_size=test_size, train_size=train_size)
80
+ train_data, test_data, label_train, label_test = model_selection.train_test_split(mnist_data, mnist_label, test_size=test_size, train_size=train_size)
70
-
71
81
  K = 10
72
82
  ks = range(1, K + 1)
73
83
 
74
84
  clf = svm.SVC()
75
- clf.fit(train_data, target_data)
85
+ clf.fit(train_data, label_train)
76
- pre = clf.predict(data_test)
86
+ pre = clf.predict()
77
87
 
78
88
  # 使う近傍数ごとに正解率&各経過時間を計算
79
89
  accuracy_scores = []
@@ -81,16 +91,17 @@
81
91
  for k in ks:
82
92
  predicted_labels = []
83
93
  loo = LeaveOneOut()
94
+
84
95
  for train, test in loo.split(mnist.data):
85
96
  train_data = mnist.data[train]
86
- target_data = mnist.target[train]
97
+ label_train = mnist.target[train]
87
98
 
88
99
  elapsed_time = time.time() - start
89
100
 
90
101
  # モデルを学習させる
91
102
  model = KNeighborsClassifier(n_neighbors=k)
92
- model.fit(train_data, target_data)
103
+ model.fit(train_data, label_train)
93
-
104
+
94
105
  # 一つだけ取り除いたテストデータを識別
95
106
  predicted_label = model.predict(features[test])
96
107
  predicted_labels.append(predicted_label)

4

書式の改善

2017/11/28 08:50

投稿

退会済みユーザー
title CHANGED
File without changes
body CHANGED
@@ -61,18 +61,18 @@
61
61
  if __name__ == '__main__':
62
62
  main()
63
63
  ```
64
- ###mnistデータで最適なkを探すknnのプログラム現段階
64
+ ###mnistデータで最適なkを探すknnのプログラム 改善版
65
65
  ```python
66
66
 
67
67
  train_size = 500
68
68
  test_size = 100
69
- data_train, data_test, label_train, label_test = model_selection.train_test_split(mnist_data, mnist_label, test_size=test_size, train_size=train_size)
69
+ train_data, test_data_, label_train, label_test = model_selection.train_test_split(mnist_data, mnist_label, test_size=test_size, train_size=train_size)
70
70
 
71
71
  K = 10
72
72
  ks = range(1, K + 1)
73
73
 
74
74
  clf = svm.SVC()
75
- clf.fit(data_train, label_train)
75
+ clf.fit(train_data, target_data)
76
76
  pre = clf.predict(data_test)
77
77
 
78
78
  # 使う近傍数ごとに正解率&各経過時間を計算
@@ -82,8 +82,8 @@
82
82
  predicted_labels = []
83
83
  loo = LeaveOneOut()
84
84
  for train, test in loo.split(mnist.data):
85
- data_train = mnist.data[train]
85
+ train_data = mnist.data[train]
86
- data_test = mnist.target[train]
86
+ target_data = mnist.target[train]
87
87
 
88
88
  elapsed_time = time.time() - start
89
89
 

3

誤字

2017/11/28 08:26

投稿

退会済みユーザー
title CHANGED
File without changes
body CHANGED
@@ -60,9 +60,10 @@
60
60
 
61
61
  if __name__ == '__main__':
62
62
  main()
63
-
63
+ ```
64
64
  ###mnistデータで最適なkを探すknnのプログラム現段階
65
65
  ```python
66
+
66
67
  train_size = 500
67
68
  test_size = 100
68
69
  data_train, data_test, label_train, label_test = model_selection.train_test_split(mnist_data, mnist_label, test_size=test_size, train_size=train_size)

2

書式の改善

2017/11/28 07:42

投稿

退会済みユーザー
title CHANGED
File without changes
body CHANGED
@@ -6,39 +6,10 @@
6
6
  ###発生している問題・エラーメッセージ
7
7
 
8
8
  ・容量が大きくて実行できない。
9
- ・ValueError Traceback (most recent call last)
10
- <ipython-input-10-c9ec06272b57> in <module>()
11
- 3
12
- 4 clf = svm.SVC()
13
- ----> 5 clf.fit(data_train, label_train)
14
- 6 pre = clf.predict(data_test)
15
- 7
16
9
 
17
- ~\Anaconda3\lib\site-packages\sklearn\svm\base.py in fit(self, X, y, sample_weight)
18
- 147 self._sparse = sparse and not callable(self.kernel)
19
- 148
20
- --> 149 X, y = check_X_y(X, y, dtype=np.float64, order='C', accept_sparse='csr')
21
- 150 y = self._validate_targets(y)
22
- 151
23
- ~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
24
- 581 y = y.astype(np.float64)
25
- 582
26
- --> 583 check_consistent_length(X, y)
27
- 584
28
- 585 return X, y
29
10
 
30
- ~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_consistent_length(*arrays)
31
- 202 if len(uniques) > 1:
32
- 203 raise ValueError("Found input variables with inconsistent numbers of"
33
- --> 204 " samples: %r" % [int(l) for l in lengths])
34
- 205
35
- 206
36
-
37
- ValueError: Found input variables with inconsistent numbers of samples: [69999, 500]
38
-
39
-
40
11
  ###Digitsデータで最適なkを探すknnのプログラム
41
-
12
+ ```python
42
13
  from matplotlib import pyplot as plt
43
14
  from sklearn import datasets
44
15
  from sklearn.model_selection import LeaveOneOut

1

現段階とエラー

2017/11/28 07:39

投稿

退会済みユーザー
title CHANGED
File without changes
body CHANGED
@@ -5,9 +5,38 @@
5
5
 
6
6
  ###発生している問題・エラーメッセージ
7
7
 
8
- 容量が大きくて実行できない。
8
+ 容量が大きくて実行できない。
9
+ ・ValueError Traceback (most recent call last)
10
+ <ipython-input-10-c9ec06272b57> in <module>()
11
+ 3
12
+ 4 clf = svm.SVC()
13
+ ----> 5 clf.fit(data_train, label_train)
14
+ 6 pre = clf.predict(data_test)
15
+ 7
9
16
 
17
+ ~\Anaconda3\lib\site-packages\sklearn\svm\base.py in fit(self, X, y, sample_weight)
18
+ 147 self._sparse = sparse and not callable(self.kernel)
19
+ 148
20
+ --> 149 X, y = check_X_y(X, y, dtype=np.float64, order='C', accept_sparse='csr')
21
+ 150 y = self._validate_targets(y)
22
+ 151
23
+ ~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
24
+ 581 y = y.astype(np.float64)
25
+ 582
26
+ --> 583 check_consistent_length(X, y)
27
+ 584
28
+ 585 return X, y
10
29
 
30
+ ~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_consistent_length(*arrays)
31
+ 202 if len(uniques) > 1:
32
+ 203 raise ValueError("Found input variables with inconsistent numbers of"
33
+ --> 204 " samples: %r" % [int(l) for l in lengths])
34
+ 205
35
+ 206
36
+
37
+ ValueError: Found input variables with inconsistent numbers of samples: [69999, 500]
38
+
39
+
11
40
  ###Digitsデータで最適なkを探すknnのプログラム
12
41
 
13
42
  from matplotlib import pyplot as plt
@@ -61,7 +90,58 @@
61
90
  if __name__ == '__main__':
62
91
  main()
63
92
 
93
+ ###mnistデータで最適なkを探すknnのプログラム現段階
94
+ ```python
95
+ train_size = 500
96
+ test_size = 100
97
+ data_train, data_test, label_train, label_test = model_selection.train_test_split(mnist_data, mnist_label, test_size=test_size, train_size=train_size)
64
98
 
99
+ K = 10
100
+ ks = range(1, K + 1)
101
+
102
+ clf = svm.SVC()
103
+ clf.fit(data_train, label_train)
104
+ pre = clf.predict(data_test)
105
+
106
+ # 使う近傍数ごとに正解率&各経過時間を計算
107
+ accuracy_scores = []
108
+ start = time.time()
109
+ for k in ks:
110
+ predicted_labels = []
111
+ loo = LeaveOneOut()
112
+ for train, test in loo.split(mnist.data):
113
+ data_train = mnist.data[train]
114
+ data_test = mnist.target[train]
115
+
116
+ elapsed_time = time.time() - start
117
+
118
+ # モデルを学習させる
119
+ model = KNeighborsClassifier(n_neighbors=k)
120
+ model.fit(train_data, target_data)
121
+
122
+ # 一つだけ取り除いたテストデータを識別
123
+ predicted_label = model.predict(features[test])
124
+ predicted_labels.append(predicted_label)
125
+
126
+ # 正解率を計算
127
+ ac_score = metrics.accuracy_score(label_test, pre)
128
+ print('k={0}: {1}'.format(k, ac_score))
129
+
130
+ accuracy_scores.append(score)
131
+
132
+ # 各経過時間を表示
133
+ print("経過時間:{0}".format(elapsed_time))
134
+
135
+ # 使う近傍数ごとの正解率を折れ線グラフ
136
+ X = list(ks)
137
+ plt.plot(X, ac_score)
138
+
139
+ plt.xlabel('k')
140
+ plt.ylabel('accuracy rate')
141
+ plt.show()
142
+ ```
143
+
144
+
65
145
  ###試したこと
66
146
  mnistのデータ
67
147
  mnist = datasets.fetch_mldata('MNIST original', data_home='data/src/download/')