機械学習で6577 rows × 3507 columnsのデータセットを扱っており、特徴量が3506(残り一つは目的変数です)と多いので、相関の高い二つの特徴量のうち目的変数との相関の低いほうを除きたいのですが、以下のエラーが解消できません。ちなみに以下のコードのdataが6577 rows × 3507 columnsのデータセットで、データフレームの一番左のコラムが目的変数(0か1の二値)です。どうぞよろしくお願いいたします。
python
1 2def correlation(dataset, threshold): 3 col_corr = set() # Set of all the names of deleted columns 4 corr_matrix = dataset.corr() 5 for i in range(len(corr_matrix.columns)): 6 for j in range(i): 7 if corr_matrix.iloc[i, j] >= threshold: 8 x=np.array(dataset.iloc[:, i], data['Target']) 9 y=np.array(dataset.iloc[:, j], data['Target']) 10 if np.corrcoef(x, rowvar=False) > np.corrcoef(y, rowvar=False): 11 12 colname = corr_matrix.columns[j] # getting the name of column 13 col_corr.add(colname) 14 if colname in dataset.columns: 15 del dataset[colname] 16 else: 17 colname = corr_matrix.columns[i] 18 col_corr.add(colname) 19 if colname in dataset.columns: 20 del dataset[colname] # deleting the column from the dataset 21 22 23data_corr=correlation(data.iloc[:, 1:], 0.7) 24 25 26 27IndexError Traceback (most recent call last) 28<ipython-input-31-a7760ef4bd26> in <module>() 29----> 1 data_corr=correlation(data.iloc[:, 1:], 0.7) 30 31<ipython-input-30-66af95ffe0ab> in correlation(dataset, threshold) 32 5 for j in range(i): 33 6 if corr_matrix.iloc[i, j] >= threshold: 34----> 7 x=np.array(dataset.iloc[:, i], data['Target']) 35 8 y=np.array(dataset.iloc[:, j], data['Target']) 36 9 if np.corrcoef(x, rowvar=False) > np.corrcoef(y, rowvar=False): 37 38~\Miniconda3\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key) 39 1365 except (KeyError, IndexError): 40 1366 pass 41-> 1367 return self._getitem_tuple(key) 42 1368 else: 43 1369 # we by definition only have the 0th axis 44 45~\Miniconda3\lib\site-packages\pandas\core\indexing.py in _getitem_tuple(self, tup) 46 1735 def _getitem_tuple(self, tup): 47 1736 48-> 1737 self._has_valid_tuple(tup) 49 1738 try: 50 1739 return self._getitem_lowerdim(tup) 51 52~\Miniconda3\lib\site-packages\pandas\core\indexing.py in _has_valid_tuple(self, key) 53 202 if i >= self.obj.ndim: 54 203 raise IndexingError('Too many indexers') 55--> 204 if not self._has_valid_type(k, i): 56 205 raise ValueError("Location based indexing can only have " 57 206 "[{types}] types" 58 59~\Miniconda3\lib\site-packages\pandas\core\indexing.py in _has_valid_type(self, key, axis) 60 1670 return True 61 1671 elif is_integer(key): 62-> 1672 return self._is_valid_integer(key, axis) 63 1673 elif is_list_like_indexer(key): 64 1674 return self._is_valid_list_like(key, axis) 65 66~\Miniconda3\lib\site-packages\pandas\core\indexing.py in _is_valid_integer(self, key, axis) 67 1711 l = len(ax) 68 1712 if key >= l or key < -l: 69-> 1713 raise IndexError("single positional indexer is out-of-bounds") 70 1714 return True 71 1715 72 73IndexError: single positional indexer is out-of-bounds 74
回答1件
あなたの回答
tips
プレビュー
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。
2020/07/05 08:52
2020/07/05 08:54