word2vecを用いて共起ネットワークの作成を試みています
以下のようなエラーが出てしまいました。
[] Traceback (most recent call last): File "plot_2.py", line 40, in <module> pca.fit(df.iloc[:,:-1]) File "C:\Users\tnaka\anaconda3\lib\site-packages\sklearn\decomposition\_pca.py", line 351, in fit self._fit(X) File "C:\Users\tnaka\anaconda3\lib\site-packages\sklearn\decomposition\_pca.py", line 397, in _fit X = self._validate_data(X, dtype=[np.float64, np.float32], File "C:\Users\tnaka\anaconda3\lib\site-packages\sklearn\base.py", line 420, in _validate_data X = check_array(X, **check_params) File "C:\Users\tnaka\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 72, in inner_f return f(**kwargs) File "C:\Users\tnaka\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 533, in check_array dtype_orig = np.result_type(*dtypes_orig) File "<__array_function__ internals>", line 5, in result_type ValueError: at least one array or dtype is required
実装しようと思っているコードはこのようなものです。↓
# -*- coding: utf-8 -*- """ Created on Thu Jul 30 00:13:40 2020 @author: taiga """ # import gensim # from gensim.models.word2vec import Word2Vec # model = Word2Vec.load("word2vec.gensim.model") from gensim.test.utils import common_texts from gensim.models import Word2Vec model = Word2Vec(sentences=common_texts, vector_size=100, window=5, min_count=1, workers=4) model.save("word2vec.model") model = Word2Vec.load("word2vec.model") with open("mecab_sanshirou_2.txt",encoding="utf-8_sig") as f: words = f.read().splitlines() re_words = set(words) re_re=[] for w in re_words: if w in model.wv.key_to_index: re_re.append(w) print(re_re) wv=[] for w in re_re: wv.append(model[w]) import pandas as pd from sklearn.decomposition import PCA import matplotlib.pyplot as plt from matplotlib.font_manager import FontProperties df = pd.DataFrame(wv) df["word"] = re_re #PCAで2次元に圧縮 pca = PCA(n_components=2) pca.fit(df.iloc[:,:-1]) feature = pca.transform(df.iloc[:,:-1]) #散布図プロット fp = FontProperties(fname=r"NotoSansCJKjp-Black.otf") plt.figure(figsize=(10,10)) for x, y, name in zip(feature[:, 0], feature[:, 1], df.iloc[:, -1]): plt.text(x, y, name,fontproperties=fp) plt.scatter(feature[:,0],feature[:,1]) plt.show()
よろしくお願い致します。
あなたの回答
tips
プレビュー