質問編集履歴
7
すみませんでした。インデントを修正しました。
test
CHANGED
File without changes
|
test
CHANGED
@@ -14,67 +14,63 @@
|
|
14
14
|
|
15
15
|
#コード
|
16
16
|
|
17
|
-
|
17
|
+
```import csv
|
18
18
|
|
19
|
-
$import csv
|
20
|
-
|
21
|
-
|
19
|
+
from janome.tokenizer import Tokenizer
|
22
20
|
|
23
21
|
|
24
22
|
|
25
|
-
|
23
|
+
documents = [] # 形態素用の配列を用意
|
26
24
|
|
27
|
-
|
25
|
+
t = Tokenizer()
|
28
26
|
|
29
|
-
|
27
|
+
y = [] # クラスラベル用の配列を用意
|
30
28
|
|
31
|
-
|
29
|
+
with open('./test.csv') as f:
|
32
30
|
|
33
|
-
|
31
|
+
reader = csv.reader(f)
|
34
32
|
|
35
|
-
|
33
|
+
next(reader)
|
36
34
|
|
37
|
-
|
35
|
+
for columns in reader:
|
38
36
|
|
39
|
-
|
37
|
+
y.append(columns[1]) # 仕事分類をクラスラベルとしてまとめる
|
40
38
|
|
41
|
-
|
39
|
+
document = [] # 1行分の仮の配列を用意
|
42
40
|
|
43
|
-
|
41
|
+
for token in t.tokenize(columns[0]):
|
42
|
+
|
43
|
+
document.append(token.surface) # 仮の配列に形態素を追加
|
44
|
+
|
45
|
+
documents.append(' '.join(document))
|
44
46
|
|
45
47
|
|
46
48
|
|
47
|
-
|
49
|
+
import numpy as np
|
48
50
|
|
49
|
-
|
51
|
+
from sklearn.feature_extraction.text $import CountVectorizer
|
50
52
|
|
51
53
|
|
52
54
|
|
53
|
-
|
55
|
+
CountVect = CountVectorizer(min_df=1)
|
54
56
|
|
55
|
-
|
57
|
+
X = CountVect.fit_transform(documents)
|
56
58
|
|
57
59
|
|
58
60
|
|
59
|
-
|
61
|
+
from sklearn.externals import joblib
|
60
62
|
|
61
|
-
|
63
|
+
clf2 = joblib.load('clf.pkl')
|
62
64
|
|
65
|
+
clf2.predict(X)
|
63
66
|
|
67
|
+
print(clf2.score(X, y))
|
64
68
|
|
65
|
-
$from sklearn.externals import joblib
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
$clf2 = joblib.load('clf.pkl')
|
70
|
-
|
71
|
-
|
69
|
+
```
|
72
|
-
|
73
|
-
$print(clf2.score(X, y))
|
74
|
-
|
75
|
-
|
76
70
|
|
77
71
|
#エラー内容
|
72
|
+
|
73
|
+
```
|
78
74
|
|
79
75
|
ValueError Traceback (most recent call last)
|
80
76
|
|
@@ -149,3 +145,5 @@
|
|
149
145
|
|
150
146
|
|
151
147
|
ValueError: dimension mismatch
|
148
|
+
|
149
|
+
```
|
6
test
CHANGED
File without changes
|
test
CHANGED
@@ -44,7 +44,7 @@
|
|
44
44
|
|
45
45
|
|
46
46
|
|
47
|
-
$
|
47
|
+
$ document.append(token.surface) # 仮の配列に形態素を追加
|
48
48
|
|
49
49
|
$ documents.append(' '.join(document))
|
50
50
|
|
5
test
CHANGED
File without changes
|
test
CHANGED
@@ -14,59 +14,63 @@
|
|
14
14
|
|
15
15
|
#コード
|
16
16
|
|
17
|
-
|
17
|
+
※インデントが入らないため、$を入れています
|
18
18
|
|
19
|
+
$import csv
|
20
|
+
|
19
|
-
from janome.tokenizer import Tokenizer
|
21
|
+
$from janome.tokenizer import Tokenizer
|
20
22
|
|
21
23
|
|
22
24
|
|
23
|
-
documents = [] # 形態素用の配列を用意
|
25
|
+
$documents = [] # 形態素用の配列を用意
|
24
26
|
|
25
|
-
t = Tokenizer()
|
27
|
+
$t = Tokenizer()
|
26
28
|
|
27
|
-
y = [] # クラスラベル用の配列を用意
|
29
|
+
$y = [] # クラスラベル用の配列を用意
|
28
30
|
|
29
|
-
with open('./test.csv') as f:
|
31
|
+
$with open('./test.csv') as f:
|
30
32
|
|
31
|
-
reader = csv.reader(f)
|
33
|
+
$ reader = csv.reader(f)
|
32
34
|
|
33
|
-
next(reader)
|
35
|
+
$ next(reader)
|
34
36
|
|
35
|
-
for columns in reader:
|
37
|
+
$ for columns in reader:
|
36
38
|
|
37
|
-
y.append(columns[1]) # 仕事分類をクラスラベルとしてまとめる
|
39
|
+
$ y.append(columns[1]) # 仕事分類をクラスラベルとしてまとめる
|
38
40
|
|
39
|
-
document = [] # 1行分の仮の配列を用意
|
41
|
+
$ document = [] # 1行分の仮の配列を用意
|
40
42
|
|
41
|
-
for token in t.tokenize(columns[0]):
|
43
|
+
$ for token in t.tokenize(columns[0]):
|
42
|
-
|
43
|
-
document.append(token.surface) # 仮の配列に形態素を追加
|
44
|
-
|
45
|
-
documents.append(' '.join(document))
|
46
44
|
|
47
45
|
|
48
46
|
|
49
|
-
|
47
|
+
$ document.append(token.surface) # 仮の配列に形態素を追加
|
50
48
|
|
51
|
-
|
49
|
+
$ documents.append(' '.join(document))
|
52
50
|
|
53
51
|
|
54
52
|
|
55
|
-
|
53
|
+
$import numpy as np
|
56
54
|
|
57
|
-
|
55
|
+
$from sklearn.feature_extraction.text $import CountVectorizer
|
58
56
|
|
59
57
|
|
60
58
|
|
59
|
+
$CountVect = CountVectorizer(min_df=1)
|
60
|
+
|
61
|
-
|
61
|
+
$X = CountVect.fit_transform(documents)
|
62
62
|
|
63
63
|
|
64
64
|
|
65
|
-
|
65
|
+
$from sklearn.externals import joblib
|
66
66
|
|
67
|
-
clf2.predict(X)
|
68
67
|
|
68
|
+
|
69
|
+
$clf2 = joblib.load('clf.pkl')
|
70
|
+
|
71
|
+
$clf2.predict(X)
|
72
|
+
|
69
|
-
print(clf2.score(X, y))
|
73
|
+
$print(clf2.score(X, y))
|
70
74
|
|
71
75
|
|
72
76
|
|
4
test
CHANGED
File without changes
|
test
CHANGED
@@ -28,7 +28,7 @@
|
|
28
28
|
|
29
29
|
with open('./test.csv') as f:
|
30
30
|
|
31
|
-
reader = csv.reader(f)
|
31
|
+
reader = csv.reader(f)
|
32
32
|
|
33
33
|
next(reader)
|
34
34
|
|
3
test
CHANGED
File without changes
|
test
CHANGED
@@ -28,7 +28,7 @@
|
|
28
28
|
|
29
29
|
with open('./test.csv') as f:
|
30
30
|
|
31
|
-
|
31
|
+
reader = csv.reader(f)
|
32
32
|
|
33
33
|
next(reader)
|
34
34
|
|
2
test
CHANGED
File without changes
|
test
CHANGED
@@ -28,7 +28,7 @@
|
|
28
28
|
|
29
29
|
with open('./test.csv') as f:
|
30
30
|
|
31
|
-
reader = csv.reader(f)
|
31
|
+
<reader = csv.reader(f)
|
32
32
|
|
33
33
|
next(reader)
|
34
34
|
|
1
test
CHANGED
File without changes
|
test
CHANGED
File without changes
|