質問編集履歴
9
誤記修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -92,7 +92,7 @@
|
|
92
92
|
|
93
93
|
```
|
94
94
|
|
95
|
-
>
|
95
|
+
>test5.py
|
96
96
|
|
97
97
|
(0, 1) 1
|
98
98
|
|
8
質問に情報を追加
test
CHANGED
File without changes
|
test
CHANGED
@@ -34,47 +34,7 @@
|
|
34
34
|
|
35
35
|
(0, 1) 22
|
36
36
|
|
37
|
-
|
37
|
+
省略
|
38
|
-
|
39
|
-
(0, 4) 25
|
40
|
-
|
41
|
-
(0, 7) 1
|
42
|
-
|
43
|
-
(0, 9) 5
|
44
|
-
|
45
|
-
(0, 10) 10
|
46
|
-
|
47
|
-
(0, 11) 110
|
48
|
-
|
49
|
-
(0, 12) 2
|
50
|
-
|
51
|
-
(0, 13) 6
|
52
|
-
|
53
|
-
(0, 14) 40
|
54
|
-
|
55
|
-
(0, 15) 33
|
56
|
-
|
57
|
-
(0, 16) 11
|
58
|
-
|
59
|
-
(0, 17) 8
|
60
|
-
|
61
|
-
(0, 20) 28
|
62
|
-
|
63
|
-
(0, 22) 1
|
64
|
-
|
65
|
-
(0, 23) 1
|
66
|
-
|
67
|
-
(0, 24) 10
|
68
|
-
|
69
|
-
(0, 25) 13
|
70
|
-
|
71
|
-
(0, 26) 5
|
72
|
-
|
73
|
-
(0, 27) 14
|
74
|
-
|
75
|
-
(0, 29) 40
|
76
|
-
|
77
|
-
(0, 30) 7
|
78
38
|
|
79
39
|
(0, 31) 6
|
80
40
|
|
@@ -86,48 +46,86 @@
|
|
86
46
|
|
87
47
|
(0, 28749) 3
|
88
48
|
|
89
|
-
|
49
|
+
省略
|
90
|
-
|
91
|
-
(0, 28751) 1
|
92
|
-
|
93
|
-
(0, 28753) 5
|
94
|
-
|
95
|
-
(0, 28757) 1
|
96
|
-
|
97
|
-
(0, 28759) 10
|
98
|
-
|
99
|
-
(0, 28760) 1
|
100
|
-
|
101
|
-
(0, 28764) 2
|
102
|
-
|
103
|
-
(0, 28766) 6
|
104
|
-
|
105
|
-
(0, 28767) 93
|
106
|
-
|
107
|
-
(0, 28768) 1
|
108
|
-
|
109
|
-
(0, 28770) 12
|
110
|
-
|
111
|
-
(0, 28771) 25
|
112
|
-
|
113
|
-
(0, 28774) 2
|
114
|
-
|
115
|
-
(0, 28775) 7
|
116
|
-
|
117
|
-
(0, 28779) 58
|
118
|
-
|
119
|
-
(0, 28780) 21
|
120
|
-
|
121
|
-
(0, 28782) 2
|
122
|
-
|
123
|
-
(0, 28784) 13
|
124
|
-
|
125
|
-
(0, 28785) 7
|
126
|
-
|
127
|
-
(0, 28787) 1
|
128
|
-
|
129
|
-
(0, 28790) 2
|
130
50
|
|
131
51
|
(0, 28791) 1
|
132
52
|
|
133
53
|
(0, 28793) 26"
|
54
|
+
|
55
|
+
|
56
|
+
|
57
|
+
実際のdetaは、test5.pyのサンプルにように単語がブランクで区切られた文字列の配列です。
|
58
|
+
|
59
|
+
countの型とサイズは以下の通りです。
|
60
|
+
|
61
|
+
print(type(count)): <class 'scipy.sparse.csr.csr_matrix'>
|
62
|
+
|
63
|
+
print(count.shape):(1, 28799)
|
64
|
+
|
65
|
+
|
66
|
+
|
67
|
+
<test5.py>
|
68
|
+
|
69
|
+
```python
|
70
|
+
|
71
|
+
from sklearn.feature_extraction.text import CountVectorizer
|
72
|
+
|
73
|
+
import csv
|
74
|
+
|
75
|
+
data = ['Twinkle twinkle little star' 'How I wonder what you are' 'Up above the world so high' 'Like a diamond in the sky' 'Twinkle twinkle little star' 'How I wonder what you are']
|
76
|
+
|
77
|
+
csvc_vec = CountVectorizer()
|
78
|
+
|
79
|
+
count = csvc_vec.fit_transform(data)
|
80
|
+
|
81
|
+
with open('count.csv', 'wt') as f:
|
82
|
+
|
83
|
+
writer = csv.writer(f)
|
84
|
+
|
85
|
+
writer.writerows(count)
|
86
|
+
|
87
|
+
print(count)
|
88
|
+
|
89
|
+
print(type(count))
|
90
|
+
|
91
|
+
print(count.shape)
|
92
|
+
|
93
|
+
```
|
94
|
+
|
95
|
+
>115.test5.py
|
96
|
+
|
97
|
+
(0, 1) 1
|
98
|
+
|
99
|
+
(0, 7) 1
|
100
|
+
|
101
|
+
(0, 5) 1
|
102
|
+
|
103
|
+
(0, 3) 1
|
104
|
+
|
105
|
+
(0, 4) 1
|
106
|
+
|
107
|
+
(0, 8) 1
|
108
|
+
|
109
|
+
(0, 14) 1
|
110
|
+
|
111
|
+
(0, 10) 2
|
112
|
+
|
113
|
+
(0, 0) 1
|
114
|
+
|
115
|
+
(0, 2) 1
|
116
|
+
|
117
|
+
(0, 15) 2
|
118
|
+
|
119
|
+
(0, 12) 2
|
120
|
+
|
121
|
+
(0, 13) 2
|
122
|
+
|
123
|
+
(0, 9) 2
|
124
|
+
|
125
|
+
(0, 6) 2
|
126
|
+
|
127
|
+
(0, 11) 3
|
128
|
+
|
129
|
+
<class 'scipy.sparse.csr.csr_matrix'>
|
130
|
+
|
131
|
+
(1, 16)
|
7
出力の説明を補足
test
CHANGED
File without changes
|
test
CHANGED
@@ -28,7 +28,7 @@
|
|
28
28
|
|
29
29
|
```
|
30
30
|
|
31
|
-
以下、count.csvファイルを開いた
|
31
|
+
以下、count.csvファイルをエディタ等で開いた表示
|
32
32
|
|
33
33
|
" (0, 0) 29
|
34
34
|
|
6
出力結果の説明補足
test
CHANGED
File without changes
|
test
CHANGED
@@ -28,7 +28,7 @@
|
|
28
28
|
|
29
29
|
```
|
30
30
|
|
31
|
-
|
31
|
+
以下、count.csvファイルを開いた内容
|
32
32
|
|
33
33
|
" (0, 0) 29
|
34
34
|
|
5
サンプルコード修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -10,13 +10,19 @@
|
|
10
10
|
|
11
11
|
```python
|
12
12
|
|
13
|
+
from sklearn.feature_extraction.text import CountVectorizer
|
14
|
+
|
15
|
+
import csv
|
16
|
+
|
17
|
+
|
18
|
+
|
13
|
-
|
19
|
+
csvc_vec = CountVectorizer()
|
14
20
|
|
15
21
|
count = c_vec.fit_transform(data)
|
16
22
|
|
17
23
|
with open('count.csv', 'wt') as f:
|
18
24
|
|
19
|
-
writer = csv.writer(f)
|
25
|
+
writer = csv.writer(f)
|
20
26
|
|
21
27
|
writer.writerows(count)
|
22
28
|
|
4
コードのインデントを修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -16,7 +16,7 @@
|
|
16
16
|
|
17
17
|
with open('count.csv', 'wt') as f:
|
18
18
|
|
19
|
-
writer = csv.writer(f)
|
19
|
+
writer = csv.writer(f)
|
20
20
|
|
21
21
|
writer.writerows(count)
|
22
22
|
|
3
コードをMarkdown記法を適用
test
CHANGED
File without changes
|
test
CHANGED
@@ -8,17 +8,19 @@
|
|
8
8
|
|
9
9
|
|
10
10
|
|
11
|
+
```python
|
12
|
+
|
11
13
|
import csvc_vec = CountVectorizer()
|
12
14
|
|
13
15
|
count = c_vec.fit_transform(data)
|
14
16
|
|
15
17
|
with open('count.csv', 'wt') as f:
|
16
18
|
|
17
|
-
|
19
|
+
writer = csv.writer(f)
|
18
20
|
|
19
21
|
writer.writerows(count)
|
20
22
|
|
21
|
-
|
23
|
+
```
|
22
24
|
|
23
25
|
|
24
26
|
|
2
サンプルコードを修正
test
CHANGED
File without changes
|
test
CHANGED
File without changes
|
1
サンプルコードの入力ミスを修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -10,13 +10,13 @@
|
|
10
10
|
|
11
11
|
import csvc_vec = CountVectorizer()
|
12
12
|
|
13
|
-
t
|
13
|
+
count = c_vec.fit_transform(data)
|
14
14
|
|
15
|
-
with open('
|
15
|
+
with open('count.csv', 'wt') as f:
|
16
16
|
|
17
17
|
writer = csv.writer(f)
|
18
18
|
|
19
|
-
writer.writerows(
|
19
|
+
writer.writerows(count)
|
20
20
|
|
21
21
|
|
22
22
|
|