質問編集履歴
1
コード訂正
test
CHANGED
File without changes
|
test
CHANGED
@@ -48,11 +48,11 @@
|
|
48
48
|
|
49
49
|
if self.on_status(status) is False:
|
50
50
|
|
51
|
-
File "honban.py", line
|
51
|
+
File "honban.py", line 70, in on_status
|
52
52
|
|
53
|
-
|
53
|
+
for i in corpus :
|
54
54
|
|
55
|
-
|
55
|
+
UnboundLocalError: local variable 'corpus' referenced before assignment
|
56
56
|
|
57
57
|
```
|
58
58
|
|
@@ -64,7 +64,15 @@
|
|
64
64
|
|
65
65
|
#上記省略
|
66
66
|
|
67
|
+
def on_status(self, status):
|
68
|
+
|
69
|
+
text = str(status.text)
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
#日本語ツイートをファイルに書き込んでいる + ツイートの数を表示
|
74
|
+
|
67
|
-
if status.lang == "ja":
|
75
|
+
if status.lang == "ja":
|
68
76
|
|
69
77
|
with open("testd.txt", "a", encoding="utf-8") as f:
|
70
78
|
|
@@ -74,35 +82,57 @@
|
|
74
82
|
|
75
83
|
num_lines = sum(1 for line in open("testd.txt"))#総文書数
|
76
84
|
|
77
|
-
|
85
|
+
corpus = f.read().split("\n")
|
78
86
|
|
79
|
-
f
|
87
|
+
if len(text) != 0:
|
80
88
|
|
81
|
-
|
89
|
+
self.count += 1
|
82
90
|
|
83
|
-
texts.append(re.sub(r"@(\w+) ", "", i))
|
84
91
|
|
85
|
-
texts.append(re.sub(r"(^RT.*)", "", i, flags=re.MULTILINE | re.DOTALL))
|
86
92
|
|
87
|
-
|
93
|
+
mecab = MeCab('-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd')
|
88
94
|
|
89
|
-
u"\U0001F600-\U0001F64F"
|
90
95
|
|
91
|
-
u"\U0001F300-\U0001F5FF"
|
92
96
|
|
93
|
-
|
97
|
+
stop_words = []
|
94
98
|
|
95
|
-
|
99
|
+
path = 'stop_words.txt'
|
96
100
|
|
97
|
-
|
101
|
+
with open(path) as g:
|
98
102
|
|
99
|
-
|
103
|
+
stop_words = g.readlines()
|
100
104
|
|
101
|
-
texts.append(i)
|
102
105
|
|
103
|
-
text = list[set(texts)]
|
104
106
|
|
107
|
+
texts=[]
|
108
|
+
|
109
|
+
for i in corpus:
|
110
|
+
|
111
|
+
texts.append(re.sub(r"http\S+", "", i))
|
112
|
+
|
113
|
+
texts.append(re.sub(r"@(\w+) ", "", i))
|
114
|
+
|
115
|
+
texts.append(re.sub(r"(^RT.*)", "", i, flags=re.MULTILINE | re.DOTALL))
|
116
|
+
|
117
|
+
emoji_pattern = re.compile("["
|
118
|
+
|
119
|
+
u"\U0001F600-\U0001F64F"
|
120
|
+
|
121
|
+
u"\U0001F300-\U0001F5FF"
|
122
|
+
|
123
|
+
u"\U0001F680-\U0001F6FF"
|
124
|
+
|
125
|
+
u"\U0001F1E0-\U0001F1FF"
|
126
|
+
|
127
|
+
"]+", flags=re.UNICODE)
|
128
|
+
|
129
|
+
texts.append(emoji_pattern.sub("", i))
|
130
|
+
|
131
|
+
texts.append(i)
|
132
|
+
|
133
|
+
text = list(set(texts))
|
134
|
+
|
105
|
-
print(te
|
135
|
+
print(text)
|
106
136
|
|
107
137
|
```
|
108
138
|
|