質問編集履歴
1
コード訂正
title
CHANGED
File without changes
|
body
CHANGED
@@ -23,34 +23,49 @@
|
|
23
23
|
if self.listener.on_data(data) is False:
|
24
24
|
File "/Users/macuser/Workspaces/jxpress/trendword/.direnv/python-3.7.3/lib/python3.7/site-packages/tweepy/streaming.py", line 54, in on_data
|
25
25
|
if self.on_status(status) is False:
|
26
|
-
File "honban.py", line
|
26
|
+
File "honban.py", line 70, in on_status
|
27
|
-
|
27
|
+
for i in corpus :
|
28
|
-
|
28
|
+
UnboundLocalError: local variable 'corpus' referenced before assignment
|
29
29
|
```
|
30
30
|
|
31
31
|
### コード
|
32
32
|
```renshu.py
|
33
33
|
#上記省略
|
34
|
+
def on_status(self, status):
|
35
|
+
text = str(status.text)
|
36
|
+
|
37
|
+
#日本語ツイートをファイルに書き込んでいる + ツイートの数を表示
|
34
|
-
|
38
|
+
if status.lang == "ja":
|
35
39
|
with open("testd.txt", "a", encoding="utf-8") as f:
|
36
40
|
f.write(text)
|
37
41
|
with codecs.open("testd.txt", "r", "utf-8") as f:
|
38
42
|
num_lines = sum(1 for line in open("testd.txt"))#総文書数
|
43
|
+
corpus = f.read().split("\n")
|
44
|
+
if len(text) != 0:
|
45
|
+
self.count += 1
|
46
|
+
|
47
|
+
mecab = MeCab('-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd')
|
48
|
+
|
49
|
+
stop_words = []
|
50
|
+
path = 'stop_words.txt'
|
51
|
+
with open(path) as g:
|
52
|
+
stop_words = g.readlines()
|
53
|
+
|
39
|
-
|
54
|
+
texts=[]
|
40
|
-
|
55
|
+
for i in corpus:
|
41
|
-
|
56
|
+
texts.append(re.sub(r"http\S+", "", i))
|
42
|
-
|
57
|
+
texts.append(re.sub(r"@(\w+) ", "", i))
|
43
|
-
|
58
|
+
texts.append(re.sub(r"(^RT.*)", "", i, flags=re.MULTILINE | re.DOTALL))
|
44
|
-
|
59
|
+
emoji_pattern = re.compile("["
|
45
|
-
|
60
|
+
u"\U0001F600-\U0001F64F"
|
46
|
-
|
61
|
+
u"\U0001F300-\U0001F5FF"
|
47
|
-
|
62
|
+
u"\U0001F680-\U0001F6FF"
|
48
|
-
|
63
|
+
u"\U0001F1E0-\U0001F1FF"
|
49
|
-
|
64
|
+
"]+", flags=re.UNICODE)
|
50
|
-
|
65
|
+
texts.append(emoji_pattern.sub("", i))
|
51
|
-
|
66
|
+
texts.append(i)
|
52
|
-
text = list
|
67
|
+
text = list(set(texts))
|
53
|
-
print(
|
68
|
+
print(text)
|
54
69
|
```
|
55
70
|
### 補足情報(FW/ツールのバージョンなど)
|
56
71
|
|