質問編集履歴

1

コード訂正

2019/09/04 08:51

投稿

farinelli
farinelli

スコア61

test CHANGED
File without changes
test CHANGED
@@ -48,11 +48,11 @@
48
48
 
49
49
  if self.on_status(status) is False:
50
50
 
51
- File "honban.py", line 82, in on_status
51
+ File "honban.py", line 70, in on_status
52
52
 
53
- text = list[set(texts)]
53
+ for i in corpus :
54
54
 
55
- TypeError: 'type' object is not subscriptable
55
+ UnboundLocalError: local variable 'corpus' referenced before assignment
56
56
 
57
57
  ```
58
58
 
@@ -64,7 +64,15 @@
64
64
 
65
65
  #上記省略
66
66
 
67
+ def on_status(self, status):
68
+
69
+ text = str(status.text)
70
+
71
+
72
+
73
+ #日本語ツイートをファイルに書き込んでいる + ツイートの数を表示
74
+
67
- if status.lang == "ja":
75
+ if status.lang == "ja":
68
76
 
69
77
  with open("testd.txt", "a", encoding="utf-8") as f:
70
78
 
@@ -74,35 +82,57 @@
74
82
 
75
83
  num_lines = sum(1 for line in open("testd.txt"))#総文書数
76
84
 
77
- texts=[]
85
+ corpus = f.read().split("\n")
78
86
 
79
- for i in corpus :
87
+ if len(text) != 0:
80
88
 
81
- texts.append(re.sub(r"http\S+", "", i))
89
+ self.count += 1
82
90
 
83
- texts.append(re.sub(r"@(\w+) ", "", i))
84
91
 
85
- texts.append(re.sub(r"(^RT.*)", "", i, flags=re.MULTILINE | re.DOTALL))
86
92
 
87
- emoji_pattern = re.compile("["
93
+ mecab = MeCab('-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd')
88
94
 
89
- u"\U0001F600-\U0001F64F"
90
95
 
91
- u"\U0001F300-\U0001F5FF"
92
96
 
93
- u"\U0001F680-\U0001F6FF"
97
+ stop_words = []
94
98
 
95
- u"\U0001F1E0-\U0001F1FF"
99
+ path = 'stop_words.txt'
96
100
 
97
- "]+", flags=re.UNICODE)
101
+ with open(path) as g:
98
102
 
99
- texts.append(emoji_pattern.sub("", i))
103
+ stop_words = g.readlines()
100
104
 
101
- texts.append(i)
102
105
 
103
- text = list[set(texts)]
104
106
 
107
+ texts=[]
108
+
109
+ for i in corpus:
110
+
111
+ texts.append(re.sub(r"http\S+", "", i))
112
+
113
+ texts.append(re.sub(r"@(\w+) ", "", i))
114
+
115
+ texts.append(re.sub(r"(^RT.*)", "", i, flags=re.MULTILINE | re.DOTALL))
116
+
117
+ emoji_pattern = re.compile("["
118
+
119
+ u"\U0001F600-\U0001F64F"
120
+
121
+ u"\U0001F300-\U0001F5FF"
122
+
123
+ u"\U0001F680-\U0001F6FF"
124
+
125
+ u"\U0001F1E0-\U0001F1FF"
126
+
127
+ "]+", flags=re.UNICODE)
128
+
129
+ texts.append(emoji_pattern.sub("", i))
130
+
131
+ texts.append(i)
132
+
133
+ text = list(set(texts))
134
+
105
- print(test)
135
+ print(text)
106
136
 
107
137
  ```
108
138