質問編集履歴
1
追記
test
CHANGED
File without changes
|
test
CHANGED
@@ -15,3 +15,69 @@
|
|
15
15
|
特定の文字列の文字コードを判定するにはどのような方法があるでしょうか。
|
16
16
|
|
17
17
|
詳しい方宜しくお願いします。
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
以下、該当コードの主要部分です。
|
22
|
+
|
23
|
+
```ここに言語を入力
|
24
|
+
|
25
|
+
f = open('test.csv','a',encoding='utf-8-sig')
|
26
|
+
|
27
|
+
csvWriter = csv.writer(f)
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
#TwitterIDからプロフィール文を取得、形態素解析
|
32
|
+
|
33
|
+
for num in range(2):
|
34
|
+
|
35
|
+
if num == 1:
|
36
|
+
|
37
|
+
sheet = sheet_2
|
38
|
+
|
39
|
+
for row in range(sheet.nrows):
|
40
|
+
|
41
|
+
val = sheet.cell(row,col).value
|
42
|
+
|
43
|
+
i = i+1
|
44
|
+
|
45
|
+
print (val)
|
46
|
+
|
47
|
+
params = {
|
48
|
+
|
49
|
+
"screen_name": val
|
50
|
+
|
51
|
+
}
|
52
|
+
|
53
|
+
responce = oath.get(url,params = params)
|
54
|
+
|
55
|
+
tweet = json.loads(responce.text)
|
56
|
+
|
57
|
+
user_description = tweet['description']
|
58
|
+
|
59
|
+
print("users_desc:",user_description)
|
60
|
+
|
61
|
+
tokens = l.tokenize(user_description)
|
62
|
+
|
63
|
+
#形態素解析及び名詞抽出
|
64
|
+
|
65
|
+
for token in tokens:
|
66
|
+
|
67
|
+
partOfSpeech = token.part_of_speech.split(',')[0]
|
68
|
+
|
69
|
+
if partOfSpeech == u'名詞':
|
70
|
+
|
71
|
+
each_words.append(token.surface)
|
72
|
+
|
73
|
+
words.append(each_words)
|
74
|
+
|
75
|
+
print(each_words)
|
76
|
+
|
77
|
+
csvWriter.writerow([num,each_words])
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
each_words = []
|
82
|
+
|
83
|
+
```
|