質問編集履歴

1

test

2018/05/01 21:30

投稿

fujiji
fujiji

スコア6

test CHANGED
@@ -1 +1 @@
1
- python3でのunicodedecodeerror
1
+ unicodedecodeerror
test CHANGED
@@ -5,125 +5,3 @@
5
5
 
6
6
 
7
7
  ###発生している問題・エラーメッセージ
8
-
9
-
10
-
11
- ```
12
-
13
- Traceback (most recent call last):
14
-
15
- File "d2v.py", line 46, in <module>
16
-
17
- lines = f.readlines()
18
-
19
- File "/Users/admin/.pyenv/versions/3.5.0/lib/python3.5/codecs.py", line 321, in decode
20
-
21
- (result, consumed) = self._buffer_decode(data, self.errors, final)
22
-
23
- UnicodeDecodeError: 'utf-8' codec can't decode byte 0x80 in position 0: invalid start byte
24
-
25
- ```
26
-
27
-
28
-
29
- ###python
30
-
31
- ```ここに言語を入力
32
-
33
- import os
34
-
35
- import sys
36
-
37
- import MeCab
38
-
39
- import collections
40
-
41
- from gensim import models
42
-
43
- from gensim.models.doc2vec import LabeledSentence
44
-
45
- from gensim.models.doc2vec import TaggedDocument
46
-
47
- import codecs
48
-
49
- import MeCab
50
-
51
-
52
-
53
- from gensim.models import doc2vec
54
-
55
-
56
-
57
- from gensim.models.doc2vec import Doc2Vec
58
-
59
-
60
-
61
-
62
-
63
-
64
-
65
- # 現在のディレクトリを取得
66
-
67
- root_dir = os.path.abspath(os.path.dirname(__file__))
68
-
69
-
70
-
71
-
72
-
73
- # ディレクトリ以下のテキストファイルを取得
74
-
75
- def getFileList(path):
76
-
77
- file_list = []
78
-
79
- for (root, dirs, files) in os.walk(path):
80
-
81
- for file in files:
82
-
83
- # パスセパレータは\より/の方が好きなので置換
84
-
85
- file_list.append( os.path.join(root,file).replace("\", "/") )
86
-
87
-
88
-
89
- return file_list
90
-
91
-
92
-
93
-
94
-
95
-
96
-
97
-
98
-
99
-
100
-
101
-
102
-
103
- # DS_Storeを省く
104
-
105
- all_files = getFileList(root_dir)[2:]
106
-
107
-
108
-
109
-
110
-
111
- print(all_files)
112
-
113
-
114
-
115
-
116
-
117
- all_sentences = []
118
-
119
- for file in all_files:
120
-
121
- f = open(file, encoding='utf-8')
122
-
123
- lines = f.readlines()
124
-
125
- for line in lines:
126
-
127
- all_sentences.append(line.replace('\n',''))
128
-
129
- ```