cls_names = [] total_images = 0 for gov in govs: file_list = os.listdir(base_path + gov + '/Annotations/') for file in file_list: total_images = total_images + 1 if file =='.DS_Store': pass else: infile_xml = open(base_path + gov + '/Annotations/' +file) tree = ElementTree.parse(infile_xml) root = tree.getroot() for obj in root.iter('object'): cls_name = obj.find('name').text cls_names.append(cls_name) print("total") print("# of images:" + str(total_images)) print("# of labels:" + str(len(cls_names)))
--------------------------------------------------------------------------- UnicodeDecodeError Traceback (most recent call last) <ipython-input-10-54a6927ce3e8> in <module>() 12 else: 13 infile_xml = open(base_path + gov + '/Annotations/' +file) ---> 14 tree = ElementTree.parse(infile_xml) 15 root = tree.getroot() 16 for obj in root.iter('object'): /anaconda3/envs/TensorFlow2/lib/python3.6/xml/etree/ElementTree.py in parse(source, parser) 1194 """ 1195 tree = ElementTree() -> 1196 tree.parse(source, parser) 1197 return tree 1198 /anaconda3/envs/TensorFlow2/lib/python3.6/xml/etree/ElementTree.py in parse(self, source, parser) 595 # It can be used to parse the whole source without feeding 596 # it with chunks. --> 597 self._root = parser._parse_whole(source) 598 return self._root 599 while True: /anaconda3/envs/TensorFlow2/lib/python3.6/codecs.py in decode(self, input, final) 319 # decode input (taking the buffer into account) 320 data = self.buffer + input --> 321 (result, consumed) = self._buffer_decode(data, self.errors, final) 322 # keep undecoded input until the next call 323 self.buffer = data[consumed:] UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb0 in position 45: invalid start byte
python初心者です。詳しく教えて頂けると幸いです
GitHubから引用し。指示通り、jupyternotebookで動かしているのですが
エラーが出てしまい、進むことができません
どこかコードが間違えていると思われるのですが、検討もつかず困っています。
データセットから画像などを読み込んでいると思うのですが、'utf-8' と言うものが引っかかっており、エラーが出ていると思います。 コードの追加、削除するところがあれば教えて下さい。
・環境としてはmacOS High Sierra バージョン10.13.5です
・環境構築に関しては、ネットからanacondaをインストールしました。
今回githubで参考にしているのですがその途中でエラーが出てしまいました。
参考URLはこちらになります
https://github.com/sekilab/RoadDamageDetector/blob/master/RoadDamageDatasetTutorial.ipynb
一応、下記の文字コードを教えて見たのですがダメでした。
encoding = "utf-8" infile_xml= open(base_path + gov + '/Annotations/' +file) root = ElementTree.parse(infile_xml)
追加するものがあればするので
よろしくお願いします
import six.moves.urllib as urllib
>>> import os >>> os.getcwd()
'/Users/yokookentarou'
try: import urllib.request except ImportError: raise ImportError('You should use Python 3.x') if not os.path.exists('./RoadDamageDataset.tar.gz'): url_base = 'https://s3-ap-northeast-1.amazonaws.com/mycityreport/RoadDamageDataset.tar.gz' urllib.request.urlretrieve(url_base, './RoadDamageDataset.tar.gz') print("Download RoadDamageDataset.tar.gz Done") else: print("You have RoadDamageDataset.tar.gz")
You have RoadDamageDataset.tar.gz
if not os.path.exists('./trainedModels.tar.gz'): url_base = 'https://s3-ap-northeast-1.amazonaws.com/mycityreport/trainedModels.tar.gz' urllib.request.urlretrieve(url_base, './trainedModels.tar.gz') print("Download trainedModels.tar.gz Done") else: print("You have trainedModels.tar.gz")
You have trainedModels.tar.gz
!tar -zxf ./RoadDamageDataset.tar.gz !tar -zxf ./trainedModels.tar.gz
RoadDamageDataset/Muroran/JPEGImages/Muroran_20170907133426.jpg: (Empty error message)
tar: Error exit delayed from previous errors.
base_path = os.getcwd() + '/RoadDamageDataset/' damageTypes=["D00", "D01", "D10", "D11", "D20", "D40", "D43", "D44"] govs = ["Adachi", "Chiba", "Ichihara", "Muroran", "Nagakute", "Numazu", "Sumida"]
cls_names = [] total_images = 0 for gov in govs: file_list = os.listdir(base_path + gov + '/Annotations/') for file in file_list: total_images = total_images + 1 if file =='.DS_Store': pass else: infile_xml = open(base_path + gov + '/Annotations/' +file) tree = ElementTree.parse(infile_xml) root = tree.getroot() for obj in root.iter('object'): cls_name = obj.find('name').text cls_names.append(cls_name) print("total") print("# of images:" + str(total_images)) print("# of labels:" + str(len(cls_names)))
UnicodeDecodeError Traceback (most recent call last)
<ipython-input-10-54a6927ce3e8> in <module>()
12 else:
13 infile_xml = open(base_path + gov + '/Annotations/' +file)
---> 14 tree = ElementTree.parse(infile_xml)
15 root = tree.getroot()
16 for obj in root.iter('object'):
/anaconda3/envs/TensorFlow2/lib/python3.6/xml/etree/ElementTree.py in parse(source, parser)
1194 """
1195 tree = ElementTree()
-> 1196 tree.parse(source, parser)
1197 return tree
1198
/anaconda3/envs/TensorFlow2/lib/python3.6/xml/etree/ElementTree.py in parse(self, source, parser)
595 # It can be used to parse the whole source without feeding
596 # it with chunks.
--> 597 self._root = parser._parse_whole(source)
598 return self._root
599 while True:
/anaconda3/envs/TensorFlow2/lib/python3.6/codecs.py in decode(self, input, final)
319 # decode input (taking the buffer into account)
320 data = self.buffer + input
--> 321 (result, consumed) = self._buffer_decode(data, self.errors, final)
322 # keep undecoded input until the next call
323 self.buffer = data[consumed:]
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb0 in position 45: invalid start byte
回答2件
あなたの回答
tips
プレビュー