python　エラー（utf-8）

cls_names = []
total_images = 0
for gov in govs:
    
    file_list = os.listdir(base_path + gov + '/Annotations/')

    for file in file_list:

        total_images = total_images + 1
        if file =='.DS_Store':
            pass
        else:
            infile_xml = open(base_path + gov + '/Annotations/' +file)
            tree = ElementTree.parse(infile_xml)
            root = tree.getroot()
            for obj in root.iter('object'):
                cls_name = obj.find('name').text
                cls_names.append(cls_name)
print("total")
print("# of images：" + str(total_images))
print("# of labels：" + str(len(cls_names)))

---------------------------------------------------------------------------
UnicodeDecodeError                        Traceback (most recent call last)
<ipython-input-10-54a6927ce3e8> in <module>()
     12         else:
     13             infile_xml = open(base_path + gov + '/Annotations/' +file)
---> 14             tree = ElementTree.parse(infile_xml)
     15             root = tree.getroot()
     16             for obj in root.iter('object'):

/anaconda3/envs/TensorFlow2/lib/python3.6/xml/etree/ElementTree.py in parse(source, parser)
   1194     """
   1195     tree = ElementTree()
-> 1196     tree.parse(source, parser)
   1197     return tree
   1198 

/anaconda3/envs/TensorFlow2/lib/python3.6/xml/etree/ElementTree.py in parse(self, source, parser)
    595                     # It can be used to parse the whole source without feeding
    596                     # it with chunks.
--> 597                     self._root = parser._parse_whole(source)
    598                     return self._root
    599             while True:

/anaconda3/envs/TensorFlow2/lib/python3.6/codecs.py in decode(self, input, final)
    319         # decode input (taking the buffer into account)
    320         data = self.buffer + input
--> 321         (result, consumed) = self._buffer_decode(data, self.errors, final)
    322         # keep undecoded input until the next call
    323         self.buffer = data[consumed:]

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb0 in position 45: invalid start byte

python初心者です。詳しく教えて頂けると幸いです
GitHubから引用し。指示通り、jupyternotebookで動かしているのですが
エラーが出てしまい、進むことができません
どこかコードが間違えていると思われるのですが、検討もつかず困っています。
データセットから画像などを読み込んでいると思うのですが、'utf-8' と言うものが引っかかっており、エラーが出ていると思います。　コードの追加、削除するところがあれば教えて下さい。

・環境としてはmacOS　High Sierra バージョン10.13.5です
・環境構築に関しては、ネットからanacondaをインストールしました。
今回githubで参考にしているのですがその途中でエラーが出てしまいました。
参考URLはこちらになります
https://github.com/sekilab/RoadDamageDetector/blob/master/RoadDamageDatasetTutorial.ipynb

一応、下記の文字コードを教えて見たのですがダメでした。

encoding = "utf-8"
infile_xml= open(base_path + gov + '/Annotations/' +file)
root = ElementTree.parse(infile_xml)

追加するものがあればするので
よろしくお願いします

import six.moves.urllib as urllib

>>> import os
>>> os.getcwd()

'/Users/yokookentarou'

try:
    import urllib.request
except ImportError:
    raise ImportError('You should use Python 3.x')

if not os.path.exists('./RoadDamageDataset.tar.gz'):
    url_base = 'https://s3-ap-northeast-1.amazonaws.com/mycityreport/RoadDamageDataset.tar.gz'
    urllib.request.urlretrieve(url_base, './RoadDamageDataset.tar.gz')
    
    print("Download RoadDamageDataset.tar.gz Done")
    
else:
    print("You have RoadDamageDataset.tar.gz")

You have RoadDamageDataset.tar.gz

if not os.path.exists('./trainedModels.tar.gz'):
    url_base = 'https://s3-ap-northeast-1.amazonaws.com/mycityreport/trainedModels.tar.gz'
    urllib.request.urlretrieve(url_base, './trainedModels.tar.gz')
    
    print("Download trainedModels.tar.gz Done")
    
else:
    print("You have trainedModels.tar.gz")

You have trainedModels.tar.gz

!tar -zxf ./RoadDamageDataset.tar.gz
!tar -zxf ./trainedModels.tar.gz

RoadDamageDataset/Muroran/JPEGImages/Muroran_20170907133426.jpg: (Empty error message)
tar: Error exit delayed from previous errors.

base_path = os.getcwd() + '/RoadDamageDataset/'

damageTypes=["D00", "D01", "D10", "D11", "D20", "D40", "D43", "D44"]

govs = ["Adachi", "Chiba", "Ichihara", "Muroran", "Nagakute", "Numazu", "Sumida"]

cls_names = []
total_images = 0
for gov in govs:
    
    file_list = os.listdir(base_path + gov + '/Annotations/')

    for file in file_list:

        total_images = total_images + 1
        if file =='.DS_Store':
            pass
        else:
            infile_xml = open(base_path + gov + '/Annotations/' +file)
            tree = ElementTree.parse(infile_xml)
            root = tree.getroot()
            for obj in root.iter('object'):
                cls_name = obj.find('name').text
                cls_names.append(cls_name)
print("total")
print("# of images：" + str(total_images))
print("# of labels：" + str(len(cls_names)))

UnicodeDecodeError Traceback (most recent call last)
<ipython-input-10-54a6927ce3e8> in <module>()
12 else:
13 infile_xml = open(base_path + gov + '/Annotations/' +file)
---> 14 tree = ElementTree.parse(infile_xml)
15 root = tree.getroot()
16 for obj in root.iter('object'):

/anaconda3/envs/TensorFlow2/lib/python3.6/xml/etree/ElementTree.py in parse(source, parser)
1194 """
1195 tree = ElementTree()
-> 1196 tree.parse(source, parser)
1197 return tree
1198

/anaconda3/envs/TensorFlow2/lib/python3.6/xml/etree/ElementTree.py in parse(self, source, parser)
595 # It can be used to parse the whole source without feeding
596 # it with chunks.
--> 597 self._root = parser._parse_whole(source)
598 return self._root
599 while True:

/anaconda3/envs/TensorFlow2/lib/python3.6/codecs.py in decode(self, input, final)
319 # decode input (taking the buffer into account)
320 data = self.buffer + input
--> 321 (result, consumed) = self._buffer_decode(data, self.errors, final)
322 # keep undecoded input until the next call
323 self.buffer = data[consumed:]

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb0 in position 45: invalid start byte

crimnut

2018/07/27 12:23

コードがすべて掲載していただけると回答しやすいです。あと、エラーは14行目のtree = ElementTree.parse(infile_xml)で発生しているので、そのあたりで問題がないか調べてみてください。また、最後にencoding='utf-8'としていますが、それはただ変数に'utf-8'という文字列を渡しているだけなので文字コードを指定できているわけではありません。