実現したいこと
GoogleColabの読み込みエラーの解消をしたい。
以前は正常に動作していたデータですが
アップデートなのか何らかの仕様変更?で影響が出ているので
原因を知りたいです。
また、もっと良い書き方が出来ればと思ってますが
ご指摘いただけないでしょうか。
以下の辺りでエラーが生じています。
filepath = xlsx_files[0]
def extract(filepath):
_df = pd.read_excel(filepath,sheet_name='円', header=None)
_df2 = pd.read_excel(filepath,sheet_name='ドル', header=None)
--------------------------**
発生している問題・エラーメッセージ
以下のようなエラーメッセージが出ています。
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-4-6f46388507d4> in <module> 28 29 for filepath in xlsx_files: ---> 30 _dfs = extract(filepath) 31 dfs = pd.concat([dfs, _dfs]) 32 9 frames <ipython-input-4-6f46388507d4> in extract(filepath) 15 16 def extract(filepath): ---> 17 _df = pd.read_excel(filepath,sheet_name='円', header=None) 18 _df2 = pd.read_excel(filepath,sheet_name='ドル', header=None) 19 _df2 = _df2[1:] /usr/local/lib/python3.8/dist-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs) 309 stacklevel=stacklevel, 310 ) --> 311 return func(*args, **kwargs) 312 313 return wrapper /usr/local/lib/python3.8/dist-packages/pandas/io/excel/_base.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, mangle_dupe_cols, storage_options) 362 if not isinstance(io, ExcelFile): 363 should_close = True --> 364 io = ExcelFile(io, storage_options=storage_options, engine=engine) 365 elif engine and engine != io.engine: 366 raise ValueError( /usr/local/lib/python3.8/dist-packages/pandas/io/excel/_base.py in __init__(self, path_or_buffer, engine, storage_options) 1231 self.storage_options = storage_options 1232 -> 1233 self._reader = self._engines[engine](self._io, storage_options=storage_options) 1234 1235 def __fspath__(self): /usr/local/lib/python3.8/dist-packages/pandas/io/excel/_openpyxl.py in __init__(self, filepath_or_buffer, storage_options) 520 """ 521 import_optional_dependency("openpyxl") --> 522 super().__init__(filepath_or_buffer, storage_options=storage_options) 523 524 @property /usr/local/lib/python3.8/dist-packages/pandas/io/excel/_base.py in __init__(self, filepath_or_buffer, storage_options) 418 self.handles.handle.seek(0) 419 try: --> 420 self.book = self.load_workbook(self.handles.handle) 421 except Exception: 422 self.close() /usr/local/lib/python3.8/dist-packages/pandas/io/excel/_openpyxl.py in load_workbook(self, filepath_or_buffer) 531 from openpyxl import load_workbook 532 --> 533 return load_workbook( 534 filepath_or_buffer, read_only=True, data_only=True, keep_links=False 535 ) /usr/local/lib/python3.8/dist-packages/openpyxl/reader/excel.py in load_workbook(filename, read_only, keep_vba, data_only, keep_links, rich_text) 344 reader = ExcelReader(filename, read_only, keep_vba, 345 data_only, keep_links, rich_text) --> 346 reader.read() 347 return reader.wb /usr/local/lib/python3.8/dist-packages/openpyxl/reader/excel.py in read(self) 301 self.read_worksheets() 302 action = "assign names" --> 303 self.parser.assign_names() 304 if not self.read_only: 305 self.archive.close() /usr/local/lib/python3.8/dist-packages/openpyxl/reader/workbook.py in assign_names(self) 107 reserved = defn.is_reserved 108 if reserved is None: --> 109 sheet.defined_names[name] = defn 110 111 elif reserved == "Print_Titles": AttributeError: 'ReadOnlyWorksheet' object has no attribute 'defined_names'
該当のソースコード
Python
1import openpyxl 2import pandas as pd 3import os 4from glob import glob 5from natsort import natsorted 6%load_ext google.colab.data_table 7from pathlib import Path 8%cd '/content' 9 10xlsx_dir = Path("./") 11xlsx_files = sorted(xlsx_dir.glob("*.xlsx")) #or xlsx 12 13filepath = xlsx_files[0] 14 15def extract(filepath): 16 _df = pd.read_excel(filepath,sheet_name='円', header=None) 17 _df2 = pd.read_excel(filepath,sheet_name='ドル', header=None) 18 _df2 = _df2[1:] 19 _dfs = pd.concat([_df,_df2], ignore_index=True) 20 columns = _dfs.iloc[0,[3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52]] 21 dfs = _dfs.iloc[1:120, [3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52]] 22 dfs.columns = columns 23 #dfs.shift(-1) 24 return dfs 25 26dfs = pd.DataFrame() 27 28for filepath in xlsx_files: 29 _dfs = extract(filepath) 30 dfs = pd.concat([dfs, _dfs]) 31 32dfs.dropna(subset=['伝票日付'], how='any', inplace=True) 33dfs.replace('-', float('NaN'), inplace=True) 34 35_dir = "/content/output" 36 37if not os.path.exists(_dir): 38 # ディレクトリが存在しない場合、ディレクトリを作成する 39 os.mkdir(_dir) 40 41#df.to_csv("/content/output/ledgertable_output.csv", sep=",") 42dfs.to_excel('/content/output/revenuesheet_2021.xlsx', index=False )
試したこと
openpyxl の同バージョンの再インストールをしましたが現象変わりませんでした。
アドバイスいただけると大変助かります。
よろしくお願いいたします。
回答1件
あなたの回答
tips
プレビュー