質問編集履歴

3

タイトルを変えました。attribuから属性

2020/02/26 04:17

投稿

SnowBallEffect
SnowBallEffect

スコア28

test CHANGED
@@ -1 +1 @@
1
- BeautifulSoupで同じattribを探す方法
1
+ BeautifulSoupで同じ属性を探す方法
test CHANGED
File without changes

2

メモを詳しく書きました

2020/02/26 04:17

投稿

SnowBallEffect
SnowBallEffect

スコア28

test CHANGED
File without changes
test CHANGED
@@ -242,6 +242,8 @@
242
242
 
243
243
 
244
244
 
245
+ #id=list-titleの属性を探してテーブルの数だけループする
246
+
245
247
  for i in range(num_tables):
246
248
 
247
249
  df = soup.find('span', id='list-title')[i].get_text()
@@ -256,7 +258,7 @@
256
258
 
257
259
 
258
260
 
259
-
261
+ #列を書き換え
260
262
 
261
263
  df = df[["No","ファンドコード", "設定口数","設定金額","設定概算金額", "状況","ファンド名", "解約口数", "解約金額","解約概算金額","販社名","指定販売会社コード"]]
262
264
 
@@ -274,13 +276,13 @@
274
276
 
275
277
 
276
278
 
277
- df.to_excel(writer, index=False, sheet_name='BPNet', startrow=1)
279
+ df.to_excel(writer, index=False, sheet_name='Test', startrow=1)
278
280
 
279
281
 
280
282
 
281
283
  workbook = writer.book
282
284
 
283
- worksheet = writer.sheets['BPNet']
285
+ worksheet = writer.sheets['Test']
284
286
 
285
287
 
286
288
 

1

コードを全て掲載いたしました。

2020/02/26 01:03

投稿

SnowBallEffect
SnowBallEffect

スコア28

test CHANGED
File without changes
test CHANGED
@@ -174,35 +174,139 @@
174
174
 
175
175
  ```Python
176
176
 
177
+ import pandas as pd
178
+
179
+ from bs4 import BeautifulSoup
180
+
177
- #テーブルを読み取る
181
+ import numpy as np
182
+
183
+
184
+
185
+
186
+
187
+ filename = r'\server\user\someone\Desktop\test.html'
188
+
189
+ path = open(filename, 'r')
190
+
191
+ destination = r"\server\user\someone\Desktop\test_result.xlsx"
192
+
193
+
194
+
195
+ soup = BeautifulSoup(open(filename), 'html.parser')
196
+
197
+
178
198
 
179
199
  df = pd.io.html.read_html(filename, encoding='Shift JIS', attrs={'class':'list2'})
180
200
 
181
-
182
-
183
- #テーブルの数を計算
184
-
185
201
  num_tables = len(df)
186
202
 
187
203
  df = pd.concat(df)
188
204
 
189
205
 
190
206
 
191
- #list-titleというelementを探す
207
+ #remove all rows containing 合計
208
+
209
+ df = df[df.isin({'合計'}).sum(1) == 0]
210
+
211
+
212
+
213
+ #奇数行
214
+
215
+ df_tmp1 = df.iloc[::2].reset_index(drop=True)
216
+
217
+ df_tmp1.columns = df.columns.get_level_values(0)
218
+
219
+
220
+
221
+ # 偶数行
222
+
223
+ df_tmp2 = df.iloc[1::2].reset_index(drop=True)
224
+
225
+ df_tmp2.columns = df.columns.get_level_values(1)
226
+
227
+
228
+
229
+ new = df_tmp1['No状況']
230
+
231
+
232
+
233
+ #splitting
234
+
235
+ df_tmp1['No'] = new[:0]
236
+
237
+ #splitting
238
+
239
+ df_tmp1['状況'] = new[:0]
240
+
241
+
192
242
 
193
243
 
194
244
 
195
245
  for i in range(num_tables):
196
246
 
197
- df = soup.find('span', id='list-title').[i]get_text()
247
+ df = soup.find('span', id='list-title')[i].get_text()
198
-
248
+
199
- broker = df
249
+ broker = df
200
-
250
+
201
- df = pd.concat([df_tmp1, df_tmp2], axis=1)
251
+ df = pd.concat([df_tmp1, df_tmp2], axis=1)
202
-
252
+
203
- df.insert(5, 'Column E', broker)
253
+ df.insert(5, '指定販売会社コード', broker)
204
-
254
+
205
- df.insert(6, 'Column F', broker)
255
+ df.insert(6, '販社名', broker)
256
+
257
+
258
+
259
+
260
+
261
+ df = df[["No","ファンドコード", "設定口数","設定金額","設定概算金額", "状況","ファンド名", "解約口数", "解約金額","解約概算金額","販社名","指定販売会社コード"]]
262
+
263
+
264
+
265
+ df.to_string(index=False)
266
+
267
+ df['No'] = np.arange(1, len(df)+1)
268
+
269
+ df['状況'] = '当日有り'
270
+
271
+
272
+
273
+ writer = pd.ExcelWriter(destination, engine='xlsxwriter')
274
+
275
+
276
+
277
+ df.to_excel(writer, index=False, sheet_name='BPNet', startrow=1)
278
+
279
+
280
+
281
+ workbook = writer.book
282
+
283
+ worksheet = writer.sheets['BPNet']
284
+
285
+
286
+
287
+ number_format = workbook.add_format({'num_format': '#,##0', 'bold': False})
288
+
289
+ font_format = workbook.add_format({'font_name': 'MS Pゴシック', 'font_size': 10})
290
+
291
+
292
+
293
+ worksheet.set_column('A:A', 1, number_format)
294
+
295
+ worksheet.set_column('B:E', 16, number_format)
296
+
297
+ worksheet.set_column('G:G', 30, number_format)
298
+
299
+ worksheet.set_column('H:J', 13, number_format)
300
+
301
+ worksheet.set_column('K:K', 24, number_format)
302
+
303
+ worksheet.set_column('L:L', 21, number_format)
304
+
305
+ worksheet.set_column('A:L', None, font_format)
306
+
307
+ writer.save()
308
+
309
+
206
310
 
207
311
  ```
208
312