質問編集履歴
3
タイトルを変えました。attribuから属性
test
CHANGED
@@ -1 +1 @@
|
|
1
|
-
BeautifulSoupで同じ
|
1
|
+
BeautifulSoupで同じ属性を探す方法
|
test
CHANGED
File without changes
|
2
メモを詳しく書きました
test
CHANGED
File without changes
|
test
CHANGED
@@ -242,6 +242,8 @@
|
|
242
242
|
|
243
243
|
|
244
244
|
|
245
|
+
#id=list-titleの属性を探してテーブルの数だけループする
|
246
|
+
|
245
247
|
for i in range(num_tables):
|
246
248
|
|
247
249
|
df = soup.find('span', id='list-title')[i].get_text()
|
@@ -256,7 +258,7 @@
|
|
256
258
|
|
257
259
|
|
258
260
|
|
259
|
-
|
261
|
+
#列を書き換え
|
260
262
|
|
261
263
|
df = df[["No","ファンドコード", "設定口数","設定金額","設定概算金額", "状況","ファンド名", "解約口数", "解約金額","解約概算金額","販社名","指定販売会社コード"]]
|
262
264
|
|
@@ -274,13 +276,13 @@
|
|
274
276
|
|
275
277
|
|
276
278
|
|
277
|
-
df.to_excel(writer, index=False, sheet_name='
|
279
|
+
df.to_excel(writer, index=False, sheet_name='Test', startrow=1)
|
278
280
|
|
279
281
|
|
280
282
|
|
281
283
|
workbook = writer.book
|
282
284
|
|
283
|
-
worksheet = writer.sheets['
|
285
|
+
worksheet = writer.sheets['Test']
|
284
286
|
|
285
287
|
|
286
288
|
|
1
コードを全て掲載いたしました。
test
CHANGED
File without changes
|
test
CHANGED
@@ -174,35 +174,139 @@
|
|
174
174
|
|
175
175
|
```Python
|
176
176
|
|
177
|
+
import pandas as pd
|
178
|
+
|
179
|
+
from bs4 import BeautifulSoup
|
180
|
+
|
177
|
-
|
181
|
+
import numpy as np
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
|
186
|
+
|
187
|
+
filename = r'\server\user\someone\Desktop\test.html'
|
188
|
+
|
189
|
+
path = open(filename, 'r')
|
190
|
+
|
191
|
+
destination = r"\server\user\someone\Desktop\test_result.xlsx"
|
192
|
+
|
193
|
+
|
194
|
+
|
195
|
+
soup = BeautifulSoup(open(filename), 'html.parser')
|
196
|
+
|
197
|
+
|
178
198
|
|
179
199
|
df = pd.io.html.read_html(filename, encoding='Shift JIS', attrs={'class':'list2'})
|
180
200
|
|
181
|
-
|
182
|
-
|
183
|
-
#テーブルの数を計算
|
184
|
-
|
185
201
|
num_tables = len(df)
|
186
202
|
|
187
203
|
df = pd.concat(df)
|
188
204
|
|
189
205
|
|
190
206
|
|
191
|
-
#l
|
207
|
+
#remove all rows containing 合計
|
208
|
+
|
209
|
+
df = df[df.isin({'合計'}).sum(1) == 0]
|
210
|
+
|
211
|
+
|
212
|
+
|
213
|
+
#奇数行
|
214
|
+
|
215
|
+
df_tmp1 = df.iloc[::2].reset_index(drop=True)
|
216
|
+
|
217
|
+
df_tmp1.columns = df.columns.get_level_values(0)
|
218
|
+
|
219
|
+
|
220
|
+
|
221
|
+
# 偶数行
|
222
|
+
|
223
|
+
df_tmp2 = df.iloc[1::2].reset_index(drop=True)
|
224
|
+
|
225
|
+
df_tmp2.columns = df.columns.get_level_values(1)
|
226
|
+
|
227
|
+
|
228
|
+
|
229
|
+
new = df_tmp1['No状況']
|
230
|
+
|
231
|
+
|
232
|
+
|
233
|
+
#splitting
|
234
|
+
|
235
|
+
df_tmp1['No'] = new[:0]
|
236
|
+
|
237
|
+
#splitting
|
238
|
+
|
239
|
+
df_tmp1['状況'] = new[:0]
|
240
|
+
|
241
|
+
|
192
242
|
|
193
243
|
|
194
244
|
|
195
245
|
for i in range(num_tables):
|
196
246
|
|
197
|
-
df = soup.find('span', id='list-title')
|
247
|
+
df = soup.find('span', id='list-title')[i].get_text()
|
198
|
-
|
248
|
+
|
199
|
-
broker = df
|
249
|
+
broker = df
|
200
|
-
|
250
|
+
|
201
|
-
df = pd.concat([df_tmp1, df_tmp2], axis=1)
|
251
|
+
df = pd.concat([df_tmp1, df_tmp2], axis=1)
|
202
|
-
|
252
|
+
|
203
|
-
df.insert(5, '
|
253
|
+
df.insert(5, '指定販売会社コード', broker)
|
204
|
-
|
254
|
+
|
205
|
-
df.insert(6, '
|
255
|
+
df.insert(6, '販社名', broker)
|
256
|
+
|
257
|
+
|
258
|
+
|
259
|
+
|
260
|
+
|
261
|
+
df = df[["No","ファンドコード", "設定口数","設定金額","設定概算金額", "状況","ファンド名", "解約口数", "解約金額","解約概算金額","販社名","指定販売会社コード"]]
|
262
|
+
|
263
|
+
|
264
|
+
|
265
|
+
df.to_string(index=False)
|
266
|
+
|
267
|
+
df['No'] = np.arange(1, len(df)+1)
|
268
|
+
|
269
|
+
df['状況'] = '当日有り'
|
270
|
+
|
271
|
+
|
272
|
+
|
273
|
+
writer = pd.ExcelWriter(destination, engine='xlsxwriter')
|
274
|
+
|
275
|
+
|
276
|
+
|
277
|
+
df.to_excel(writer, index=False, sheet_name='BPNet', startrow=1)
|
278
|
+
|
279
|
+
|
280
|
+
|
281
|
+
workbook = writer.book
|
282
|
+
|
283
|
+
worksheet = writer.sheets['BPNet']
|
284
|
+
|
285
|
+
|
286
|
+
|
287
|
+
number_format = workbook.add_format({'num_format': '#,##0', 'bold': False})
|
288
|
+
|
289
|
+
font_format = workbook.add_format({'font_name': 'MS Pゴシック', 'font_size': 10})
|
290
|
+
|
291
|
+
|
292
|
+
|
293
|
+
worksheet.set_column('A:A', 1, number_format)
|
294
|
+
|
295
|
+
worksheet.set_column('B:E', 16, number_format)
|
296
|
+
|
297
|
+
worksheet.set_column('G:G', 30, number_format)
|
298
|
+
|
299
|
+
worksheet.set_column('H:J', 13, number_format)
|
300
|
+
|
301
|
+
worksheet.set_column('K:K', 24, number_format)
|
302
|
+
|
303
|
+
worksheet.set_column('L:L', 21, number_format)
|
304
|
+
|
305
|
+
worksheet.set_column('A:L', None, font_format)
|
306
|
+
|
307
|
+
writer.save()
|
308
|
+
|
309
|
+
|
206
310
|
|
207
311
|
```
|
208
312
|
|