質問編集履歴
1
詳細を記載
test
CHANGED
File without changes
|
test
CHANGED
@@ -18,9 +18,53 @@
|
|
18
18
|
|
19
19
|
```python
|
20
20
|
|
21
|
-
|
21
|
+
import requests
|
22
22
|
|
23
|
+
import pandas as pd
|
24
|
+
|
25
|
+
from google.colab import files
|
26
|
+
|
27
|
+
from bs4 import BeautifulSoup
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
def f1(url, suffix):
|
32
|
+
|
33
|
+
response = requests.get(url)
|
34
|
+
|
35
|
+
response.encoding = response.apparent_encoding
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
|
41
|
+
item_name_list = [] # 商品名を格納
|
42
|
+
|
43
|
+
pricenum_list = [] # 税抜き価格を格納
|
44
|
+
|
45
|
+
pricetax_list = [] # 税込価格を格納
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
# データフレームを定義
|
50
|
+
|
51
|
+
columns = ["商品名", "税抜き", "税込み"]
|
52
|
+
|
53
|
+
df = pd.DataFrame(columns=columns)
|
54
|
+
|
55
|
+
|
56
|
+
|
57
|
+
bs = BeautifulSoup(response.text, 'html.parser')
|
58
|
+
|
59
|
+
|
60
|
+
|
61
|
+
~~~~~~(ここでHTML抽出の解析するscript記載)~~~~~~
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
# 各データをデータフレームに格納
|
66
|
+
|
23
|
-
se = pd.Series([item_name_list, price
|
67
|
+
se = pd.Series([item_name_list, price1_list, price2_list], columns)
|
24
68
|
|
25
69
|
df = df.append(se, columns)
|
26
70
|
|
@@ -28,7 +72,7 @@
|
|
28
72
|
|
29
73
|
# 収集したデータをエクセル形式で保存
|
30
74
|
|
31
|
-
filename = "c
|
75
|
+
filename = "ckeck_list_%d.xlsx"%(suffix)
|
32
76
|
|
33
77
|
df.to_excel(filename, encoding = 'utf-8-sig')
|
34
78
|
|
@@ -38,7 +82,7 @@
|
|
38
82
|
|
39
83
|
# 処理したいURLのリスト
|
40
84
|
|
41
|
-
urls = ["https://www
|
85
|
+
urls = ["https://www~~~", "https://www~~~", "https://www~~~"]
|
42
86
|
|
43
87
|
|
44
88
|
|