質問編集履歴
1
コードの訂正
test
CHANGED
File without changes
|
test
CHANGED
@@ -44,7 +44,105 @@
|
|
44
44
|
|
45
45
|
```ここに言語名を入力
|
46
46
|
|
47
|
+
import requests
|
48
|
+
|
49
|
+
import bs4
|
50
|
+
|
51
|
+
import time
|
52
|
+
|
53
|
+
import pandas as pd
|
54
|
+
|
55
|
+
from tqdm import tqdm
|
56
|
+
|
57
|
+
from google.colab import files
|
58
|
+
|
59
|
+
|
60
|
+
|
61
|
+
def getSoup(page_number):
|
62
|
+
|
63
|
+
Target_URL = "http://www.sakamotofoods.co.jp/webshop/&page=" + str(page_number)
|
64
|
+
|
65
|
+
headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36"}
|
66
|
+
|
67
|
+
response = requests.get(Target_URL, headers=headers)
|
68
|
+
|
69
|
+
time.sleep(1)
|
70
|
+
|
71
|
+
soup = bs4.BeautifulSoup(response.content, "html.parser")
|
72
|
+
|
73
|
+
return soup
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
def getData(URL):
|
78
|
+
|
79
|
+
Target_URL = URL
|
80
|
+
|
81
|
+
headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36"}
|
82
|
+
|
83
|
+
response = requests.get(Target_URL, headers=headers)
|
84
|
+
|
85
|
+
time.sleep(1)
|
86
|
+
|
87
|
+
soup = bs4.BeautifulSoup(response.content, "html.parser")
|
88
|
+
|
89
|
+
return soup
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
def getDataframe(soup, df):
|
94
|
+
|
95
|
+
entries = soup.find_all(class_ = "showcaseHd")
|
96
|
+
|
97
|
+
for entry in entries:
|
98
|
+
|
99
|
+
URL = entry.find("a")["href"]
|
100
|
+
|
101
|
+
soup_second = getData(URL)
|
102
|
+
|
103
|
+
se = pd.Series([
|
104
|
+
|
105
|
+
soup_second.find("title").text, # Product Name
|
106
|
+
|
107
|
+
soup_second.find_all(class_ = "tableType02")[1].find_all("tr")[6].find("td").text, # JAN
|
108
|
+
|
109
|
+
], columns)
|
110
|
+
|
111
|
+
df = df.append(se, columns)
|
112
|
+
|
47
|
-
|
113
|
+
return df
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
soup_info = getSoup(1)
|
120
|
+
|
121
|
+
time.sleep(1)
|
122
|
+
|
123
|
+
page_count_str = soup_info.find(class_ = "last").text.strip()
|
124
|
+
|
125
|
+
|
126
|
+
|
127
|
+
page_count = int(page_count_str)
|
128
|
+
|
129
|
+
print("\n")
|
130
|
+
|
131
|
+
print("ページ数: " + str(page_count))
|
132
|
+
|
133
|
+
|
134
|
+
|
135
|
+
columns = ["Product Name", "JAN"]
|
136
|
+
|
137
|
+
df = pd.DataFrame(columns=columns)
|
138
|
+
|
139
|
+
for page in tqdm(range(1, page_count + 1)):
|
140
|
+
|
141
|
+
soup_info = getSoup(page)
|
142
|
+
|
143
|
+
df = getDataframe(soup_info, df)
|
144
|
+
|
145
|
+
df
|
48
146
|
|
49
147
|
```
|
50
148
|
|