質問編集履歴

1

コードの訂正

2020/05/23 20:58

投稿

kenrou
kenrou

スコア0

test CHANGED
File without changes
test CHANGED
@@ -44,7 +44,105 @@
44
44
 
45
45
  ```ここに言語名を入力
46
46
 
47
+ import requests
48
+
49
+ import bs4
50
+
51
+ import time
52
+
53
+ import pandas as pd
54
+
55
+ from tqdm import tqdm
56
+
57
+ from google.colab import files
58
+
59
+
60
+
61
+ def getSoup(page_number):
62
+
63
+ Target_URL = "http://www.sakamotofoods.co.jp/webshop/&page=" + str(page_number)
64
+
65
+ headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36"}
66
+
67
+ response = requests.get(Target_URL, headers=headers)
68
+
69
+ time.sleep(1)
70
+
71
+ soup = bs4.BeautifulSoup(response.content, "html.parser")
72
+
73
+ return soup
74
+
75
+
76
+
77
+ def getData(URL):
78
+
79
+ Target_URL = URL
80
+
81
+ headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36"}
82
+
83
+ response = requests.get(Target_URL, headers=headers)
84
+
85
+ time.sleep(1)
86
+
87
+ soup = bs4.BeautifulSoup(response.content, "html.parser")
88
+
89
+ return soup
90
+
91
+
92
+
93
+ def getDataframe(soup, df):
94
+
95
+ entries = soup.find_all(class_ = "showcaseHd")
96
+
97
+ for entry in entries:
98
+
99
+ URL = entry.find("a")["href"]
100
+
101
+ soup_second = getData(URL)
102
+
103
+ se = pd.Series([
104
+
105
+ soup_second.find("title").text, # Product Name
106
+
107
+ soup_second.find_all(class_ = "tableType02")[1].find_all("tr")[6].find("td").text, # JAN
108
+
109
+ ], columns)
110
+
111
+ df = df.append(se, columns)
112
+
47
- Python
113
+ return df
114
+
115
+
116
+
117
+
118
+
119
+ soup_info = getSoup(1)
120
+
121
+ time.sleep(1)
122
+
123
+ page_count_str = soup_info.find(class_ = "last").text.strip()
124
+
125
+
126
+
127
+ page_count = int(page_count_str)
128
+
129
+ print("\n")
130
+
131
+ print("ページ数: " + str(page_count))
132
+
133
+
134
+
135
+ columns = ["Product Name", "JAN"]
136
+
137
+ df = pd.DataFrame(columns=columns)
138
+
139
+ for page in tqdm(range(1, page_count + 1)):
140
+
141
+ soup_info = getSoup(page)
142
+
143
+ df = getDataframe(soup_info, df)
144
+
145
+ df
48
146
 
49
147
  ```
50
148