質問編集履歴
7
コード修正
title
CHANGED
File without changes
|
body
CHANGED
@@ -104,7 +104,7 @@
|
|
104
104
|
overview_1_c = []
|
105
105
|
link_1_c = []
|
106
106
|
|
107
|
-
try: #cが
|
107
|
+
try: #cが8個ない場合に対応
|
108
108
|
for i in range(8):
|
109
109
|
page_1_c = soup1.find_all("div", {"class":"main-inner-c"})[i]
|
110
110
|
|
@@ -137,7 +137,7 @@
|
|
137
137
|
overview_2_c = []
|
138
138
|
link_2_c = []
|
139
139
|
|
140
|
-
try: #cが
|
140
|
+
try: #cが8個ない場合に対応
|
141
141
|
for i in range(10):
|
142
142
|
page_2_c = soup2.find_all("div", {"class":"main-inner-c"})[i]
|
143
143
|
|
@@ -170,7 +170,7 @@
|
|
170
170
|
overview_3_c = []
|
171
171
|
link_3_c = []
|
172
172
|
|
173
|
-
try: #cが
|
173
|
+
try: #cが8個ない場合に対応
|
174
174
|
for i in range(10):
|
175
175
|
page_3_c = soup3.find_all("div", {"class":"main-inner-c"})[i]
|
176
176
|
|
6
コード修正
title
CHANGED
File without changes
|
body
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
Webスクレイピング用の関数単体では問題なく動作するのですが、
|
7
7
|
scheduleモジュールで処理した際にエラーが出てしまいます。
|
8
8
|
関数の中にtry、exceptを組み込んでいる影響でしょうか。
|
9
|
+
※if elseに書き換えても同様のメッセージが発生しましたので更新したコードを記載いたします。
|
9
10
|
|
10
11
|
### 発生している問題・エラーメッセージ
|
11
12
|
|
@@ -73,30 +74,31 @@
|
|
73
74
|
```python
|
74
75
|
def scrayping():
|
75
76
|
|
76
|
-
#
|
77
|
+
#page_1
|
78
|
+
|
77
79
|
result1 = requests.get(url1)
|
78
80
|
c1 = result1.content
|
79
81
|
soup1 = BeautifulSoup(c1)
|
80
82
|
|
83
|
+
if soup1.find("div", {"class":"main-inner-a"}) is None:
|
84
|
+
title_1_a = []
|
85
|
+
overview_1_a = []
|
86
|
+
link_1_a = []
|
87
|
+
else:
|
81
|
-
|
88
|
+
page_1_a = soup1.find("div", {"class":"main-inner-a"})
|
82
|
-
try: #aがある場合
|
83
|
-
|
89
|
+
title_1_a = page_1_a.find("h3").text.replace("\u3000","").replace("\n","")
|
84
|
-
|
90
|
+
overview_1_a = page_1_a.find("p").text.replace("\u3000","").replace("\n","")
|
85
|
-
|
91
|
+
link_1_a = page_1_a.find("a").get("href")
|
86
|
-
except:
|
87
|
-
title_1_a = []
|
88
|
-
overview_1_a = []
|
89
|
-
link_1_a = []
|
90
92
|
|
93
|
+
if soup1.find("div", {"class":"main-inner-b"}) is None:
|
94
|
+
title_1_b = []
|
95
|
+
overview_1_b = []
|
96
|
+
link_1_b = []
|
97
|
+
else:
|
91
|
-
|
98
|
+
page_1_b = soup1.find("div", {"class":"main-inner-b"})
|
92
|
-
try: #bがある場合
|
93
|
-
|
99
|
+
title_1_b = page_1_b.find("h3").text.replace("\u3000","").replace("\n","")
|
94
|
-
|
100
|
+
overview_1_b = page_1_b.find("p").text.replace("\u3000","").replace("\n","")
|
95
|
-
|
101
|
+
link_1_b = page_1_b.find("a").get("href")
|
96
|
-
except:
|
97
|
-
title_1_b = []
|
98
|
-
overview_1_b = []
|
99
|
-
link_1_b = []
|
100
102
|
|
101
103
|
title_1_c = []
|
102
104
|
overview_1_c = []
|
@@ -125,7 +127,7 @@
|
|
125
127
|
|
126
128
|
page1_df = pd.concat([title_1_all, overview_1_all, link_1_all], axis=1)
|
127
129
|
|
128
|
-
#
|
130
|
+
#page_2
|
129
131
|
|
130
132
|
result2 = requests.get(url2)
|
131
133
|
c2 = result2.content
|
@@ -158,7 +160,7 @@
|
|
158
160
|
|
159
161
|
page2_df = pd.concat([title_2_all, overview_2_all, link_2_all], axis=1)
|
160
162
|
|
161
|
-
#
|
163
|
+
#page_3
|
162
164
|
|
163
165
|
result3 = requests.get(url3)
|
164
166
|
c3 = result3.content
|
@@ -191,10 +193,11 @@
|
|
191
193
|
|
192
194
|
page3_df = pd.concat([title_3_all, overview_3_all, link_3_all], axis=1)
|
193
195
|
|
194
|
-
#
|
196
|
+
#total
|
195
197
|
|
196
198
|
page_df = pd.concat([page1_df, page2_df, page3_df], axis=0,ignore_index=True)
|
197
199
|
page_df.columns=["Title","Overview","Link"]
|
200
|
+
page_df.index = np.arange(1, len(page_df)+1)
|
198
201
|
|
199
202
|
page_df.to_excel("result.xlsx")
|
200
203
|
|
5
コード修正
title
CHANGED
File without changes
|
body
CHANGED
@@ -213,7 +213,7 @@
|
|
213
213
|
|
214
214
|
url1 = "https://runda.jp/search/?q=" + keyword + "&page=1&search_type=" + period
|
215
215
|
url2 = "https://runda.jp/search/?q=" + keyword + "&page=2&search_type=" + period
|
216
|
-
url3 = "https://runda.jp/search/?q=" + keyword + "&page=
|
216
|
+
url3 = "https://runda.jp/search/?q=" + keyword + "&page=3&search_type=" + period
|
217
217
|
|
218
218
|
#繰り返し自動処理
|
219
219
|
|
4
コード修正
title
CHANGED
File without changes
|
body
CHANGED
@@ -69,7 +69,6 @@
|
|
69
69
|
|
70
70
|
長文失礼いたします。
|
71
71
|
|
72
|
-
#関数の定義
|
73
72
|
|
74
73
|
```python
|
75
74
|
def scrayping():
|
3
コード修正
title
CHANGED
File without changes
|
body
CHANGED
@@ -69,10 +69,12 @@
|
|
69
69
|
|
70
70
|
長文失礼いたします。
|
71
71
|
|
72
|
+
#関数の定義
|
72
73
|
|
73
|
-
|
74
|
+
```python
|
74
75
|
def scrayping():
|
75
76
|
|
77
|
+
#1ページ目
|
76
78
|
result1 = requests.get(url1)
|
77
79
|
c1 = result1.content
|
78
80
|
soup1 = BeautifulSoup(c1)
|
@@ -124,6 +126,7 @@
|
|
124
126
|
|
125
127
|
page1_df = pd.concat([title_1_all, overview_1_all, link_1_all], axis=1)
|
126
128
|
|
129
|
+
#2ページ目
|
127
130
|
|
128
131
|
result2 = requests.get(url2)
|
129
132
|
c2 = result2.content
|
@@ -156,6 +159,7 @@
|
|
156
159
|
|
157
160
|
page2_df = pd.concat([title_2_all, overview_2_all, link_2_all], axis=1)
|
158
161
|
|
162
|
+
#3ページ目
|
159
163
|
|
160
164
|
result3 = requests.get(url3)
|
161
165
|
c3 = result3.content
|
@@ -188,7 +192,7 @@
|
|
188
192
|
|
189
193
|
page3_df = pd.concat([title_3_all, overview_3_all, link_3_all], axis=1)
|
190
194
|
|
191
|
-
#
|
195
|
+
#1~3ページ統合
|
192
196
|
|
193
197
|
page_df = pd.concat([page1_df, page2_df, page3_df], axis=0,ignore_index=True)
|
194
198
|
page_df.columns=["Title","Overview","Link"]
|
@@ -220,8 +224,10 @@
|
|
220
224
|
schedule.run_pending()
|
221
225
|
time.sleep(10)
|
222
226
|
|
227
|
+
```
|
223
228
|
|
224
229
|
|
230
|
+
|
225
231
|
### 試したこと
|
226
232
|
|
227
233
|
関数の定義、関数単体での実行は問題なく作動したのですが
|
2
コードの修正
title
CHANGED
File without changes
|
body
CHANGED
@@ -73,8 +73,6 @@
|
|
73
73
|
|
74
74
|
def scrayping():
|
75
75
|
|
76
|
-
#page_1
|
77
|
-
|
78
76
|
result1 = requests.get(url1)
|
79
77
|
c1 = result1.content
|
80
78
|
soup1 = BeautifulSoup(c1)
|
@@ -126,7 +124,6 @@
|
|
126
124
|
|
127
125
|
page1_df = pd.concat([title_1_all, overview_1_all, link_1_all], axis=1)
|
128
126
|
|
129
|
-
#page_2
|
130
127
|
|
131
128
|
result2 = requests.get(url2)
|
132
129
|
c2 = result2.content
|
@@ -159,7 +156,6 @@
|
|
159
156
|
|
160
157
|
page2_df = pd.concat([title_2_all, overview_2_all, link_2_all], axis=1)
|
161
158
|
|
162
|
-
#page_3
|
163
159
|
|
164
160
|
result3 = requests.get(url3)
|
165
161
|
c3 = result3.content
|
1
誤字修正
title
CHANGED
@@ -1,1 +1,1 @@
|
|
1
|
-
Python scheduleモジュール実行時のエラー
|
1
|
+
Python Webスクレイピングにおけるscheduleモジュール実行時のエラー
|
body
CHANGED
File without changes
|