トップに関する質問 Python Webスクレイピングにおけるscheduleモジュール実行時のエラー

編集履歴

質問編集履歴

コード修正

2021/02/24 06:53

投稿

SMRKW

スコア2

title CHANGED Viewed

File without changes

body CHANGED Viewed

@@ -104,7 +104,7 @@
   overview_1_c = []
   link_1_c = []
-  try: #cがある場合
+  try: #cが8個ない場合に対応
         for i in range(8):
           page_1_c = soup1.find_all("div", {"class":"main-inner-c"})[i]
@@ -137,7 +137,7 @@
   overview_2_c = []
   link_2_c = []
-  try: #cがある場合
+  try: #cが8個ない場合に対応
       for i in range(10):
             page_2_c = soup2.find_all("div", {"class":"main-inner-c"})[i]
@@ -170,7 +170,7 @@
   overview_3_c = []
   link_3_c = []
-  try: #cがある場合
+  try: #cが8個ない場合に対応
        for i in range(10):
             page_3_c = soup3.find_all("div", {"class":"main-inner-c"})[i]

コード修正

2021/02/24 06:53

投稿

SMRKW

スコア2

title CHANGED Viewed

File without changes

body CHANGED Viewed

@@ -6,6 +6,7 @@
 Webスクレイピング用の関数単体では問題なく動作するのですが、
 scheduleモジュールで処理した際にエラーが出てしまいます。
 関数の中にtry、exceptを組み込んでいる影響でしょうか。
+※if elseに書き換えても同様のメッセージが発生しましたので更新したコードを記載いたします。
 ### 発生している問題・エラーメッセージ
@@ -73,30 +74,31 @@
 ```python
 def scrayping():
-#1ページ目
+#page_1
   result1 = requests.get(url1)
   c1 = result1.content
   soup1 = BeautifulSoup(c1)
+  if soup1.find("div", {"class":"main-inner-a"}) is None:
+      title_1_a = []
+      overview_1_a = []
+      link_1_a = []
+  else:
-  page_1_a = soup1.find("div", {"class":"main-inner-a"})
+      page_1_a = soup1.find("div", {"class":"main-inner-a"})
-  try: #aがある場合
-    title_1_a = page_1_a.find("h3").text.replace("\u3000","").replace("\n","")
+      title_1_a = page_1_a.find("h3").text.replace("\u3000","").replace("\n","")
-    overview_1_a = page_1_a.find("p").text.replace("\u3000","").replace("\n","")
+      overview_1_a = page_1_a.find("p").text.replace("\u3000","").replace("\n","")
-    link_1_a = page_1_a.find("a").get("href")
+      link_1_a = page_1_a.find("a").get("href")
-  except:
-    title_1_a = []
-    overview_1_a = []
-    link_1_a = []
+  if soup1.find("div", {"class":"main-inner-b"}) is None:
+      title_1_b = []
+      overview_1_b = []
+      link_1_b = []
+  else:
-  page_1_b = soup1.find("div", {"class":"main-inner-b"})
+      page_1_b = soup1.find("div", {"class":"main-inner-b"})
-  try: #bがある場合
-    title_1_b = page_1_b.find("h3").text.replace("\u3000","").replace("\n","")
+      title_1_b = page_1_b.find("h3").text.replace("\u3000","").replace("\n","")
-    overview_1_b = page_1_b.find("p").text.replace("\u3000","").replace("\n","")
+      overview_1_b = page_1_b.find("p").text.replace("\u3000","").replace("\n","")
-    link_1_b = page_1_b.find("a").get("href")
+      link_1_b = page_1_b.find("a").get("href")
-  except:
-    title_1_b = []
-    overview_1_b = []
-    link_1_b = []
   title_1_c = []
   overview_1_c = []
@@ -125,7 +127,7 @@
   page1_df = pd.concat([title_1_all, overview_1_all, link_1_all], axis=1)
-#2ページ目
+#page_2
   result2 = requests.get(url2)
   c2 = result2.content
@@ -158,7 +160,7 @@
   page2_df = pd.concat([title_2_all, overview_2_all, link_2_all], axis=1)
-#3ページ目
+#page_3
   result3 = requests.get(url3)
   c3 = result3.content
@@ -191,10 +193,11 @@
   page3_df = pd.concat([title_3_all, overview_3_all, link_3_all], axis=1)
-#1～3ページ統合
+#total
   page_df = pd.concat([page1_df, page2_df, page3_df], axis=0,ignore_index=True)
   page_df.columns=["Title","Overview","Link"]
+  page_df.index = np.arange(1, len(page_df)+1)
   page_df.to_excel("result.xlsx")

コード修正

2021/02/24 06:52

投稿

SMRKW

スコア2

title CHANGED Viewed

File without changes

body CHANGED Viewed

@@ -213,7 +213,7 @@
 url1 = "https://runda.jp/search/?q=" + keyword + "&page=1&search_type=" + period
 url2 = "https://runda.jp/search/?q=" + keyword + "&page=2&search_type=" + period
-url3 = "https://runda.jp/search/?q=" + keyword + "&page=2&search_type=" + period
+url3 = "https://runda.jp/search/?q=" + keyword + "&page=3&search_type=" + period
 #繰り返し自動処理

コード修正

2021/02/24 05:32

投稿

SMRKW

スコア2

title CHANGED Viewed

File without changes

body CHANGED Viewed

@@ -69,7 +69,6 @@
 長文失礼いたします。
-#関数の定義
 ```python
 def scrayping():

コード修正

2021/02/24 05:28

投稿

SMRKW

スコア2

title CHANGED Viewed

File without changes

body CHANGED Viewed

@@ -69,10 +69,12 @@
 長文失礼いたします。
+#関数の定義
+```python
 def scrayping():
+#1ページ目
   result1 = requests.get(url1)
   c1 = result1.content
   soup1 = BeautifulSoup(c1)
@@ -124,6 +126,7 @@
   page1_df = pd.concat([title_1_all, overview_1_all, link_1_all], axis=1)
+#2ページ目
   result2 = requests.get(url2)
   c2 = result2.content
@@ -156,6 +159,7 @@
   page2_df = pd.concat([title_2_all, overview_2_all, link_2_all], axis=1)
+#3ページ目
   result3 = requests.get(url3)
   c3 = result3.content
@@ -188,7 +192,7 @@
   page3_df = pd.concat([title_3_all, overview_3_all, link_3_all], axis=1)
-#page1~3統合
+#1～3ページ統合
   page_df = pd.concat([page1_df, page2_df, page3_df], axis=0,ignore_index=True)
   page_df.columns=["Title","Overview","Link"]
@@ -220,8 +224,10 @@
     schedule.run_pending()
     time.sleep(10)
+```
 ### 試したこと
 関数の定義、関数単体での実行は問題なく作動したのですが

コードの修正

2021/02/24 05:27

投稿

SMRKW

スコア2

title CHANGED Viewed

File without changes

body CHANGED Viewed

@@ -73,8 +73,6 @@
 def scrayping():
-#page_1
   result1 = requests.get(url1)
   c1 = result1.content
   soup1 = BeautifulSoup(c1)
@@ -126,7 +124,6 @@
   page1_df = pd.concat([title_1_all, overview_1_all, link_1_all], axis=1)
-#page_2
   result2 = requests.get(url2)
   c2 = result2.content
@@ -159,7 +156,6 @@
   page2_df = pd.concat([title_2_all, overview_2_all, link_2_all], axis=1)
-#page_3
   result3 = requests.get(url3)
   c3 = result3.content

誤字修正

2021/02/24 05:16

投稿

SMRKW

スコア2

title CHANGED Viewed

	@@ -1,1 +1,1 @@
1	- Python scheduleモジュール実行時のエラー
1	+ Python Webスクレイピングにおけるscheduleモジュール実行時のエラー

body CHANGED Viewed

File without changes