Beautifulsoup 検索ページに取得したいデータがないときの飛ばし方がわからない

問題点
race_id_listからfor文で回してスクレイピングをする際にデータが空のページがあるので
'NoneType' object has no attribute 'find_all'
とエラーが出て止まってしまう。

try、exceptで改善しよう試みたが解決できなかった。

どのようにコードを書けばエラーなくスクレイピングできるか教えていただきたいです。

python
1race_id_list = []
2venue_list = ["3601","1006","1106","1813","1914","2015","2135","2218","2320","2433","2726","2826","3129","3230"]
3for venue in venue_list:
4    for month in range(1, 13, 1):
5        for day in range(1, 32, 1):
6             for r in range(1, 13, 1):
7                    race_id = "2020" + str(venue) + str(month)+str(day) + str(r)
8                    race_id_list.append(race_id)

python
1def result_scrape(race_id_list):
2    race_results={}
3    for race_id in tqdm(race_id_list):
4        time.sleep(1)
5        try:
6            url = "https://keiba.rakuten.co.jp/race_performance/list/RACEID/"+ race_id
7            html = requests.get(url)
8            html.encoding = "UTF-8"
9            soup = BeautifulSoup(html.text, "html.parser")
10
11            #着順
12            Ranks= soup.find_all("td",class_="order")
13            Ranks_list = []
14            for Rank in Ranks:
15                Rank = Rank.get_text()
16                Ranks_list.append(Rank)
17            #Ranks_list = pd.DataFrame(Ranks_list)
18
19            #枠
20            Positions_a= soup.find("tbody",class_="record")
21            Positions_b = Positions_a.find_all("th",class_="position")
22            Positions_list = []
23            for Position in Positions_b:
24                Position = Position.get_text()
25                Positions_list.append(Position)
26            #Positions_list = pd.DataFrame(Positions_list)
27            
28            #内枠か外枠か　　　内枠０　　外枠１
29            in_out_list = []
30            for in_out_ in Positions_list:
31                if int(in_out_) <= 4:
32                    in_ = 0
33                    in_out_list.append(str(in_))
34                else:
35                    out = str(1)
36                    in_out_list.append(out)            
37            
38            #馬番　find_next_siblings()の説明はhttps://senablog.com/python-bs4-method/#toc_id_6
39            horse_len_list = []
40            a = soup.find_all("td",class_="order")
41            for a_1 in a:
42                b = a_1.find_next_siblings()[1]
43                b  =b.get_text()
44                horse_len_list.append(b)
45            #horse_len_list  = pd.DataFrame(horse_len_list)
46            #horse_len_list = [ho.find_next_siblings()[1] for ho in soup.find_all("td",class_="order")]もあり
47            
48            #頭数    strにしているのはｐｄ.DataFrameをするときにエラーが出るのを防ぐため
49            horse_head = str(len(horse_len_list))
50            
51            
52            #馬名
53            horse_names= soup.find_all("td",class_="horse")
54            horse_name_list = []
55            for horse_name in horse_names:
56                horse_name = horse_name.get_text()
57                horse_name = horse_name.replace("\n","").lstrip()#replaceで\を消し　lstripで前のインデント（空白）をさくじょ
58                horse_name  = horse_name.rstrip()#lstripで後ろのインデントを削除
59                horse_name_list.append(horse_name)
60            #horse_name_list = pd.DataFrame(horse_name_list)
61
62
63
64            #斤量
65            weight_Tax = soup.find_all("td",class_="weightTax")
66            weight_Tax_list = []
67            for weight_Tax_1 in weight_Tax:
68                weight_Tax_1 = weight_Tax_1.get_text()
69                weight_Tax_list.append(weight_Tax_1)
70            #weight_Tax_list = pd.DataFrame(weight_Tax_list)
71
72            #馬体重と増減を分ける
73            weight = soup.find_all("td",class_="weight")
74            weight_list = []
75            for weight_1 in weight:
76                weight_1 = weight_1.get_text()
77                weight_1 = weight_1.replace("\n","").lstrip().rstrip()#replaceで\を消し　lstripで前のインデント（空白）をさくじょ 連結パターン
78                weight_list.append(weight_1)
79
80            #馬体重
81            weight_lists = []
82            for a in weight_list:
83                a = a[:3]    
84                weight_lists.append(a)
85            #weight_lists = pd.DataFrame(weight_lists)
86
87            #増減
88            weiget_zougen_list = []
89            weiget_zougen_lists = []#修正したやつ
90            for b in weight_list:
91                b = b[3:]
92                weiget_zougen_list.append(b)
93
94            for c in weiget_zougen_list:
95                c = c.replace("±","")
96                c = c.replace("+","")
97                weiget_zougen_lists.append(c)
98            #weiget_zougen_lists = pd.DataFrame(weiget_zougen_lists)
99
100            #ジョッキー
101            jockey = soup.find_all("td",class_="jockey")
102            jockey_list = []
103            for jockey_1 in jockey:
104
105                jockey_1 = jockey_1.lstrip().rstrip()#replaceで\を消し　lstripで前のインデント（空白）をさくじょ 連結パターン
106                jockey_list.append(jockey_1)
107
108            #jockey_list = pd.DataFrame(jockey_list)
109
110            #タイム  　　　　　　　　注意：　import time があるから名前をtimeにするとエラーが起きる。
111            times = soup.find("tbody",class_="record") #classの中にあるclassを抜くとき、最初はfind
112            race_time = times.find_all("td",class_="time")
113            time_list = []
114            for time_1 in race_time:
115                time_1 = time_1.get_text()
116                time_1 = time_1.replace("\n","").lstrip().rstrip()#replaceで\を消し　lstripで前のインデント（空白）をさくじょ 連結パターン
117                time_list.append(time_1)
118            #time_list = pd.DataFrame(time_list)
119
120            #着差
121            lead = soup.find_all("td", class_="lead")
122            lead_list = []
123            for lead_1 in lead:
124                lead_1 = lead_1.get_text()
125                lead_1 = lead_1.replace("\n","")
126                lead_1 = lead_1.replace("\u3000"," ")
127                lead_1 = lead_1.lstrip().rstrip()
128                lead_list.append(lead_1)
129            #lead_list = pd.DataFrame(lead_list)
130
131            #上がり
132            spurt = soup.find_all("td", class_="spurt")
133            spurt_list = []
134            for spurt_1 in spurt:
135                spurt_1 = spurt_1.get_text()
136                spurt_list.append(spurt_1)
137            #spurt_list = pd.DataFrame(spurt_list)
138
139            #調教師
140            tamer = soup.find_all("td", class_="tamer")
141            tamer_list = []
142            for tamer_1 in tamer:
143                tamer_1 = tamer_1.get_text()
144                tamer_list.append(tamer_1)
145            #tamer_list = pd.DataFrame(tamer_list)
146
147            #人気
148            # 一度、着順情報があるテーブルだけを取り出す
149            table = soup.find("table", class_="dataTable")
150            # HTML全体ではなく、上記テーブルのみから、条件を満たす要素を取り出す
151            rank = table.find_all("td", class_="rank")
152            rank_list = []
153            for rank_1 in rank:
154                rank_1 = rank_1.get_text()
155                rank_list.append(rank_1)
156            #rank_list = pd.DataFrame(rank_list)
157            
158            
159            
160            
161            
162            ##コース情報
163            #距離
164            info = soup.find("ul", class_="trackState trackMainState")
165            distance = info.find_all("li", class_="distance")[0]
166            distance = distance.get_text().replace("\n","").lstrip().rstrip()
167            #distanceをべつの場所で実行していらない文字をコピペしてreplaceする 消したい文字をキーボードで入力してもなぜかきえない
168            distance = distance.replace("ダ","")
169            distance = distance.replace("m","")
170            distance = distance.replace(",","")
171
172            info = soup.find("ul", class_="trackState trackMainState")
173            info_1 = info.find_all("dt")
174
175            #馬場
176            grand = info_1[1]
177            grand = grand.get_text().replace("：","")
178
179            info_2 = info.find_all("dd")
180            #天候
181            wether_1 = info_2[0]
182            wether_1 = wether_1.get_text()
183
184            #馬場状態
185            grand_state  = info_2[1].get_text()
186            
187            
188            #horse_id
189            id = soup.find("tbody",class_="record") 
190            id_a = id.find_all("td", class_="horse")
191            id_b = id.find_all("td",attrs = {'class': 'horse'})
192            horse_id_list=[]
193            for td in id_b:
194                horse_id = re.findall(r'\d+', td.find('a')['href'])[0]
195                horse_id_list.append(horse_id)
196
197            df = pd.DataFrame({
198                #"レース_id":''.join(race_id),
199                "距離":''.join(distance),
200                "天候":"".join(wether_1),
201                "馬場状態":''.join(grand_state),
202                "馬場":"".join(grand),
203                "着順":Ranks_list,
204                "枠":Positions_list,
205                "内or外": in_out_list,
206                "馬番":horse_len_list,
207                "頭数":''.join(horse_head),
208                "馬名":horse_name_list,
209                "斤量":weight_Tax_list,
210                "馬体重":weight_lists,
211                "増減":weiget_zougen_lists,
212                "騎手":jockey_list,
213                "タイム":time_list,
214                "着差":lead_list,
215                "上がり":spurt_list,
216                "調教師":tamer_list,
217                "人気":rank_list,
218                "horse_id":horse_id_list
219            })
220            
221            
222            #race_id を辞書型のkeyにする方法
223            race_results[race_id] = df
224        
225        #存在しないrace_idを飛ばす
226        except IndexError:
227            continue
228        
229        #wify切れたとき
230        except Exception as e:
231            print(e)
232            break
233        
234        #jupyterの停止ボタンの処理        
235        except:
236            break
237
238    return race_results

行動規範の内容に同意します

回答2件

自己解決

try exceptで対処

投稿2021/06/02 10:15

rikuanpg9294

総合スコア15

otn

2021/06/02 13:03

初心者には使いこなしが難しい機能ですが。想定した例外（指定したタグが存在しない）だけを捕まえて、それ以外の例外（ネットワーク障害とかその他全部）を捕まえないように出来ていますか？

行動規範の内容に同意します

結果が空リストかどうかifで判断すれば良いだけでしょう。

Python
1xxx = soup.find_all(～～～)
2if xxx:
3  for x in xxx:
4    ～～～～
5else:
6  空の時の処理

投稿2021/05/25 15:17

編集2021/05/25 15:29

otn

総合スコア84505

あなたの回答

tips

プレビュー

行動規範の内容に同意します

質問の解決につながる回答をしましょう。サンプルコードなど、より具体的な説明があると質問者の理解の助けになります。また、読む側のことを考えた、分かりやすい文章を心がけましょう。

15分調べてもわからないことは
teratailで質問しよう！

ただいまの回答率
85.48%

質問をまとめることで
思考を整理して素早く解決

テンプレート機能で
簡単に質問をまとめる

質問する

質問をすることでしか得られない、回答やアドバイスがある。

15分調べてもわからないことは、質問しよう！

Beautifulsoup 検索ページに取得したいデータがないときの飛ばし方がわからない

関連した質問