スクレイピングをするために書いたコードでは1レース分のみなら取り出せたのですが、
for文を用いて数レースまとめてスクレイピングしようとするとエラーが出てしまいできません。
どのように書き換えればスムーズに行くでしょうか?アドバイスよろしくお願いいいたします。
python
1race_id_list = ["202105142726040301","202105142726040301"]
python
1def result_scrape(race_id_list): 2 race_results=[] 3 for race_id in tqdm(race_id_list): 4 time.sleep(1) 5 url = "https://keiba.rakuten.co.jp/race_performance/list/RACEID/"+ race_id 6 html = requests.get(url) 7 html.encoding = "UTF-8" 8 soup = BeautifulSoup(html.text, "html.parser") 9 10 #着順 11 Ranks= soup.find_all("td",class_="order") 12 Ranks_list = [] 13 for Rank in Ranks: 14 Rank = Rank.get_text() 15 Ranks_list.append(Rank) 16 Ranks_list = pd.DataFrame(Ranks_list) 17 18 #枠 19 Positions= soup.find_all("th",class_="position") 20 Positions_list = [] 21 for Position in Positions: 22 Position = Position.get_text() 23 Positions_list.append(Position) 24 Positions_list.remove("枠") 25 Positions_list = pd.DataFrame(Positions_list) 26 #馬番 find_next_siblings()の説明はhttps://senablog.com/python-bs4-method/#toc_id_6 27 horse_len_list = [] 28 a = soup.find_all("td",class_="order") 29 for a_1 in a: 30 b = a_1.find_next_siblings()[1] 31 b =b.get_text() 32 horse_len_list.append(b) 33 horse_len_list = pd.DataFrame(horse_len_list) 34 #horse_len_list = [ho.find_next_siblings()[1] for ho in soup.find_all("td",class_="order")]もあり 35 36 #馬名 37 horse_names= soup.find_all("td",class_="horse") 38 horse_name_list = [] 39 for horse_name in horse_names: 40 horse_name = horse_name.get_text() 41 horse_name = horse_name.replace("\n","").lstrip()#replaceで\を消し lstripで前のインデント(空白)をさくじょ 42 horse_name = horse_name.rstrip()#lstripで後ろのインデントを削除 43 horse_name_list.append(horse_name) 44 horse_name_list = pd.DataFrame(horse_name_list) 45 46 #性別年齢分ける 47 sex_olds = soup.find_all("td",class_="state") 48 sex_old_list= [] 49 for sex_old in sex_olds: 50 #毛色はいらない 51 sex_old = sex_old.get_text() 52 sex_old = sex_old.replace("\n","") 53 sex_old = sex_old.replace("/鹿毛","") 54 sex_old = sex_old.replace("/芦毛","") 55 sex_old = sex_old.replace("/黒鹿毛","") 56 sex_old = sex_old.replace("/青鹿毛","") 57 sex_old = sex_old.replace("/黒鹿毛","") 58 sex_old = sex_old.replace("/白毛","") 59 sex_old = sex_old.replace("/栗毛","") 60 sex_old = sex_old.replace("/栃栗毛","")##replaceで\を消し 61 sex_old = sex_old.rstrip()#lstripで後ろのインデントを削除 62 sex_old_list.append(sex_old) 63 64 #性別 65 sex_list = [] 66 for a in sex_old_list: 67 a = a[:1] #1文字目まで 68 sex_list.append(a) 69 sex_list = pd.DataFrame(sex_list) 70 71 #年齢 72 old_list = [] 73 for b in sex_old_list: 74 b = b[1:] #1文字め以降 75 old_list.append(b) 76 old_list = pd.DataFrame(old_list) 77 78 #斤量 79 weight_Tax = soup.find_all("td",class_="weightTax") 80 weight_Tax_list = [] 81 for weight_Tax_1 in weight_Tax: 82 weight_Tax_1 = weight_Tax_1.get_text() 83 weight_Tax_list.append(weight_Tax_1) 84 weight_Tax_list = pd.DataFrame(weight_Tax_list) 85 86 #馬体重と増減を分ける 87 weight = soup.find_all("td",class_="weight") 88 weight_list = [] 89 for weight_1 in weight: 90 weight_1 = weight_1.get_text() 91 weight_1 = weight_1.replace("\n","").lstrip().rstrip()#replaceで\を消し lstripで前のインデント(空白)をさくじょ 連結パターン 92 weight_list.append(weight_1) 93 94 #馬体重 95 weight_lists = [] 96 for a in weight_list: 97 a = a[:3] 98 weight_lists.append(a) 99 weight_lists = pd.DataFrame(weight_lists) 100 101 #増減 102 weiget_zougen_list = [] 103 weiget_zougen_lists = []#修正したやつ 104 for b in weight_list: 105 b = b[3:] 106 weiget_zougen_list.append(b) 107 108 for c in weiget_zougen_list: 109 c = c.replace("±","") 110 c = c.replace("+","") 111 weiget_zougen_lists.append(c) 112 weiget_zougen_lists = pd.DataFrame(weiget_zougen_lists) 113 114 #ジョッキー 115 jockey = soup.find_all("td",class_="jockey") 116 jockey_list = [] 117 for jockey_1 in jockey: 118 jockey_1 = jockey_1.get_text() 119 jockey_1 = jockey_1.replace("\n","") 120 jockey_1 = jockey_1.replace("▲","") 121 jockey_1 = jockey_1.replace("◇","") 122 jockey_1 = jockey_1.replace("★","") 123 jockey_1 = jockey_1.replace("☆","") 124 jockey_1 = jockey_1.replace("(園田)","") 125 jockey_1 = jockey_1.replace("(西脇)","") 126 jockey_1 = jockey_1.replace("(大井)","") 127 jockey_1 = jockey_1.replace("(浦和)","") 128 jockey_1 = jockey_1.replace("(川崎)","") 129 jockey_1 = jockey_1.replace("(船橋)","") 130 jockey_1 = jockey_1.replace("(盛岡)","") 131 jockey_1 = jockey_1.replace("(水沢)","") 132 jockey_1 = jockey_1.replace("(門別)","") 133 jockey_1 = jockey_1.replace("(金沢)","") 134 jockey_1 = jockey_1.replace("(笠松)","") 135 jockey_1 = jockey_1.replace("(姫路)","") 136 jockey_1 = jockey_1.replace("(高知)","") 137 jockey_1 = jockey_1.replace("(佐賀)","") 138 jockey_1 = jockey_1.replace("(北海道)","") 139 jockey_1 = jockey_1.replace("(名古屋)","") 140 jockey_1 = jockey_1.lstrip().rstrip()#replaceで\を消し lstripで前のインデント(空白)をさくじょ 連結パターン 141 jockey_list.append(jockey_1) 142 143 jockey_list = pd.DataFrame(jockey_list) 144 145 #タイム 146 times = soup.find("tbody",class_="record") #classの中にあるclassを抜くとき、最初はfind 147 time = times.find_all("td",class_="time") 148 time_list = [] 149 for time_1 in time: 150 time_1 = time_1.get_text() 151 time_1 = time_1.replace("\n","").lstrip().rstrip()#replaceで\を消し lstripで前のインデント(空白)をさくじょ 連結パターン 152 time_list.append(time_1) 153 time_list = pd.DataFrame(time_list) 154 155 #着差 156 lead = soup.find_all("td", class_="lead") 157 lead_list = [] 158 for lead_1 in lead: 159 lead_1 = lead_1.get_text() 160 lead_1 = lead_1.replace("\n","") 161 lead_1 = lead_1.replace("\u3000"," ") 162 lead_1 = lead_1.lstrip().rstrip() 163 lead_list.append(lead_1) 164 lead_list = pd.DataFrame(lead_list) 165 166 #上がり 167 spurt = soup.find_all("td", class_="spurt") 168 spurt_list = [] 169 for spurt_1 in spurt: 170 spurt_1 = spurt_1.get_text() 171 spurt_list.append(spurt_1) 172 spurt_list = pd.DataFrame(spurt_list) 173 174 #調教師 175 tamer = soup.find_all("td", class_="tamer") 176 tamer_list = [] 177 for tamer_1 in tamer: 178 tamer_1 = tamer_1.get_text() 179 tamer_list.append(tamer_1) 180 tamer_list = pd.DataFrame(tamer_list) 181 182 #人気 183 # 一度、着順情報があるテーブルだけを取り出す 184 table = soup.find("table", class_="dataTable") 185 # HTML全体ではなく、上記テーブルのみから、条件を満たす要素を取り出す 186 rank = table.find_all("td", class_="rank") 187 rank_list = [] 188 for rank_1 in rank: 189 rank_1 = rank_1.get_text() 190 rank_list.append(rank_1) 191 rank_list = pd.DataFrame(rank_list) 192 193 #出馬表にするために結合 194 df = pd.concat([Ranks_list,Positions_list,horse_len_list,horse_name_list,sex_list,old_list,weight_Tax_list,weight_lists,weiget_zougen_lists,jockey_list,time_list,lead_list,spurt_list,tamer_list,rank_list],axis=1) 195 #columnsの用意 そしてカラムに代入する 196 listed =["着順","枠","馬番","馬名","性別","年齢","斤量","馬体重","増減","騎手","タイム","着差","上がり","調教師","人気"] 197 df.columns = listed 198 race_results.append(df) 199 return df
python
1UnboundLocalError Traceback (most recent call last) 2<ipython-input-129-699e08fd9315> in <module> 3----> 1 a = result_scrape(race_id_list) 4 5<ipython-input-127-b6a25ff2fb48> in result_scrape(race_id_list) 6 2 race_results=[] 7 3 for race_id in tqdm(race_id_list): 8----> 4 time.sleep(1) 9 5 url = "https://keiba.rakuten.co.jp/race_performance/list/RACEID/"+ race_id 10 6 html = requests.get(url) 11 12UnboundLocalError: local variable 'time' referenced before assignment 13
回答3件
あなたの回答
tips
プレビュー
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。
2021/05/23 13:26
2021/05/23 13:33
2021/05/23 13:38
2021/05/23 14:57