予想する地方競馬の出馬表をスクレイピングを行った際以下のようなエラーが発生しました。
なぜ範囲外になるかが分かりません。
どのように書き換えれば上手くいくかご教示ください。よろしくお願いいたします。
python
1race_id_list = "202130051304" 2st = ShutubaTable.scrape(race_id_list, '2021/5/13')
python
1--------------------------------------------------------------------------- 2IndexError Traceback (most recent call last) 3<ipython-input-18-da19651b9347> in <module> 4 1 race_id_list = "202130051304" 5----> 2 st = ShutubaTable.scrape(race_id_list, '2021/5/13') 6 7<ipython-input-9-ff97b4e36758> in scrape(cls, race_id_list, date) 8 20 for text in texts: 9 21 if 'm' in text: 10---> 22 df['course_len'] = [int(re.findall(r'\d+', text)[0])] * len(df) 11 23 if text in ["曇", "晴", "雨", "小雨", "小雪", "雪"]: 12 24 df["weather"] = [text] * len(df) 13 14IndexError: list index out of range
python
1class ShutubaTable(DataProcessor): 2 def __init__(self, shutuba_tables): 3 super(ShutubaTable, self).__init__() 4 self.data = shutuba_tables 5 6 @classmethod 7 def scrape(cls, race_id_list, date): 8 data = pd.DataFrame() 9 for race_id in tqdm(race_id_list): 10 url = 'https://nar.netkeiba.com/race/shutuba.html?race_id=' + race_id #narに変更 11 df = pd.read_html(url)[0] 12 df = df.T.reset_index(level=0, drop=True).T 13 14 html = requests.get(url) 15 html.encoding = "EUC-JP" 16 soup = BeautifulSoup(html.text, "html.parser") 17 18 texts = soup.find('div', attrs={'class': 'RaceData01'}).text 19 texts = re.findall(r'\w+', texts) 20 for text in texts: 21 if 'm' in text: 22 df['course_len'] = [int(re.findall(r'\d+', text)[0])] * len(df) 23 if text in ["曇", "晴", "雨", "小雨", "小雪", "雪"]: 24 df["weather"] = [text] * len(df) 25 if text in ["良", "稍重", "重"]: 26 df["ground_state"] = [text] * len(df) 27 if '不' in text: 28 df["ground_state"] = ['不良'] * len(df) 29 # 2020/12/13追加 30 if '稍' in text: 31 df["ground_state"] = ['稍重'] * len(df) 32 if '芝' in text: 33 df['race_type'] = ['芝'] * len(df) 34 if '障' in text: 35 df['race_type'] = ['障害'] * len(df) 36 if 'ダ' in text: 37 df['race_type'] = ['ダート'] * len(df) 38 df['date'] = [date] * len(df) 39 40 # horse_id 41 horse_id_list = [] 42 horse_td_list = soup.find_all("td", attrs={'class': 'HorseInfo'}) 43 for td in horse_td_list: 44 horse_id = re.findall(r'\d+', td.find('a')['href'])[0] 45 horse_id_list.append(horse_id) 46 # jockey_id 47 jockey_id_list = [] 48 jockey_td_list = soup.find_all("td", attrs={'class': 'Jockey'}) 49 for td in jockey_td_list: 50 jockey_id = re.findall(r'\d+', td.find('a')['href'])[0] 51 jockey_id_list.append(jockey_id) 52 df['horse_id'] = horse_id_list 53 df['jockey_id'] = jockey_id_list 54 55 df.index = [race_id] * len(df) 56 data = data.append(df) 57 time.sleep(1) 58 return cls(data) 59
回答1件
あなたの回答
tips
プレビュー
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。
2021/05/13 23:29
2021/05/14 09:14 編集
2021/05/14 13:41