実現したいこと
cssセレクタで指定した要素を抽出する
環境
Anacondaで環境を作り、vscode(Python)でスクレイピングするためのコードを記述しています。
詰まっている部分
同じ画面内で抽出できる要素とできない要素(nullになる要素)がある。
現在の状況
URLから別の画面に遷移後詳細ページの情報を取得しようとしているのですが、
以下に添付している【spider結果】のように「'syuboba'」の値だけ「'null'」として返ってきます。
原因がわからず調べても出てこず、困っています。よろしくお願いします。
spider
1import scrapy 2import logging 3 4class KeibaDbSpider(scrapy.Spider): 5 name = 'keiba_db' 6 allowed_domains = ['race.netkeiba.com','db.netkeiba.com'] 7 start_urls = ['https://race.netkeiba.com/race/result.html?race_id=202207011001&rf=race_list'] 8 9 #URLから詳細ページへ遷移する 10 def parse(self, response): 11 #cssセレクタを絞り込む 12 race_results = response.css('#All_Result_Table > tbody > tr') 13 14 for race_result in race_results: 15 #cssセレクタでURLを指定(16個) 16 yield response.follow(url=race_result.css('td > span > a::attr(href)').get(), callback=self. 17 parse_item) 18 19 20 #詳細ページから情報を取得する 21 def parse_item(self,response): 22 #詳細ページのcssセレクタを絞り込む 23 #syosai = response.css('#contents > div.db_main_race.fc > div >') 24 yield{ 25 #開催地を取得する 26 'hiduke' : response.css('#contents > div.db_main_race.fc > div > table > tbody > tr:nth-child(1) > td:nth-child(1) > a::text').get(), 27 'kaisai' : response.css('#contents > div.db_main_race.fc > div > table > tbody > tr:nth-child(1) > td:nth-child(2) > a::text').get(), 28 'waku': response.css('#contents > div.db_main_race.fc > div > table > tbody > tr:nth-child(1) > td:nth-child(8)::text').get(), 29 'ninnki': response.css('#contents > div.db_main_race.fc > div > table > tbody > tr:nth-child(1) > td:nth-child(11)::text').get(), 30 'junni': response.css('#contents > div.db_main_race.fc > div > table > tbody > tr:nth-child(1) > td + td+ td+ td+ td+ td+ td+ td+ td+ td+ td+ td::text').get(), 31 'kisyu': response.css('#contents > div.db_main_race.fc > div > table > tbody > tr:nth-child(1) > td:nth-child(13) > a::attr(title)').get(), 32 'kyori': response.css('#contents > div.db_main_race.fc > div > table > tbody > tr:nth-child(1) > td:nth-child(15)::text').get(), 33 'baba': response.css('#contents > div.db_main_race.fc > div > table > tbody > tr:nth-child(1) > td:nth-child(16)::text').get(), 34 35 #値を取れない 36 'syuboba': response.css('#db_main_box > div.db_main_deta > div > div.db_prof_area_02 > div > dl > dd > table > tbody > tr:nth-child(1) > td:nth-child(1) > a::attr(title)').get(), 37 } 38
spider結果
1[ 2{"hiduke": "2022/01/30", "kaisai": "1中京10", "waku": "6", "ninnki": "7", "junni": "1", "kisyu": "酒井学", "kyori": "ダ1200", "baba": "良", "syuboba": null}, 3{"hiduke": "2022/01/30", "kaisai": "1中京10", "waku": "7", "ninnki": "16", "junni": "16", "kisyu": "的場勇人", "kyori": "ダ1200", "baba": "良", "syuboba": null}, 4{"hiduke": "2022/01/30", "kaisai": "1中京10", "waku": "7", "ninnki": "12", "junni": "15", "kisyu": "古川奈穂", "kyori": "ダ1200", "baba": "良", "syuboba": null}, 5{"hiduke": "2022/01/30", "kaisai": "1中京10", "waku": "2", "ninnki": "10", "junni": "14", "kisyu": "永島まな", "kyori": "ダ1200", "baba": "良", "syuboba": null}, 6{"hiduke": "2022/01/30", "kaisai": "1中京10", "waku": "6", "ninnki": "14", "junni": "13", "kisyu": "柴山雄一", "kyori": "ダ1200", "baba": "良", "syuboba": null}, 7{"hiduke": "2022/01/30", "kaisai": "1中京10", "waku": "5", "ninnki": "13", "junni": "12", "kisyu": "川島信二", "kyori": "ダ1200", "baba": "良", "syuboba": null}, 8{"hiduke": "2022/01/30", "kaisai": "1中京10", "waku": "1", "ninnki": "15", "junni": "11", "kisyu": "水口優也", "kyori": "ダ1200", "baba": "良", "syuboba": null}, 9{"hiduke": "2022/01/30", "kaisai": "1中京10", "waku": "2", "ninnki": "9", "junni": "10", "kisyu": "松田大作", "kyori": "ダ1200", "baba": "良", "syuboba": null}, 10{"hiduke": "2022/01/30", "kaisai": "1中京10", "waku": "4", "ninnki": "8", "junni": "9", "kisyu": "川須栄彦", "kyori": "ダ1200", "baba": "良", "syuboba": null}, 11{"hiduke": "2022/01/30", "kaisai": "1中京10", "waku": "5", "ninnki": "11", "junni": "8", "kisyu": "長岡禎仁", "kyori": "ダ1200", "baba": "良", "syuboba": null}, 12{"hiduke": "2022/01/30", "kaisai": "1中京10", "waku": "4", "ninnki": "5", "junni": "7", "kisyu": "和田竜二", "kyori": "ダ1200", "baba": "良", "syuboba": null}, 13{"hiduke": "2022/01/30", "kaisai": "1中京10", "waku": "3", "ninnki": "1", "junni": "6", "kisyu": "松山弘平", "kyori": "ダ1200", "baba": "良", "syuboba": null}, 14{"hiduke": "2022/01/30", "kaisai": "1中京10", "waku": "3", "ninnki": "3", "junni": "5", "kisyu": "幸英明", "kyori": "ダ1200", "baba": "良", "syuboba": null}, 15{"hiduke": "2022/01/30", "kaisai": "1中京10", "waku": "1", "ninnki": "6", "junni": "4", "kisyu": "亀田温心", "kyori": "ダ1200", "baba": "良", "syuboba": null}, 16{"hiduke": "2022/01/30", "kaisai": "1中京10", "waku": "8", "ninnki": "4", "junni": "3", "kisyu": "国分優作", "kyori": "ダ1200", "baba": "良", "syuboba": null}, 17{"hiduke": "2022/01/30", "kaisai": "1中京10", "waku": "8", "ninnki": "2", "junni": "2", "kisyu": "西谷凜", "kyori": "ダ1200", "baba": "良", "syuboba": null} 18]

回答1件
あなたの回答
tips
プレビュー
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。
2022/02/09 23:38