Pythonを勉強している初学者になります。
特定のURLからスクレイピングをしようとしているのですが、時折エラーが出たり出なかったりするのはなぜでしょうか?
ご教授いただけると幸いです。
!pip install selenium !pip install requests from selenium import webdriver from time import sleep import pandas as pd import requests import re from fake_useragent import UserAgent from bs4 import BeautifulSoup as bs import csv import bs4 import time browesr = webdriver.Chrome() url="https://www.mercari.com/jp/search/?sort_order=&keyword=%E3%83%91%E3%83%A9%E3%83%96%E3%83%BC%E3%83%84&category_root=2&category_child=33&category_grand_child%5B349%5D=1&brand_name=&brand_id=&size_group=2&size_id%5B135%5D=1&size_id%5B136%5D=1&price_min=&price_max=&item_condition_id%5B1%5D=1&item_condition_id%5B2%5D=1&item_condition_id%5B3%5D=1&status_on_sale=1" browesr.get(url) !pip install fake_useragent ua=UserAgent() useragent=ua.random def get_html(url): headers={"User-Agent":useragent} res=requests.get(url,headers=headers) return res soup=bs(res.content,"html.parser") items=soup.findAll(class_="items-box") output=[] for item in items: output.append({ "title":item.find(class_="items-box-name font-2").text, "url":"https://www.mercari.com"+item.find("a").get("href"), "picture":item.find("img").get("data-src"), "price":item.find(class_="items-box-price font-5").text }) items_df=pd.DataFrame(output) def detail(detail_sp): return{"title":detail_sp.find(class_="item-name").text, "url":("https://www.mercari.com/jp/items/"+detail_sp.find(class_="item-action-text").get("data-key")).replace("id=",""), "place":detail_sp.find(class_="item-price bold").text, "pictures":"|".join([img.find("img").get("data-src")for img in detail_sp.findAll(class_="owl-item-inner")]), "Description":detail_sp.find(class_="item-description-inner").text, "spec_table":spec_table(detail_sp) } def spec_table(detail_sp): spec_list = [] ths = detail_sp.find(class_="item-detail-table").findAll("th") tds = detail_sp.find(class_="item-detail-table").findAll("td") for th, td in zip(ths, tds): _td = re.sub(r"\n+", "\n", td.text.strip()) if td.find("a"): _url = td.find("a").get("href") if re.search(r"u/\d+", _url): _td = _td + "\n" + _url spec_list.append("|".join([th.text, _td])) return "\t".join(spec_list) results = [] for idx, detail_url in enumerate(items_df.url.head(30)): detail_res = get_html(detail_url) detail_sp = bs(detail_res.content, "html.parser") results.append(detail_sp) print(f"now {idx} of numbers" ) print(detail_res) print(detail_url) time.sleep(5) **#エラー部分になります** --------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-11-c9a8da77d460> in <module> ----> 1 soup=bs(res.content,"html.parser") 2 items=soup.findAll(class_="items-box") NameError: name 'res' is not defined
回答2件
あなたの回答
tips
プレビュー