※※非常に急を要しております。Python、スクレイピングで取得値が何も得られない。(BeautifulSoup)

前提

Pythonの勉強中の初心者です。
某通販サイトのスクレイピングにトライしています。

実現したいこと

取得したいURLをまとめたcsvファイルを読み込み、1ページずつアクセスし、
該当のソースコード内の関数定義ゾーンで定義した要素を取得し、csvに書き出してエクスポートするのがゴールです。
同時にページ内の画像を、指定したディレクトリにダウンロードします。

発生している問題・エラーメッセージ

VSCやJupyter Labでコードを回しており、エラー文が出力されることなく最後までいきますが、
エクスポートされたcsvには、指定したカラムのヘッダー以外、何も出力されません。
※同時にページ内の画像を指定したディレクトリにダウンロードしてくる処理を書いていますが、これだけは上手く成功しています。

該当のソースコード

import requests
from bs4 import BeautifulSoup
import pandas as pd
import csv
from selenium import webdriver
from selenium.common import exceptions
from selenium.webdriver.chrome.options import Options
import re
import time
from retrying import retry
import smtplib
from email.mime.text import MIMEText
from email.utils import formatdate
import signal
import sys
from urllib import request
import random
t1 = time.time()

###############################################
#読み込むcsvファイルの名前
in_1 = "test"

#正常に終わった時に出力されるファイルの名前
out_1 = "kansei"

#タイムアウトした時に出力されるファイルの名前
time_out = "time-out"
################################################


########## 関数定義ゾーン ############

@retry(stop_max_attempt_number=100, wait_fixed=3000)
def get_url(a):
    driver.get(a)
    time.sleep(3)
    try:
        driver.find_element_by_css_selector("#nav-hamburger-menu").click()
    except Exception as e:
        pass

    else:
        time.sleep(2)
        html = driver.page_source.encode('utf-8')

        return html

@retry(stop_max_attempt_number=30, wait_fixed=3000)
def get_image_url(a):
    ua = 'Mozilla/5.0～～～省略～～～～'
    headers = {'User-Agent': ua}
    response = requests.get(a,headers=headers)
    image = response.content

    return image

def brand():
    brand_1 = soup.find("td", {"class": "a-size-base prodDetAttrValue"})

    brand_2 = ""
    row_1 = ""

    if not brand_1:
        brand_2 = soup.find("a", {"id": "bylineInfo"})
        brand_2_text = brand_2.text
        row_1 = brand_2_text

    else:
        brand_1_text = brand_1.text
        brand_1_text_trim = brand_1_text.replace("&lrm;","")
        row_1 = brand_1_text_trim
    
    return row_1


def name():
    name_1 = soup.find("span", {"id": "productTitle"})

    if not name_1:
        name_title_1_2 = ""

    else: 
        name_title = name_1.text
        name_title_1 = name_title.replace(" ","")
        name_title_1_2 = name_title_1.replace("\n\t","")

    return name_title_1_2

def price_original():
    price_original_1 = soup.find("span", {"class": "a-size-medium a-color-price"})

    if not price_original_1:
        price_original_title_2 = ""

    else: 
        price_original_title = price_original_1.text
        price_original_title_1 = price_original_title.replace(" ","")
        price_original_title_2 = price_original_title_1.replace("\n\t","")

    return price_original_title_2

def product_explanation():
    product_explanation_1 = soup.find("div", {"class": "a-section a-spacing-top-small"})

    if not product_explanation_1:
        product_explanation_2 = ""

    else: 
        product_explanation_1_text = product_explanation_1.text
        product_explanation_2 = product_explanation_1_text.find_all("li")
   
    return product_explanation_2

def Product_description():
    Product_description_1 = soup.find("div", {"id": "productDescription"})
    Product_description_2 = Product_description_1.find("p")

    if not Product_description_2:
        Product_description_2_text = ""

    else: 
        Product_description_2_text = Product_description_2.text
   
    return Product_description_2_text

def technical_details():
    technical_details_1 = soup.find("table", {"id": "productDetails_techSpec_section_1"})

    if not technical_details_1:
        technical_details_2 = ""

    else:
        technical_details_1_text = technical_details_1.text
        technical_details_2 = technical_details_1_text.replace("&lrm;","")
    
    return technical_details_2


def asin():
    asin_1 = soup.find("td", {"class": "a-size-base prodDetAttrValue"})

    if not asin_1:
        asin_2 = ""

    else:
        asin_1_text = asin_1.text
        asin_2 = asin_1_text.replace("&lrm;","")
    
    return asin_2


def image_list():
    count_2 = ""
    count_3 = 0

    image_list = soup.find("ul", {"class":"a-unordered-list a-nostyle a-button-list a-vertical a-spacing-top-micro regularAltImageViewLayout"})

    if not image_list:
        image_lists = ""
    else:
        image_lists = image_list.find_all("li", {"class":"a-spacing-small item imageThumbnail a-declarative"})

    for i in image_lists:
        if ("hidden" in str(i)) == False:
            count = i.find("img")
            count_1 = count["src"]
            count_2 = count_2 + "\n" + count_1
            count_3 = count_3 + 1
    return [count_2, count_3]

def foo(sig, frame):
    x = input("""
Ctrl-Cが押されました。一時停止中です。続行にはEnterを押してください。
プログラム終了する場合は、q を入力：""")
    if x == "q":
        #一旦停止された時に出力されるファイル
        filename = "stop_1_{}.csv".format(time_out)
        with open(filename, mode="w", encoding="cp932", errors="ignore") as f:
          df.to_csv(f)
          t2 = time.time()
          t = t2 - t1
          print(t)
        sys.exit()

########## 関数定義ゾーン_終わり ############

driver_path = "PCのドライバーのパス"
csv_path = "読み込むCSVのパス"
image_path = "画像DLの吐き出しディレクトリ"

columns = ["brand","Name","price_original","product_explanation","Product_description","Technical_Details","asin"]
df = pd.DataFrame(columns=columns)

options = Options()
options.set_headless((False))
options.add_argument('--user-agent=Mozilla/5.0～～～省略～～～～')

driver = webdriver.Chrome("{}".format(driver_path),options=options)

base = "https://www.○○○○.co.jp"

# csvファイルの読み込み
csv_file = open("{}\\{}.csv".format(csv_path,in_1), "r", encoding='utf-8-sig', errors="", newline="" )
f = csv.reader(csv_file, delimiter=",", doublequote=True, lineterminator="\r\n", quotechar='"', skipinitialspace=True)

signal.signal(signal.SIGINT,foo)

########## 処理スタート ############
try:

    for row in f:
        try:
            a = row[0]
            url = base + a 

            html = get_url(url)
            soup = BeautifulSoup(html, "html.parser")

            product_code_title_1 = url.split("/")
            product_code_title_2 = url.split("/")
            pcn = 0
            for row_2 in product_code_title_1:
                if row_2 == "dp":
                    product_code_title_3 = product_code_title_2[pcn + 1]
                pcn = pcn + 1
            print(product_code_title_3)

            count_2 = ""
            count_6 = 0
            p = 1

            image_list = soup.find("div", {"id":"altImages"})
            if not image_list:
                image_lists = ""
            else:
                image_lists = image_list.find_all("li", {"class":"a-spacing-small item imageThumbnail a-declarative"})

            for i in image_lists:
                if ("play" in str(i)) == False:
                    count = i.find("img")
                    count_1 = count["src"]
                    count_2 = count_1.replace("38","800")
                    count_3 = count_2.replace("50","800")
                    count_4 = count_3.replace("40","800")
                    count_5 = count_4.replace("40","800")
                    if p <= 5:
                        image = get_image_url(count_5)
                        save_path = "{}\\".format(image_path) + product_code_title_3.lower() + "_{}.jpg".format(p)

                        with open(save_path, "wb") as aaa:
                            aaa.write(image)
                    else:
                        break
                    
                    time.sleep(1)
                    count_6 = count_6 + 1
                    p = p + 1

            brand_title = brand()

            name_title = name()

            price_original_title = price_original()

            product_explanation_title = product_explanation()

            product_description_title = product_description()

            technical_details_title = technical_details()

            asin_title = asin()

            se = pd.Series([brand_title,name_title, price_original_title, product_explanation_title, product_description_title, technical_details_title, asin_title], columns)

            df = df.append(se,columns)
            time.sleep(random.randint(2,5))

        except Exception as e:
            pass

except Exception as e:
#タイムアウトしたときに出力されるファイル
    filename = "{}.csv".format(time_out)
    with open(filename, mode="w", encoding="cp932", errors="ignore") as f:
        df.to_csv(f)
        t2 = time.time()
        t = t2 - t1
        print(t)
    driver.quit()

else:
#正常に終わった時に出力されるファイル
    filename = "{}.csv".format(out_1)
    with open(filename, mode="w", encoding="cp932", errors="ignore") as f:
        df.to_csv(f)
        t2 = time.time()
        t = t2 - t1
        print(t)
    driver.quit()

試したこと

来週までにこちらを完成させなければならない為、この度初めてこちらに投稿をさせていただきました。
こちらのサイトは以前より参考にさせて頂いております。

細かな部分を修正しながら、既に何百回と試していますが、原因が全く分からなくなってしまいました。
助けて頂けませんでしょうか。

何卒、ご指導のほどを宜しくお願いいたします。

Zuishin

2022/11/04 02:51 編集

■□■□■□■□■□■□■□■□■□■□■ 【重要】※※勉強に非常に急を要するんですか？※※【至急読んでください】 ◆ ※※効率的に勉強するなら、このような応用から始めるのではなく、まず基礎を固めるのが良いと思います。※※ ◆ ※※結果的にそれが近道になります。※※ ■□■□■□■□■□■□■□■□■□■□■

int32_t

2022/11/04 02:51

except Exception as e: pass こんなことしてたら例外が出ても気づかないですよ。とりあえずステップ実行などしてどこに問題があるか突き止めましょう。

Kei4414

2022/11/04 03:04

ご返信いただき、ありがとうございます。また、ご指摘いただきまして、感謝申し上げます。色々なサイトを閲覧し見様見まねで書いたものも多く、その処理の意味を理解し切れておりませんでした。反省したうえ、基礎をしっかりと固める努力をします。

m.ts10806

2022/11/04 03:20

本当に急ぎなら業者に依頼されたほうが良いかと。「急ぎ」って書いたからと作業依頼に応えてもらえる場所ではないです。そもそもスクレイピング許可とれてるのか？というところから突っ込まれる場所です。

退会済みユーザー

2022/11/04 06:12 編集

スクレイピングは、サイトの管理者に許可を得ているとか、規約に従って専用の API にアクセスしているとかでなければ、迷惑行為になるかもしれないということは認識してますか？クローラーを作って某図書館サイトにアクセスしたら業務妨害とかで逮捕された事例もありますので、甘く見ない方がいいと思います。逮捕までいかなくても、帯域ごとアクセスを遮断され、あなたのやったことで多数利用者が巻き添えを喰らうことになるかも。