編集履歴

質問編集履歴

解決しました

2022/07/03 10:28

投稿

OK_13ros

スコア1

test CHANGED Viewed

File without changes

test CHANGED Viewed

@@ -9,172 +9,3 @@
 このようなサイトから値をうまく取得する方法を、
 ご教授いただきたく思います。
-from cgitb import html
-from selenium import webdriver
-from selenium.webdriver.chrome import service
-from selenium.webdriver.chrome.options import Options
-from time import sleep
-from bs4 import BeautifulSoup
-import csv
-#最初のページ
-url = "http://www.jasdec.com/reading/sbmei.php"
-# ヘッドレスモードの設定。
-# True => ブラウザを描写しない。
-# False => ブラウザを描写する。
-options = Options()
-options.add_argument('--headless')
-#ChromeDriverのパスを変数に設定
-ChromeDriver = r"C:\Users\k-saito\Downloads\chromedriver_win32\chromedriver.exe"
-#ChromeDriverのstartとstopを制御するServiceオブジェクトを介してパスを渡す
-chrome_service = service.Service(executable_path=ChromeDriver)
-#Chromeを起動
-driver = webdriver.Chrome(service=chrome_service)
-#保振のサイトに遷移
-driver.get(url)
-#カレントウインドウを最大化する
-driver.maximize_window()
-#テキストボックスを指定
-ISINcode = driver.find_element_by_xpath(r"/html/body/div[3]/div/div/div/div/div/div[1]/div[2]/table/tbody/tr/td/font/table[3]/tbody/tr/td/form/table/tbody/tr[1]/td[2]/input")
-#テキストボックスに入力
-ISINcode.send_keys("")
-#テキストボックスを指定
-meigara_name = driver.find_element_by_xpath(r"/html/body/div[3]/div/div/div/div/div/div[1]/div[2]/table/tbody/tr/td/font/table[3]/tbody/tr/td/form/table/tbody/tr[2]/td[2]/input")
-#銘柄の正式名称に入力
-meigara_name.send_keys("")
-#検索ボタンを指定
-search_btn = driver.find_element_by_xpath(r"/html/body/div[3]/div/div/div/div/div/div[1]/div[2]/table/tbody/tr/td/font/table[4]/tbody/tr/td/input[2]")
-#検索ボタンを押して検索を実行
-search_btn.click()
-#ページ読み込み待機
-sleep(10)
-#変数名
-i = 2
-count = 0 #画面遷移用
-allcount = 0 #処理回数を数える用
-HEADER = [
-  '銘柄の正式名称', '銘柄略称', 'ISINコード', '発行代理人','支払代理人','発行通貨','社債の総額',
-  '各社債の金額','方式','利付割引区分','利払通貨','利払日','償還通貨','償還日','「各社債の金額」あたりの償還プレミアム',
-  '払込日','利払期日(今回)','利率(今回)','1通貨あたりの利子額(今回)','利払期日(次回)','利率(次回)','1通貨あたりの利子額(次回)'
-  '利払期日(最終回)','利率(最終回)','1通貨あたりの利子額(最終回)','社債管理者等'
-] #ヘッダー部分
-#出力先
-with open(r'\\svr-kyo4\全社共有\3_6000_新事業推進部\商品企画室\98.個人作業用フォルダ\saito\Pythonテスト\Scraping\test.csv', 'w', encoding='utf-8',newline='') as file:
-  writer = csv.writer(file)
-  #ヘッダーの出力
-  writer.writerow(HEADER)
-  #繰り返し処理(練習で30回繰り返し)
-  for ch in range(30):
-    #検索先のhtmlを取得
-    html = driver.page_source
-    #soupオブジェクトを作る
-    soup = BeautifulSoup(html, 'html.parser')
-    #一つ一つ選択(今試してる)
-    meisyo = soup.find_all('span',class_='hy')[1].get_text().strip() #銘柄の正式名称
-    ryakusyo = soup.find_all('span',class_='hy')[3].get_text().strip() #銘柄略称
-    ISINcode = soup.find_all('span',class_='hy')[5].get_text().strip() #ISINコード
-    issue_agent = soup.find_all('span',class_='hy')[7].get_text().strip() #発行代理人
-    paying_agent = soup.find_all('span',class_='hy')[9].get_text().strip() #支払代理人
-    issue_currency = soup.find_all('span',class_='hy')[11].get_text().strip() #発行通貨
-    total_amount = soup.find_all('span',class_='hy')[13].get_text().strip() #社債の総額
-    amount_each_bond = soup.find_all('span',class_='hy')[15].get_text().strip() #各社債の金額
-    system = soup.find_all('span',class_='hy')[16].get_text().strip() #方式
-    interest_bearing_division = soup.find_all('span',class_='hy')[18].get_text().strip() #利付割引区分
-    interest_bearing_currency = soup.find_all('span',class_='hy')[20].get_text().strip() #利払通貨
-    pay_date1 = soup.find_all('span',class_='hy')[22].get_text().strip() #利払日　分割されて入力欄があるため要変更
-    pay_date2 = soup.find_all('span',class_='hy')[23].get_text().strip() #利払日　分割されて入力欄があるため要変更
-    pay_date3 = soup.find_all('span',class_='hy')[24].get_text().strip() #利払日　分割されて入力欄があるため要変更
-    pay_date4 = soup.find_all('span',class_='hy')[25].get_text().strip() #利払日　分割されて入力欄があるため要変更
-    pay_date5 = soup.find_all('span',class_='hy')[26].get_text().strip() #利払日　分割されて入力欄があるため要変更
-    pay_date6 = soup.find_all('span',class_='hy')[27].get_text().strip() #利払日　分割されて入力欄があるため要変更
-    redemption_Currency = soup.find_all('span',class_='hy')[35].get_text().strip() #償還通貨
-    redemption_date = soup.find_all('span',class_='hy')[37].get_text().strip() #償還日
-    premium = soup.find_all('span',class_='hy')[38].get_text().strip() #「各社債の金額」あたりの償還プレミアム　値が入ってる状態で要確認
-    date_of_payment = soup.find_all('span',class_='hy')[40].get_text().strip() #払込日
-    interest_payment_date_now = soup.find_all('span',class_='hy')[42].get_text().strip() #利払期日(今回)
-    interest_rate_now = soup.find_all('span',class_='hy')[46].get_text().strip() #利率(今回)
-    interest_per_currency_now = soup.find_all('span',class_='hy')[50].get_text().strip() #1通貨あたりの利子額(今回)
-    interest_payment_date_next = soup.find_all('span',class_='hy')[44].get_text().strip() #利払期日(次回) 値が入ってる状態で要確認
-    interest_rate_next = soup.find_all('span',class_='hy')[48].get_text().strip() #利率(次回) 値が入ってる状態で要確認
-    interest_per_currency_next = soup.find_all('span',class_='hy')[52].get_text().strip() #1通貨あたりの利子額(次回) 値が入ってる状態で要確認
-    final_payment_date = soup.find_all('span',class_='hy')[54].get_text().strip() #最終償還期日
-    interest_rate_last = soup.find_all('span',class_='hy')[56].get_text().strip() #利率(最終回)
-    interest_per_currency_last = soup.find_all('span',class_='hy')[58].get_text().strip() #1通貨あたりの利子額(最終回)
-    social_bond_manager1 = soup.find_all('span',class_='hy')[60].get_text().strip() #社債管理者等
-    social_bond_manager2 = soup.find_all('span',class_='hy')[61].get_text().strip() #社債管理者等
-    social_bond_manager3 = soup.find_all('span',class_='hy')[62].get_text().strip() #社債管理者等
-    social_bond_manager4 = soup.find_all('span',class_='hy')[63].get_text().strip() #社債管理者等 値が入ってる状態で要確認
-    social_bond_manager5 = soup.find_all('span',class_='hy')[64].get_text().strip() #社債管理者等 値が入ってる状態で要確認
-    social_bond_manager6 = soup.find_all('span',class_='hy')[65].get_text().strip() #社債管理者等 値が入ってる状態で要確認
-    social_bond_manager7 = soup.find_all('span',class_='hy')[66].get_text().strip() #社債管理者等 値が入ってる状態で要確認
-    social_bond_manager8 = soup.find_all('span',class_='hy')[67].get_text().strip() #社債管理者等 値が入ってる状態で要確認
-    social_bond_manager9 = soup.find_all('span',class_='hy')[68].get_text().strip() #社債管理者等 値が入ってる状態で要確認
-    social_bond_manager10 = soup.find_all('span',class_='hy')[69].get_text().strip() #社債管理者等 値が入ってる状態で要確認
-    row = [
-      meisyo,ryakusyo,ISINcode,issue_agent,paying_agent,issue_currency,total_amount,amount_each_bond,
-      system,interest_bearing_division,interest_bearing_currency,redemption_date,premium,date_of_payment,
-      interest_payment_date_now,interest_rate_now,interest_per_currency_now,interest_payment_date_next,
-      interest_rate_next,interest_per_currency_next,final_payment_date,interest_rate_last,interest_per_currency_last,
-      social_bond_manager1,social_bond_manager2,social_bond_manager3,social_bond_manager4,social_bond_manager5,
-      social_bond_manager6,social_bond_manager7,social_bond_manager8,social_bond_manager9,social_bond_manager10
-    ]
-    writer.writerow(row)
-    #ページ読み込み待機
-    sleep(8)
-    if count < 19:
-      #この処理で1-20までは処理できる(次へボタンが12なので押せない)
-      #ボタンを指定
-      next_btn = driver.find_element_by_xpath(r"/html/body/div[3]/div/div/div/div/div/div[1]/div[2]/table/tbody/tr/td/font/table[3]/tbody/tr/td/a[" +str(i) +"]")
-      #ボタンを押して次ページに進む
-      next_btn.click()
-    if count == 19:
-      #次へボタン(12)専用
-      #ボタンを指定
-      next_btn1 = driver.find_element_by_xpath(r"/html/body/div[3]/div/div/div/div/div/div[1]/div[2]/table/tbody/tr/td/font/table[3]/tbody/tr/td/a[12]")
-      #ボタンを押して次ページに進む
-      next_btn1.click()
-      #countを10にリセットする(これで2回目以降の次へボタンに対応)
-      count = 10
-    #ページ読み込み待機
-    sleep(8)
-    #処理したら+1
-    i += 1
-    count += 1
-    allcount += 1
-    #iが12になったらリセット
-    if i == 12:
-      i = 3
-  k = 0
-  #出力用(処理回数の分だけ繰り返す)
-  for out in range(allcount):
-    print (k)
-    k += 1
-driver.quit()

スクレイピング Anaconda Python

コードを掲載しました

2022/07/03 09:17

投稿

OK_13ros

スコア1

test CHANGED Viewed

File without changes

test CHANGED Viewed

@@ -1,6 +1,4 @@
-```ここに言語を入力
-コード
-```スクレイピング初心者です。
+スクレイピング初心者です。
 PythonにてWebスクレイピングを練習し、
 練習として、

スクレイピング Anaconda Python

コードを載せさせていただきました

2022/07/03 09:15

投稿

OK_13ros

スコア1

test CHANGED Viewed

File without changes

test CHANGED Viewed

@@ -1,4 +1,6 @@
+```ここに言語を入力
+コード
-スクレイピング初心者です。
+```スクレイピング初心者です。
 PythonにてWebスクレイピングを練習し、
 練習として、
@@ -10,3 +12,171 @@
 このようなサイトから値をうまく取得する方法を、
 ご教授いただきたく思います。
+from cgitb import html
+from selenium import webdriver
+from selenium.webdriver.chrome import service
+from selenium.webdriver.chrome.options import Options
+from time import sleep
+from bs4 import BeautifulSoup
+import csv
+#最初のページ
+url = "http://www.jasdec.com/reading/sbmei.php"
+# ヘッドレスモードの設定。
+# True => ブラウザを描写しない。
+# False => ブラウザを描写する。
+options = Options()
+options.add_argument('--headless')
+#ChromeDriverのパスを変数に設定
+ChromeDriver = r"C:\Users\k-saito\Downloads\chromedriver_win32\chromedriver.exe"
+#ChromeDriverのstartとstopを制御するServiceオブジェクトを介してパスを渡す
+chrome_service = service.Service(executable_path=ChromeDriver)
+#Chromeを起動
+driver = webdriver.Chrome(service=chrome_service)
+#保振のサイトに遷移
+driver.get(url)
+#カレントウインドウを最大化する
+driver.maximize_window()
+#テキストボックスを指定
+ISINcode = driver.find_element_by_xpath(r"/html/body/div[3]/div/div/div/div/div/div[1]/div[2]/table/tbody/tr/td/font/table[3]/tbody/tr/td/form/table/tbody/tr[1]/td[2]/input")
+#テキストボックスに入力
+ISINcode.send_keys("")
+#テキストボックスを指定
+meigara_name = driver.find_element_by_xpath(r"/html/body/div[3]/div/div/div/div/div/div[1]/div[2]/table/tbody/tr/td/font/table[3]/tbody/tr/td/form/table/tbody/tr[2]/td[2]/input")
+#銘柄の正式名称に入力
+meigara_name.send_keys("")
+#検索ボタンを指定
+search_btn = driver.find_element_by_xpath(r"/html/body/div[3]/div/div/div/div/div/div[1]/div[2]/table/tbody/tr/td/font/table[4]/tbody/tr/td/input[2]")
+#検索ボタンを押して検索を実行
+search_btn.click()
+#ページ読み込み待機
+sleep(10)
+#変数名
+i = 2
+count = 0 #画面遷移用
+allcount = 0 #処理回数を数える用
+HEADER = [
+  '銘柄の正式名称', '銘柄略称', 'ISINコード', '発行代理人','支払代理人','発行通貨','社債の総額',
+  '各社債の金額','方式','利付割引区分','利払通貨','利払日','償還通貨','償還日','「各社債の金額」あたりの償還プレミアム',
+  '払込日','利払期日(今回)','利率(今回)','1通貨あたりの利子額(今回)','利払期日(次回)','利率(次回)','1通貨あたりの利子額(次回)'
+  '利払期日(最終回)','利率(最終回)','1通貨あたりの利子額(最終回)','社債管理者等'
+] #ヘッダー部分
+#出力先
+with open(r'\\svr-kyo4\全社共有\3_6000_新事業推進部\商品企画室\98.個人作業用フォルダ\saito\Pythonテスト\Scraping\test.csv', 'w', encoding='utf-8',newline='') as file:
+  writer = csv.writer(file)
+  #ヘッダーの出力
+  writer.writerow(HEADER)
+  #繰り返し処理(練習で30回繰り返し)
+  for ch in range(30):
+    #検索先のhtmlを取得
+    html = driver.page_source
+    #soupオブジェクトを作る
+    soup = BeautifulSoup(html, 'html.parser')
+    #一つ一つ選択(今試してる)
+    meisyo = soup.find_all('span',class_='hy')[1].get_text().strip() #銘柄の正式名称
+    ryakusyo = soup.find_all('span',class_='hy')[3].get_text().strip() #銘柄略称
+    ISINcode = soup.find_all('span',class_='hy')[5].get_text().strip() #ISINコード
+    issue_agent = soup.find_all('span',class_='hy')[7].get_text().strip() #発行代理人
+    paying_agent = soup.find_all('span',class_='hy')[9].get_text().strip() #支払代理人
+    issue_currency = soup.find_all('span',class_='hy')[11].get_text().strip() #発行通貨
+    total_amount = soup.find_all('span',class_='hy')[13].get_text().strip() #社債の総額
+    amount_each_bond = soup.find_all('span',class_='hy')[15].get_text().strip() #各社債の金額
+    system = soup.find_all('span',class_='hy')[16].get_text().strip() #方式
+    interest_bearing_division = soup.find_all('span',class_='hy')[18].get_text().strip() #利付割引区分
+    interest_bearing_currency = soup.find_all('span',class_='hy')[20].get_text().strip() #利払通貨
+    pay_date1 = soup.find_all('span',class_='hy')[22].get_text().strip() #利払日　分割されて入力欄があるため要変更
+    pay_date2 = soup.find_all('span',class_='hy')[23].get_text().strip() #利払日　分割されて入力欄があるため要変更
+    pay_date3 = soup.find_all('span',class_='hy')[24].get_text().strip() #利払日　分割されて入力欄があるため要変更
+    pay_date4 = soup.find_all('span',class_='hy')[25].get_text().strip() #利払日　分割されて入力欄があるため要変更
+    pay_date5 = soup.find_all('span',class_='hy')[26].get_text().strip() #利払日　分割されて入力欄があるため要変更
+    pay_date6 = soup.find_all('span',class_='hy')[27].get_text().strip() #利払日　分割されて入力欄があるため要変更
+    redemption_Currency = soup.find_all('span',class_='hy')[35].get_text().strip() #償還通貨
+    redemption_date = soup.find_all('span',class_='hy')[37].get_text().strip() #償還日
+    premium = soup.find_all('span',class_='hy')[38].get_text().strip() #「各社債の金額」あたりの償還プレミアム　値が入ってる状態で要確認
+    date_of_payment = soup.find_all('span',class_='hy')[40].get_text().strip() #払込日
+    interest_payment_date_now = soup.find_all('span',class_='hy')[42].get_text().strip() #利払期日(今回)
+    interest_rate_now = soup.find_all('span',class_='hy')[46].get_text().strip() #利率(今回)
+    interest_per_currency_now = soup.find_all('span',class_='hy')[50].get_text().strip() #1通貨あたりの利子額(今回)
+    interest_payment_date_next = soup.find_all('span',class_='hy')[44].get_text().strip() #利払期日(次回) 値が入ってる状態で要確認
+    interest_rate_next = soup.find_all('span',class_='hy')[48].get_text().strip() #利率(次回) 値が入ってる状態で要確認
+    interest_per_currency_next = soup.find_all('span',class_='hy')[52].get_text().strip() #1通貨あたりの利子額(次回) 値が入ってる状態で要確認
+    final_payment_date = soup.find_all('span',class_='hy')[54].get_text().strip() #最終償還期日
+    interest_rate_last = soup.find_all('span',class_='hy')[56].get_text().strip() #利率(最終回)
+    interest_per_currency_last = soup.find_all('span',class_='hy')[58].get_text().strip() #1通貨あたりの利子額(最終回)
+    social_bond_manager1 = soup.find_all('span',class_='hy')[60].get_text().strip() #社債管理者等
+    social_bond_manager2 = soup.find_all('span',class_='hy')[61].get_text().strip() #社債管理者等
+    social_bond_manager3 = soup.find_all('span',class_='hy')[62].get_text().strip() #社債管理者等
+    social_bond_manager4 = soup.find_all('span',class_='hy')[63].get_text().strip() #社債管理者等 値が入ってる状態で要確認
+    social_bond_manager5 = soup.find_all('span',class_='hy')[64].get_text().strip() #社債管理者等 値が入ってる状態で要確認
+    social_bond_manager6 = soup.find_all('span',class_='hy')[65].get_text().strip() #社債管理者等 値が入ってる状態で要確認
+    social_bond_manager7 = soup.find_all('span',class_='hy')[66].get_text().strip() #社債管理者等 値が入ってる状態で要確認
+    social_bond_manager8 = soup.find_all('span',class_='hy')[67].get_text().strip() #社債管理者等 値が入ってる状態で要確認
+    social_bond_manager9 = soup.find_all('span',class_='hy')[68].get_text().strip() #社債管理者等 値が入ってる状態で要確認
+    social_bond_manager10 = soup.find_all('span',class_='hy')[69].get_text().strip() #社債管理者等 値が入ってる状態で要確認
+    row = [
+      meisyo,ryakusyo,ISINcode,issue_agent,paying_agent,issue_currency,total_amount,amount_each_bond,
+      system,interest_bearing_division,interest_bearing_currency,redemption_date,premium,date_of_payment,
+      interest_payment_date_now,interest_rate_now,interest_per_currency_now,interest_payment_date_next,
+      interest_rate_next,interest_per_currency_next,final_payment_date,interest_rate_last,interest_per_currency_last,
+      social_bond_manager1,social_bond_manager2,social_bond_manager3,social_bond_manager4,social_bond_manager5,
+      social_bond_manager6,social_bond_manager7,social_bond_manager8,social_bond_manager9,social_bond_manager10
+    ]
+    writer.writerow(row)
+    #ページ読み込み待機
+    sleep(8)
+    if count < 19:
+      #この処理で1-20までは処理できる(次へボタンが12なので押せない)
+      #ボタンを指定
+      next_btn = driver.find_element_by_xpath(r"/html/body/div[3]/div/div/div/div/div/div[1]/div[2]/table/tbody/tr/td/font/table[3]/tbody/tr/td/a[" +str(i) +"]")
+      #ボタンを押して次ページに進む
+      next_btn.click()
+    if count == 19:
+      #次へボタン(12)専用
+      #ボタンを指定
+      next_btn1 = driver.find_element_by_xpath(r"/html/body/div[3]/div/div/div/div/div/div[1]/div[2]/table/tbody/tr/td/font/table[3]/tbody/tr/td/a[12]")
+      #ボタンを押して次ページに進む
+      next_btn1.click()
+      #countを10にリセットする(これで2回目以降の次へボタンに対応)
+      count = 10
+    #ページ読み込み待機
+    sleep(8)
+    #処理したら+1
+    i += 1
+    count += 1
+    allcount += 1
+    #iが12になったらリセット
+    if i == 12:
+      i = 3
+  k = 0
+  #出力用(処理回数の分だけ繰り返す)
+  for out in range(allcount):
+    print (k)
+    k += 1
+driver.quit()

スクレイピング Anaconda Python