selenium,beautifulsoupにて、取得出来ない値があり、質問させて頂きました。
以下のコードを実行しますと、企業の詳細リンクを取得することが出来るのですが、一部取得できない部分があります。
# coding: UTF-8 from django.core.management.base import BaseCommand import requests, requests_cache from random import randint import re, os, math, datetime, codecs from pprint import pprint from bs4 import BeautifulSoup from time import sleep from urllib.parse import urljoin import re from ...models import Company from selenium import webdriver from selenium.webdriver.support.ui import Select import time def get_response(targetURL, waitsec_from=2, waitsec_to=5): headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0'} response = requests.get(targetURL, headers=headers) # print(response.status_code) msg_headder = "from cache" if not response.from_cache: s = randint(waitsec_from,waitsec_to) sleep(s) msg_headder = 'random sleep ' + str(s) + 'sec' print(str(datetime.datetime.now()) + ',' + msg_headder + ',' + response.url, file=codecs.open('requests.log', 'a', 'utf-8')) return response # BaseCommandを継承して作成 class Command(BaseCommand): # コマンドが実行された際に呼ばれるメソッド def handle(self, *args, **options): BASE_URL = 'https://job.mynavi.jp' SEARCH_BASE_URL = BASE_URL + '/21/pc/toppage/displayTopPage/index' requests_cache.install_cache('mynavi_cache', backend='sqlite', expire_after=60*120) try: driver = webdriver.Chrome() driver.get("https://job.mynavi.jp//21/pc/toppage/displayTopPage/index") time.sleep(3) select_button = driver.find_element_by_id("OC") time.sleep(3) select_elem = Select(select_button) select_elem.select_by_value('OC:293,294,340,350,360,370,380,390,400,410') driver.find_element_by_xpath("//input[@class='btn-search-01']").click() driver.find_element_by_xpath("//input[@id='industryGroupInfoArray_6']").click() driver.find_element_by_xpath("//input[@id='srchNarrowDownButton']").click() search_url = driver.current_url # source = driver.page_source # print(source) except Exception as e: print(e) soup = BeautifulSoup(get_response(search_url).content,'lxml') # print(type(soup)) # exit() company_list = soup.select('.boxSearchresultEach h3 a') for company_link_list in company_list: # print(company_link_list) company_link = company_link_list.attrs['href'] company_detail_link = BASE_URL + company_link print('ok') print(company_detail_link) time.sleep(35)
実行結果は以下になります
ok https://job.mynavi.jp/21/pc/search/corp74882/employment.html ok https://job.mynavi.jp/21/pc/search/corp99702/employment.html ok https://job.mynavi.jp/21/pc/search/corp54899/employment.html ok https://job.mynavi.jp/21/pc/search/corp98907/employment.html ok https://job.mynavi.jp/21/pc/search/corp95820/employment.html ok https://job.mynavi.jp/21/pc/search/corp38962/employment.html ok https://job.mynavi.jp/21/pc/search/corp91150/employment.html ok https://job.mynavi.jp/21/pc/search/corp63613/employment.html ok https://job.mynavi.jp/21/pc/search/corp102493/employment.html ok https://job.mynavi.jp/21/pc/search/corp68653/employment.html ok https://job.mynavi.jp/21/pc/search/corp212087/employment.html ok https://job.mynavi.jp/21/pc/search/corp64526/employment.html ok https://job.mynavi.jp/21/pc/search/corp107245/employment.html ok https://job.mynavi.jp/21/pc/search/corp50828/employment.html ok https://job.mynavi.jp/21/pc/search/corp74232/employment.html ok https://job.mynavi.jp/21/pc/search/corp112066/employment.html ok https://job.mynavi.jp/21/pc/search/corp81394/employment.html ok https://job.mynavi.jp/21/pc/search/corp55786/employment.html ok https://job.mynavi.jp/21/pc/search/corp62347/employment.html ok https://job.mynavi.jp/21/pc/search/corp63229/employment.html ok https://job.mynavi.jp/21/pc/search/corp72466/employment.html ok https://job.mynavi.jp/21/pc/search/corp74170/employment.html ok https://job.mynavi.jp/21/pc/search/corp74102/employment.html ok https://job.mynavi.jp/21/pc/search/corp58662/employment.html ok https://job.mynavi.jp/21/pc/search/corp65810/employment.html ok https://job.mynavi.jp/21/pc/search/corp81532/employment.html ok https://job.mynavi.jp/21/pc/search/corp50857/employment.html ok https://job.mynavi.jp/21/pc/search/corp64785/employment.html ok https://job.mynavi.jp/21/pc/search/corp233673/employment.html ok https://job.mynavi.jp/21/pc/search/corp51702/employment.html ok https://job.mynavi.jp/21/pc/search/corp75032/employment.html ok https://job.mynavi.jp/21/pc/search/corp59056/employment.html ok https://job.mynavi.jp/21/pc/search/corp70316/employment.html ok https://job.mynavi.jp/21/pc/search/corp70580/employment.html ok https://job.mynavi.jp/21/pc/search/corp106270/employment.html ok https://job.mynavi.jp/21/pc/search/corp90130/employment.html ok https://job.mynavi.jp/21/pc/search/corp203149/employment.html ok https://job.mynavi.jp/21/pc/search/corp100226/employment.html ok https://job.mynavi.jp/21/pc/search/corp5931/employment.html ok https://job.mynavi.jp/21/pc/search/corp242011/employment.html ok https://job.mynavi.jp/21/pc/search/corp104537/employment.html ok https://job.mynavi.jp/21/pc/search/corp111936/employment.html ok https://job.mynavi.jp/21/pc/search/corp75962/employment.html ok https://job.mynavi.jp/21/pc/search/corp223271/employment.html ok https://job.mynavi.jp/21/pc/search/corp68756/employment.html ok https://job.mynavi.jp/21/pc/search/corp212784/employment.html ok https://job.mynavi.jp/21/pc/search/corp224182/employment.html ok https://job.mynavi.jp/21/pc/search/corp113021/employment.html ok https://job.mynavi.jp/21/pc/search/corp91559/employment.html ok https://job.mynavi.jp/21/pc/search/corp208468/employment.html ok https://job.mynavi.jp/21/pc/search/corp56587/employment.html ok https://job.mynavi.jp/21/pc/search/corp3186/employment.html ok https://job.mynavi.jp/21/pc/search/corp76596/employment.html ok https://job.mynavi.jp/21/pc/search/corp243785/employment.html ok https://job.mynavi.jp/21/pc/search/corp88089/employment.html ok https://job.mynavi.jp/21/pc/search/corp1626/employment.html ok https://job.mynavi.jp/21/pc/search/corp72320/employment.html ok https://job.mynavi.jp/21/pc/search/corp68141/employment.html ok https://job.mynavi.jp/21/pc/search/corp85586/employment.html ok https://job.mynavi.jp/21/pc/search/corp70537/employment.html ok https://job.mynavi.jp/21/pc/search/corp201658/employment.html ok https://job.mynavi.jp/21/pc/search/corp564/employment.html ok https://job.mynavi.jp/21/pc/search/corp87218/employment.html ok https://job.mynavi.jp/21/pc/search/corp492/employment.html ok https://job.mynavi.jp/21/pc/search/corp66408/employment.html ok https://job.mynavi.jp/21/pc/search/corp64963/employment.html ok https://job.mynavi.jp/21/pc/search/corp201655/employment.html ok https://job.mynavi.jp/21/pc/search/corp112048/employment.html ok https://job.mynavi.jp/21/pc/search/corp207237/employment.html ok https://job.mynavi.jp/21/pc/search/corp207542/employment.html ok https://job.mynavi.jp/21/pc/search/corp96661/employment.html ok https://job.mynavi.jp/21/pc/search/corp208646/employment.html ok https://job.mynavi.jp/21/pc/search/corp88148/employment.html ok https://job.mynavi.jp/21/pc/search/corp65990/employment.html ok https://job.mynavi.jp/21/pc/search/corp73290/employment.html ok https://job.mynavi.jp/21/pc/search/corp63265/employment.html ok https://job.mynavi.jp/21/pc/search/corp202579/employment.html ok https://job.mynavi.jp/21/pc/search/corp54422/employment.html ok https://job.mynavi.jp/21/pc/search/corp83336/employment.html ok https://job.mynavi.jp/21/pc/search/corp69054/employment.html ok https://job.mynavi.jp/21/pc/search/corp206033/employment.html ok https://job.mynavi.jp/21/pc/search/corp80067/employment.html ok https://job.mynavi.jp/21/pc/search/corp52230/employment.html ok https://job.mynavi.jp/21/pc/search/corp203901/employment.html ok https://job.mynavi.jp/21/pc/search/corp70125/employment.html ok https://job.mynavi.jp/21/pc/search/corp203993/employment.html ok https://job.mynavi.jp/21/pc/search/corp79699/employment.html ok https://job.mynavi.jp/21/pc/search/corp92355/employment.html ok https://job.mynavi.jp/21/pc/search/corp210341/employment.html ok https://job.mynavi.jp/21/pc/search/corp1469/employment.html ok https://job.mynavi.jp/21/pc/search/corp103756/employment.html ok https://job.mynavi.jp/21/pc/search/corp102887/employment.html ok https://job.mynavi.jp/21/pc/search/corp88947/employment.html ok https://job.mynavi.jp/21/pc/search/corp73340/employment.html ok https://job.mynavi.jp/21/pc/search/corp63660/employment.html ok https://job.mynavi.jp/21/pc/search/corp102084/employment.html ok https://job.mynavi.jp/21/pc/search/corp81145/employment.html ok https://job.mynavi.jp/21/pc/search/corp209900/employment.html ok https://job.mynavi.jp/21/pc/search/corp86969/employment.html ok https://job.mynavi.jp/21/pc/search/corp14182/employment.html
取得できない値は、例えば「/21/pc/search/corp89443/employment.html」こちらのリンクや、「/21/pc/search/corp75427/employment.html」こちらのリンクが取得できません。
こちらの問題の解決の為、ご助言頂けましたら幸いです。
追記です
該当のスクレイピング対象のURLはこちらになります。
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。