スクレイピングで取得した情報をDBに保存する記述を教えていただきたいです。
ruby
1require 'nokogiri' 2require 'anemone' 3require 'pry-byebug' 4require 'uri' 5 6URL = 'https://******************* 7 8html = URI.encode_www_form_component(URL) 9 10area_urls = [] 11prefecture_urls = [] 12city_urls = [] 13 14Anemone.crawl(html, depth_limit: 0, delay: 3) do |anemone| 15 anemone.focus_crawl do |page| 16 page.links.keep_if do |link| 17 link.to_s.match(%r{************/[0-9]{1,2}}) 18 end 19 page.links.each do |link| 20 area_urls << link 21 end 22 end 23end 24 25area_urls.each do |area| 26 Anemone.crawl(area, depth_limit: 0, delay: 3) do |anemone| 27 anemone.focus_crawl do |page| 28 page.links.keep_if do |link| 29 link.to_s.match(%r{***********/[0-9]{1,2}/[0-9]{5}}) 30 end 31 page.links.each do |link| 32 prefecture_urls << link 33 end 34 end 35 end 36end 37 38prefecture_urls.each do |prefecture| 39 Anemone.crawl(prefecture, depth_limit: 1, delay: 3, skip_query_strings: true) do |anemone| 40 anemone.focus_crawl do |page| 41 page.lins.keep_if do |link| 42 link.to_s.match(%r{**************/[0-9]{1,2}/[0-9]{5}/[0-9]}) 43 end 44 page.links.each do |link| 45 city_urls << link 46 end 47 end 48 49 anemone.on_every_page do |page| 50 url = page.url 51 html = open(url) 52 53 doc = Nokogiri::HTML.parse(html, nil, 'UTF-8') 54 55 name = doc.xpath('/html/body/div[4]/div/div[2]/div[1]/h1').text 56 postcode = doc.xpath('/html/body/div[4]/div/div[2]/table[1]/tbody/tr[3]/td/text()[1]') 57 tel = doc.xpath('/html/body/div[4]/div/div[2]/table[1]/tbody/tr[4]/td').text 58 fax = doc.xpath('/html/body/div[4]/div/div[2]/table[1]/tbody/tr[5]/td').text 59 address = doc.xpath('/html/body/div[4]/div/div[2]/table[1]/tbody/tr[3]/td/text()[2]') 60 staff_number = doc.xpath('/html/body/div[4]/div/div[2]/table[4]/tbody/tr[1]/td/p').text 61 company = doc.xpath('/html/body/div[4]/div/div[2]/table[5]/tbody/tr[2]/td').text 62 office_url = doc.xpath('/html/body/div[4]/div/div[2]/table[1]/tbody/tr[6]/td/a').text 63 64 office = Office.new(name: name, postcode: postcode, tel: tel, fax: fax, address: address, staff_number: staff_number, url: office_url) 65 office.save 66 end 67 end 68end
現状以下のように記述していますが、大量なデータすぎて検証することができていません。
ruby
1office = Office.new(name: name, postcode: postcode, tel: tel, fax: fax, address: address, staff_number: staff_number, url: office_url) 2 office.save
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。
2020/10/12 05:57
2020/10/12 05:58
2020/10/12 06:10
2020/10/12 06:50
2020/10/12 07:27