require 'rubygems' require 'mechanize'![イメージ説明](a9fc1816373faa9b7a7912652fc9b808.jpeg) require 'kconv' ### Setting ################# url = 'https://suumo.jp/jj/chintai/ichiran/FR301FC001/?ar=060&bs=040&ta=26&sc=26102&cb=0.0&ct=9999999&et=9999999&cn=9999999&mb=0&mt=9999999&shkr1=03&shkr2=03&shkr3=03&shkr4=03&fw2=&srch_navi=1' #'https://suumo.jp/chintai/hyogo/sc_kobeshinada/' #'http://suumo.jp/chintai/hyogo/sc_kobeshisuma/' #'https://suumo.jp/chintai/hyogo/sc_kobeshinada/' #'https://suumo.jp/chintai/hyogo/sc_kobeshinada/nj_204/' ############################# AGENT_ALIASES = [ 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36' ] agent = Mechanize.new agent.verify_mode = OpenSSL::SSL::VERIFY_NONE agent.cookie_jar.clear! agent.follow_meta_refresh = true agent.user_agent = AGENT_ALIASES[0] agent.max_history = 2 i = 0 open("結果.csv", 'w'){|io| io.write("") } begin page = agent.get("#{url}pnz1#{i += 1}.html") #open('1.html', 'w'){|io| io.write(page.body) } puts "#{i} Page" # 条件にあう物件がありません。条件を変更して再度検索してください。 break if !page.search('div.error_pop').empty? # 詳細を見るリンクを取集する result = [] page.search("#js-bukkenList").search('.cassetteitem').each do |div| info = {} info['賃貸種類'] = div.search('.ui-pct--util1').first.text info['物件名'] = div.search('.cassetteitem_content-title').first.text div.search('tbody').each do |tbody| info['階数'] = tbody.search('td')[2].text.strip info['賃料'] = tbody.search('td')[3].text.strip info['管理費'] = tbody.search('td')[4].text.strip temp = tbody.search('td')[5].text.strip info['敷礼'] = temp.split('/')[0] info['保証'] = temp.split('/')[1] info['敷引'] = temp.split('/')[2] info['償却'] = temp.split('/')[3] info['間取り'] = tbody.search('td')[6].text.strip info['専有面積'] = tbody.search('td')[7].text.strip div.search('td a').each do |link| next if link.text != '詳細を見る' info['url'] = link[:href] end result << info end res = " #{info['賃貸種類']},#{info['物件名']},#{info['住所']},#{info['賃料']},#{info['管理費']},#{info['敷礼']},#{info['保証']},#{info['敷引']},#{info['償却']},#{info['間取り']},#{info['専有面積']}" open("結果.csv", 'a'){|io| io.write(res.tosjis) } open('m_url.txt', 'w'){|io| io.write(res.to_s + "\n") } end result.each_with_index do |e, i| if e['url'] =~ /^http/ page = agent.get(e['url']) open('html.txt', 'w'){|io| io.write(page.body) } else page = agent.get("https://suumo.jp#{e['url']}") #open('2.html', 'w'){|io| io.write(page.body) } end puts page.uri.to_s open('url.txt', 'a'){|io| io.write(page.uri.to_s + "\n") } end end until false コード
下記の部分ですが、なぜ勝手に改行されたのか、不明です。教えてください。
#################################################################################
-- coding: utf-8 --
require 'rubygems'
require 'mechanize'
require 'kconv'
Setting
url = 'https://suumo.jp/jj/chintai/ichiran/FR301FC001/?ar=060&bs=040&ta=26&sc=26102&cb=0.0&ct=9999999&et=9999999&cn=9999999&mb=0&mt=9999999&shkr1=03&shkr2=03&shkr3=03&shkr4=03&fw2=&srch_navi=1'
#'https://suumo.jp/chintai/hyogo/sc_kobeshinada/'
#'http://suumo.jp/chintai/hyogo/sc_kobeshisuma/'
#'https://suumo.jp/chintai/hyogo/sc_kobeshinada/'
#'https://suumo.jp/chintai/hyogo/sc_kobeshinada/nj_204/'
#############################
AGENT_ALIASES = [
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
]
agent = Mechanize.new
agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
agent.cookie_jar.clear!
agent.follow_meta_refresh = true
agent.user_agent = AGENT_ALIASES[0]
agent.max_history = 2
i = 0
open("結果.csv", 'w'){|io| io.write("") }
begin
page = agent.get("#{url}pnz1#{i += 1}.html")
#open('1.html', 'w'){|io| io.write(page.body) }
puts "#{i} Page" # 条件にあう物件がありません。条件を変更して再度検索してください。 break if !page.search('div.error_pop').empty? # 詳細を見るリンクを取集する result = [] page.search("#js-bukkenList").search('.cassetteitem').each do |div| info = {} info['賃貸種類'] = div.search('.ui-pct--util1').first.text info['物件名'] = div.search('.cassetteitem_content-title').first.text div.search('tbody').each do |tbody| info['階数'] = tbody.search('td')[2].text.strip info['賃料'] = tbody.search('td')[3].text.strip info['管理費'] = tbody.search('td')[4].text.strip temp = tbody.search('td')[5].text.strip info['敷礼'] = temp.split('/')[0] info['保証'] = temp.split('/')[1] info['敷引'] = temp.split('/')[2] info['償却'] = temp.split('/')[3] info['間取り'] = tbody.search('td')[6].text.strip info['専有面積'] = tbody.search('td')[7].text.strip div.search('td a').each do |link| next if link.text != '詳細を見る' info['url'] = link[:href] end result << info end res = " #{info['賃貸種類']},#{info['物件名']},#{info['住所']},#{info['賃料']},#{info['管理費']},#{info['敷礼']},#{info['保証']},#{info['敷引']},#{info['償却']},#{info['間取り']},#{info['専有面積']}" open("結果.csv", 'a'){|io| io.write(res.tosjis) } open('m_url.txt', 'w'){|io| io.write(res.to_s + "\n") } end result.each_with_index do |e, i| if e['url'] =~ /^http/ page = agent.get(e['url']) open('html.txt', 'w'){|io| io.write(page.body) } else page = agent.get("https://suumo.jp#{e['url']}") #open('2.html', 'w'){|io| io.write(page.body) } end puts page.uri.to_s open('url.txt', 'a'){|io| io.write(page.uri.to_s + "\n") } end
end until false
あなたの回答
tips
プレビュー