質問編集履歴
1
code
test
CHANGED
File without changes
|
test
CHANGED
@@ -1,3 +1,187 @@
|
|
1
|
+
```# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
|
5
|
+
require 'mechanize'![イメージ説明](a9fc1816373faa9b7a7912652fc9b808.jpeg)
|
6
|
+
|
7
|
+
require 'kconv'
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
### Setting #################
|
16
|
+
|
17
|
+
url = 'https://suumo.jp/jj/chintai/ichiran/FR301FC001/?ar=060&bs=040&ta=26&sc=26102&cb=0.0&ct=9999999&et=9999999&cn=9999999&mb=0&mt=9999999&shkr1=03&shkr2=03&shkr3=03&shkr4=03&fw2=&srch_navi=1'
|
18
|
+
|
19
|
+
#'https://suumo.jp/chintai/hyogo/sc_kobeshinada/'
|
20
|
+
|
21
|
+
#'http://suumo.jp/chintai/hyogo/sc_kobeshisuma/'
|
22
|
+
|
23
|
+
#'https://suumo.jp/chintai/hyogo/sc_kobeshinada/'
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
#'https://suumo.jp/chintai/hyogo/sc_kobeshinada/nj_204/'
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
#############################
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
AGENT_ALIASES = [
|
38
|
+
|
39
|
+
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
40
|
+
|
41
|
+
]
|
42
|
+
|
43
|
+
|
44
|
+
|
45
|
+
agent = Mechanize.new
|
46
|
+
|
47
|
+
agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
48
|
+
|
49
|
+
agent.cookie_jar.clear!
|
50
|
+
|
51
|
+
agent.follow_meta_refresh = true
|
52
|
+
|
53
|
+
agent.user_agent = AGENT_ALIASES[0]
|
54
|
+
|
55
|
+
agent.max_history = 2
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
i = 0
|
60
|
+
|
61
|
+
open("結果.csv", 'w'){|io| io.write("") }
|
62
|
+
|
63
|
+
begin
|
64
|
+
|
65
|
+
page = agent.get("#{url}pnz1#{i += 1}.html")
|
66
|
+
|
67
|
+
#open('1.html', 'w'){|io| io.write(page.body) }
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
puts "#{i} Page"
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
# 条件にあう物件がありません。条件を変更して再度検索してください。
|
76
|
+
|
77
|
+
break if !page.search('div.error_pop').empty?
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
# 詳細を見るリンクを取集する
|
82
|
+
|
83
|
+
result = []
|
84
|
+
|
85
|
+
page.search("#js-bukkenList").search('.cassetteitem').each do |div|
|
86
|
+
|
87
|
+
info = {}
|
88
|
+
|
89
|
+
info['賃貸種類'] = div.search('.ui-pct--util1').first.text
|
90
|
+
|
91
|
+
info['物件名'] = div.search('.cassetteitem_content-title').first.text
|
92
|
+
|
93
|
+
div.search('tbody').each do |tbody|
|
94
|
+
|
95
|
+
info['階数'] = tbody.search('td')[2].text.strip
|
96
|
+
|
97
|
+
info['賃料'] = tbody.search('td')[3].text.strip
|
98
|
+
|
99
|
+
info['管理費'] = tbody.search('td')[4].text.strip
|
100
|
+
|
101
|
+
temp = tbody.search('td')[5].text.strip
|
102
|
+
|
103
|
+
info['敷礼'] = temp.split('/')[0]
|
104
|
+
|
105
|
+
info['保証'] = temp.split('/')[1]
|
106
|
+
|
107
|
+
info['敷引'] = temp.split('/')[2]
|
108
|
+
|
109
|
+
info['償却'] = temp.split('/')[3]
|
110
|
+
|
111
|
+
info['間取り'] = tbody.search('td')[6].text.strip
|
112
|
+
|
113
|
+
info['専有面積'] = tbody.search('td')[7].text.strip
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
div.search('td a').each do |link|
|
118
|
+
|
119
|
+
next if link.text != '詳細を見る'
|
120
|
+
|
121
|
+
info['url'] = link[:href]
|
122
|
+
|
123
|
+
|
124
|
+
|
125
|
+
end
|
126
|
+
|
127
|
+
|
128
|
+
|
129
|
+
|
130
|
+
|
131
|
+
result << info
|
132
|
+
|
133
|
+
end
|
134
|
+
|
135
|
+
res = " #{info['賃貸種類']},#{info['物件名']},#{info['住所']},#{info['賃料']},#{info['管理費']},#{info['敷礼']},#{info['保証']},#{info['敷引']},#{info['償却']},#{info['間取り']},#{info['専有面積']}"
|
136
|
+
|
137
|
+
open("結果.csv", 'a'){|io| io.write(res.tosjis) }
|
138
|
+
|
139
|
+
open('m_url.txt', 'w'){|io| io.write(res.to_s + "\n") }
|
140
|
+
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
|
148
|
+
|
149
|
+
|
150
|
+
|
151
|
+
result.each_with_index do |e, i|
|
152
|
+
|
153
|
+
if e['url'] =~ /^http/
|
154
|
+
|
155
|
+
page = agent.get(e['url'])
|
156
|
+
|
157
|
+
open('html.txt', 'w'){|io| io.write(page.body) }
|
158
|
+
|
159
|
+
|
160
|
+
|
161
|
+
else
|
162
|
+
|
163
|
+
page = agent.get("https://suumo.jp#{e['url']}")
|
164
|
+
|
165
|
+
#open('2.html', 'w'){|io| io.write(page.body) }
|
166
|
+
|
167
|
+
|
168
|
+
|
169
|
+
end
|
170
|
+
|
171
|
+
puts page.uri.to_s
|
172
|
+
|
173
|
+
open('url.txt', 'a'){|io| io.write(page.uri.to_s + "\n") }
|
174
|
+
|
175
|
+
end
|
176
|
+
|
177
|
+
|
178
|
+
|
179
|
+
end until false
|
180
|
+
|
181
|
+
コード
|
182
|
+
|
183
|
+
```
|
184
|
+
|
1
185
|
下記の部分ですが、なぜ勝手に改行されたのか、不明です。教えてください。
|
2
186
|
|
3
187
|
|