質問編集履歴

1

code

2019/01/12 05:01

投稿

tyoujobfideshom
tyoujobfideshom

スコア13

test CHANGED
File without changes
test CHANGED
@@ -1,3 +1,187 @@
1
+ ```# -*- coding: utf-8 -*-
2
+
3
+ require 'rubygems'
4
+
5
+ require 'mechanize'![イメージ説明](a9fc1816373faa9b7a7912652fc9b808.jpeg)
6
+
7
+ require 'kconv'
8
+
9
+
10
+
11
+
12
+
13
+
14
+
15
+ ### Setting #################
16
+
17
+ url = 'https://suumo.jp/jj/chintai/ichiran/FR301FC001/?ar=060&bs=040&ta=26&sc=26102&cb=0.0&ct=9999999&et=9999999&cn=9999999&mb=0&mt=9999999&shkr1=03&shkr2=03&shkr3=03&shkr4=03&fw2=&srch_navi=1'
18
+
19
+ #'https://suumo.jp/chintai/hyogo/sc_kobeshinada/'
20
+
21
+ #'http://suumo.jp/chintai/hyogo/sc_kobeshisuma/'
22
+
23
+ #'https://suumo.jp/chintai/hyogo/sc_kobeshinada/'
24
+
25
+
26
+
27
+ #'https://suumo.jp/chintai/hyogo/sc_kobeshinada/nj_204/'
28
+
29
+
30
+
31
+ #############################
32
+
33
+
34
+
35
+
36
+
37
+ AGENT_ALIASES = [
38
+
39
+ 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
40
+
41
+ ]
42
+
43
+
44
+
45
+ agent = Mechanize.new
46
+
47
+ agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
48
+
49
+ agent.cookie_jar.clear!
50
+
51
+ agent.follow_meta_refresh = true
52
+
53
+ agent.user_agent = AGENT_ALIASES[0]
54
+
55
+ agent.max_history = 2
56
+
57
+
58
+
59
+ i = 0
60
+
61
+ open("結果.csv", 'w'){|io| io.write("") }
62
+
63
+ begin
64
+
65
+ page = agent.get("#{url}pnz1#{i += 1}.html")
66
+
67
+ #open('1.html', 'w'){|io| io.write(page.body) }
68
+
69
+
70
+
71
+ puts "#{i} Page"
72
+
73
+
74
+
75
+ # 条件にあう物件がありません。条件を変更して再度検索してください。
76
+
77
+ break if !page.search('div.error_pop').empty?
78
+
79
+
80
+
81
+ # 詳細を見るリンクを取集する
82
+
83
+ result = []
84
+
85
+ page.search("#js-bukkenList").search('.cassetteitem').each do |div|
86
+
87
+ info = {}
88
+
89
+ info['賃貸種類'] = div.search('.ui-pct--util1').first.text
90
+
91
+ info['物件名'] = div.search('.cassetteitem_content-title').first.text
92
+
93
+ div.search('tbody').each do |tbody|
94
+
95
+ info['階数'] = tbody.search('td')[2].text.strip
96
+
97
+ info['賃料'] = tbody.search('td')[3].text.strip
98
+
99
+ info['管理費'] = tbody.search('td')[4].text.strip
100
+
101
+ temp = tbody.search('td')[5].text.strip
102
+
103
+ info['敷礼'] = temp.split('/')[0]
104
+
105
+ info['保証'] = temp.split('/')[1]
106
+
107
+ info['敷引'] = temp.split('/')[2]
108
+
109
+ info['償却'] = temp.split('/')[3]
110
+
111
+ info['間取り'] = tbody.search('td')[6].text.strip
112
+
113
+ info['専有面積'] = tbody.search('td')[7].text.strip
114
+
115
+
116
+
117
+ div.search('td a').each do |link|
118
+
119
+ next if link.text != '詳細を見る'
120
+
121
+ info['url'] = link[:href]
122
+
123
+
124
+
125
+ end
126
+
127
+
128
+
129
+
130
+
131
+ result << info
132
+
133
+ end
134
+
135
+ res = " #{info['賃貸種類']},#{info['物件名']},#{info['住所']},#{info['賃料']},#{info['管理費']},#{info['敷礼']},#{info['保証']},#{info['敷引']},#{info['償却']},#{info['間取り']},#{info['専有面積']}"
136
+
137
+ open("結果.csv", 'a'){|io| io.write(res.tosjis) }
138
+
139
+ open('m_url.txt', 'w'){|io| io.write(res.to_s + "\n") }
140
+
141
+
142
+
143
+
144
+
145
+ end
146
+
147
+
148
+
149
+
150
+
151
+ result.each_with_index do |e, i|
152
+
153
+ if e['url'] =~ /^http/
154
+
155
+ page = agent.get(e['url'])
156
+
157
+ open('html.txt', 'w'){|io| io.write(page.body) }
158
+
159
+
160
+
161
+ else
162
+
163
+ page = agent.get("https://suumo.jp#{e['url']}")
164
+
165
+ #open('2.html', 'w'){|io| io.write(page.body) }
166
+
167
+
168
+
169
+ end
170
+
171
+ puts page.uri.to_s
172
+
173
+ open('url.txt', 'a'){|io| io.write(page.uri.to_s + "\n") }
174
+
175
+ end
176
+
177
+
178
+
179
+ end until false
180
+
181
+ コード
182
+
183
+ ```
184
+
1
185
  下記の部分ですが、なぜ勝手に改行されたのか、不明です。教えてください。
2
186
 
3
187