回答編集履歴
7
修正
answer
CHANGED
@@ -47,7 +47,6 @@
|
|
47
47
|
end
|
48
48
|
sleep(rand(3..5))
|
49
49
|
page = agent.get("https://teratail.com/feed/active/#{i + 1}")
|
50
|
-
break
|
51
50
|
end
|
52
51
|
rescue Mechanize::ResponseCodeError
|
53
52
|
# Do Nothing
|
6
修正
answer
CHANGED
@@ -25,4 +25,46 @@
|
|
25
25
|
sleep(rand(3..5))
|
26
26
|
end
|
27
27
|
|
28
|
-
```
|
28
|
+
```
|
29
|
+
|
30
|
+
追記
|
31
|
+
```Ruby
|
32
|
+
require 'mechanize'
|
33
|
+
require 'csv'
|
34
|
+
|
35
|
+
agent = Mechanize.new
|
36
|
+
agent.user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
|
37
|
+
+ ' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'
|
38
|
+
|
39
|
+
urls = []
|
40
|
+
|
41
|
+
begin
|
42
|
+
page = agent.get('https://teratail.com/')
|
43
|
+
|
44
|
+
1.step do |i|
|
45
|
+
page.css('.C-questionFeedItemTitle > a').each do |a|
|
46
|
+
urls << 'https://teratail.com' + a['href']
|
47
|
+
end
|
48
|
+
sleep(rand(3..5))
|
49
|
+
page = agent.get("https://teratail.com/feed/active/#{i + 1}")
|
50
|
+
break
|
51
|
+
end
|
52
|
+
rescue Mechanize::ResponseCodeError
|
53
|
+
# Do Nothing
|
54
|
+
end
|
55
|
+
|
56
|
+
if urls.any?
|
57
|
+
lines = urls.map do |url|
|
58
|
+
sleep(rand(3..5))
|
59
|
+
page = agent.get(url)
|
60
|
+
[page.css('.p-questionHead__ttl').text.encode('cp932')]
|
61
|
+
end
|
62
|
+
CSV.open("file.csv", "wb") do |csv|
|
63
|
+
lines.each { |line| csv << line }
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
```
|
68
|
+
参考
|
69
|
+
[class CSV](https://docs.ruby-lang.org/ja/latest/class/CSV.html)
|
70
|
+
[CSV を文字コード変換しつつロード](https://qiita.com/labocho/items/8559576b71642b79df67)
|
5
修正
answer
CHANGED
@@ -12,7 +12,7 @@
|
|
12
12
|
|
13
13
|
loop do
|
14
14
|
page.css('.article-title > a').each do |a|
|
15
|
-
urls << a
|
15
|
+
urls << a['href']
|
16
16
|
end
|
17
17
|
break if page.css('.paging-next').empty?
|
18
18
|
sleep(rand(3..5))
|
@@ -22,6 +22,7 @@
|
|
22
22
|
urls.each do |url|
|
23
23
|
page = agent.get(url)
|
24
24
|
puts page.css('.article-title > a').text
|
25
|
+
sleep(rand(3..5))
|
25
26
|
end
|
26
27
|
|
27
28
|
```
|
4
修正
answer
CHANGED
@@ -21,7 +21,7 @@
|
|
21
21
|
|
22
22
|
urls.each do |url|
|
23
23
|
page = agent.get(url)
|
24
|
-
puts page.css('.article-title > a').text
|
24
|
+
puts page.css('.article-title > a').text
|
25
25
|
end
|
26
26
|
|
27
27
|
```
|
3
修正
answer
CHANGED
@@ -14,7 +14,7 @@
|
|
14
14
|
page.css('.article-title > a').each do |a|
|
15
15
|
urls << a.attributes['href'].text
|
16
16
|
end
|
17
|
-
break
|
17
|
+
break if page.css('.paging-next').empty?
|
18
18
|
sleep(rand(3..5))
|
19
19
|
page = page.link_with(text: '次のページ').click
|
20
20
|
end
|
2
修正
answer
CHANGED
@@ -12,7 +12,7 @@
|
|
12
12
|
|
13
13
|
loop do
|
14
14
|
page.css('.article-title > a').each do |a|
|
15
|
-
urls << a.attributes[
|
15
|
+
urls << a.attributes['href'].text
|
16
16
|
end
|
17
17
|
break unless page.css('.paging-next')
|
18
18
|
sleep(rand(3..5))
|
1
修正
answer
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
記事一覧をすべて取得して各ページのタイトルを表示するクローラー。
|
2
2
|
```Ruby
|
3
3
|
require 'mechanize'
|
4
4
|
|
@@ -15,8 +15,8 @@
|
|
15
15
|
urls << a.attributes["href"].text
|
16
16
|
end
|
17
17
|
break unless page.css('.paging-next')
|
18
|
+
sleep(rand(3..5))
|
18
19
|
page = page.link_with(text: '次のページ').click
|
19
|
-
sleep(rand(3..5))
|
20
20
|
end
|
21
21
|
|
22
22
|
urls.each do |url|
|