質問編集履歴
1
当サイトで仮に抽出を行う場合の例
title
CHANGED
File without changes
|
body
CHANGED
@@ -34,4 +34,33 @@
|
|
34
34
|
end
|
35
35
|
p ""
|
36
36
|
}
|
37
|
+
```
|
38
|
+
|
39
|
+
|
40
|
+
```example
|
41
|
+
require 'mechanize'
|
42
|
+
|
43
|
+
agent = Mechanize.new
|
44
|
+
agent.user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
|
45
|
+
+ ' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'
|
46
|
+
|
47
|
+
urls = []
|
48
|
+
|
49
|
+
page = agent.get('https://teratail.com/feed/new/1')
|
50
|
+
|
51
|
+
loop do
|
52
|
+
page.css('#mainContainer > div.boxContentWrap.btnNew.j-feedContentsWrapper > ul > li:nth-child(1) > div.C-questionFeedItem__main.boxItemContent > h2 > a').each do |a|
|
53
|
+
urls << a['href']
|
54
|
+
end
|
55
|
+
break if page.css('#mainContainer > div.j-feedPagination.u-mt40 > div > p > a > span').empty?
|
56
|
+
sleep(rand(3..5))
|
57
|
+
page = page.link_with(text: '次のページ').click
|
58
|
+
end
|
59
|
+
|
60
|
+
urls.each do |url|
|
61
|
+
page = agent.get(url)
|
62
|
+
puts page.css('#l-headContents > div > div.p-questionHead__main > h1').text
|
63
|
+
sleep(rand(3..5))
|
64
|
+
end
|
65
|
+
|
37
66
|
```
|