質問編集履歴
1
当サイトで仮に抽出を行う場合の例
test
CHANGED
File without changes
|
test
CHANGED
@@ -71,3 +71,61 @@
|
|
71
71
|
}
|
72
72
|
|
73
73
|
```
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
```example
|
80
|
+
|
81
|
+
require 'mechanize'
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
agent = Mechanize.new
|
86
|
+
|
87
|
+
agent.user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
|
88
|
+
|
89
|
+
+ ' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
urls = []
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
page = agent.get('https://teratail.com/feed/new/1')
|
98
|
+
|
99
|
+
|
100
|
+
|
101
|
+
loop do
|
102
|
+
|
103
|
+
page.css('#mainContainer > div.boxContentWrap.btnNew.j-feedContentsWrapper > ul > li:nth-child(1) > div.C-questionFeedItem__main.boxItemContent > h2 > a').each do |a|
|
104
|
+
|
105
|
+
urls << a['href']
|
106
|
+
|
107
|
+
end
|
108
|
+
|
109
|
+
break if page.css('#mainContainer > div.j-feedPagination.u-mt40 > div > p > a > span').empty?
|
110
|
+
|
111
|
+
sleep(rand(3..5))
|
112
|
+
|
113
|
+
page = page.link_with(text: '次のページ').click
|
114
|
+
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
urls.each do |url|
|
120
|
+
|
121
|
+
page = agent.get(url)
|
122
|
+
|
123
|
+
puts page.css('#l-headContents > div > div.p-questionHead__main > h1').text
|
124
|
+
|
125
|
+
sleep(rand(3..5))
|
126
|
+
|
127
|
+
end
|
128
|
+
|
129
|
+
|
130
|
+
|
131
|
+
```
|