質問編集履歴
4
test
CHANGED
File without changes
|
test
CHANGED
@@ -22,7 +22,7 @@
|
|
22
22
|
|
23
23
|
const puppeteer = require("puppeteer");
|
24
24
|
|
25
|
-
var TARGET_URL = "
|
25
|
+
var TARGET_URL = "ヤフーニュース"
|
26
26
|
|
27
27
|
var datas = [];
|
28
28
|
|
3
test
CHANGED
File without changes
|
test
CHANGED
@@ -22,7 +22,7 @@
|
|
22
22
|
|
23
23
|
const puppeteer = require("puppeteer");
|
24
24
|
|
25
|
-
var TARGET_URL = "
|
25
|
+
var TARGET_URL = "/"
|
26
26
|
|
27
27
|
var datas = [];
|
28
28
|
|
2
コードの修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -22,27 +22,25 @@
|
|
22
22
|
|
23
23
|
const puppeteer = require("puppeteer");
|
24
24
|
|
25
|
-
var TARGET_URL = "https://n
|
25
|
+
var TARGET_URL = "https://anond.hatelabo.jp/"
|
26
|
-
|
27
|
-
var LINK_LEVEL = 3;
|
28
26
|
|
29
27
|
var datas = [];
|
30
28
|
|
31
|
-
var items =
|
29
|
+
var items = {};
|
32
30
|
|
33
|
-
|
31
|
+
var r = 0;
|
34
32
|
|
35
33
|
puppeteer.launch({
|
36
34
|
|
37
|
-
args:['--no-sandbox','--disable-gpu','--ignore-certificate-errors'],
|
35
|
+
args:['--no-sandbox','--disable-gpu','--ignore-certificate-errors'],
|
38
36
|
|
39
|
-
ignoreDefaultArgs: ['--disable-extentions'],
|
37
|
+
ignoreDefaultArgs: ['--disable-extentions'],
|
40
38
|
|
41
|
-
ignoreHTTPSErrors: false,
|
39
|
+
ignoreHTTPSErrors: false,
|
42
40
|
|
43
|
-
headless:false,
|
41
|
+
headless:false,
|
44
42
|
|
45
|
-
slowMo :300
|
43
|
+
slowMo :300
|
46
44
|
|
47
45
|
|
48
46
|
|
@@ -56,61 +54,87 @@
|
|
56
54
|
|
57
55
|
|
58
56
|
|
59
|
-
const page = await browser.newPage();
|
57
|
+
const page = await browser.newPage();
|
60
58
|
|
61
|
-
await page.goto(url,{waitUntil:"domcontentloaded"});
|
59
|
+
await page.goto(url,{waitUntil:"domcontentloaded"});
|
60
|
+
|
61
|
+
console.log(url);
|
62
|
+
|
63
|
+
//-----------------------リンク一覧を配列に入れる----------------------
|
62
64
|
|
63
65
|
|
64
66
|
|
65
|
-
|
67
|
+
let lists2 = await page.$$("a[href]");
|
66
68
|
|
67
|
-
|
69
|
+
|
68
70
|
|
69
|
-
|
70
|
-
|
71
|
-
if (isLoadingSucceeded){
|
72
|
-
|
73
|
-
while (isLoadingSucceeded) {
|
74
|
-
|
75
|
-
let lists2 = await page.$$("a[href]");
|
76
|
-
|
77
|
-
// 3ページ目まで
|
78
|
-
|
79
|
-
if(level >= LINK_LEVEL )return;
|
80
|
-
|
81
|
-
//基準ページ以外であれば無視
|
82
|
-
|
83
|
-
var us =TARGET_URL.split("/");
|
84
|
-
|
85
|
-
us.pop();
|
86
|
-
|
87
|
-
|
71
|
+
for (let i = 0; i < lists2.length; i++) {
|
88
|
-
|
89
|
-
if (url.indexOf(base) < 0)return;
|
90
72
|
|
91
73
|
//既出のサイトであれば無視
|
92
74
|
|
93
|
-
i
|
75
|
+
var removeDuplicates = function(object) {
|
94
76
|
|
95
|
-
|
77
|
+
var result = [], comparisons = [], key, comparison;
|
96
78
|
|
97
|
-
|
79
|
+
for (key in object) {
|
98
80
|
|
99
|
-
|
81
|
+
comparison = JSON.stringify(object[key]);
|
100
82
|
|
101
|
-
|
83
|
+
if (comparisons.indexOf(comparison) === -1) {
|
102
84
|
|
103
|
-
|
85
|
+
result.push(object[key]);
|
104
86
|
|
87
|
+
}
|
105
88
|
|
89
|
+
comparisons.push(comparison);
|
106
90
|
|
107
|
-
}
|
91
|
+
}
|
108
92
|
|
109
|
-
|
93
|
+
|
110
94
|
|
111
|
-
|
95
|
+
return result;
|
112
96
|
|
97
|
+
};
|
98
|
+
|
99
|
+
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
datas.push(await (await lists2[i].getProperty('href')).jsonValue());
|
104
|
+
|
105
|
+
result2 = await result[i].indexOf(TARGET_URL);
|
106
|
+
|
107
|
+
//await console.log(result2);
|
108
|
+
|
109
|
+
//外部サイトであれば無視
|
110
|
+
|
111
|
+
if (result2 != -1){
|
112
|
+
|
113
|
+
var result = removeDuplicates(datas);
|
114
|
+
|
115
|
+
items[r] = result[i] ;
|
116
|
+
|
117
|
+
r = r + 1;
|
118
|
+
|
119
|
+
};
|
120
|
+
|
121
|
+
await console.log(items);
|
122
|
+
|
123
|
+
};
|
124
|
+
|
125
|
+
|
126
|
+
|
127
|
+
await page.goto(url,{waitUntil:"domcontentloaded"});
|
128
|
+
|
129
|
+
|
130
|
+
|
131
|
+
|
132
|
+
|
133
|
+
await console.log(items[r]);
|
134
|
+
|
113
|
-
downloadRec(
|
135
|
+
await downloadRec(items[r+1],level+1,i+1);
|
136
|
+
|
137
|
+
await console.log(downloadRec);
|
114
138
|
|
115
139
|
};
|
116
140
|
|
1
test
CHANGED
File without changes
|
test
CHANGED
File without changes
|