質問編集履歴
4
title
CHANGED
File without changes
|
body
CHANGED
@@ -10,7 +10,7 @@
|
|
10
10
|
|
11
11
|
```
|
12
12
|
const puppeteer = require("puppeteer");
|
13
|
-
var TARGET_URL = "
|
13
|
+
var TARGET_URL = "ヤフーニュース"
|
14
14
|
var datas = [];
|
15
15
|
var items = {};
|
16
16
|
var r = 0;
|
3
title
CHANGED
File without changes
|
body
CHANGED
@@ -10,7 +10,7 @@
|
|
10
10
|
|
11
11
|
```
|
12
12
|
const puppeteer = require("puppeteer");
|
13
|
-
var TARGET_URL = "
|
13
|
+
var TARGET_URL = "/"
|
14
14
|
var datas = [];
|
15
15
|
var items = {};
|
16
16
|
var r = 0;
|
2
コードの修正
title
CHANGED
File without changes
|
body
CHANGED
@@ -10,52 +10,64 @@
|
|
10
10
|
|
11
11
|
```
|
12
12
|
const puppeteer = require("puppeteer");
|
13
|
-
var TARGET_URL = "https://
|
13
|
+
var TARGET_URL = "https://anond.hatelabo.jp/"
|
14
|
-
var LINK_LEVEL = 3;
|
15
14
|
var datas = [];
|
16
|
-
var items =
|
15
|
+
var items = {};
|
17
|
-
|
16
|
+
var r = 0;
|
18
17
|
puppeteer.launch({
|
19
|
-
args:['--no-sandbox','--disable-gpu','--ignore-certificate-errors'],
|
18
|
+
args:['--no-sandbox','--disable-gpu','--ignore-certificate-errors'],
|
20
|
-
ignoreDefaultArgs: ['--disable-extentions'],
|
19
|
+
ignoreDefaultArgs: ['--disable-extentions'],
|
21
|
-
ignoreHTTPSErrors: false,
|
20
|
+
ignoreHTTPSErrors: false,
|
22
|
-
headless:false,
|
21
|
+
headless:false,
|
23
|
-
slowMo :300
|
22
|
+
slowMo :300
|
24
23
|
|
25
24
|
}).then(async browser => {
|
26
25
|
try{
|
27
26
|
|
28
27
|
async function downloadRec(url,level,i){
|
29
28
|
|
30
|
-
const page = await browser.newPage();
|
29
|
+
const page = await browser.newPage();
|
31
|
-
await page.goto(url,{waitUntil:"domcontentloaded"});
|
30
|
+
await page.goto(url,{waitUntil:"domcontentloaded"});
|
32
|
-
|
31
|
+
console.log(url);
|
33
32
|
//-----------------------リンク一覧を配列に入れる----------------------
|
34
|
-
const isLoadingSucceeded = await page.$('li.next a[href]').then(res => !!res);
|
35
33
|
|
36
|
-
if (isLoadingSucceeded){
|
37
|
-
while (isLoadingSucceeded) {
|
38
|
-
let lists2 = await page.$$("a[href]");
|
34
|
+
let lists2 = await page.$$("a[href]");
|
39
|
-
|
35
|
+
|
40
|
-
if(level >= LINK_LEVEL )return;
|
41
|
-
//基準ページ以外であれば無視
|
42
|
-
var us =TARGET_URL.split("/");
|
43
|
-
us.pop();
|
44
|
-
|
36
|
+
for (let i = 0; i < lists2.length; i++) {
|
45
|
-
if (url.indexOf(base) < 0)return;
|
46
37
|
//既出のサイトであれば無視
|
47
|
-
if(i[datas[i]]);
|
48
|
-
|
38
|
+
var removeDuplicates = function(object) {
|
39
|
+
var result = [], comparisons = [], key, comparison;
|
40
|
+
for (key in object) {
|
41
|
+
comparison = JSON.stringify(object[key]);
|
42
|
+
if (comparisons.indexOf(comparison) === -1) {
|
43
|
+
result.push(object[key]);
|
44
|
+
}
|
45
|
+
comparisons.push(comparison);
|
46
|
+
}
|
47
|
+
|
48
|
+
return result;
|
49
|
+
};
|
50
|
+
|
51
|
+
|
49
|
-
datas.push(await (await lists2[i].getProperty('href')).jsonValue());
|
52
|
+
datas.push(await (await lists2[i].getProperty('href')).jsonValue());
|
53
|
+
result2 = await result[i].indexOf(TARGET_URL);
|
54
|
+
//await console.log(result2);
|
55
|
+
//外部サイトであれば無視
|
56
|
+
if (result2 != -1){
|
57
|
+
var result = removeDuplicates(datas);
|
50
|
-
items
|
58
|
+
items[r] = result[i] ;
|
59
|
+
r = r + 1;
|
60
|
+
};
|
51
|
-
await
|
61
|
+
await console.log(items);
|
62
|
+
};
|
63
|
+
|
52
|
-
await page.
|
64
|
+
await page.goto(url,{waitUntil:"domcontentloaded"});
|
65
|
+
|
66
|
+
|
53
|
-
|
67
|
+
await console.log(items[r]);
|
68
|
+
await downloadRec(items[r+1],level+1,i+1);
|
69
|
+
await console.log(downloadRec);
|
54
70
|
};
|
55
|
-
};
|
56
|
-
};
|
57
|
-
downloadRec(datas[i+1],level+1,i+1);
|
58
|
-
};
|
59
71
|
//----------------------------------------------------------------------------
|
60
72
|
|
61
73
|
await downloadRec(TARGET_URL,0,0);
|
1
title
CHANGED
File without changes
|
body
CHANGED
File without changes
|