質問編集履歴

4

2019/05/13 22:41

投稿

RMBQsKe5AP10gjx
RMBQsKe5AP10gjx

スコア24

test CHANGED
File without changes
test CHANGED
@@ -22,7 +22,7 @@
22
22
 
23
23
  const puppeteer = require("puppeteer");
24
24
 
25
- var TARGET_URL = "/"
25
+ var TARGET_URL = "ヤフーニュース"
26
26
 
27
27
  var datas = [];
28
28
 

3

2019/05/13 22:41

投稿

RMBQsKe5AP10gjx
RMBQsKe5AP10gjx

スコア24

test CHANGED
File without changes
test CHANGED
@@ -22,7 +22,7 @@
22
22
 
23
23
  const puppeteer = require("puppeteer");
24
24
 
25
- var TARGET_URL = "https://anond.hatelabo.jp/"
25
+ var TARGET_URL = "/"
26
26
 
27
27
  var datas = [];
28
28
 

2

コードの修正

2019/05/13 22:41

投稿

RMBQsKe5AP10gjx
RMBQsKe5AP10gjx

スコア24

test CHANGED
File without changes
test CHANGED
@@ -22,27 +22,25 @@
22
22
 
23
23
  const puppeteer = require("puppeteer");
24
24
 
25
- var TARGET_URL = "https://news.yahoo.co.jp/list/"
25
+ var TARGET_URL = "https://anond.hatelabo.jp/"
26
-
27
- var LINK_LEVEL = 3;
28
26
 
29
27
  var datas = [];
30
28
 
31
- var items = [];
29
+ var items = {};
32
30
 
33
-
31
+ var r = 0;
34
32
 
35
33
  puppeteer.launch({
36
34
 
37
- args:['--no-sandbox','--disable-gpu','--ignore-certificate-errors'],
35
+ args:['--no-sandbox','--disable-gpu','--ignore-certificate-errors'],
38
36
 
39
- ignoreDefaultArgs: ['--disable-extentions'],
37
+ ignoreDefaultArgs: ['--disable-extentions'],
40
38
 
41
- ignoreHTTPSErrors: false,
39
+ ignoreHTTPSErrors: false,
42
40
 
43
- headless:false,
41
+ headless:false,
44
42
 
45
- slowMo :300
43
+ slowMo :300
46
44
 
47
45
 
48
46
 
@@ -56,61 +54,87 @@
56
54
 
57
55
 
58
56
 
59
- const page = await browser.newPage();
57
+ const page = await browser.newPage();
60
58
 
61
- await page.goto(url,{waitUntil:"domcontentloaded"});
59
+ await page.goto(url,{waitUntil:"domcontentloaded"});
60
+
61
+ console.log(url);
62
+
63
+ //-----------------------リンク一覧を配列に入れる----------------------
62
64
 
63
65
 
64
66
 
65
- //-----------------------リンク一覧を配列に入れる----------------------
67
+ let lists2 = await page.$$("a[href]");
66
68
 
67
- const isLoadingSucceeded = await page.$('li.next a[href]').then(res => !!res);
69
+
68
70
 
69
-
70
-
71
- if (isLoadingSucceeded){
72
-
73
- while (isLoadingSucceeded) {
74
-
75
- let lists2 = await page.$$("a[href]");
76
-
77
- // 3ページ目まで
78
-
79
- if(level >= LINK_LEVEL )return;
80
-
81
- //基準ページ以外であれば無視
82
-
83
- var us =TARGET_URL.split("/");
84
-
85
- us.pop();
86
-
87
- var base = us.join("/");
71
+ for (let i = 0; i < lists2.length; i++) {
88
-
89
- if (url.indexOf(base) < 0)return;
90
72
 
91
73
  //既出のサイトであれば無視
92
74
 
93
- if(i[datas[i]]);
75
+ var removeDuplicates = function(object) {
94
76
 
95
- for (let i = 0; i < lists2.length; i++) {
77
+ var result = [], comparisons = [], key, comparison;
96
78
 
97
- datas.push(await (await lists2[i].getProperty('href')).jsonValue());
79
+ for (key in object) {
98
80
 
99
- items.push( {[i] : datas[i]} );
81
+ comparison = JSON.stringify(object[key]);
100
82
 
101
- await console.log(datas);
83
+ if (comparisons.indexOf(comparison) === -1) {
102
84
 
103
- await page.click('li.next a[href]',{waitUntil:"domcontentloaded"});
85
+ result.push(object[key]);
104
86
 
87
+ }
105
88
 
89
+ comparisons.push(comparison);
106
90
 
107
- };
91
+ }
108
92
 
109
- };
93
+
110
94
 
111
- };
95
+ return result;
112
96
 
97
+ };
98
+
99
+
100
+
101
+
102
+
103
+ datas.push(await (await lists2[i].getProperty('href')).jsonValue());
104
+
105
+ result2 = await result[i].indexOf(TARGET_URL);
106
+
107
+ //await console.log(result2);
108
+
109
+ //外部サイトであれば無視
110
+
111
+ if (result2 != -1){
112
+
113
+ var result = removeDuplicates(datas);
114
+
115
+ items[r] = result[i] ;
116
+
117
+ r = r + 1;
118
+
119
+ };
120
+
121
+ await console.log(items);
122
+
123
+ };
124
+
125
+
126
+
127
+ await page.goto(url,{waitUntil:"domcontentloaded"});
128
+
129
+
130
+
131
+
132
+
133
+ await console.log(items[r]);
134
+
113
- downloadRec(datas[i+1],level+1,i+1);
135
+ await downloadRec(items[r+1],level+1,i+1);
136
+
137
+ await console.log(downloadRec);
114
138
 
115
139
  };
116
140
 

1

2019/05/13 22:32

投稿

RMBQsKe5AP10gjx
RMBQsKe5AP10gjx

スコア24

test CHANGED
File without changes
test CHANGED
File without changes