質問編集履歴

1

全文の追加

2019/01/20 11:26

投稿

退会済みユーザー
test CHANGED
File without changes
test CHANGED
@@ -4,9 +4,107 @@
4
4
 
5
5
  ```R
6
6
 
7
+ library(RSelenium)
8
+
9
+ library(rvest)
10
+
11
+ library(XML)
12
+
13
+
14
+
15
+ remDr = remoteDriver(remoteServerAddr = "localhost", port = 4444, browserName = "chrome")
16
+
17
+ remDr$open()
18
+
19
+
20
+
21
+ iterater <- 1
22
+
23
+ max_page <- 3
24
+
25
+ patent_linkDF <-data.frame('', '')
26
+
27
+ sleep <- 3
28
+
29
+
30
+
31
+ url <- 'https://jglobal.jst.go.jp/search/patents#{"category":"3","keyword":"農薬"}'
32
+
33
+ remDr$navigate(url)
34
+
35
+ planeHtmlList <- remDr$getPageSource()
36
+
37
+ fileNmae <- paste(formatC(iterater, width = 5, flag = 0), ".html", sep = "")
38
+
39
+ write(unlist(planeHtmlList), fileNmae)
40
+
41
+
42
+
43
+ path <- paste(getwd(), fileNmae, sep = "/")
44
+
45
+ html <- read_html(path)
46
+
47
+ parsed_doc <- htmlParse(html)
48
+
49
+ title <- xpathSApply(doc = parsed_doc , path = "//a[@href]", xmlValue)
50
+
51
+ link <- xpathSApply(doc = parsed_doc , path = "//a[@href]", xmlGetAttr, "href")
52
+
53
+ tempDF <- data.frame(title, link)
54
+
55
+ patent_linkDF <- tempDF[-c(1,2, nrow(tempDF)), ]
56
+
57
+ iterater <- iterater + 1
58
+
59
+ Sys.sleep(sleep)
60
+
61
+
62
+
7
63
  while(iterater <= max_page){
8
64
 
9
- url <- paste('https://hogehoge.jp/search/fugafuga#{"category":"3","page":', iterater, "}", sep='')
65
+ url <- paste('https://jglobal.jst.go.jp/search/patents#{"category":"3","keyword":"農薬","page":', iterater, "}", sep='') remDr$navigate(url)
66
+
67
+ planeHtmlList <- remDr$getPageSource()
68
+
69
+ fileNmae <- paste(formatC(iterater, width = 5, flag = 0), ".html", sep = "")
70
+
71
+ write(unlist(planeHtmlList), fileNmae)
72
+
73
+
74
+
75
+ path <- paste(getwd(), fileNmae, sep = "/")
76
+
77
+ html <- read_html(path)
78
+
79
+ parsed_doc <- htmlParse(html)
80
+
81
+ title <- xpathSApply(doc = parsed_doc , path = "//a[@href]", xmlValue)
82
+
83
+ link <- xpathSApply(doc = parsed_doc , path = "//a[@href]", xmlGetAttr, "href")
84
+
85
+ tempDF <- data.frame(title, link)
86
+
87
+ patent_page_linkDF <- tempDF[-c(1,2, nrow(tempDF)), ]
88
+
89
+ patent_linkDF <- rbind(patent_linkDF, tempDF);
90
+
91
+ iterater <- iterater + 1
92
+
93
+ Sys.sleep(sleep)
94
+
95
+ }
96
+
97
+
98
+
99
+
100
+
101
+ write.csv(patent_linkDF, "patent_link.csv")
102
+
103
+
104
+
105
+
106
+
107
+
10
108
 
11
109
  ```
12
110