teratail header banner
teratail header banner
質問するログイン新規登録

質問編集履歴

1

全文の追加

2019/01/20 11:26

投稿

退会済みユーザー
title CHANGED
File without changes
body CHANGED
@@ -1,8 +1,57 @@
1
1
  R でスクレイピングをするために URL 操作をしたいのです。
2
2
  以下がコードです。
3
3
  ```R
4
+ library(RSelenium)
5
+ library(rvest)
6
+ library(XML)
7
+
8
+ remDr = remoteDriver(remoteServerAddr = "localhost", port = 4444, browserName = "chrome")
9
+ remDr$open()
10
+
11
+ iterater <- 1
12
+ max_page <- 3
13
+ patent_linkDF <-data.frame('', '')
14
+ sleep <- 3
15
+
16
+ url <- 'https://jglobal.jst.go.jp/search/patents#{"category":"3","keyword":"農薬"}'
17
+ remDr$navigate(url)
18
+ planeHtmlList <- remDr$getPageSource()
19
+ fileNmae <- paste(formatC(iterater, width = 5, flag = 0), ".html", sep = "")
20
+ write(unlist(planeHtmlList), fileNmae)
21
+
22
+ path <- paste(getwd(), fileNmae, sep = "/")
23
+ html <- read_html(path)
24
+ parsed_doc <- htmlParse(html)
25
+ title <- xpathSApply(doc = parsed_doc , path = "//a[@href]", xmlValue)
26
+ link <- xpathSApply(doc = parsed_doc , path = "//a[@href]", xmlGetAttr, "href")
27
+ tempDF <- data.frame(title, link)
28
+ patent_linkDF <- tempDF[-c(1,2, nrow(tempDF)), ]
29
+ iterater <- iterater + 1
30
+ Sys.sleep(sleep)
31
+
4
32
  while(iterater <= max_page){
5
- url <- paste('https://hogehoge.jp/search/fugafuga#{"category":"3","page":', iterater, "}", sep='')
33
+ url <- paste('https://jglobal.jst.go.jp/search/patents#{"category":"3","keyword":"農薬","page":', iterater, "}", sep='') remDr$navigate(url)
34
+ planeHtmlList <- remDr$getPageSource()
35
+ fileNmae <- paste(formatC(iterater, width = 5, flag = 0), ".html", sep = "")
36
+ write(unlist(planeHtmlList), fileNmae)
37
+
38
+ path <- paste(getwd(), fileNmae, sep = "/")
39
+ html <- read_html(path)
40
+ parsed_doc <- htmlParse(html)
41
+ title <- xpathSApply(doc = parsed_doc , path = "//a[@href]", xmlValue)
42
+ link <- xpathSApply(doc = parsed_doc , path = "//a[@href]", xmlGetAttr, "href")
43
+ tempDF <- data.frame(title, link)
44
+ patent_page_linkDF <- tempDF[-c(1,2, nrow(tempDF)), ]
45
+ patent_linkDF <- rbind(patent_linkDF, tempDF);
46
+ iterater <- iterater + 1
47
+ Sys.sleep(sleep)
48
+ }
49
+
50
+
51
+ write.csv(patent_linkDF, "patent_link.csv")
52
+
53
+
54
+
6
55
  ```
7
56
  以下がエラー内容です。
8
57
  ```