質問編集履歴
7
初心者
test
CHANGED
File without changes
|
test
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
### python3でseleniumとbs4を使用 使用機材はmac
|
2
2
|
|
3
|
-
|
3
|
+
|
4
4
|
|
5
5
|
seleniumとbs4を用いてスクレイピングをしています。社会科のキーワード集作成のためにキーワードとそのリンク先の説明を取得したいです。
|
6
6
|
|
6
ソースコードの変更
test
CHANGED
File without changes
|
test
CHANGED
@@ -112,11 +112,15 @@
|
|
112
112
|
|
113
113
|
```python
|
114
114
|
|
115
|
-
import requests, bs4
|
115
|
+
import requests, bs4, sys
|
116
116
|
|
117
117
|
from selenium import webdriver
|
118
118
|
|
119
119
|
from time import sleep
|
120
|
+
|
121
|
+
import urllib
|
122
|
+
|
123
|
+
urllib.request.http.client
|
120
124
|
|
121
125
|
|
122
126
|
|
@@ -124,13 +128,13 @@
|
|
124
128
|
|
125
129
|
print('Next...')
|
126
130
|
|
127
|
-
driver = webdriver.Firefox(executable_path='/Users/
|
131
|
+
driver = webdriver.Firefox(executable_path='/Users/****/opt/anaconda3/bin/geckodriver')
|
128
132
|
|
129
133
|
url = 'http://ssd.cswiki.jp/index.php?%E6%AD%B4%E5%8F%B2%EF%BC%A1%E3%83%A9%E3%83%B3%E3%82%AF'
|
130
134
|
|
131
135
|
driver.get(url)
|
132
136
|
|
133
|
-
sleep(
|
137
|
+
sleep(10)
|
134
138
|
|
135
139
|
|
136
140
|
|
@@ -151,6 +155,8 @@
|
|
151
155
|
time.sleep(30)
|
152
156
|
|
153
157
|
driver.back()
|
158
|
+
|
159
|
+
|
154
160
|
|
155
161
|
|
156
162
|
|
5
requestsの再インストール
test
CHANGED
File without changes
|
test
CHANGED
@@ -22,7 +22,7 @@
|
|
22
22
|
|
23
23
|
File "Selelelen.py", line 1, in <module>
|
24
24
|
|
25
|
-
import requests, bs4
|
25
|
+
import requests, bs4, sys
|
26
26
|
|
27
27
|
File "/Users/****/opt/anaconda3/lib/python3.7/site-packages/requests/__init__.py", line 43, in <module>
|
28
28
|
|
@@ -80,7 +80,7 @@
|
|
80
80
|
|
81
81
|
from . import _html5lib
|
82
82
|
|
83
|
-
File "/Users/****
|
83
|
+
File "/Users/****/opt/anaconda3/lib/python3.7/site-packages/bs4/builder/_html5lib.py", line 20, in <module>
|
84
84
|
|
85
85
|
import html5lib
|
86
86
|
|
@@ -101,8 +101,6 @@
|
|
101
101
|
if hasattr(http.client, 'HTTPSConnection'):
|
102
102
|
|
103
103
|
AttributeError: module 'http' has no attribute 'client'
|
104
|
-
|
105
|
-
|
106
104
|
|
107
105
|
```
|
108
106
|
|
4
エラーメッセージ全文に変更
test
CHANGED
File without changes
|
test
CHANGED
@@ -17,8 +17,6 @@
|
|
17
17
|
|
18
18
|
|
19
19
|
|
20
|
-
|
21
|
-
(base) ****@****noMacBook-Air program % python Selelelen.py
|
22
20
|
|
23
21
|
Traceback (most recent call last):
|
24
22
|
|
@@ -82,7 +80,7 @@
|
|
82
80
|
|
83
81
|
from . import _html5lib
|
84
82
|
|
85
|
-
File "/Users/****/opt/anaconda3/lib/python3.7/site-packages/bs4/builder/_html5lib.py", line 20, in <module>
|
83
|
+
File "/Users/****a/opt/anaconda3/lib/python3.7/site-packages/bs4/builder/_html5lib.py", line 20, in <module>
|
86
84
|
|
87
85
|
import html5lib
|
88
86
|
|
@@ -103,8 +101,6 @@
|
|
103
101
|
if hasattr(http.client, 'HTTPSConnection'):
|
104
102
|
|
105
103
|
AttributeError: module 'http' has no attribute 'client'
|
106
|
-
|
107
|
-
|
108
104
|
|
109
105
|
|
110
106
|
|
3
エラーメッセージ全文に変更
test
CHANGED
File without changes
|
test
CHANGED
@@ -16,7 +16,97 @@
|
|
16
16
|
|
17
17
|
|
18
18
|
|
19
|
+
|
20
|
+
|
21
|
+
(base) ****@****noMacBook-Air program % python Selelelen.py
|
22
|
+
|
23
|
+
Traceback (most recent call last):
|
24
|
+
|
25
|
+
File "Selelelen.py", line 1, in <module>
|
26
|
+
|
27
|
+
import requests, bs4
|
28
|
+
|
29
|
+
File "/Users/****/opt/anaconda3/lib/python3.7/site-packages/requests/__init__.py", line 43, in <module>
|
30
|
+
|
31
|
+
import urllib3
|
32
|
+
|
33
|
+
File "/Users/****/opt/anaconda3/lib/python3.7/site-packages/urllib3/__init__.py", line 7, in <module>
|
34
|
+
|
35
|
+
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, connection_from_url
|
36
|
+
|
37
|
+
File "/Users/****/opt/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py", line 11, in <module>
|
38
|
+
|
39
|
+
from .exceptions import (
|
40
|
+
|
41
|
+
File "/Users/****/opt/anaconda3/lib/python3.7/site-packages/urllib3/exceptions.py", line 2, in <module>
|
42
|
+
|
43
|
+
from .packages.six.moves.http_client import IncompleteRead as httplib_IncompleteRead
|
44
|
+
|
45
|
+
File "<frozen importlib._bootstrap>", line 983, in _find_and_load
|
46
|
+
|
47
|
+
File "<frozen importlib._bootstrap>", line 967, in _find_and_load_unlocked
|
48
|
+
|
49
|
+
File "<frozen importlib._bootstrap>", line 668, in _load_unlocked
|
50
|
+
|
51
|
+
File "<frozen importlib._bootstrap>", line 638, in _load_backward_compatible
|
52
|
+
|
53
|
+
File "/Users/****/opt/anaconda3/lib/python3.7/site-packages/urllib3/packages/six.py", line 199, in load_module
|
54
|
+
|
55
|
+
mod = mod._resolve()
|
56
|
+
|
57
|
+
File "/Users/****/opt/anaconda3/lib/python3.7/site-packages/urllib3/packages/six.py", line 113, in _resolve
|
58
|
+
|
59
|
+
return _import_module(self.mod)
|
60
|
+
|
61
|
+
File "/Users/****/opt/anaconda3/lib/python3.7/site-packages/urllib3/packages/six.py", line 82, in _import_module
|
62
|
+
|
63
|
+
__import__(name)
|
64
|
+
|
65
|
+
File "/Users/****/opt/anaconda3/lib/python3.7/http/client.py", line 72, in <module>
|
66
|
+
|
67
|
+
import email.message
|
68
|
+
|
69
|
+
File "/Users/****/opt/anaconda3/lib/python3.7/email/message.py", line 10, in <module>
|
70
|
+
|
71
|
+
import uu
|
72
|
+
|
73
|
+
File "/Users/****/Desktop/Program/uu.py", line 1, in <module>
|
74
|
+
|
75
|
+
import requests, sys, webbrowser, bs4
|
76
|
+
|
77
|
+
File "/Users/****/opt/anaconda3/lib/python3.7/site-packages/bs4/__init__.py", line 31, in <module>
|
78
|
+
|
79
|
+
from .builder import builder_registry, ParserRejectedMarkup
|
80
|
+
|
81
|
+
File "/Users/****/opt/anaconda3/lib/python3.7/site-packages/bs4/builder/__init__.py", line 475, in <module>
|
82
|
+
|
83
|
+
from . import _html5lib
|
84
|
+
|
85
|
+
File "/Users/****/opt/anaconda3/lib/python3.7/site-packages/bs4/builder/_html5lib.py", line 20, in <module>
|
86
|
+
|
87
|
+
import html5lib
|
88
|
+
|
89
|
+
File "/Users/****/opt/anaconda3/lib/python3.7/site-packages/html5lib/__init__.py", line 28, in <module>
|
90
|
+
|
91
|
+
from .serializer import serialize
|
92
|
+
|
93
|
+
File "/Users/****/opt/anaconda3/lib/python3.7/site-packages/html5lib/serializer.py", line 11, in <module>
|
94
|
+
|
95
|
+
from xml.sax.saxutils import escape
|
96
|
+
|
97
|
+
File "/Users/****/opt/anaconda3/lib/python3.7/xml/sax/saxutils.py", line 6, in <module>
|
98
|
+
|
99
|
+
import os, urllib.parse, urllib.request
|
100
|
+
|
101
|
+
File "/Users/****/opt/anaconda3/lib/python3.7/urllib/request.py", line 1351, in <module>
|
102
|
+
|
103
|
+
if hasattr(http.client, 'HTTPSConnection'):
|
104
|
+
|
19
105
|
AttributeError: module 'http' has no attribute 'client'
|
106
|
+
|
107
|
+
|
108
|
+
|
109
|
+
|
20
110
|
|
21
111
|
```
|
22
112
|
|
2
エラーメッセージの訂正 sleepの秒数変更
test
CHANGED
File without changes
|
test
CHANGED
@@ -16,7 +16,7 @@
|
|
16
16
|
|
17
17
|
|
18
18
|
|
19
|
-
AttributeError: '
|
19
|
+
AttributeError: module 'http' has no attribute 'client'
|
20
20
|
|
21
21
|
```
|
22
22
|
|
@@ -46,7 +46,7 @@
|
|
46
46
|
|
47
47
|
driver.get(url)
|
48
48
|
|
49
|
-
sleep(
|
49
|
+
sleep(30)
|
50
50
|
|
51
51
|
|
52
52
|
|
1
スペルミスの修正を行った。
test
CHANGED
File without changes
|
test
CHANGED
@@ -50,13 +50,13 @@
|
|
50
50
|
|
51
51
|
|
52
52
|
|
53
|
-
explain_url = []
|
53
|
+
explain_urls = []
|
54
54
|
|
55
55
|
elems_explain_url = driver.find_elements_by_css_selector('#body p a')
|
56
56
|
|
57
57
|
for elem_explain_url in elems_explain_url:
|
58
58
|
|
59
|
-
explain_url
|
59
|
+
explain_url = elem_explain_url.get_attribute('href')
|
60
60
|
|
61
61
|
explain_urls.append(explain_url)
|
62
62
|
|