質問編集履歴
4
質問への回答を追記
test
CHANGED
File without changes
|
test
CHANGED
@@ -56,168 +56,18 @@
|
|
56
56
|
|
57
57
|
|
58
58
|
|
59
|
-
```
|
60
|
-
|
61
|
-
/usr/local/lib64/python3.6/site-packages/bs4/__init__.py in __init__
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
% ",".join(features))でエラー
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
if features is None or len(features) == 0:
|
70
|
-
|
71
|
-
features = self.DEFAULT_BUILDER_FEATURES
|
72
|
-
|
73
|
-
builder_class = builder_registry.lookup(*features)
|
74
|
-
|
75
|
-
if builder_class is None:
|
76
|
-
|
77
|
-
raise FeatureNotFound(
|
78
|
-
|
79
|
-
"Couldn't find a tree builder with the features you "
|
80
|
-
|
81
|
-
"requested: %s. Do you need to install a parser library?"
|
82
|
-
|
83
|
-
% ",".join(features)) ...
|
84
|
-
|
85
|
-
builder = builder_class()
|
86
|
-
|
87
|
-
if not (original_features == builder.NAME or
|
88
|
-
|
89
|
-
original_features in builder.ALTERNATE_NAMES):
|
90
|
-
|
91
|
-
if builder.is_xml:
|
92
|
-
|
93
|
-
markup_type = "XML"
|
94
|
-
|
95
|
-
else:
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
```
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
```
|
106
|
-
|
107
|
-
builder None
|
108
|
-
|
109
|
-
builder_class None
|
110
|
-
|
111
|
-
deprecated_argument <function BeautifulSoup.__init__.<locals>.deprecated_argument at 0x7f6729947bf8> exclude_encodings None
|
112
|
-
|
113
|
-
features ['html5lib']
|
114
|
-
|
115
|
-
from_encoding None
|
116
|
-
|
117
|
-
kwargs {}
|
118
|
-
|
119
|
-
markup ('<!doctype html><html itemscope="" '
|
120
|
-
|
121
|
-
'itemtype="http://schema.org/SearchResultsPage" lang="ja"><head><meta '
|
122
|
-
|
123
|
-
'content="text/html; charset=UTF-8" http-equiv="Content-Type"><meta '
|
124
|
-
|
125
|
-
'content="/images/branding/googleg/1x/googleg_standard_color_128dp.png" '
|
126
|
-
|
127
|
-
'itemprop="image"><noscript><meta '
|
128
|
-
|
129
|
-
'content="0;url=/search?q=aaa&ie=UTF-8&gbv=1&sei=PBq0W9mdCpTj-AbUvrOwAQ" '
|
130
|
-
|
131
|
-
'http-equiv="refresh"><style>table,div,span,p{display:none}</style><div '
|
132
|
-
|
133
|
-
'style="display:block">数秒たってもリダイレクトされない場合は、<a '
|
134
|
-
|
135
|
-
'href="/search?q=aaa&ie=UTF-8&gbv=1&sei=PBq0W9mdCpTj-AbUvrOwAQ">ここをクリック</a>してください。</div></noscript><title>aaa '
|
136
|
-
|
137
|
-
'- Google 検索</title><style>#gbar,#guser{font-size:13px;padding-top:1px '
|
138
|
-
|
139
|
-
'!important;}#gbar{height:22px}#guser{padding-bottom:7px '
|
140
|
-
|
141
|
-
'!important;text-align:right}.gbh,.gbd{border-top:1px solid '
|
142
|
-
|
143
|
-
'#c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}@media '
|
144
|
-
|
145
|
-
'all{.gb1{height:22px;margin-right:.5em;vertical-align:top}#gbar{float:left}}a.gb1,a.gb4{text-decoration:underline '
|
146
|
-
|
147
|
-
'!important}a.gb1,a.gb4{color:#00c !important}.gbi .gb4{color:#dd8e27 '
|
148
|
-
|
149
|
-
'!important}.gbf .gb4{color:#900 !important} '
|
150
|
-
|
151
|
-
'</style><style>.star{float:left;margin-top:1px;overflow:hidden}.ybhkme{font-size:11px}.j{width:34em}body,td,div,.p,a{font-family:arial,sans-serif;tap-highlight-color:rgba(255,255,255,0)}body{margin:0}a '
|
152
|
-
|
153
|
-
'img{border:0}#gbar{float:left;height:22px;padding-left:2px;font-size:13px}.gsfi,.gsfs{font-size:17px}.w,.q:active,.q:visited,.tbotu{color:#11c}a.gl{text-decoration:none}#foot{padding:0 '
|
154
|
-
|
155
|
-
'8px}#foot '
|
156
|
-
|
157
|
-
'a{white-space:nowrap}h3{font-size:16px;font-weight:normal;margin:0;padding:0}#res '
|
158
|
-
|
159
|
-
'h3{display:inline}.hd{height:1px;position:absolute;top:-1000em}.g,body,html,table,.std{font-size:13px}.g{margin-bottom:23px;margin-top:0;zoom:1}ol '
|
160
|
-
|
161
|
-
'li,ul li{list-style:none}h1,ol,ul,li{margin:0;padding:0}.e{margin:2px 0 '
|
162
|
-
|
163
|
-
'0.75em}#leftnav a{text-decoration:none}#leftnav '
|
164
|
-
|
165
|
-
'h2{color:#767676;font-weight:normal;margin:0}#nav{border-collapse:collapse;margin-top:17px;text-align:left}#nav '
|
166
|
-
|
167
|
-
'td{text-align:center}.nobr{white-space:nowrap}.ts{border-collapse:collapse}.s '
|
168
|
-
|
169
|
-
'br{display:none}.csb{display:block;height:40px}.images_table '
|
170
|
-
|
171
|
-
'td{line-height:17px;padding-bottom:16px}.images_table img{border:1px solid '
|
172
|
-
|
173
|
-
'#ccc;padding:1px}#tbd,#abd{display:block;min-height:1px}#abd{padding-top:3px}#tbd '
|
174
|
-
|
175
|
-
'li{display:inline}.TIrJXe,.UU5df{margin-bottom:8px}#tbd .tbt '
|
176
|
-
|
177
|
-
'li{display:block;font-size:13px;line-height:1.2;padding-bottom:3px;padding-left:8px;text-indent:-8px}.tbos,.b{font-weight:bold}em{font-weight:bold;font-style:normal}.mime{color:#1a0dab;font-weight:bold;font-size:x-small}.soc '
|
178
|
-
|
179
|
-
'a{text-decoration:none}.soc{color:#808080}.ul7Gbc{color:#e7711b}#Db7kif{border:1px '
|
180
|
-
|
181
|
-
'solid #e0e0e0;margin-left:-8px;margin-right:-8px;padding:15px 20px '
|
182
|
-
|
183
|
-
'5px}.mrH1y{font-size:32px}.PZ6wOb{color:#777;font-size:16px;margin-top:5px}.gwrItc{color:#777;font-size:14px;margin-top:5px}.SVob4e{border:1px '
|
184
|
-
|
185
|
-
'solid #e0e0e0;padding-left:20px}.mYu5Hb{border:1px solid #e0e0e0;padding:5px '
|
186
|
-
|
187
|
-
'20px}#vob{border:1px solid #e0e0e0;padding:15px '
|
188
|
-
|
189
|
-
'15px}#ZjIC2e{font-size:22px;line-height:22px;padding-bottom:5px}#vob_st{line-height:1.24}.DfLGHd{border-width:1px;border-style:solid;border-color:#eee;background-color:#fff;position:relative;margin-bottom:26px}.uRIxYb,.NjTIc,.PftIHd,.DXoZmb{font-family:Arial;font-weight:lighter}.uRIxYb{margin-bottom:5px}.uRIxYb{font-size:xx-large}.NjTIc{font-size:medium}.PftIHd{font-size:large}.DXoZmb{font-size:small}.DfLGHd{margin-left:-8px;margin-right:-15px;padding:20px '
|
190
|
-
|
191
|
-
'20px 24px}.ernfsc{border-spacing:0px '
|
192
|
-
|
193
|
-
'2px}.D3VFNd{max-width:380px;text-overflow:ellipsis;white-space:nowrap;overflow:hidden;padding-left:0px}.c1Ujmc{padding-left:15px;white-space:nowrap;color:#666}.EjZtie{padding-left:0px}.SFt5jb{color:#212121}.Pt7r9e{color:#878787}.bkcGhd{color:#093}.fIP9ce{color:#c00}.LDBB9d{padding:1px}.gssb_a{padding:0 '
|
194
|
-
|
195
|
-
'10px !important}.gssb_c{left:132px !important;right:295px '
|
196
|
-
|
197
|
-
'!important;top:78px !important;width:572px !important}.gssb_c '
|
198
|
-
|
199
|
-
'table{font-size:16px !important}.gssb_e{border:1px solid #ccc '
|
200
|
-
|
201
|
-
'... <trimmed 92941 bytes string>
|
202
|
-
|
203
|
-
original_features
|
204
|
-
|
205
|
-
'html5lib'
|
206
|
-
|
207
|
-
parse_only
|
208
|
-
|
209
|
-
None
|
210
|
-
|
211
|
-
self
|
212
|
-
|
213
|
-
Error in formatting: AttributeError: '<class 'bs4.BeautifulSoup'>' object has no attribute 'contents'
|
214
|
-
|
215
|
-
```
|
216
|
-
|
217
|
-
|
218
|
-
|
219
59
|
どのようにすればエラーが解決できるでしょうか?
|
220
60
|
|
221
61
|
|
222
62
|
|
223
63
|
なお,[stackoverflow](https://stackoverflow.com/questions/24398302/bs4-featurenotfound-couldnt-find-a-tree-builder-with-the-features-you-requeste)やteratailでの同様の質問は参照済みです。
|
64
|
+
|
65
|
+
|
66
|
+
|
67
|
+
環境はLocal:macOS High Sierra,VPS:CentOS7
|
68
|
+
|
69
|
+
いただいた質問に関する出力結果は,
|
70
|
+
|
71
|
+
[https://docs.google.com/document/d/1DzycCIAjIUONikpp109OzexD4qOR0oyY4zKc4WJju6U/edit?usp=sharing](https://docs.google.com/document/d/1DzycCIAjIUONikpp109OzexD4qOR0oyY4zKc4WJju6U/edit?usp=sharing)
|
72
|
+
|
73
|
+
に記載してあります。(長すぎたため質問に収まらず)
|
3
エラーが出る行について
test
CHANGED
File without changes
|
test
CHANGED
@@ -50,12 +50,22 @@
|
|
50
50
|
|
51
51
|
|
52
52
|
|
53
|
+
エラーはbs = BeautifulSoup(html.text, 'html5lib')で生じます。
|
54
|
+
|
55
|
+
また,直前にhtml.encoding('utf-8')を書き加えた場合にはhtml.encoding('utf-8')でエラーとなります。
|
56
|
+
|
57
|
+
|
58
|
+
|
53
59
|
```
|
54
60
|
|
55
61
|
/usr/local/lib64/python3.6/site-packages/bs4/__init__.py in __init__
|
56
62
|
|
57
63
|
|
58
64
|
|
65
|
+
% ",".join(features))でエラー
|
66
|
+
|
67
|
+
|
68
|
+
|
59
69
|
if features is None or len(features) == 0:
|
60
70
|
|
61
71
|
features = self.DEFAULT_BUILDER_FEATURES
|
2
ローカル環境では正常であることについて
test
CHANGED
File without changes
|
test
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
VPS上でDjangoとMeCabを用いたスクレイピングプログラムを作成しています。
|
2
2
|
|
3
|
+
ローカル環境では問題なく動くのですが,VPS上ではエラーが出ます。
|
4
|
+
|
3
5
|
|
4
6
|
|
5
7
|
```bash
|
1
エラーメッセージの追加
test
CHANGED
File without changes
|
test
CHANGED
@@ -30,9 +30,13 @@
|
|
30
30
|
|
31
31
|
```python
|
32
32
|
|
33
|
+
models.py
|
34
|
+
|
35
|
+
|
36
|
+
|
33
37
|
html = requests.get(searchUrl)
|
34
38
|
|
35
|
-
bs = BeautifulSoup(html.text, 'html
|
39
|
+
bs = BeautifulSoup(html.text, 'html5lib') ...
|
36
40
|
|
37
41
|
for el in bs.select("h3.r a"):
|
38
42
|
|
@@ -44,8 +48,164 @@
|
|
44
48
|
|
45
49
|
|
46
50
|
|
51
|
+
```
|
52
|
+
|
53
|
+
/usr/local/lib64/python3.6/site-packages/bs4/__init__.py in __init__
|
54
|
+
|
55
|
+
|
56
|
+
|
57
|
+
if features is None or len(features) == 0:
|
58
|
+
|
59
|
+
features = self.DEFAULT_BUILDER_FEATURES
|
60
|
+
|
61
|
+
builder_class = builder_registry.lookup(*features)
|
62
|
+
|
63
|
+
if builder_class is None:
|
64
|
+
|
65
|
+
raise FeatureNotFound(
|
66
|
+
|
67
|
+
"Couldn't find a tree builder with the features you "
|
68
|
+
|
69
|
+
"requested: %s. Do you need to install a parser library?"
|
70
|
+
|
71
|
+
% ",".join(features)) ...
|
72
|
+
|
73
|
+
builder = builder_class()
|
74
|
+
|
75
|
+
if not (original_features == builder.NAME or
|
76
|
+
|
77
|
+
original_features in builder.ALTERNATE_NAMES):
|
78
|
+
|
79
|
+
if builder.is_xml:
|
80
|
+
|
81
|
+
markup_type = "XML"
|
82
|
+
|
83
|
+
else:
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
```
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
```
|
94
|
+
|
95
|
+
builder None
|
96
|
+
|
97
|
+
builder_class None
|
98
|
+
|
99
|
+
deprecated_argument <function BeautifulSoup.__init__.<locals>.deprecated_argument at 0x7f6729947bf8> exclude_encodings None
|
100
|
+
|
101
|
+
features ['html5lib']
|
102
|
+
|
103
|
+
from_encoding None
|
104
|
+
|
105
|
+
kwargs {}
|
106
|
+
|
107
|
+
markup ('<!doctype html><html itemscope="" '
|
108
|
+
|
109
|
+
'itemtype="http://schema.org/SearchResultsPage" lang="ja"><head><meta '
|
110
|
+
|
111
|
+
'content="text/html; charset=UTF-8" http-equiv="Content-Type"><meta '
|
112
|
+
|
113
|
+
'content="/images/branding/googleg/1x/googleg_standard_color_128dp.png" '
|
114
|
+
|
115
|
+
'itemprop="image"><noscript><meta '
|
116
|
+
|
117
|
+
'content="0;url=/search?q=aaa&ie=UTF-8&gbv=1&sei=PBq0W9mdCpTj-AbUvrOwAQ" '
|
118
|
+
|
119
|
+
'http-equiv="refresh"><style>table,div,span,p{display:none}</style><div '
|
120
|
+
|
121
|
+
'style="display:block">数秒たってもリダイレクトされない場合は、<a '
|
122
|
+
|
123
|
+
'href="/search?q=aaa&ie=UTF-8&gbv=1&sei=PBq0W9mdCpTj-AbUvrOwAQ">ここをクリック</a>してください。</div></noscript><title>aaa '
|
124
|
+
|
125
|
+
'- Google 検索</title><style>#gbar,#guser{font-size:13px;padding-top:1px '
|
126
|
+
|
127
|
+
'!important;}#gbar{height:22px}#guser{padding-bottom:7px '
|
128
|
+
|
129
|
+
'!important;text-align:right}.gbh,.gbd{border-top:1px solid '
|
130
|
+
|
131
|
+
'#c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}@media '
|
132
|
+
|
133
|
+
'all{.gb1{height:22px;margin-right:.5em;vertical-align:top}#gbar{float:left}}a.gb1,a.gb4{text-decoration:underline '
|
134
|
+
|
135
|
+
'!important}a.gb1,a.gb4{color:#00c !important}.gbi .gb4{color:#dd8e27 '
|
136
|
+
|
137
|
+
'!important}.gbf .gb4{color:#900 !important} '
|
138
|
+
|
139
|
+
'</style><style>.star{float:left;margin-top:1px;overflow:hidden}.ybhkme{font-size:11px}.j{width:34em}body,td,div,.p,a{font-family:arial,sans-serif;tap-highlight-color:rgba(255,255,255,0)}body{margin:0}a '
|
140
|
+
|
141
|
+
'img{border:0}#gbar{float:left;height:22px;padding-left:2px;font-size:13px}.gsfi,.gsfs{font-size:17px}.w,.q:active,.q:visited,.tbotu{color:#11c}a.gl{text-decoration:none}#foot{padding:0 '
|
142
|
+
|
143
|
+
'8px}#foot '
|
144
|
+
|
145
|
+
'a{white-space:nowrap}h3{font-size:16px;font-weight:normal;margin:0;padding:0}#res '
|
146
|
+
|
147
|
+
'h3{display:inline}.hd{height:1px;position:absolute;top:-1000em}.g,body,html,table,.std{font-size:13px}.g{margin-bottom:23px;margin-top:0;zoom:1}ol '
|
148
|
+
|
149
|
+
'li,ul li{list-style:none}h1,ol,ul,li{margin:0;padding:0}.e{margin:2px 0 '
|
150
|
+
|
151
|
+
'0.75em}#leftnav a{text-decoration:none}#leftnav '
|
152
|
+
|
153
|
+
'h2{color:#767676;font-weight:normal;margin:0}#nav{border-collapse:collapse;margin-top:17px;text-align:left}#nav '
|
154
|
+
|
155
|
+
'td{text-align:center}.nobr{white-space:nowrap}.ts{border-collapse:collapse}.s '
|
156
|
+
|
157
|
+
'br{display:none}.csb{display:block;height:40px}.images_table '
|
158
|
+
|
159
|
+
'td{line-height:17px;padding-bottom:16px}.images_table img{border:1px solid '
|
160
|
+
|
161
|
+
'#ccc;padding:1px}#tbd,#abd{display:block;min-height:1px}#abd{padding-top:3px}#tbd '
|
162
|
+
|
163
|
+
'li{display:inline}.TIrJXe,.UU5df{margin-bottom:8px}#tbd .tbt '
|
164
|
+
|
165
|
+
'li{display:block;font-size:13px;line-height:1.2;padding-bottom:3px;padding-left:8px;text-indent:-8px}.tbos,.b{font-weight:bold}em{font-weight:bold;font-style:normal}.mime{color:#1a0dab;font-weight:bold;font-size:x-small}.soc '
|
166
|
+
|
167
|
+
'a{text-decoration:none}.soc{color:#808080}.ul7Gbc{color:#e7711b}#Db7kif{border:1px '
|
168
|
+
|
169
|
+
'solid #e0e0e0;margin-left:-8px;margin-right:-8px;padding:15px 20px '
|
170
|
+
|
171
|
+
'5px}.mrH1y{font-size:32px}.PZ6wOb{color:#777;font-size:16px;margin-top:5px}.gwrItc{color:#777;font-size:14px;margin-top:5px}.SVob4e{border:1px '
|
172
|
+
|
173
|
+
'solid #e0e0e0;padding-left:20px}.mYu5Hb{border:1px solid #e0e0e0;padding:5px '
|
174
|
+
|
175
|
+
'20px}#vob{border:1px solid #e0e0e0;padding:15px '
|
176
|
+
|
177
|
+
'15px}#ZjIC2e{font-size:22px;line-height:22px;padding-bottom:5px}#vob_st{line-height:1.24}.DfLGHd{border-width:1px;border-style:solid;border-color:#eee;background-color:#fff;position:relative;margin-bottom:26px}.uRIxYb,.NjTIc,.PftIHd,.DXoZmb{font-family:Arial;font-weight:lighter}.uRIxYb{margin-bottom:5px}.uRIxYb{font-size:xx-large}.NjTIc{font-size:medium}.PftIHd{font-size:large}.DXoZmb{font-size:small}.DfLGHd{margin-left:-8px;margin-right:-15px;padding:20px '
|
178
|
+
|
179
|
+
'20px 24px}.ernfsc{border-spacing:0px '
|
180
|
+
|
181
|
+
'2px}.D3VFNd{max-width:380px;text-overflow:ellipsis;white-space:nowrap;overflow:hidden;padding-left:0px}.c1Ujmc{padding-left:15px;white-space:nowrap;color:#666}.EjZtie{padding-left:0px}.SFt5jb{color:#212121}.Pt7r9e{color:#878787}.bkcGhd{color:#093}.fIP9ce{color:#c00}.LDBB9d{padding:1px}.gssb_a{padding:0 '
|
182
|
+
|
183
|
+
'10px !important}.gssb_c{left:132px !important;right:295px '
|
184
|
+
|
185
|
+
'!important;top:78px !important;width:572px !important}.gssb_c '
|
186
|
+
|
187
|
+
'table{font-size:16px !important}.gssb_e{border:1px solid #ccc '
|
188
|
+
|
189
|
+
'... <trimmed 92941 bytes string>
|
190
|
+
|
191
|
+
original_features
|
192
|
+
|
193
|
+
'html5lib'
|
194
|
+
|
195
|
+
parse_only
|
196
|
+
|
197
|
+
None
|
198
|
+
|
199
|
+
self
|
200
|
+
|
201
|
+
Error in formatting: AttributeError: '<class 'bs4.BeautifulSoup'>' object has no attribute 'contents'
|
202
|
+
|
203
|
+
```
|
204
|
+
|
205
|
+
|
206
|
+
|
47
207
|
どのようにすればエラーが解決できるでしょうか?
|
48
208
|
|
49
209
|
|
50
210
|
|
51
|
-
なお,[stackoverflo](https://stackoverflow.com/questions/24398302/bs4-featurenotfound-couldnt-find-a-tree-builder-with-the-features-you-requeste)やteratailでの同様の質問は参照済みです。
|
211
|
+
なお,[stackoverflow](https://stackoverflow.com/questions/24398302/bs4-featurenotfound-couldnt-find-a-tree-builder-with-the-features-you-requeste)やteratailでの同様の質問は参照済みです。
|