回答編集履歴
7
テキスト修正
test
CHANGED
@@ -235,3 +235,15 @@
|
|
235
235
|
print(P_list)
|
236
236
|
|
237
237
|
```
|
238
|
+
|
239
|
+
上記を実行すると以下が表示されます。
|
240
|
+
|
241
|
+
|
242
|
+
|
243
|
+
```
|
244
|
+
|
245
|
+
[<h3>見出し1</h3>, <h3>見出し2</h3>, <h3>見出し3</h3>]
|
246
|
+
|
247
|
+
[[<p>本文1-A</p>, <p>本文1-B</p>, <p>本文1-C</p>], [<p>本文2-A</p>, <p>本文2-B</p>, <p>本文2-C</p>], [<p>本文3-A</p>, <p>本文3-B</p>, <p>本文3-C</p>]]
|
248
|
+
|
249
|
+
```
|
6
テキスト修正
test
CHANGED
@@ -112,12 +112,40 @@
|
|
112
112
|
|
113
113
|
|
114
114
|
|
115
|
-
追加のご質問への回答となるコードは以下です。
|
115
|
+
~~追加のご質問への回答となるコードは以下です。~~
|
116
116
|
|
117
117
|
|
118
118
|
|
119
119
|
```python
|
120
120
|
|
121
|
+
(削除しました。)
|
122
|
+
|
123
|
+
```
|
124
|
+
|
125
|
+
~~上記を実行すると以下が表示されます。~~
|
126
|
+
|
127
|
+
|
128
|
+
|
129
|
+
```
|
130
|
+
|
131
|
+
(削除しました)
|
132
|
+
|
133
|
+
|
134
|
+
|
135
|
+
```
|
136
|
+
|
137
|
+
|
138
|
+
|
139
|
+
#### 追記2
|
140
|
+
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
|
145
|
+
|
146
|
+
|
147
|
+
```python
|
148
|
+
|
121
149
|
from bs4 import BeautifulSoup
|
122
150
|
|
123
151
|
|
@@ -182,13 +210,13 @@
|
|
182
210
|
|
183
211
|
|
184
212
|
|
185
|
-
H3
|
213
|
+
H3_list = soup.findAll('h3')
|
186
|
-
|
214
|
+
|
187
|
-
P_list = [[] for _ in range(len(H3
|
215
|
+
P_list = [[] for _ in range(len(H3_list))]
|
188
|
-
|
189
|
-
|
190
|
-
|
216
|
+
|
217
|
+
|
218
|
+
|
191
|
-
for i, e in enumerate(H3
|
219
|
+
for i, e in enumerate(H3_list):
|
192
220
|
|
193
221
|
while True:
|
194
222
|
|
@@ -202,24 +230,8 @@
|
|
202
230
|
|
203
231
|
|
204
232
|
|
205
|
-
print(H3
|
233
|
+
print(H3_list)
|
206
234
|
|
207
235
|
print(P_list)
|
208
236
|
|
209
|
-
|
210
|
-
|
211
|
-
```
|
237
|
+
```
|
212
|
-
|
213
|
-
上記を実行すると以下が表示されます。
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
```
|
218
|
-
|
219
|
-
[<h3>見出し1</h3>, <h3>見出し2</h3>, <h3>見出し3</h3>, <h4>見出し4</h4>]
|
220
|
-
|
221
|
-
[[<p>本文1-A</p>, <p>本文1-B</p>, <p>本文1-C</p>], [<p>本文2-A</p>, <p>本文2-B</p>, <p>本文2-C</p>], [<p>本文3-A</p>, <p>本文3-B</p>, <p>本文3-C</p>], [<p>本文4-A</p>, <p>本文4-B</p>, <p>本文4-C</p>]]
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
```
|
5
テキスト修正
test
CHANGED
@@ -105,3 +105,121 @@
|
|
105
105
|
[[<p>本文1-A</p>, <p>本文1-B</p>, <p>本文1-C</p>], [<p>本文2-A</p>, <p>本文2-B</p>, <p>本文2-C</p>], [<p>本文3-A</p>, <p>本文3-B</p>, <p>本文3-C</p>]]
|
106
106
|
|
107
107
|
```
|
108
|
+
|
109
|
+
|
110
|
+
|
111
|
+
#### 追記
|
112
|
+
|
113
|
+
|
114
|
+
|
115
|
+
追加のご質問への回答となるコードは以下です。
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
```python
|
120
|
+
|
121
|
+
from bs4 import BeautifulSoup
|
122
|
+
|
123
|
+
|
124
|
+
|
125
|
+
html = '''
|
126
|
+
|
127
|
+
<!DOCTYPE html>
|
128
|
+
|
129
|
+
<html lang="ja">
|
130
|
+
|
131
|
+
<head>
|
132
|
+
|
133
|
+
<meta charset="UTF-8">
|
134
|
+
|
135
|
+
<title>test</title>
|
136
|
+
|
137
|
+
</head>
|
138
|
+
|
139
|
+
<body>
|
140
|
+
|
141
|
+
<h3>見出し1</h3>
|
142
|
+
|
143
|
+
<p>本文1-A</p>
|
144
|
+
|
145
|
+
<p>本文1-B</p>
|
146
|
+
|
147
|
+
<p>本文1-C</p>
|
148
|
+
|
149
|
+
<h3>見出し2</h3>
|
150
|
+
|
151
|
+
<p>本文2-A</p>
|
152
|
+
|
153
|
+
<p>本文2-B</p>
|
154
|
+
|
155
|
+
<p>本文2-C</p>
|
156
|
+
|
157
|
+
<h3>見出し3</h3>
|
158
|
+
|
159
|
+
<p>本文3-A</p>
|
160
|
+
|
161
|
+
<p>本文3-B</p>
|
162
|
+
|
163
|
+
<p>本文3-C</p>
|
164
|
+
|
165
|
+
<h4>見出し4</h4>
|
166
|
+
|
167
|
+
<p>本文4-A</p>
|
168
|
+
|
169
|
+
<p>本文4-B</p>
|
170
|
+
|
171
|
+
<p>本文4-C</p>
|
172
|
+
|
173
|
+
</body>
|
174
|
+
|
175
|
+
</html>
|
176
|
+
|
177
|
+
'''
|
178
|
+
|
179
|
+
|
180
|
+
|
181
|
+
soup = BeautifulSoup(html, 'lxml')
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
H3or4_list = soup.findAll(['h3', 'h4'])
|
186
|
+
|
187
|
+
P_list = [[] for _ in range(len(H3or4_list))]
|
188
|
+
|
189
|
+
|
190
|
+
|
191
|
+
for i, e in enumerate(H3or4_list):
|
192
|
+
|
193
|
+
while True:
|
194
|
+
|
195
|
+
e = e.findNext(['p', 'h3', 'h4'])
|
196
|
+
|
197
|
+
if e is None or e.name != 'p':
|
198
|
+
|
199
|
+
break
|
200
|
+
|
201
|
+
P_list[i].append(e)
|
202
|
+
|
203
|
+
|
204
|
+
|
205
|
+
print(H3or4_list)
|
206
|
+
|
207
|
+
print(P_list)
|
208
|
+
|
209
|
+
|
210
|
+
|
211
|
+
```
|
212
|
+
|
213
|
+
上記を実行すると以下が表示されます。
|
214
|
+
|
215
|
+
|
216
|
+
|
217
|
+
```
|
218
|
+
|
219
|
+
[<h3>見出し1</h3>, <h3>見出し2</h3>, <h3>見出し3</h3>, <h4>見出し4</h4>]
|
220
|
+
|
221
|
+
[[<p>本文1-A</p>, <p>本文1-B</p>, <p>本文1-C</p>], [<p>本文2-A</p>, <p>本文2-B</p>, <p>本文2-C</p>], [<p>本文3-A</p>, <p>本文3-B</p>, <p>本文3-C</p>], [<p>本文4-A</p>, <p>本文4-B</p>, <p>本文4-C</p>]]
|
222
|
+
|
223
|
+
|
224
|
+
|
225
|
+
```
|
4
テキスト修正
test
CHANGED
@@ -64,13 +64,11 @@
|
|
64
64
|
|
65
65
|
H3_list = soup.findAll('h3')
|
66
66
|
|
67
|
-
P_list = []
|
67
|
+
P_list = [[] for _ in range(len(H3_list))]
|
68
68
|
|
69
69
|
|
70
70
|
|
71
|
-
for h3 in H3_list:
|
71
|
+
for i, h3 in enumerate(H3_list):
|
72
|
-
|
73
|
-
ps = []
|
74
72
|
|
75
73
|
e = h3
|
76
74
|
|
@@ -82,9 +80,7 @@
|
|
82
80
|
|
83
81
|
break
|
84
82
|
|
85
|
-
ps.append(e)
|
86
|
-
|
87
|
-
P_list.append(
|
83
|
+
P_list[i].append(e)
|
88
84
|
|
89
85
|
|
90
86
|
|
3
テキスト修正
test
CHANGED
@@ -58,21 +58,19 @@
|
|
58
58
|
|
59
59
|
|
60
60
|
|
61
|
+
soup = BeautifulSoup(html, 'lxml')
|
62
|
+
|
63
|
+
|
64
|
+
|
61
|
-
H3_list =
|
65
|
+
H3_list = soup.findAll('h3')
|
62
66
|
|
63
67
|
P_list = []
|
64
68
|
|
65
69
|
|
66
70
|
|
67
|
-
|
71
|
+
for h3 in H3_list:
|
68
72
|
|
69
|
-
|
70
|
-
|
71
|
-
for h3 in soup.findAll('h3'):
|
72
|
-
|
73
|
-
H3_list.append(h3)
|
74
|
-
|
75
|
-
p
|
73
|
+
ps = []
|
76
74
|
|
77
75
|
e = h3
|
78
76
|
|
@@ -84,9 +82,9 @@
|
|
84
82
|
|
85
83
|
break
|
86
84
|
|
87
|
-
p
|
85
|
+
ps.append(e)
|
88
86
|
|
89
|
-
P_list.append(p
|
87
|
+
P_list.append(ps)
|
90
88
|
|
91
89
|
|
92
90
|
|
2
テキスト修正
test
CHANGED
@@ -70,9 +70,9 @@
|
|
70
70
|
|
71
71
|
for h3 in soup.findAll('h3'):
|
72
72
|
|
73
|
-
H3_list.append(h3
|
73
|
+
H3_list.append(h3)
|
74
74
|
|
75
|
-
|
75
|
+
p_elements = []
|
76
76
|
|
77
77
|
e = h3
|
78
78
|
|
@@ -84,9 +84,9 @@
|
|
84
84
|
|
85
85
|
break
|
86
86
|
|
87
|
-
|
87
|
+
p_elements.append(e)
|
88
88
|
|
89
|
-
P_list.append(
|
89
|
+
P_list.append(p_elements)
|
90
90
|
|
91
91
|
|
92
92
|
|
@@ -104,6 +104,10 @@
|
|
104
104
|
|
105
105
|
|
106
106
|
|
107
|
-
|
107
|
+
```
|
108
108
|
|
109
|
-
[
|
109
|
+
[<h3>見出し1</h3>, <h3>見出し2</h3>, <h3>見出し3</h3>]
|
110
|
+
|
111
|
+
[[<p>本文1-A</p>, <p>本文1-B</p>, <p>本文1-C</p>], [<p>本文2-A</p>, <p>本文2-B</p>, <p>本文2-C</p>], [<p>本文3-A</p>, <p>本文3-B</p>, <p>本文3-C</p>]]
|
112
|
+
|
113
|
+
```
|
1
テキスト修正
test
CHANGED
@@ -68,11 +68,13 @@
|
|
68
68
|
|
69
69
|
|
70
70
|
|
71
|
-
for
|
71
|
+
for h3 in soup.findAll('h3'):
|
72
72
|
|
73
|
-
H3_list.append(
|
73
|
+
H3_list.append(h3.text)
|
74
74
|
|
75
75
|
text = ''
|
76
|
+
|
77
|
+
e = h3
|
76
78
|
|
77
79
|
while True:
|
78
80
|
|
@@ -85,8 +87,6 @@
|
|
85
87
|
text += e.text
|
86
88
|
|
87
89
|
P_list.append(text)
|
88
|
-
|
89
|
-
|
90
90
|
|
91
91
|
|
92
92
|
|