回答編集履歴
3
a
test
CHANGED
@@ -5,6 +5,8 @@
|
|
5
5
|
```python
|
6
6
|
|
7
7
|
from bs4 import BeautifulSoup
|
8
|
+
|
9
|
+
from bs4.element import NavigableString
|
8
10
|
|
9
11
|
|
10
12
|
|
@@ -46,9 +48,11 @@
|
|
46
48
|
|
47
49
|
for tag in soup.findAll('br'):
|
48
50
|
|
49
|
-
te
|
51
|
+
if isinstance(tag.next_sibling, NavigableString):
|
50
52
|
|
53
|
+
text = tag.next_sibling.strip()
|
54
|
+
|
51
|
-
tag.next_sibling.replace_with(text)
|
55
|
+
tag.next_sibling.replace_with(text)
|
52
56
|
|
53
57
|
|
54
58
|
|
2
あ
test
CHANGED
@@ -16,9 +16,11 @@
|
|
16
16
|
|
17
17
|
<body>
|
18
18
|
|
19
|
-
<
|
19
|
+
<td><strong>三菱<strong>UFJ<br/>
|
20
20
|
|
21
|
+
モルガン・<br/>
|
22
|
+
|
21
|
-
|
23
|
+
スタンレー証券</td>
|
22
24
|
|
23
25
|
</body>
|
24
26
|
|
@@ -34,19 +36,27 @@
|
|
34
36
|
|
35
37
|
|
36
38
|
|
39
|
+
# strong タグを削除
|
40
|
+
|
37
|
-
i
|
41
|
+
[tag.replaceWithChildren() for tag in soup.findAll('strong')]
|
38
42
|
|
39
43
|
|
40
44
|
|
41
|
-
# タグを
|
45
|
+
# br タグ直後の改行を削除
|
42
46
|
|
43
|
-
|
47
|
+
for tag in soup.findAll('br'):
|
48
|
+
|
49
|
+
text = tag.next_sibling.strip()
|
50
|
+
|
51
|
+
tag.next_sibling.replace_with(text)
|
44
52
|
|
45
53
|
|
46
54
|
|
47
|
-
# タグ
|
55
|
+
# br タグを削除
|
48
56
|
|
49
|
-
[tag.replaceWithChildren() for tag in soup.findAll(
|
57
|
+
[tag.replaceWithChildren() for tag in soup.findAll('br')]
|
58
|
+
|
59
|
+
|
50
60
|
|
51
61
|
print('>>> after\n', soup)
|
52
62
|
|
@@ -64,9 +74,11 @@
|
|
64
74
|
|
65
75
|
<body>
|
66
76
|
|
67
|
-
<
|
77
|
+
<td><strong>三菱<strong>UFJ<br/>
|
68
78
|
|
79
|
+
モルガン・<br/>
|
80
|
+
|
69
|
-
<
|
81
|
+
スタンレー証券</strong></strong></td>
|
70
82
|
|
71
83
|
</body>
|
72
84
|
|
@@ -82,9 +94,7 @@
|
|
82
94
|
|
83
95
|
<body>
|
84
96
|
|
85
|
-
<
|
97
|
+
<td>三菱UFJモルガン・スタンレー証券</td>
|
86
|
-
|
87
|
-
<p>テスト</p>
|
88
98
|
|
89
99
|
</body>
|
90
100
|
|
1
あ
test
CHANGED
@@ -40,7 +40,7 @@
|
|
40
40
|
|
41
41
|
# タグをコンテンツごと削除する場合
|
42
42
|
|
43
|
-
#[tag.r
|
43
|
+
#[tag.extract() for tag in soup.findAll(ignore_tags)]
|
44
44
|
|
45
45
|
|
46
46
|
|