回答編集履歴
2
d
test
CHANGED
@@ -87,3 +87,57 @@
|
|
87
87
|
print(vals) # ['ZZZ', 'AAA', 'BBB', 'CCC', 'YYY', '', 'TTTSSS', 'RRR', '4', 'XXX']
|
88
88
|
|
89
89
|
```
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
## 追記
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
```python
|
98
|
+
|
99
|
+
html = '''<p><span>ZZZ</span></p>,
|
100
|
+
|
101
|
+
<p>AAA</p>,
|
102
|
+
|
103
|
+
<p>BBB</p>,
|
104
|
+
|
105
|
+
<p>CCC</p>,
|
106
|
+
|
107
|
+
<p class="tags">YYY</p>,
|
108
|
+
|
109
|
+
<p class="list"><a href="/WWW/"><img alt="VVV" src="/UUU"/></a></p>,
|
110
|
+
|
111
|
+
<p class="tags">TTT<br class="sp"/>SSS</p>,
|
112
|
+
|
113
|
+
<p class="hoge"><a class="tagb" href="/socialmedia/">RRR</a></p>,
|
114
|
+
|
115
|
+
<p class="fuga"><a class="typesquare_tags" href="/chronicle/04/">4</a></p>,
|
116
|
+
|
117
|
+
<p class="capion typesquare_tags">XXX</p>'''
|
118
|
+
|
119
|
+
|
120
|
+
|
121
|
+
from bs4 import BeautifulSoup
|
122
|
+
|
123
|
+
|
124
|
+
|
125
|
+
soup = BeautifulSoup(html)
|
126
|
+
|
127
|
+
|
128
|
+
|
129
|
+
vals = []
|
130
|
+
|
131
|
+
for t in soup.find_all('p'):
|
132
|
+
|
133
|
+
# p タグの子でテキストがある要素のみ検索
|
134
|
+
|
135
|
+
p_text = t.find(text=True, recursive=False)
|
136
|
+
|
137
|
+
if p_text:
|
138
|
+
|
139
|
+
vals.append(p_text)
|
140
|
+
|
141
|
+
print(vals) # ['AAA', 'BBB', 'CCC', 'YYY', 'TTT', 'XXX']
|
142
|
+
|
143
|
+
```
|
1
d
test
CHANGED
@@ -41,3 +41,49 @@
|
|
41
41
|
print(vals) # ['ZZZ', 'AAA', 'BBB', 'CCC']
|
42
42
|
|
43
43
|
```
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
## 追記
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
単純に p タグの値だけ取り出すという意味でしたら、以下です。
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
```python
|
56
|
+
|
57
|
+
html = '''<p><span>ZZZ</span></p>,
|
58
|
+
|
59
|
+
<p>AAA</p>,
|
60
|
+
|
61
|
+
<p>BBB</p>,
|
62
|
+
|
63
|
+
<p>CCC</p>,
|
64
|
+
|
65
|
+
<p class="tags">YYY</p>,
|
66
|
+
|
67
|
+
<p class="list"><a href="/WWW/"><img alt="VVV" src="/UUU"/></a></p>,
|
68
|
+
|
69
|
+
<p class="tags">TTT<br class="sp"/>SSS</p>,
|
70
|
+
|
71
|
+
<p class="hoge"><a class="tagb" href="/socialmedia/">RRR</a></p>,
|
72
|
+
|
73
|
+
<p class="fuga"><a class="typesquare_tags" href="/chronicle/04/">4</a></p>,
|
74
|
+
|
75
|
+
<p class="capion typesquare_tags">XXX</p>'''
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
from bs4 import BeautifulSoup
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
soup = BeautifulSoup(html)
|
84
|
+
|
85
|
+
vals = [t.text for t in soup.find_all('p')]
|
86
|
+
|
87
|
+
print(vals) # ['ZZZ', 'AAA', 'BBB', 'CCC', 'YYY', '', 'TTTSSS', 'RRR', '4', 'XXX']
|
88
|
+
|
89
|
+
```
|