回答編集履歴

2

d

2019/04/05 06:53

投稿

tiitoi
tiitoi

スコア21956

test CHANGED
@@ -87,3 +87,57 @@
87
87
  print(vals) # ['ZZZ', 'AAA', 'BBB', 'CCC', 'YYY', '', 'TTTSSS', 'RRR', '4', 'XXX']
88
88
 
89
89
  ```
90
+
91
+
92
+
93
+ ## 追記
94
+
95
+
96
+
97
+ ```python
98
+
99
+ html = '''<p><span>ZZZ</span></p>,
100
+
101
+ <p>AAA</p>,
102
+
103
+ <p>BBB</p>,
104
+
105
+ <p>CCC</p>,
106
+
107
+ <p class="tags">YYY</p>,
108
+
109
+ <p class="list"><a href="/WWW/"><img alt="VVV" src="/UUU"/></a></p>,
110
+
111
+ <p class="tags">TTT<br class="sp"/>SSS</p>,
112
+
113
+ <p class="hoge"><a class="tagb" href="/socialmedia/">RRR</a></p>,
114
+
115
+ <p class="fuga"><a class="typesquare_tags" href="/chronicle/04/">4</a></p>,
116
+
117
+ <p class="capion typesquare_tags">XXX</p>'''
118
+
119
+
120
+
121
+ from bs4 import BeautifulSoup
122
+
123
+
124
+
125
+ soup = BeautifulSoup(html)
126
+
127
+
128
+
129
+ vals = []
130
+
131
+ for t in soup.find_all('p'):
132
+
133
+ # p タグの子でテキストがある要素のみ検索
134
+
135
+ p_text = t.find(text=True, recursive=False)
136
+
137
+ if p_text:
138
+
139
+ vals.append(p_text)
140
+
141
+ print(vals) # ['AAA', 'BBB', 'CCC', 'YYY', 'TTT', 'XXX']
142
+
143
+ ```

1

d

2019/04/05 06:53

投稿

tiitoi
tiitoi

スコア21956

test CHANGED
@@ -41,3 +41,49 @@
41
41
  print(vals) # ['ZZZ', 'AAA', 'BBB', 'CCC']
42
42
 
43
43
  ```
44
+
45
+
46
+
47
+ ## 追記
48
+
49
+
50
+
51
+ 単純に p タグの値だけ取り出すという意味でしたら、以下です。
52
+
53
+
54
+
55
+ ```python
56
+
57
+ html = '''<p><span>ZZZ</span></p>,
58
+
59
+ <p>AAA</p>,
60
+
61
+ <p>BBB</p>,
62
+
63
+ <p>CCC</p>,
64
+
65
+ <p class="tags">YYY</p>,
66
+
67
+ <p class="list"><a href="/WWW/"><img alt="VVV" src="/UUU"/></a></p>,
68
+
69
+ <p class="tags">TTT<br class="sp"/>SSS</p>,
70
+
71
+ <p class="hoge"><a class="tagb" href="/socialmedia/">RRR</a></p>,
72
+
73
+ <p class="fuga"><a class="typesquare_tags" href="/chronicle/04/">4</a></p>,
74
+
75
+ <p class="capion typesquare_tags">XXX</p>'''
76
+
77
+
78
+
79
+ from bs4 import BeautifulSoup
80
+
81
+
82
+
83
+ soup = BeautifulSoup(html)
84
+
85
+ vals = [t.text for t in soup.find_all('p')]
86
+
87
+ print(vals) # ['ZZZ', 'AAA', 'BBB', 'CCC', 'YYY', '', 'TTTSSS', 'RRR', '4', 'XXX']
88
+
89
+ ```