回答編集履歴

1

BeautifulSoupを追加

2018/11/15 11:58

投稿

barobaro
barobaro

スコア1286

test CHANGED
@@ -29,3 +29,97 @@
29
29
  yield item
30
30
 
31
31
  ```
32
+
33
+
34
+
35
+ BeautifulSoupでしたら
36
+
37
+
38
+
39
+ ```python
40
+
41
+ from bs4 import BeautifulSoup
42
+
43
+
44
+
45
+ html = """
46
+
47
+ <table>
48
+
49
+ <tbody>
50
+
51
+ <tr>
52
+
53
+ <td>A3</td>
54
+
55
+ <td>B3</td>
56
+
57
+ <td>
58
+
59
+ <img src="../../media/test1.gif"> <!-- ◯ -->
60
+
61
+ <img src="../../media/test2.gif"> <!-- ★ -->
62
+
63
+ </td>
64
+
65
+ </tr>
66
+
67
+ <tr>
68
+
69
+ <td>A2</td>
70
+
71
+ <td>B2</td>
72
+
73
+ <td>
74
+
75
+ <img src="../../media/test3.gif">
76
+
77
+ </td>
78
+
79
+ </tr>
80
+
81
+ </tbody>
82
+
83
+ </table>
84
+
85
+ """
86
+
87
+
88
+
89
+ soup = BeautifulSoup(html, 'html.parser')
90
+
91
+
92
+
93
+ for trs in soup.select('tr'):
94
+
95
+
96
+
97
+ result = []
98
+
99
+
100
+
101
+ for tds in trs.select('td'):
102
+
103
+ if tds.img:
104
+
105
+ for i in tds.select('img'):
106
+
107
+ result.append(i.get('src'))
108
+
109
+ else:
110
+
111
+ result.append(tds.get_text(strip=True))
112
+
113
+
114
+
115
+ print(result)
116
+
117
+ ```
118
+
119
+
120
+
121
+ 結果
122
+
123
+ ['A3', 'B3', '../../media/test1.gif', '../../media/test2.gif']
124
+
125
+ ['A2', 'B2', '../../media/test3.gif']