質問するログイン新規登録

回答編集履歴

1

実行結果を付与

2016/07/20 05:03

投稿

hiono
hiono

スコア7

answer CHANGED
@@ -1,16 +1,46 @@
1
- next がclass="b"の間はclass="a"にappend
2
- > for e in doc.findall('.//div[@class="a"]'):
3
- > n = e.getnext()
4
- > while True:
5
- > if n != None and n.get('class') == 'b':
6
- > for img in n:
7
- > if img.tag == 'img':
8
- > e.append(img)
9
- > else:
10
- > break
11
- > n = n.getnext()
12
1
 
2
+ ```shell
3
+ dockerdev@py3:~$ cat a
4
+ #!/usr/bin/env python3
5
+ # -*- coding: utf-8 -*-
6
+ import lxml.html
7
+
8
+ html = """
9
+ <div class='a'><img src='aa.jpg'></div>
10
+ <div class='b'><img src='ab.jpg'></div>
11
+ <div class='b'><img src='ac.jpg'></div>
12
+ <div class='b'><img src='ad.jpg'></div>
13
+ <div class='a'><img src='ba.jpg'></div>
14
+ <div class='b'><img src='bb.jpg'></div>
15
+ <div class='b'><img src='bc.jpg'></div>
16
+ <div class='a'><img src='ca.jpg'></div>
17
+ <div class='b'><img src='cb.jpg'></div>
18
+ """
19
+ doc = lxml.html.fromstring(html)
20
+ # next がclass="b"の間はclass="a"にappend
21
+ for e in doc.findall('.//div[@class="a"]'):
22
+ n = e.getnext()
23
+ while True:
24
+ if n != None and n.get('class') == 'b':
25
+ for img in n:
26
+ if img.tag == 'img':
27
+ e.append(img)
28
+ else:
29
+ break
30
+ n = n.getnext()
13
- 不要なdivを一気に消す
31
+ # 不要なdivを一気に消す
14
- > for e in doc.findall('.//div[@class="b"]'):
32
+ for e in doc.findall('.//div[@class="b"]'):
15
- > parent = e.getparent()
33
+ parent = e.getparent()
16
- > parent.remove(e)
34
+ parent.remove(e)
35
+
36
+ print(lxml.etree.tostring(doc, pretty_print=True).decode('utf8'))
37
+ dockerdev@py3:~$ python3 a
38
+ python3 a
39
+ <div><div class="a"><img src="aa.jpg"/><img src="ab.jpg"/><img src="ac.jpg"/><img src="ad.jpg"/></div>
40
+ <div class="a"><img src="ba.jpg"/><img src="bb.jpg"/><img src="bc.jpg"/></div>
41
+ <div class="a"><img src="ca.jpg"/><img src="cb.jpg"/></div>
42
+ </div>
43
+
44
+ dockerdev@py3:~$
45
+ ```
46
+