回答編集履歴

1

実行結果を付与

2016/07/20 05:03

投稿

hiono
hiono

スコア7

test CHANGED
@@ -1,31 +1,93 @@
1
- next がclass="b"の間はclass="a"にappend
2
1
 
3
- > for e in doc.findall('.//div[@class="a"]'):
4
2
 
5
- > n = e.getnext()
3
+ ```shell
6
4
 
7
- > while True:
5
+ dockerdev@py3:~$ cat a
8
6
 
9
- > if n != None and n.get('class') == 'b':
7
+ #!/usr/bin/env python3
10
8
 
11
- > for img in n:
9
+ # -*- coding: utf-8 -*-
12
10
 
13
- > if img.tag == 'img':
14
-
15
- > e.append(img)
16
-
17
- > else:
11
+ import lxml.html
18
-
19
- > break
20
-
21
- > n = n.getnext()
22
12
 
23
13
 
24
14
 
25
- 不要なdivを一気に消す
15
+ html = """
26
16
 
27
- > for e in doc.findall('.//div[@class="b"]'):
17
+ <div class='a'><img src='aa.jpg'></div>
28
18
 
29
- > parent = e.getparent()
19
+ <div class='b'><img src='ab.jpg'></div>
30
20
 
21
+ <div class='b'><img src='ac.jpg'></div>
22
+
23
+ <div class='b'><img src='ad.jpg'></div>
24
+
25
+ <div class='a'><img src='ba.jpg'></div>
26
+
27
+ <div class='b'><img src='bb.jpg'></div>
28
+
29
+ <div class='b'><img src='bc.jpg'></div>
30
+
31
+ <div class='a'><img src='ca.jpg'></div>
32
+
33
+ <div class='b'><img src='cb.jpg'></div>
34
+
35
+ """
36
+
37
+ doc = lxml.html.fromstring(html)
38
+
39
+ # next がclass="b"の間はclass="a"にappend
40
+
41
+ for e in doc.findall('.//div[@class="a"]'):
42
+
43
+ n = e.getnext()
44
+
45
+ while True:
46
+
47
+ if n != None and n.get('class') == 'b':
48
+
49
+ for img in n:
50
+
51
+ if img.tag == 'img':
52
+
53
+ e.append(img)
54
+
55
+ else:
56
+
57
+ break
58
+
59
+ n = n.getnext()
60
+
61
+ # 不要なdivを一気に消す
62
+
63
+ for e in doc.findall('.//div[@class="b"]'):
64
+
65
+ parent = e.getparent()
66
+
31
- > parent.remove(e)
67
+ parent.remove(e)
68
+
69
+
70
+
71
+ print(lxml.etree.tostring(doc, pretty_print=True).decode('utf8'))
72
+
73
+ dockerdev@py3:~$ python3 a
74
+
75
+ python3 a
76
+
77
+ <div><div class="a"><img src="aa.jpg"/><img src="ab.jpg"/><img src="ac.jpg"/><img src="ad.jpg"/></div>
78
+
79
+ <div class="a"><img src="ba.jpg"/><img src="bb.jpg"/><img src="bc.jpg"/></div>
80
+
81
+ <div class="a"><img src="ca.jpg"/><img src="cb.jpg"/></div>
82
+
83
+ </div>
84
+
85
+
86
+
87
+ dockerdev@py3:~$
88
+
89
+ ```
90
+
91
+
92
+
93
+