回答編集履歴
11
さらに修正
test
CHANGED
@@ -116,7 +116,7 @@
|
|
116
116
|
|
117
117
|
} else {
|
118
118
|
|
119
|
-
$textnode = $dom->createTextNode('
|
119
|
+
$textnode = $dom->createTextNode('');
|
120
120
|
|
121
121
|
}
|
122
122
|
|
@@ -130,7 +130,7 @@
|
|
130
130
|
|
131
131
|
|
132
132
|
|
133
|
-
$figcaption = $dom->createElement('figcaption', trim($textnode->nodeValue));
|
133
|
+
$figcaption = $dom->createElement('figcaption', trim($textnode->nodeValue) ?: 'タイトル無し');
|
134
134
|
|
135
135
|
$figure->appendChild($figcaption);
|
136
136
|
|
10
中身が無かったりした場合への例外対応
test
CHANGED
@@ -22,7 +22,7 @@
|
|
22
22
|
|
23
23
|
echo preg_replace_callback(
|
24
24
|
|
25
|
-
'@<img( class="aa bb cc etc".*?) />\s*+(\
|
25
|
+
'@<img( class="aa bb cc etc".*?) />\s*+([^\s<]*+)@s',
|
26
26
|
|
27
27
|
function ($m) {
|
28
28
|
|
@@ -110,9 +110,15 @@
|
|
110
110
|
|
111
111
|
|
112
112
|
|
113
|
-
$textnode = $node->nextSibling
|
113
|
+
if ($textnode = $node->nextSibling and $textnode->nodeType === XML_TEXT_NODE) {
|
114
114
|
|
115
|
-
$node->parentNode->removeChild($textnode);
|
115
|
+
$node->parentNode->removeChild($textnode);
|
116
|
+
|
117
|
+
} else {
|
118
|
+
|
119
|
+
$textnode = $dom->createTextNode('タイトル無し');
|
120
|
+
|
121
|
+
}
|
116
122
|
|
117
123
|
|
118
124
|
|
9
修正
test
CHANGED
@@ -22,7 +22,7 @@
|
|
22
22
|
|
23
23
|
echo preg_replace_callback(
|
24
24
|
|
25
|
-
'@<img( class="aa bb cc etc".*?) />(\S*+)@s',
|
25
|
+
'@<img( class="aa bb cc etc".*?) />\s*+(\S*+)@s',
|
26
26
|
|
27
27
|
function ($m) {
|
28
28
|
|
8
欠点
test
CHANGED
@@ -49,6 +49,10 @@
|
|
49
49
|
|
50
50
|
|
51
51
|
##### DOM版 (細かい差異に依存しないので変更により強いです)
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
(ただしインデントが崩れたり一部文字がHTMLエンティティに置換されてしまうなど,少し弊害があります…)
|
52
56
|
|
53
57
|
|
54
58
|
|
7
m1
test
CHANGED
@@ -26,7 +26,7 @@
|
|
26
26
|
|
27
27
|
function ($m) {
|
28
28
|
|
29
|
-
$
|
29
|
+
$m[1] = preg_replace('@
|
30
30
|
|
31
31
|
\s*+(?:
|
32
32
|
|
@@ -36,7 +36,7 @@
|
|
36
36
|
|
37
37
|
@x', '', $m[1]);
|
38
38
|
|
39
|
-
return "<figure><img$
|
39
|
+
return "<figure><img$m[1]><figcaption>$m[2]</figcaption></figure>";
|
40
40
|
|
41
41
|
},
|
42
42
|
|
6
正規表現
test
CHANGED
@@ -1,4 +1,54 @@
|
|
1
|
-
|
1
|
+
#### 正規表現版 (変更に弱いですが,決め打ちであればこれで十分です)
|
2
|
+
|
3
|
+
|
4
|
+
|
5
|
+
```php
|
6
|
+
|
7
|
+
<?php
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
$content = <<<EOD
|
12
|
+
|
13
|
+
<img class="aa bb cc etc" src="https://hogehoeg.com/○○○.jpg" alt="○○○" width="○○○" height="○○○" />出典:https://△△△.com
|
14
|
+
|
15
|
+
<img class="aa bb cc etc" src="https://hogehoeg.com/○○○.jpg" alt='xx oo' width="○○○" height="○○○" />出典:https://△△△.com
|
16
|
+
|
17
|
+
<img class="aa bb cc etc" src="https://hogehoeg.com/○○○.jpg" width="○○○" height="○○○" />出典:https://△△△.com
|
18
|
+
|
19
|
+
EOD;
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
echo preg_replace_callback(
|
24
|
+
|
25
|
+
'@<img( class="aa bb cc etc".*?) />(\S*+)@s',
|
26
|
+
|
27
|
+
function ($m) {
|
28
|
+
|
29
|
+
$f = preg_replace('@
|
30
|
+
|
31
|
+
\s*+(?:
|
32
|
+
|
33
|
+
(?:src|alt)(*SKIP)(*FAIL)|[\w-]++
|
34
|
+
|
35
|
+
)=(?:"[^"]*+"|\'[^\']*+\')
|
36
|
+
|
37
|
+
@x', '', $m[1]);
|
38
|
+
|
39
|
+
return "<figure><img$f><figcaption>$m[2]</figcaption></figure>";
|
40
|
+
|
41
|
+
},
|
42
|
+
|
43
|
+
$content
|
44
|
+
|
45
|
+
);
|
46
|
+
|
47
|
+
```
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
##### DOM版 (細かい差異に依存しないので変更により強いです)
|
2
52
|
|
3
53
|
|
4
54
|
|
@@ -10,7 +60,7 @@
|
|
10
60
|
|
11
61
|
// 何かこれらを括っている親要素があると仮定 (もしなければ付加してください)
|
12
62
|
|
13
|
-
$content=<<<EOD
|
63
|
+
$content = <<<EOD
|
14
64
|
|
15
65
|
<root>
|
16
66
|
|
5
補足
test
CHANGED
@@ -80,9 +80,15 @@
|
|
80
80
|
|
81
81
|
|
82
82
|
|
83
|
-
|
83
|
+
$result = $dom->saveXML($dom->documentElement);
|
84
84
|
|
85
|
+
echo $result;
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
// もし最初に親要素を付加したならば,後からそれを除外してください
|
90
|
+
|
85
|
-
e
|
91
|
+
// $result = substr($dom->saveXML($dom->documentElement), 6, -7);
|
86
92
|
|
87
93
|
```
|
88
94
|
|
4
内部エラーを使用
test
CHANGED
@@ -28,13 +28,17 @@
|
|
28
28
|
|
29
29
|
$dom = new DOMDocument;
|
30
30
|
|
31
|
+
libxml_use_internal_errors(true);
|
32
|
+
|
31
|
-
|
33
|
+
$dom->loadHTML(
|
32
34
|
|
33
35
|
mb_convert_encoding($content, 'HTML-ENTITIES', 'UTF-8'),
|
34
36
|
|
35
37
|
LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD
|
36
38
|
|
37
39
|
);
|
40
|
+
|
41
|
+
libxml_clear_errors();
|
38
42
|
|
39
43
|
$xpath = new DOMXPath($dom);
|
40
44
|
|
3
トリミング
test
CHANGED
@@ -66,7 +66,7 @@
|
|
66
66
|
|
67
67
|
|
68
68
|
|
69
|
-
$figcaption = $dom->createElement('figcaption', $textnode->nodeValue);
|
69
|
+
$figcaption = $dom->createElement('figcaption', trim($textnode->nodeValue));
|
70
70
|
|
71
71
|
$figure->appendChild($figcaption);
|
72
72
|
|
2
つけ忘れ
test
CHANGED
@@ -44,7 +44,7 @@
|
|
44
44
|
|
45
45
|
|
46
46
|
|
47
|
-
foreach ($xpath->query('./@*[not(name()="src")][not(name()="alt")]') as $attr) {
|
47
|
+
foreach ($xpath->query('./@*[not(name()="src")][not(name()="alt")]', $node) as $attr) {
|
48
48
|
|
49
49
|
$node->removeAttribute($attr->name);
|
50
50
|
|
1
訂正
test
CHANGED
@@ -8,13 +8,19 @@
|
|
8
8
|
|
9
9
|
|
10
10
|
|
11
|
+
// 何かこれらを括っている親要素があると仮定 (もしなければ付加してください)
|
12
|
+
|
11
13
|
$content=<<<EOD
|
12
14
|
|
13
|
-
<
|
15
|
+
<root>
|
14
16
|
|
15
|
-
<img class="aa bb cc etc" src="https://hogehoeg.com/○○○.jpg" alt=
|
17
|
+
<img class="aa bb cc etc" src="https://hogehoeg.com/○○○.jpg" alt="○○○" width="○○○" height="○○○" />出典:https://△△△.com
|
16
18
|
|
19
|
+
<img class="aa bb cc etc" src="https://hogehoeg.com/○○○.jpg" alt='xx oo' width="○○○" height="○○○" />出典:https://△△△.com
|
20
|
+
|
17
|
-
<img class="aa bb cc etc" src="https://hogehoeg.com/○○○.jpg" width="○○○" height="○○○" />出典:https://△△△.com
|
21
|
+
<img class="aa bb cc etc" src="https://hogehoeg.com/○○○.jpg" width="○○○" height="○○○" />出典:https://△△△.com
|
22
|
+
|
23
|
+
</root>
|
18
24
|
|
19
25
|
EOD;
|
20
26
|
|
@@ -24,7 +30,7 @@
|
|
24
30
|
|
25
31
|
@$dom->loadHTML(
|
26
32
|
|
27
|
-
mb_convert_encoding(
|
33
|
+
mb_convert_encoding($content, 'HTML-ENTITIES', 'UTF-8'),
|
28
34
|
|
29
35
|
LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD
|
30
36
|
|
@@ -32,7 +38,11 @@
|
|
32
38
|
|
33
39
|
$xpath = new DOMXPath($dom);
|
34
40
|
|
41
|
+
|
42
|
+
|
35
|
-
foreach ($xpath->query('//img[@class="aa bb cc etc"]') as $node) {
|
43
|
+
foreach ($xpath->query('//img[@class="aa bb cc etc"]') as $i => $node) {
|
44
|
+
|
45
|
+
|
36
46
|
|
37
47
|
foreach ($xpath->query('./@*[not(name()="src")][not(name()="alt")]') as $attr) {
|
38
48
|
|
@@ -40,16 +50,36 @@
|
|
40
50
|
|
41
51
|
}
|
42
52
|
|
53
|
+
|
54
|
+
|
55
|
+
$textnode = $node->nextSibling;
|
56
|
+
|
57
|
+
$node->parentNode->removeChild($textnode);
|
58
|
+
|
59
|
+
|
60
|
+
|
43
61
|
$figure = $dom->createElement('figure');
|
44
62
|
|
45
63
|
$node->parentNode->replaceChild($figure, $node);
|
46
64
|
|
47
65
|
$figure->appendChild($node);
|
48
66
|
|
67
|
+
|
68
|
+
|
69
|
+
$figcaption = $dom->createElement('figcaption', $textnode->nodeValue);
|
70
|
+
|
71
|
+
$figure->appendChild($figcaption);
|
72
|
+
|
73
|
+
|
74
|
+
|
49
75
|
}
|
50
76
|
|
51
77
|
|
52
78
|
|
79
|
+
// もし最初に親要素を付加したならば,substrで除外してください
|
80
|
+
|
53
|
-
echo
|
81
|
+
echo $dom->saveXML($dom->documentElement);
|
54
82
|
|
55
83
|
```
|
84
|
+
|
85
|
+
|