回答編集履歴

1

質問の変更に対応しました。

2018/04/26 10:58

投稿

退会済みユーザー
test CHANGED
@@ -1,4 +1,4 @@
1
- Javaで実装したHTMLパーサ[jsoup](https://jsoup.org/)を使うのが便利です。
1
+ Javaで実装したHTMLパーサ[jsoup](https://jsoup.org/)を使うのが便利です。これを使う場合はまず[Download and install jsoup](https://jsoup.org/download)からjsoup-1.11.3.jarをダウンロードしてクラスパスに追加してください。
2
2
 
3
3
  以下はspanタグのリストを取り出して、その中のテキストだけを抽出しています。
4
4
 
@@ -6,27 +6,137 @@
6
6
 
7
7
  ```java
8
8
 
9
- String html = "<tr style=\"min-height: 27px\">\r\n" +
10
-
11
- "<td style=\"border-left: 1px solid #000000; border-top: 1px solid #000000; border-right: 1px solid #000000; border-bottom: 1px solid #000000; vertical-align: middle\">\r\n" +
12
-
13
- "<p style=\"margin-left: 30px; line-height: 13.33px; margin-right: 6px; text-align: left\">\r\n" +
14
-
15
- "<span style=\"font-family: 'MS Mincho'; font-size: 12px\">流動資産合計</span>\r\n" +
16
-
17
- "</p>\r\n" +
18
-
19
- .....
20
-
21
- Document doc = Jsoup.parse(html);
22
-
23
- Elements spans = doc.select("span");
24
-
25
- for (Element e : spans)
9
+ package stackoverflow;
26
-
27
- System.out.println(e.text());
28
10
 
29
11
 
12
+
13
+ import org.jsoup.Jsoup;
14
+
15
+ import org.jsoup.nodes.Document;
16
+
17
+ import org.jsoup.nodes.Element;
18
+
19
+ import org.jsoup.select.Elements;
20
+
21
+
22
+
23
+ public class Main {
24
+
25
+
26
+
27
+ public static void main(String[] args) {
28
+
29
+ String html = "<tr style=\"min-height: 27px\">\r\n" +
30
+
31
+ "<td style=\"border-left: 1px solid #000000; border-top: 1px solid #000000; border-right: 1px solid #000000; border-bottom: 1px solid #000000; vertical-align: middle\">\r\n" +
32
+
33
+ "<p style=\"margin-left: 30px; line-height: 13.33px; margin-right: 6px; text-align: left\">\r\n" +
34
+
35
+ "<span style=\"font-family: 'MS Mincho'; font-size: 12px\">流動資産合計</span>\r\n" +
36
+
37
+ "</p>\r\n" +
38
+
39
+ "</td>\r\n" +
40
+
41
+ "<td style=\"border-left: 1px solid #000000; border-top: 1px solid #000000; border-right: 1px solid #000000; border-bottom: 1px solid #000000; vertical-align: middle\">\r\n" +
42
+
43
+ "<p style=\"line-height: 13.33px; text-align: center\">&#160;</p>\r\n" +
44
+
45
+ "</td>\r\n" +
46
+
47
+ "<td style=\"border-left: 1px solid #000000; border-top: 1px solid #000000; border-right: 1px solid #000000; border-bottom: 1px solid #000000; vertical-align: middle\">\r\n" +
48
+
49
+ "<p style=\"line-height: 13.33px; margin-right: 6px; text-align: right\">\r\n" +
50
+
51
+ "<span style=\"font-family: 'MS Mincho'; font-size: 12px\">34,303</span>\r\n" +
52
+
53
+ "</p>\r\n" +
54
+
55
+ "</td>\r\n" +
56
+
57
+ "<td style=\"border-left: 1px solid #000000; border-top: 1px solid #000000; border-right: 1px solid #000000; border-bottom: 1px solid #000000; vertical-align: middle\">\r\n" +
58
+
59
+ "<p style=\"line-height: 13.33px; margin-right: 6px; text-align: right\">\r\n" +
60
+
61
+ "<span style=\"font-family: 'MS Mincho'; font-size: 12px\">36,762</span>\r\n" +
62
+
63
+ "</p>\r\n" +
64
+
65
+ "</td>\r\n" +
66
+
67
+ "<td style=\"border-left: 1px solid #000000; border-top: 1px solid #000000; border-right: 1px solid #000000; border-bottom: 1px solid #000000; vertical-align: middle\">\r\n" +
68
+
69
+ "<p style=\"line-height: 13.33px; margin-right: 6px; text-align: right\">\r\n" +
70
+
71
+ "<span style=\"font-family: 'MS Mincho'; font-size: 12px\">28,016</span>\r\n" +
72
+
73
+ "</p>\r\n" +
74
+
75
+ "</td>\r\n" +
76
+
77
+ "</tr>\r\n" +
78
+
79
+ "<tr style=\"min-height: 27px\">\r\n" +
80
+
81
+ "<td style=\"border-left: 1px solid #000000; border-top: 1px solid #000000; border-right: 1px solid #000000; border-bottom: 1px solid #000000; vertical-align: middle\">\r\n" +
82
+
83
+ "<p style=\"margin-left: 30px; line-height: 13.33px; margin-right: 6px; text-align: left\">\r\n" +
84
+
85
+ "<span style=\"font-family: 'MS Mincho'; font-size: 12px\">流動負債合計</span>\r\n" +
86
+
87
+ "</p>\r\n" +
88
+
89
+ "</td>\r\n" +
90
+
91
+ "<td style=\"border-left: 1px solid #000000; border-top: 1px solid #000000; border-right: 1px solid #000000; border-bottom: 1px solid #000000; vertical-align: middle\">\r\n" +
92
+
93
+ "<p style=\"line-height: 13.33px; text-align: center\">&#160;</p>\r\n" +
94
+
95
+ "</td>\r\n" +
96
+
97
+ "<td style=\"border-left: 1px solid #000000; border-top: 1px solid #000000; border-right: 1px solid #000000; border-bottom: 1px solid #000000; vertical-align: middle\">\r\n" +
98
+
99
+ "<p style=\"line-height: 13.33px; margin-right: 6px; text-align: right\">\r\n" +
100
+
101
+ "<span style=\"font-family: 'MS Mincho'; font-size: 12px\">6,917</span>\r\n" +
102
+
103
+ "</p>\r\n" +
104
+
105
+ "</td>\r\n" +
106
+
107
+ "<td style=\"border-left: 1px solid #000000; border-top: 1px solid #000000; border-right: 1px solid #000000; border-bottom: 1px solid #000000; vertical-align: middle\">\r\n" +
108
+
109
+ "<p style=\"line-height: 13.33px; margin-right: 6px; text-align: right\">\r\n" +
110
+
111
+ "<span style=\"font-family: 'MS Mincho'; font-size: 12px\">6,809</span>\r\n" +
112
+
113
+ "</p>\r\n" +
114
+
115
+ "</td>\r\n" +
116
+
117
+ "<td style=\"border-left: 1px solid #000000; border-top: 1px solid #000000; border-right: 1px solid #000000; border-bottom: 1px solid #000000; vertical-align: middle\">\r\n" +
118
+
119
+ "<p style=\"line-height: 13.33px; margin-right: 6px; text-align: right\">\r\n" +
120
+
121
+ "<span style=\"font-family: 'MS Mincho'; font-size: 12px\">5,339</span>\r\n" +
122
+
123
+ "</p>\r\n" +
124
+
125
+ "</td>\r\n" +
126
+
127
+ "</tr>";
128
+
129
+ Document doc = Jsoup.parse(html);
130
+
131
+ Elements spans = doc.select("span");
132
+
133
+ for (Element e : spans)
134
+
135
+ System.out.println(e.text());
136
+
137
+ }
138
+
139
+ }
30
140
 
31
141
  ```
32
142