回答編集履歴

2

XMLAgilityPackの例を追加しました.

2016/08/20 11:17

投稿

tmakita
tmakita

スコア69

test CHANGED
@@ -175,3 +175,79 @@
175
175
 
176
176
 
177
177
  入力はたまたまXHTMLでした.他のダーティーなのでも試した方が良いのでしょうけれども、私の家は冷房なしでこれ以上気力が持ちません... あと普段まずVisual Studioを立ち上げる機会はゼロです.おかしな点があるかもしれません、ご容赦ください.
178
+
179
+
180
+
181
+ [追記]
182
+
183
+ > HtmlAgilityPackを使ってみましたが結果は散々でした.
184
+
185
+
186
+
187
+ HtmlAgilityPackでもいろいろやった末、以下のコードで動きました.エンコーディングの指定が要るようです.
188
+
189
+
190
+
191
+ ```vb
192
+
193
+ Imports System.Text.Encoding
194
+
195
+ Imports System.Net
196
+
197
+ Imports HtmlAgilityPack
198
+
199
+
200
+
201
+ ...
202
+
203
+
204
+
205
+ Sub Main()
206
+
207
+ Dim xw As XmlWriter = CreateXmlWriter("maxTemp.xml")
208
+
209
+ xw.WriteStartElement("tempDataRoot", "")
210
+
211
+ Dim wc As WebClient = New WebClient()
212
+
213
+ wc.Encoding = UTF8
214
+
215
+ Dim htmlSource As String = wc.DownloadString("http://www.data.jma.go.jp/obd/stats/data/mdrr/synopday/data1s.html")
216
+
217
+ Dim doc As HtmlDocument = New HtmlDocument()
218
+
219
+ doc.LoadHtml(htmlSource)
220
+
221
+ Dim trs = doc.DocumentNode.SelectNodes("//table[@class = 'o1']//tr[@class != 'o1h']")
222
+
223
+ For Each tr In trs
224
+
225
+ Dim tds As IEnumerable(Of HtmlNode) = tr.Elements("td")
226
+
227
+ Dim tdArray As HtmlNode() = tds.ToArray
228
+
229
+ Dim regionTd As HtmlNode = tdArray(0)
230
+
231
+ Dim maxTempTd As HtmlNode = tdArray(6)
232
+
233
+ xw.WriteStartElement("tempData")
234
+
235
+ xw.WriteAttributeString("region", regionTd.InnerText)
236
+
237
+ xw.WriteAttributeString("maxTemp", maxTempTd.InnerText)
238
+
239
+ Debug.WriteLine("region={0} max-temp={1}", regionTd.InnerText, maxTempTd.InnerText)
240
+
241
+ xw.WriteEndElement()
242
+
243
+ Next
244
+
245
+ xw.WriteEndElement()
246
+
247
+ xw.Close()
248
+
249
+ End Sub
250
+
251
+
252
+
253
+ ```

1

サンプルプログラムを追加しました.

2016/08/20 11:16

投稿

tmakita
tmakita

スコア69

test CHANGED
@@ -33,3 +33,145 @@
33
33
 
34
34
 
35
35
  以上 少しでもお役にたてば.
36
+
37
+
38
+
39
+ [追記]
40
+
41
+ コメントに書きましたプログラムです.
42
+
43
+
44
+
45
+ ```vb
46
+
47
+ Imports System.Xml
48
+
49
+ Imports System.Xml.XPath
50
+
51
+ Imports System.Text.Encoding
52
+
53
+ Imports Sgml
54
+
55
+ Module HtmlXPathModule
56
+
57
+ Sub Main()
58
+
59
+ Dim xw As XmlWriter = CreateXmlWriter("maxTemp.xml")
60
+
61
+ xw.WriteStartElement("tempDataRoot", "")
62
+
63
+ Dim sgml As SgmlReader = New SgmlReader()
64
+
65
+ sgml.DocType = "HTML"
66
+
67
+ sgml.Href = "http://www.data.jma.go.jp/obd/stats/data/mdrr/synopday/data1s.html"
68
+
69
+ sgml.IgnoreDtd = True
70
+
71
+ Dim htmlDoc As XDocument = XDocument.Load(sgml)
72
+
73
+ Dim nsTable As NameTable = New NameTable
74
+
75
+ Dim nsMgr As XmlNamespaceManager = New XmlNamespaceManager(nsTable)
76
+
77
+ nsMgr.AddNamespace("xhtml", "http://www.w3.org/1999/xhtml")
78
+
79
+ Dim targetTrs As IEnumerable(Of XElement) = htmlDoc.XPathSelectElements("//xhtml:table[@class = 'o1']//xhtml:tr[@class != 'o1h']", nsMgr)
80
+
81
+ For Each tr As XElement In targetTrs
82
+
83
+ Dim targetTds As IEnumerable(Of XElement) = tr.Elements
84
+
85
+ Dim tdArray As XElement() = targetTds.ToArray
86
+
87
+ Dim region As XElement = tdArray(0)
88
+
89
+ Dim maxTemp As XElement = tdArray(6)
90
+
91
+ xw.WriteStartElement("tempData")
92
+
93
+ xw.WriteAttributeString("region", "", region.Value)
94
+
95
+ xw.WriteAttributeString("maxTemp", "", maxTemp.Value)
96
+
97
+ xw.WriteEndElement()
98
+
99
+ Next
100
+
101
+ xw.WriteEndElement()
102
+
103
+ xw.Close()
104
+
105
+ End Sub
106
+
107
+
108
+
109
+ Function CreateXmlWriter(outputPath As String) As XmlWriter
110
+
111
+ Dim settings As XmlWriterSettings = New XmlWriterSettings()
112
+
113
+ settings.CloseOutput = True
114
+
115
+ settings.ConformanceLevel = ConformanceLevel.Document
116
+
117
+ settings.Encoding = UTF8
118
+
119
+ settings.Indent = False
120
+
121
+ settings.NewLineChars = vbCrLf
122
+
123
+ settings.NewLineHandling = NewLineHandling.None
124
+
125
+ settings.OmitXmlDeclaration = False
126
+
127
+ settings.WriteEndDocumentOnClose = False
128
+
129
+ Dim xw As XmlWriter = XmlWriter.Create(outputPath, settings)
130
+
131
+ Return xw
132
+
133
+ End Function
134
+
135
+
136
+
137
+ End Module
138
+
139
+ ```
140
+
141
+
142
+
143
+ こんなXMLが出ます.(maxTemp.xml)今日は日本列島暑いです.37℃越え(!)のところもあります.
144
+
145
+
146
+
147
+ ```XML
148
+
149
+ <?xml version="1.0" encoding="UTF-8"?>
150
+
151
+ <tempDataRoot>
152
+
153
+ <tempData maxTemp="27.1]" region="札幌"/>
154
+
155
+ <tempData maxTemp="26.9]" region="稚内"/>
156
+
157
+ <tempData maxTemp="26.0]" region="北見枝幸"/>
158
+
159
+ ...
160
+
161
+ <tempData maxTemp="37.1]" region="鹿児島"/>
162
+
163
+ ...
164
+
165
+ <tempData maxTemp="31.3]" region="西表島"/>
166
+
167
+ <tempData maxTemp="33.1]" region="石垣島"/>
168
+
169
+ <tempData maxTemp="" region="昭和"/>
170
+
171
+ </tempDataRoot>
172
+
173
+ ```
174
+
175
+
176
+
177
+ 入力はたまたまXHTMLでした.他のダーティーなのでも試した方が良いのでしょうけれども、私の家は冷房なしでこれ以上気力が持ちません... あと普段まずVisual Studioを立ち上げる機会はゼロです.おかしな点があるかもしれません、ご容赦ください.