回答編集履歴

1

edit

2017/12/12 04:49

投稿

mkgrei
mkgrei

スコア8560

test CHANGED
@@ -21,3 +21,277 @@
21
21
  https://qiita.com/ryu19maki/items/e5a3b470795de883a09a
22
22
 
23
23
  https://pythondatascience.plavox.info/pandas/データフレームを出力する
24
+
25
+
26
+
27
+ ---
28
+
29
+
30
+
31
+ 手元では特に問題なくCSVファイルにできます。
32
+
33
+
34
+
35
+ ```python
36
+
37
+ import pandas as pd
38
+
39
+ from bs4 import BeautifulSoup
40
+
41
+
42
+
43
+ a = '''
44
+
45
+ <table class="pro-table pro-table02 mb5 tableCross_1">
46
+
47
+ <thead>
48
+
49
+ <tr>
50
+
51
+ <th class="w7p" rowspan="2"> </th>
52
+
53
+ <th class="w5p" rowspan="2">合計</th>
54
+
55
+ <th colspan="9">内訳</th>
56
+
57
+ <th class="w5p area-disabled" rowspan="2"><a class="gonFloat8 cboxElement" href="#print">地図・クーポン印刷</a></th>
58
+
59
+ <th class="w5p area-disabled" rowspan="2">チョットぐに追加</th>
60
+
61
+ <th class="w5p area-disabled" rowspan="2">口コミ</th>
62
+
63
+ <th class="w5p area-disabled" rowspan="2">お店のブログ</th>
64
+
65
+ </tr>
66
+
67
+ <tr>
68
+
69
+ <th class="w5p">店舗トップ</th>
70
+
71
+ <th class="w5p"><a class="gonFloat cboxElement" href="#menu">メニュー</a></th>
72
+
73
+ <th class="w5p"><a class="gonFloat5 cboxElement" href="#seat">席・個室・貸切</a></th>
74
+
75
+ <th class="w5p"><a class="gonFloat2 cboxElement" href="#photo">写真</a></th>
76
+
77
+ <th class="w5p">こだわり</th>
78
+
79
+ <th class="w5p"><a class="gonFloat7 cboxElement" href="#map">地図</a></th>
80
+
81
+ <th class="w5p">クーポン</th>
82
+
83
+ <th class="w5p"><a class="gonFloat3 cboxElement" href="#yoyaku">予約</a></th>
84
+
85
+ <th class="w5p"><a class="gonFloat6 cboxElement" href="#other">その他</a></th>
86
+
87
+ </tr>
88
+
89
+ </thead>
90
+
91
+ <tbody>
92
+
93
+ <tr>
94
+
95
+ <th class="txtLeft">2016/11</th>
96
+
97
+ <td class="txtRight table1item1">8158</td>
98
+
99
+ <td class="txtRight table1item2">2740</td>
100
+
101
+ <td class="txtRight table1item3">2199</td>
102
+
103
+ <td class="txtRight table1item4">998</td>
104
+
105
+ <td class="txtRight table1item5">420</td>
106
+
107
+ <td class="txtRight table1item6">214</td>
108
+
109
+ <td class="txtRight table1item7">542</td>
110
+
111
+ <td class="txtRight table1item8">242</td>
112
+
113
+ <td class="txtRight table1item9">790</td>
114
+
115
+ <td class="txtRight table1item10">13</td>
116
+
117
+ <td class="txtRight table1item11">49</td>
118
+
119
+ <td class="txtRight table1item12">0</td>
120
+
121
+ <td class="txtRight table1item13">111</td>
122
+
123
+ <td class="txtRight table1item14">0</td>
124
+
125
+ </tr>
126
+
127
+ <tr>
128
+
129
+ <th class="txtLeft">2016/12</th>
130
+
131
+ <td class="txtRight table1item1">5401</td>
132
+
133
+ <td class="txtRight table1item2">1909</td>
134
+
135
+ <td class="txtRight table1item3">1255</td>
136
+
137
+ <td class="txtRight table1item4">510</td>
138
+
139
+ <td class="txtRight table1item5">360</td>
140
+
141
+ <td class="txtRight table1item6">140</td>
142
+
143
+ <td class="txtRight table1item7">562</td>
144
+
145
+ <td class="txtRight table1item8">139</td>
146
+
147
+ <td class="txtRight table1item9">515</td>
148
+
149
+ <td class="txtRight table1item10">11</td>
150
+
151
+ <td class="txtRight table1item11">75</td>
152
+
153
+ <td class="txtRight table1item12">0</td>
154
+
155
+ <td class="txtRight table1item13">67</td>
156
+
157
+ <td class="txtRight table1item14">0</td>
158
+
159
+ </tr>
160
+
161
+ <tr>
162
+
163
+ <th class="txtLeft">2017/01</th>
164
+
165
+ <td class="txtRight table1item1">4584</td>
166
+
167
+ <td class="txtRight table1item2">1534</td>
168
+
169
+ <td class="txtRight table1item3">1219</td>
170
+
171
+ <td class="txtRight table1item4">413</td>
172
+
173
+ <td class="txtRight table1item5">341</td>
174
+
175
+ <td class="txtRight table1item6">137</td>
176
+
177
+ <td class="txtRight table1item7">333</td>
178
+
179
+ <td class="txtRight table1item8">135</td>
180
+
181
+ <td class="txtRight table1item9">460</td>
182
+
183
+ <td class="txtRight table1item10">12</td>
184
+
185
+ <td class="txtRight table1item11">18</td>
186
+
187
+ <td class="txtRight table1item12">0</td>
188
+
189
+ <td class="txtRight table1item13">49</td>
190
+
191
+ <td class="txtRight table1item14">0</td>
192
+
193
+ </tr>
194
+
195
+ <tr>
196
+
197
+ <th class="txtLeft">2017/02</th>
198
+
199
+ <td class="txtRight table1item1">4821</td>
200
+
201
+ <td class="txtRight table1item2">1711</td>
202
+
203
+ <td class="txtRight table1item3">1243</td>
204
+
205
+ <td class="txtRight table1item4">407</td>
206
+
207
+ <td class="txtRight table1item5">335</td>
208
+
209
+ <td class="txtRight table1item6">134</td>
210
+
211
+ <td class="txtRight table1item7">299</td>
212
+
213
+ <td class="txtRight table1item8">169</td>
214
+
215
+ <td class="txtRight table1item9">513</td>
216
+
217
+ <td class="txtRight table1item10">10</td>
218
+
219
+ <td class="txtRight table1item11">22</td>
220
+
221
+ <td class="txtRight table1item12">0</td>
222
+
223
+ <td class="txtRight table1item13">60</td>
224
+
225
+ <td class="txtRight table1item14">0</td>
226
+
227
+ </tr>
228
+
229
+ <tr>
230
+
231
+ <tr class="total">
232
+
233
+ <th class="txtLeft">合計</th>
234
+
235
+ <td class="txtRight w100 table1item1">59010</td>
236
+
237
+ <td class="txtRight w100 table1item2">19188</td>
238
+
239
+ <td class="txtRight w100 table1item3">14620</td>
240
+
241
+ <td class="txtRight w100 table1item4">5547</td>
242
+
243
+ <td class="txtRight w100 table1item5">3837</td>
244
+
245
+ <td class="txtRight w100 table1item6">1584</td>
246
+
247
+ <td class="txtRight w100 table1item7">3993</td>
248
+
249
+ <td class="txtRight w100 table1item8">1521</td>
250
+
251
+ <td class="txtRight w100 table1item9">8615</td>
252
+
253
+ <td class="txtRight w100 table1item10">105</td>
254
+
255
+ <td class="txtRight w100 table1item11">347</td>
256
+
257
+ <td class="txtRight w100 table1item12">0</td>
258
+
259
+ <td class="txtRight w100 table1item13">664</td>
260
+
261
+ <td class="txtRight w100 table1item14">0</td>
262
+
263
+ </tr>
264
+
265
+ </tbody>
266
+
267
+ </table>
268
+
269
+ '''
270
+
271
+
272
+
273
+ dfs = pd.io.html.read_html(a)
274
+
275
+ df = dfs[0]
276
+
277
+
278
+
279
+ df.columns = ['date', '合計', '店舗トップ', 'メニュー',
280
+
281
+ '席・個室・貸切', '写真', 'こだわり', '地図',
282
+
283
+ 'クーポン', '予約', 'その他', '地図・クーポン印刷',
284
+
285
+ 'チョットぐに追加', '口コミ', 'お店のブログ']
286
+
287
+
288
+
289
+ df = df.set_index('date')
290
+
291
+ del df.index.name
292
+
293
+
294
+
295
+ df.to_csv('mydata.csv')
296
+
297
+ ```