質問編集履歴
3
追記依頼
test
CHANGED
File without changes
|
test
CHANGED
@@ -138,6 +138,128 @@
|
|
138
138
|
|
139
139
|
|
140
140
|
|
141
|
+
```ログ
|
142
|
+
|
143
|
+
2018-04-27 23:15:48 [scrapy.utils.log] INFO: Scrapy 1.5.0 started (bot: myproject)
|
144
|
+
|
145
|
+
2018-04-27 23:15:48 [scrapy.utils.log] INFO: Versions: lxml 3.5.0.0, libxml2 2.9.3, cssselect 1.0.1, parsel 1.3.1, w3lib 1.18.0, Twisted 17.9.0, Python 3.5.2 (default, Nov 23 2017, 16:37:01) - [GCC 5.4.0 20160609], pyOpenSSL 17.5.0 (OpenSSL 1.1.0g 2 Nov 2017), cryptography 2.1.4, Platform Linux-4.4.0-119-generic-x86_64-with-Ubuntu-16.04-xenial
|
146
|
+
|
147
|
+
2018-04-27 23:15:48 [scrapy.crawler] INFO: Overridden settings: {'FEED_FORMAT': 'jl', 'BOT_NAME': 'myproject', 'DOWNLOAD_DELAY': 1, 'NEWSPIDER_MODULE': 'myproject.spiders', 'ROBOTSTXT_OBEY': True, 'FEED_URI': 'test5.jl', 'SPIDER_MODULES': ['myproject.spiders']}
|
148
|
+
|
149
|
+
2018-04-27 23:15:48 [scrapy.middleware] INFO: Enabled extensions:
|
150
|
+
|
151
|
+
['scrapy.extensions.feedexport.FeedExporter',
|
152
|
+
|
153
|
+
'scrapy.extensions.memusage.MemoryUsage',
|
154
|
+
|
155
|
+
'scrapy.extensions.logstats.LogStats',
|
156
|
+
|
157
|
+
'scrapy.extensions.corestats.CoreStats',
|
158
|
+
|
159
|
+
'scrapy.extensions.telnet.TelnetConsole']
|
160
|
+
|
161
|
+
2018-04-27 23:15:48 [scrapy.middleware] INFO: Enabled downloader middlewares:
|
162
|
+
|
163
|
+
['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware',
|
164
|
+
|
165
|
+
'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
|
166
|
+
|
167
|
+
'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
|
168
|
+
|
169
|
+
'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
|
170
|
+
|
171
|
+
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
|
172
|
+
|
173
|
+
'scrapy.downloadermiddlewares.retry.RetryMiddleware',
|
174
|
+
|
175
|
+
'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
|
176
|
+
|
177
|
+
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
|
178
|
+
|
179
|
+
'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
|
180
|
+
|
181
|
+
'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
|
182
|
+
|
183
|
+
'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware',
|
184
|
+
|
185
|
+
'scrapy.downloadermiddlewares.stats.DownloaderStats']
|
186
|
+
|
187
|
+
2018-04-27 23:15:48 [scrapy.middleware] INFO: Enabled spider middlewares:
|
188
|
+
|
189
|
+
['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
|
190
|
+
|
191
|
+
'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
|
192
|
+
|
193
|
+
'scrapy.spidermiddlewares.referer.RefererMiddleware',
|
194
|
+
|
195
|
+
'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
|
196
|
+
|
197
|
+
'scrapy.spidermiddlewares.depth.DepthMiddleware']
|
198
|
+
|
199
|
+
2018-04-27 23:15:48 [scrapy.middleware] INFO: Enabled item pipelines:
|
200
|
+
|
201
|
+
[]
|
202
|
+
|
203
|
+
2018-04-27 23:15:48 [scrapy.core.engine] INFO: Spider opened
|
204
|
+
|
205
|
+
2018-04-27 23:15:48 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
|
206
|
+
|
207
|
+
2018-04-27 23:15:48 [scrapy.extensions.telnet] DEBUG: Telnet console listening on 127.0.0.1:6023
|
208
|
+
|
209
|
+
2018-04-27 23:15:49 [scrapy.core.engine] DEBUG: Crawled (404) <GET http://www.bash-tv.com/robots.txt> (referer: None)
|
210
|
+
|
211
|
+
2018-04-27 23:15:51 [scrapy.core.engine] DEBUG: Crawled (200) <GET http://www.bash-tv.com/wp/schedule> (referer: None)
|
212
|
+
|
213
|
+
2018-04-27 23:15:51 [scrapy.core.engine] INFO: Closing spider (finished)
|
214
|
+
|
215
|
+
2018-04-27 23:15:51 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
|
216
|
+
|
217
|
+
{'downloader/request_bytes': 449,
|
218
|
+
|
219
|
+
'downloader/request_count': 2,
|
220
|
+
|
221
|
+
'downloader/request_method_count/GET': 2,
|
222
|
+
|
223
|
+
'downloader/response_bytes': 199210,
|
224
|
+
|
225
|
+
'downloader/response_count': 2,
|
226
|
+
|
227
|
+
'downloader/response_status_count/200': 1,
|
228
|
+
|
229
|
+
'downloader/response_status_count/404': 1,
|
230
|
+
|
231
|
+
'finish_reason': 'finished',
|
232
|
+
|
233
|
+
'finish_time': datetime.datetime(2018, 4, 27, 14, 15, 51, 243458),
|
234
|
+
|
235
|
+
'log_count/DEBUG': 3,
|
236
|
+
|
237
|
+
'log_count/INFO': 7,
|
238
|
+
|
239
|
+
'memusage/max': 54681600,
|
240
|
+
|
241
|
+
'memusage/startup': 54681600,
|
242
|
+
|
243
|
+
'response_received_count': 2,
|
244
|
+
|
245
|
+
'scheduler/dequeued': 1,
|
246
|
+
|
247
|
+
'scheduler/dequeued/memory': 1,
|
248
|
+
|
249
|
+
'scheduler/enqueued': 1,
|
250
|
+
|
251
|
+
'scheduler/enqueued/memory': 1,
|
252
|
+
|
253
|
+
'start_time': datetime.datetime(2018, 4, 27, 14, 15, 48, 736407)}
|
254
|
+
|
255
|
+
2018-04-27 23:15:51 [scrapy.core.engine] INFO: Spider closed (finished)
|
256
|
+
|
257
|
+
|
258
|
+
|
259
|
+
```
|
260
|
+
|
261
|
+
|
262
|
+
|
141
263
|
日付ごとのboxを取得し、それをparse_infoで解析し、itemに格納したいんですが、JSONに出力しようとしても出力されず、
|
142
264
|
|
143
265
|
ログをみてもスクレイピングできていないようです。
|
2
コードの追加
test
CHANGED
File without changes
|
test
CHANGED
@@ -54,7 +54,7 @@
|
|
54
54
|
|
55
55
|
for info in response.xpath('//div[@class="syuroku_raiten_area"]'):
|
56
56
|
|
57
|
-
self.parse_info(info)
|
57
|
+
yield from self.parse_info(info)
|
58
58
|
|
59
59
|
|
60
60
|
|
@@ -68,7 +68,7 @@
|
|
68
68
|
|
69
69
|
item = SceduleItem()
|
70
70
|
|
71
|
-
day = info.xpath('//h3/text()').extract_first()
|
71
|
+
day = info.xpath('//h3/text()').extract_first() #parseで取得したboxの日付を得る。
|
72
72
|
|
73
73
|
for result in info.xpath('//div[@class="syuroku_raiten_area"]/div[@class="hall_info"]'):
|
74
74
|
|
@@ -90,6 +90,50 @@
|
|
90
90
|
|
91
91
|
|
92
92
|
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
```
|
98
|
+
|
99
|
+
|
100
|
+
|
101
|
+
```items.py
|
102
|
+
|
103
|
+
# -*- coding: utf-8 -*-
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
# Define here the models for your scraped items
|
108
|
+
|
109
|
+
#
|
110
|
+
|
111
|
+
# See documentation in:
|
112
|
+
|
113
|
+
# https://doc.scrapy.org/en/latest/topics/items.html
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
import scrapy
|
118
|
+
|
119
|
+
|
120
|
+
|
121
|
+
|
122
|
+
|
123
|
+
class SceduleItem(scrapy.Item):
|
124
|
+
|
125
|
+
day = scrapy.Field()
|
126
|
+
|
127
|
+
place = scrapy.Field()
|
128
|
+
|
129
|
+
content = scrapy.Field()
|
130
|
+
|
131
|
+
shop = scrapy.Field()
|
132
|
+
|
133
|
+
media = scrapy.Field()
|
134
|
+
|
135
|
+
|
136
|
+
|
93
137
|
```
|
94
138
|
|
95
139
|
|
1
不足
test
CHANGED
File without changes
|
test
CHANGED
@@ -98,4 +98,6 @@
|
|
98
98
|
|
99
99
|
ログをみてもスクレイピングできていないようです。
|
100
100
|
|
101
|
-
parse_infoがうまく呼び出せていないのかななんて考えているのですがいまいちよくわかりません
|
101
|
+
parse_infoがうまく呼び出せていないのかななんて考えているのですがいまいちよくわかりません。
|
102
|
+
|
103
|
+
このプログラムが正常にスクレピングするにはどうしたらいいでしょうか。
|