質問編集履歴
2
書式を改善いたしました
test
CHANGED
File without changes
|
test
CHANGED
@@ -26,164 +26,164 @@
|
|
26
26
|
|
27
27
|
```ここに言語を入力
|
28
28
|
|
29
|
+
# -*- coding:utf-8 -*-
|
30
|
+
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
import urllib2
|
36
|
+
|
37
|
+
import codecs
|
38
|
+
|
39
|
+
from bs4 import BeautifulSoup
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
f = codecs.open('horse.csv', 'w', 'utf-8')
|
44
|
+
|
45
|
+
f.write('race_date,kaisai,weather,race_number,race_name,tousuu,frame_number,horse_number,single_win_ratio,popularity,horse_arrival,horse_name,weight,distance,baba, race_time,difference,horse_pass,pace,nobori,horse_weight,win_horse,prize_money'+ u"\n")
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
tpl_url='http://db.netkeiba.com/?pid=jockey_detail&id=00663&page={0}'
|
50
|
+
|
51
|
+
|
52
|
+
|
53
|
+
for i in xrange(1, 5):
|
54
|
+
|
55
|
+
url=tpl_url.format( i )
|
56
|
+
|
57
|
+
soup = BeautifulSoup(urllib2.urlopen(url).read(),"lxml")
|
58
|
+
|
59
|
+
tr_arr = soup.find('div', {'id':'contents_liquid'}).findAll('tbody')
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
for tr in tr_arr:
|
64
|
+
|
65
|
+
|
66
|
+
|
67
|
+
#日付#race_date
|
68
|
+
|
69
|
+
lrg1= tr.findAll('td')[0].string
|
70
|
+
|
71
|
+
#開催#kaisai
|
72
|
+
|
73
|
+
lrg2= tr.findAll('td')[1].string
|
74
|
+
|
75
|
+
#天気#weather
|
76
|
+
|
77
|
+
lrg3= tr.findAll('td')[2].string
|
78
|
+
|
79
|
+
#第○レース#race_number
|
80
|
+
|
81
|
+
lrg4= tr.findAll('td')[3].string
|
82
|
+
|
83
|
+
#レース名#race_name
|
84
|
+
|
85
|
+
lrg5= tr.findAll('td')[4].string
|
86
|
+
|
87
|
+
#映像(この部分、削除したいです)
|
88
|
+
|
89
|
+
lrg6= tr.findAll('td')[5].string
|
90
|
+
|
91
|
+
#頭数#tousuu
|
92
|
+
|
93
|
+
lrg7= tr.findAll('td')[6].string
|
94
|
+
|
95
|
+
#枠番#frame_number
|
96
|
+
|
97
|
+
lrg8= tr.findAll('td')[7].string
|
98
|
+
|
99
|
+
#馬番#horse_number
|
100
|
+
|
101
|
+
lrg9= tr.findAll('td')[8].string
|
102
|
+
|
103
|
+
#単勝#single_win_racio
|
104
|
+
|
105
|
+
lrg10= tr.findAll('td')[9].string
|
106
|
+
|
107
|
+
#人気#popularity
|
108
|
+
|
109
|
+
lrg11= tr.findAll('td')[10].string
|
110
|
+
|
111
|
+
#着順#horse_arrival
|
112
|
+
|
113
|
+
lrg12= tr.findAll('td')[11].string
|
114
|
+
|
115
|
+
#馬名#horse_name
|
116
|
+
|
117
|
+
lrg13= tr.findAll('td',{'class':'txt_l'}) [1]
|
118
|
+
|
119
|
+
#斤量#weight
|
120
|
+
|
121
|
+
lrg14= tr.findAll('td')[13].string
|
122
|
+
|
123
|
+
#距離#distance
|
124
|
+
|
125
|
+
lrg15= tr.findAll('td')[14].string
|
126
|
+
|
127
|
+
#馬場#baba
|
128
|
+
|
129
|
+
lrg16= tr.findAll('td')[15].string
|
130
|
+
|
131
|
+
#タイム#race_time
|
132
|
+
|
133
|
+
lrg17= tr.findAll('td')[16].string
|
134
|
+
|
135
|
+
#着差#difference
|
136
|
+
|
137
|
+
lrg18= tr.findAll('td')[17].string
|
138
|
+
|
139
|
+
#通過#horse_pass
|
140
|
+
|
141
|
+
lrg19= tr.findAll('td')[18].string
|
142
|
+
|
143
|
+
#ペース#pace
|
144
|
+
|
145
|
+
lrg20= tr.findAll('td')[19].string
|
146
|
+
|
147
|
+
#上り#nobori
|
148
|
+
|
149
|
+
lrg21= tr.findAll('td')[20].string
|
150
|
+
|
151
|
+
#馬体重#horse_weight
|
152
|
+
|
153
|
+
lrg22= tr.findAll('td')[21].string
|
154
|
+
|
155
|
+
#勝ち馬#win_horse
|
156
|
+
|
157
|
+
lrg23= tr.findAll('td',{'class':'txt_l'}) [2]
|
158
|
+
|
159
|
+
#賞金#prize_money
|
160
|
+
|
161
|
+
lrg24= tr.findAll('td')[23].string
|
162
|
+
|
163
|
+
|
164
|
+
|
165
|
+
print lrg1,lrg2,lrg3,lrg4,lrg5,lrg6,lrg7,lrg8,lrg9,lrg10,\
|
166
|
+
|
167
|
+
lrg11,lrg12,lrg13.a.string,lrg14,lrg15,lrg16,lrg17,\
|
168
|
+
|
169
|
+
lrg18,lrg19,lrg20,lrg21,lrg22,lrg23.a.string,lrg24
|
170
|
+
|
171
|
+
|
172
|
+
|
173
|
+
|
174
|
+
|
175
|
+
f.close()
|
176
|
+
|
177
|
+
|
178
|
+
|
179
|
+
```
|
180
|
+
|
181
|
+
|
182
|
+
|
29
183
|
コード
|
30
184
|
|
31
185
|
```
|
32
186
|
|
33
|
-
# -*- coding:utf-8 -*-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
import urllib2
|
38
|
-
|
39
|
-
import codecs
|
40
|
-
|
41
|
-
from bs4 import BeautifulSoup
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
f = codecs.open('horse.csv', 'w', 'utf-8')
|
46
|
-
|
47
|
-
f.write('race_date,kaisai,weather,race_number,race_name,tousuu,frame_number,horse_number,single_win_ratio,popularity,horse_arrival,horse_name,weight,distance,baba, race_time,difference,horse_pass,pace,nobori,horse_weight,win_horse,prize_money'+ u"\n")
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
tpl_url='http://db.netkeiba.com/?pid=jockey_detail&id=00663&page={0}'
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
for i in xrange(1, 5):
|
56
|
-
|
57
|
-
url=tpl_url.format( i )
|
58
|
-
|
59
|
-
soup = BeautifulSoup(urllib2.urlopen(url).read(),"lxml")
|
60
|
-
|
61
|
-
tr_arr = soup.find('div', {'id':'contents_liquid'}).findAll('tbody')
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
for tr in tr_arr:
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
#日付#race_date
|
70
|
-
|
71
|
-
lrg1= tr.findAll('td')[0].string
|
72
|
-
|
73
|
-
#開催#kaisai
|
74
|
-
|
75
|
-
lrg2= tr.findAll('td')[1].string
|
76
|
-
|
77
|
-
#天気#weather
|
78
|
-
|
79
|
-
lrg3= tr.findAll('td')[2].string
|
80
|
-
|
81
|
-
#第○レース#race_number
|
82
|
-
|
83
|
-
lrg4= tr.findAll('td')[3].string
|
84
|
-
|
85
|
-
#レース名#race_name
|
86
|
-
|
87
|
-
lrg5= tr.findAll('td')[4].string
|
88
|
-
|
89
|
-
#映像(この部分、削除したいです)
|
90
|
-
|
91
|
-
lrg6= tr.findAll('td')[5].string
|
92
|
-
|
93
|
-
#頭数#tousuu
|
94
|
-
|
95
|
-
lrg7= tr.findAll('td')[6].string
|
96
|
-
|
97
|
-
#枠番#frame_number
|
98
|
-
|
99
|
-
lrg8= tr.findAll('td')[7].string
|
100
|
-
|
101
|
-
#馬番#horse_number
|
102
|
-
|
103
|
-
lrg9= tr.findAll('td')[8].string
|
104
|
-
|
105
|
-
#単勝#single_win_racio
|
106
|
-
|
107
|
-
lrg10= tr.findAll('td')[9].string
|
108
|
-
|
109
|
-
#人気#popularity
|
110
|
-
|
111
|
-
lrg11= tr.findAll('td')[10].string
|
112
|
-
|
113
|
-
#着順#horse_arrival
|
114
|
-
|
115
|
-
lrg12= tr.findAll('td')[11].string
|
116
|
-
|
117
|
-
#馬名#horse_name
|
118
|
-
|
119
|
-
lrg13= tr.findAll('td',{'class':'txt_l'}) [1]
|
120
|
-
|
121
|
-
#斤量#weight
|
122
|
-
|
123
|
-
lrg14= tr.findAll('td')[13].string
|
124
|
-
|
125
|
-
#距離#distance
|
126
|
-
|
127
|
-
lrg15= tr.findAll('td')[14].string
|
128
|
-
|
129
|
-
#馬場#baba
|
130
|
-
|
131
|
-
lrg16= tr.findAll('td')[15].string
|
132
|
-
|
133
|
-
#タイム#race_time
|
134
|
-
|
135
|
-
lrg17= tr.findAll('td')[16].string
|
136
|
-
|
137
|
-
#着差#difference
|
138
|
-
|
139
|
-
lrg18= tr.findAll('td')[17].string
|
140
|
-
|
141
|
-
#通過#horse_pass
|
142
|
-
|
143
|
-
lrg19= tr.findAll('td')[18].string
|
144
|
-
|
145
|
-
#ペース#pace
|
146
|
-
|
147
|
-
lrg20= tr.findAll('td')[19].string
|
148
|
-
|
149
|
-
#上り#nobori
|
150
|
-
|
151
|
-
lrg21= tr.findAll('td')[20].string
|
152
|
-
|
153
|
-
#馬体重#horse_weight
|
154
|
-
|
155
|
-
lrg22= tr.findAll('td')[21].string
|
156
|
-
|
157
|
-
#勝ち馬#win_horse
|
158
|
-
|
159
|
-
lrg23= tr.findAll('td',{'class':'txt_l'}) [2]
|
160
|
-
|
161
|
-
#賞金#prize_money
|
162
|
-
|
163
|
-
lrg24= tr.findAll('td')[23].string
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
print lrg1,lrg2,lrg3,lrg4,lrg5,lrg6,lrg7,lrg8,lrg9,lrg10,\
|
168
|
-
|
169
|
-
lrg11,lrg12,lrg13.a.string,lrg14,lrg15,lrg16,lrg17,\
|
170
|
-
|
171
|
-
lrg18,lrg19,lrg20,lrg21,lrg22,lrg23.a.string,lrg24
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
f.close()
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
```ここに言語を入力
|
182
|
-
|
183
|
-
コード
|
184
|
-
|
185
|
-
```
|
186
|
-
|
187
187
|
|
188
188
|
|
189
189
|
|
1
コードブロックで囲みました、よろしくお願いいたします。
test
CHANGED
File without changes
|
test
CHANGED
@@ -24,7 +24,11 @@
|
|
24
24
|
|
25
25
|
|
26
26
|
|
27
|
-
|
27
|
+
```ここに言語を入力
|
28
|
+
|
29
|
+
コード
|
30
|
+
|
31
|
+
```
|
28
32
|
|
29
33
|
# -*- coding:utf-8 -*-
|
30
34
|
|
@@ -174,6 +178,18 @@
|
|
174
178
|
|
175
179
|
|
176
180
|
|
181
|
+
```ここに言語を入力
|
182
|
+
|
183
|
+
コード
|
184
|
+
|
185
|
+
```
|
186
|
+
|
187
|
+
|
188
|
+
|
189
|
+
|
190
|
+
|
191
|
+
|
192
|
+
|
177
193
|
(参考)下記のコードが苦慮しておりますスクレイピング対象のHTMLの
|
178
194
|
|
179
195
|
最初の1行部分でございます。
|