質問編集履歴

1

ソースコードを載せました。人気の本(文庫、新書、漫画・・・)のデータを抽出しています。

2019/08/07 04:02

投稿

naomaruJr
naomaruJr

スコア8

test CHANGED
File without changes
test CHANGED
@@ -24,25 +24,187 @@
24
24
 
25
25
  ```Python
26
26
 
27
+
28
+
29
+ import requests
30
+
31
+ from bs4 import BeautifulSoup
32
+
27
33
  import pandas as pd
28
34
 
29
35
  import xlsxwriter
30
36
 
31
-
32
-
33
-
34
-
35
- path = 'path'(excelのパスです)
36
-
37
-
38
-
39
- main_data = pd.DataFrame(data)
40
-
41
-
42
-
43
- with pd.ExcelWriter(path)as xlsxwriter:
44
-
45
-   main_data.to_excel(xlsxwriter,sheet_name='example')
37
+ import openpyxl as opx
38
+
39
+
40
+
41
+ pages = 1
42
+
43
+
44
+
45
+ data = []
46
+
47
+ data1 = [] #デイリー
48
+
49
+ data2 = [] #週刊
50
+
51
+ data3 = [] #月間
52
+
53
+ data4 = [] #年間
54
+
55
+ genre = ""
56
+
57
+ path = r"C:パス"
58
+
59
+ wb = opx.load_workbook(path)
60
+
61
+
62
+
63
+ #メイン処理を行う
64
+
65
+ def mainData(baseURL,pages):
66
+
67
+ while pages <= 5:
68
+
69
+ url = baseURL + str(pages)
70
+
71
+ OpenURL = requests.get(url)
72
+
73
+ content = OpenURL.content
74
+
75
+ soup = BeautifulSoup(content, 'html.parser')
76
+
77
+
78
+
79
+ for div in soup.select("div.desc"):
80
+
81
+ title = div.a.string
82
+
83
+ users = div.select_one(".info-users").span.get_text(strip=True)
84
+
85
+ author = div.select_one(".itemInfoElmBox").a.string
86
+
87
+
88
+
89
+ data.append(
90
+
91
+ {
92
+
93
+ "title": title,
94
+
95
+ "users": users,
96
+
97
+ "author": author,
98
+
99
+ }
100
+
101
+ )
102
+
103
+ next
104
+
105
+
106
+
107
+ pages += 1
108
+
109
+ return data
110
+
111
+
112
+
113
+ #データ4回*4(ジャンル)
114
+
115
+ for i in range(4):
116
+
117
+ if i == 0:
118
+
119
+ genre = "book"
120
+
121
+
122
+
123
+ elif i == 1:
124
+
125
+ genre = "bunko"
126
+
127
+
128
+
129
+ elif i == 2:
130
+
131
+ genre = "shinsho"
132
+
133
+
134
+
135
+ elif i == 3:
136
+
137
+ genre ="comic"
138
+
139
+
140
+
141
+ for config in range(4):
142
+
143
+ if config == 0:
144
+
145
+ baseURL = "https://booklog.jp/ranking/" + genre + "?page="
146
+
147
+ data1 = mainData(baseURL,1)
148
+
149
+ data = [] #初期化
150
+
151
+
152
+
153
+ elif config == 1:
154
+
155
+ baseURL ="https://booklog.jp/ranking/weekly/201908/1/" + genre + "?page="
156
+
157
+ data2 = mainData(baseURL,1)
158
+
159
+ data = [] #初期化
160
+
161
+
162
+
163
+ elif config == 2:
164
+
165
+ baseURL ="https://booklog.jp/ranking/monthly/201907/" + genre + "?page="
166
+
167
+ data3 = mainData(baseURL,1)
168
+
169
+ data = [] #初期化
170
+
171
+
172
+
173
+ elif config == 3:
174
+
175
+ baseURL = "https://booklog.jp/ranking/annual/2018/" + genre + "?page="
176
+
177
+ data4 = mainData(baseURL,1)
178
+
179
+ data = [] #初期化
180
+
181
+
182
+
183
+ books1 = pd.DataFrame(data1)
184
+
185
+ books2 = pd.DataFrame(data2)
186
+
187
+ books3 = pd.DataFrame(data3)
188
+
189
+ books4 = pd.DataFrame(data4)
190
+
191
+
192
+
193
+ with pd.ExcelWriter(path)as writer:
194
+
195
+ books1.to_excel(xlsxwriter, sheet_name=genre+'デイリー')
196
+
197
+ books2.to_excel(xlsxwriter, sheet_name=genre+'週間')
198
+
199
+ books3.to_excel(xlsxwriter, sheet_name=genre+'月間')
200
+
201
+ books4.to_excel(xlsxwriter, sheet_name=genre+'年間')
202
+
203
+
204
+
205
+
206
+
207
+
46
208
 
47
209
 
48
210