回答率: 85.47%

質問するログイン新規登録

トップに関する質問 Python DataFrameをExcelに…

編集履歴

質問編集履歴

1

ソースコードを載せました。人気の本（文庫、新書、漫画・・・）のデータを抽出しています。

2019/08/07 04:02

投稿

スコア8

test CHANGED Viewed

File without changes

test CHANGED Viewed

@@ -24,25 +24,187 @@
 ```Python
+import requests
+from bs4 import BeautifulSoup
 import pandas as pd
 import xlsxwriter
-path = 'path'（excelのパスです)
-main_data = pd.DataFrame(data)
-with pd.ExcelWriter(path)as xlsxwriter:
-　　main_data.to_excel(xlsxwriter,sheet_name='example'）
+import openpyxl as opx
+pages = 1
+data = []
+data1 = []  #デイリー
+data2 = []  #週刊
+data3 = []  #月間
+data4 = []  #年間
+genre = ""
+path = r"C:パス"
+wb = opx.load_workbook(path)
+#メイン処理を行う
+def mainData(baseURL,pages):
+    while pages <= 5:
+        url = baseURL + str(pages)
+        OpenURL = requests.get(url)
+        content = OpenURL.content
+        soup = BeautifulSoup(content, 'html.parser')
+        for div in soup.select("div.desc"):
+            title = div.a.string
+            users = div.select_one(".info-users").span.get_text(strip=True)
+            author = div.select_one(".itemInfoElmBox").a.string
+            data.append(
+                {
+                    "title": title,
+                    "users": users,
+                    "author": author,
+                }
+            )
+            next
+        pages += 1
+    return data
+#データ4回*4（ジャンル）
+for i in range(4):
+    if i == 0:
+        genre = "book"
+    elif i == 1:
+        genre = "bunko"
+    elif i == 2:
+        genre = "shinsho"
+    elif i == 3:
+        genre ="comic"
+    for config in range(4):
+        if config == 0:
+            baseURL = "https://booklog.jp/ranking/" + genre + "?page="
+            data1 = mainData(baseURL,1)
+            data = []   #初期化
+        elif config == 1:
+            baseURL ="https://booklog.jp/ranking/weekly/201908/1/" + genre + "?page="
+            data2 = mainData(baseURL,1)
+            data = []   #初期化
+        elif config == 2:
+            baseURL ="https://booklog.jp/ranking/monthly/201907/" + genre + "?page="
+            data3 = mainData(baseURL,1)
+            data = []   #初期化
+        elif config == 3:
+            baseURL = "https://booklog.jp/ranking/annual/2018/" + genre + "?page="
+            data4 = mainData(baseURL,1)
+            data = []   #初期化
+    books1 = pd.DataFrame(data1)
+    books2 = pd.DataFrame(data2)
+    books3 = pd.DataFrame(data3)
+    books4 = pd.DataFrame(data4)
+    with pd.ExcelWriter(path)as writer:
+        books1.to_excel(xlsxwriter, sheet_name=genre+'デイリー')
+        books2.to_excel(xlsxwriter, sheet_name=genre+'週間')
+        books3.to_excel(xlsxwriter, sheet_name=genre+'月間')
+        books4.to_excel(xlsxwriter, sheet_name=genre+'年間')