質問編集履歴
2
enumerate追加
title
CHANGED
File without changes
|
body
CHANGED
@@ -74,15 +74,13 @@
|
|
74
74
|
|
75
75
|
alltxt = soup_content.get_text()
|
76
76
|
with open('h2textlink.csv', 'w+',newline='',encoding='utf-8') as f:
|
77
|
-
n = 0
|
78
77
|
writer = csv.writer(f, lineterminator='\n')
|
79
78
|
std_link = 'http://kondou.com/BS4/'
|
80
|
-
for subheading in soup_content.find_all('h2'):
|
79
|
+
for n, subheading in enumerate(soup_content.find_all('h2')):
|
81
80
|
sh = subheading.get_text()
|
82
81
|
h2link = subheading.a['href']
|
83
82
|
|
84
83
|
writer.writerow([n, sh, std_link + h2link])
|
85
|
-
n += 1
|
86
84
|
pass
|
87
85
|
```
|
88
86
|
## コード実行結果
|
1
完成コード追加
title
CHANGED
@@ -1,1 +1,1 @@
|
|
1
|
-
|
1
|
+
h2内のリンクを取得したい。webスクレイピング
|
body
CHANGED
@@ -56,4 +56,40 @@
|
|
56
56
|
...(略)
|
57
57
|
```
|
58
58
|
|
59
|
+
# h2のテキストとリンク取得完了!
|
59
|
-
|
60
|
+
おかげさまでできました! (≧∀≦)
|
61
|
+
|
62
|
+
|
63
|
+
```python
|
64
|
+
# -*- coding: utf-8 -*-
|
65
|
+
from bs4 import BeautifulSoup
|
66
|
+
import requests
|
67
|
+
import csv
|
68
|
+
|
69
|
+
"""
|
70
|
+
csvファイルに<h2>のテキストとリンクを保存
|
71
|
+
"""
|
72
|
+
r = requests.get("http://kondou.com/BS4/index.html")
|
73
|
+
soup_content = BeautifulSoup(r.content, "html.parser")
|
74
|
+
|
75
|
+
alltxt = soup_content.get_text()
|
76
|
+
with open('h2textlink.csv', 'w+',newline='',encoding='utf-8') as f:
|
77
|
+
n = 0
|
78
|
+
writer = csv.writer(f, lineterminator='\n')
|
79
|
+
std_link = 'http://kondou.com/BS4/'
|
80
|
+
for subheading in soup_content.find_all('h2'):
|
81
|
+
sh = subheading.get_text()
|
82
|
+
h2link = subheading.a['href']
|
83
|
+
|
84
|
+
writer.writerow([n, sh, std_link + h2link])
|
85
|
+
n += 1
|
86
|
+
pass
|
87
|
+
```
|
88
|
+
## コード実行結果
|
89
|
+
```csv
|
90
|
+
0,(訳注)石鹸は食べられない¶,http://kondou.com/BS4/#id2
|
91
|
+
1,この文書について¶,http://kondou.com/BS4/#id3
|
92
|
+
2,助けてほしいときは¶,http://kondou.com/BS4/#id5
|
93
|
+
3,インストール後の問題¶,http://kondou.com/BS4/#id9
|
94
|
+
...(略)
|
95
|
+
```
|