質問編集履歴
2
enumerate追加
test
CHANGED
File without changes
|
test
CHANGED
@@ -150,13 +150,11 @@
|
|
150
150
|
|
151
151
|
with open('h2textlink.csv', 'w+',newline='',encoding='utf-8') as f:
|
152
152
|
|
153
|
-
n = 0
|
154
|
-
|
155
153
|
writer = csv.writer(f, lineterminator='\n')
|
156
154
|
|
157
155
|
std_link = 'http://kondou.com/BS4/'
|
158
156
|
|
159
|
-
for subheading in soup_content.find_all('h2'):
|
157
|
+
for n, subheading in enumerate(soup_content.find_all('h2')):
|
160
158
|
|
161
159
|
sh = subheading.get_text()
|
162
160
|
|
@@ -165,8 +163,6 @@
|
|
165
163
|
|
166
164
|
|
167
165
|
writer.writerow([n, sh, std_link + h2link])
|
168
|
-
|
169
|
-
n += 1
|
170
166
|
|
171
167
|
pass
|
172
168
|
|
1
完成コード追加
test
CHANGED
@@ -1 +1 @@
|
|
1
|
-
h
|
1
|
+
h2内のリンクを取得したい。webスクレイピング
|
test
CHANGED
@@ -114,4 +114,76 @@
|
|
114
114
|
|
115
115
|
|
116
116
|
|
117
|
+
# h2のテキストとリンク取得完了!
|
118
|
+
|
117
|
-
|
119
|
+
おかげさまでできました! (≧∀≦)
|
120
|
+
|
121
|
+
|
122
|
+
|
123
|
+
|
124
|
+
|
125
|
+
```python
|
126
|
+
|
127
|
+
# -*- coding: utf-8 -*-
|
128
|
+
|
129
|
+
from bs4 import BeautifulSoup
|
130
|
+
|
131
|
+
import requests
|
132
|
+
|
133
|
+
import csv
|
134
|
+
|
135
|
+
|
136
|
+
|
137
|
+
"""
|
138
|
+
|
139
|
+
csvファイルに<h2>のテキストとリンクを保存
|
140
|
+
|
141
|
+
"""
|
142
|
+
|
143
|
+
r = requests.get("http://kondou.com/BS4/index.html")
|
144
|
+
|
145
|
+
soup_content = BeautifulSoup(r.content, "html.parser")
|
146
|
+
|
147
|
+
|
148
|
+
|
149
|
+
alltxt = soup_content.get_text()
|
150
|
+
|
151
|
+
with open('h2textlink.csv', 'w+',newline='',encoding='utf-8') as f:
|
152
|
+
|
153
|
+
n = 0
|
154
|
+
|
155
|
+
writer = csv.writer(f, lineterminator='\n')
|
156
|
+
|
157
|
+
std_link = 'http://kondou.com/BS4/'
|
158
|
+
|
159
|
+
for subheading in soup_content.find_all('h2'):
|
160
|
+
|
161
|
+
sh = subheading.get_text()
|
162
|
+
|
163
|
+
h2link = subheading.a['href']
|
164
|
+
|
165
|
+
|
166
|
+
|
167
|
+
writer.writerow([n, sh, std_link + h2link])
|
168
|
+
|
169
|
+
n += 1
|
170
|
+
|
171
|
+
pass
|
172
|
+
|
173
|
+
```
|
174
|
+
|
175
|
+
## コード実行結果
|
176
|
+
|
177
|
+
```csv
|
178
|
+
|
179
|
+
0,(訳注)石鹸は食べられない¶,http://kondou.com/BS4/#id2
|
180
|
+
|
181
|
+
1,この文書について¶,http://kondou.com/BS4/#id3
|
182
|
+
|
183
|
+
2,助けてほしいときは¶,http://kondou.com/BS4/#id5
|
184
|
+
|
185
|
+
3,インストール後の問題¶,http://kondou.com/BS4/#id9
|
186
|
+
|
187
|
+
...(略)
|
188
|
+
|
189
|
+
```
|