質問編集履歴
2
追記
test
CHANGED
File without changes
|
test
CHANGED
@@ -90,34 +90,34 @@
|
|
90
90
|
|
91
91
|
soup = BeautifulSoup(html,'html.parser')
|
92
92
|
|
93
|
+
i = 0
|
93
94
|
|
95
|
+
for a_tag in soup.find_all('a'):
|
94
96
|
|
95
|
-
|
96
|
-
|
97
|
-
wb = op.load_workbook('スクレイピング.xlsx','w')
|
98
|
-
|
99
|
-
ws = wb.active
|
100
|
-
|
101
|
-
for i in soup.find_all('a'):
|
102
|
-
|
103
|
-
j = (urljoin(base_url,
|
97
|
+
j = (urljoin(base_url, a_tag.get('href')))
|
104
98
|
|
105
99
|
if j.startswith('javascript'):
|
106
100
|
|
107
101
|
continue
|
108
102
|
|
109
|
-
|
103
|
+
i += 1
|
110
104
|
|
111
105
|
ws['A'+str(i)].value = j
|
112
106
|
|
113
107
|
wb.save('スクレイピング.xlsx')
|
114
108
|
|
109
|
+
|
110
|
+
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
|
115
115
|
```
|
116
116
|
|
117
117
|
```
|
118
118
|
|
119
|
-
|
119
|
+
s['A'+str(i)].value = j
|
120
120
|
|
121
|
-
|
121
|
+
AttributeError: 'EmptyCell' object attribute 'value' is read-only
|
122
122
|
|
123
123
|
```
|
1
追記
test
CHANGED
File without changes
|
test
CHANGED
@@ -9,6 +9,18 @@
|
|
9
9
|
ご教授お願いいたします。
|
10
10
|
|
11
11
|
```python
|
12
|
+
|
13
|
+
from urllib import request
|
14
|
+
|
15
|
+
from bs4 import BeautifulSoup
|
16
|
+
|
17
|
+
import requests
|
18
|
+
|
19
|
+
from urllib.parse import urljoin
|
20
|
+
|
21
|
+
import openpyxl as op
|
22
|
+
|
23
|
+
|
12
24
|
|
13
25
|
base_url = "url"
|
14
26
|
|
@@ -49,3 +61,63 @@
|
|
49
61
|
AttributeError: 'list' object has no attribute 'value'
|
50
62
|
|
51
63
|
```
|
64
|
+
|
65
|
+
|
66
|
+
|
67
|
+
回答者様の回答を反映しました。
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
```python
|
72
|
+
|
73
|
+
from urllib import request
|
74
|
+
|
75
|
+
from bs4 import BeautifulSoup
|
76
|
+
|
77
|
+
import requests
|
78
|
+
|
79
|
+
from urllib.parse import urljoin
|
80
|
+
|
81
|
+
import openpyxl as op
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
base_url = "url"
|
86
|
+
|
87
|
+
html = request.urlopen(base_url)
|
88
|
+
|
89
|
+
|
90
|
+
|
91
|
+
soup = BeautifulSoup(html,'html.parser')
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
wb = op.load_workbook('スクレイピング.xlsx','w')
|
98
|
+
|
99
|
+
ws = wb.active
|
100
|
+
|
101
|
+
for i in soup.find_all('a'):
|
102
|
+
|
103
|
+
j = (urljoin(base_url, i.get('href')))
|
104
|
+
|
105
|
+
if j.startswith('javascript'):
|
106
|
+
|
107
|
+
continue
|
108
|
+
|
109
|
+
|
110
|
+
|
111
|
+
ws['A'+str(i)].value = j
|
112
|
+
|
113
|
+
wb.save('スクレイピング.xlsx')
|
114
|
+
|
115
|
+
```
|
116
|
+
|
117
|
+
```
|
118
|
+
|
119
|
+
raise ValueError("{0} is not a valid coordinate or range")
|
120
|
+
|
121
|
+
ValueError: {0} is not a valid coordinate or range
|
122
|
+
|
123
|
+
```
|