質問編集履歴
2
情報の追加
test
CHANGED
File without changes
|
test
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
スクレイピングである人物の画像を収集したいです。
|
2
2
|
|
3
|
-
https://
|
3
|
+
https://note.mu/kokoperikyo/n/n8023c7e9e262 を参考にしながら作っています。
|
4
4
|
|
5
5
|
```ここに言語を入力
|
6
6
|
|
1
情報の追加
test
CHANGED
File without changes
|
test
CHANGED
@@ -4,9 +4,31 @@
|
|
4
4
|
|
5
5
|
```ここに言語を入力
|
6
6
|
|
7
|
-
|
7
|
+
def img_url_list(num):
|
8
8
|
|
9
|
+
"""
|
10
|
+
|
11
|
+
using yahoo (this script can't use at google)
|
12
|
+
|
13
|
+
"""
|
14
|
+
|
15
|
+
num_self = num
|
16
|
+
|
17
|
+
url = 'https://search.yahoo.co.jp/image/search?p=藤田ニコル&oq=藤田&ei=UTF-8&b={}&ktot=5'.format(num_self)
|
18
|
+
|
9
|
-
byte_content, _ = fetcher.fetch(url)
|
19
|
+
byte_content, _ = fetcher.fetch(url)
|
20
|
+
|
21
|
+
structured_page = BeautifulSoup(byte_content.decode('UTF-8'), 'html.parser')
|
22
|
+
|
23
|
+
img_link_elems = structured_page.find_all('a', attrs={'target': 'imagewin'})
|
24
|
+
|
25
|
+
img_urls = [e.get('href') for e in img_link_elems if e.get('href').startswith('http')]
|
26
|
+
|
27
|
+
img_urls = list(set(img_urls))
|
28
|
+
|
29
|
+
num_self += 20
|
30
|
+
|
31
|
+
return img_urls,num_self
|
10
32
|
|
11
33
|
```
|
12
34
|
|