teratail header banner
teratail header banner
質問するログイン新規登録

回答編集履歴

1

a

2018/10/16 11:46

投稿

tiitoi
tiitoi

スコア21960

answer CHANGED
@@ -18,25 +18,30 @@
18
18
  def get_data(url):
19
19
  req = request.urlopen(url)
20
20
  soup = BeautifulSoup(req.read(), 'html.parser')
21
-
22
- poem_elem = soup.select('td[height=40] b')
23
- if not poem_elem:
24
- return False # 存在しないID
25
21
 
26
22
  # 俳句
27
- poem_elem = poem_elem[0]
23
+ poem_elem = soup.select('td[height=40] b')[0]
28
24
  poem = poem_elem.text.replace('*', '').strip() # サニタイズ
25
+ if not poem:
26
+ return False # 存在しないID
29
27
  # 作者
30
- author_elem = soup.select('table[width=85%] tr:nth-of-type(1) td:nth-of-type(2)')[0]
28
+ author_elem = soup.select('table[cellspacing="1"] tr:nth-of-type(1) td:nth-of-type(2)')[0]
31
29
  author = author_elem.text.strip() # サニタイズ
32
30
  # 季語
33
- season_word_elem = soup.select('table[width=85%] tr:nth-of-type(2) td:nth-of-type(2)')[0]
31
+ season_word_elem = soup.select('table[cellspacing="1"] tr:nth-of-type(2) td:nth-of-type(2)')[0]
34
32
  season_word = season_word_elem.text.strip() # サニタイズ
35
33
  # 季節
36
- season_elem = soup.select('table[width=85%] tr:nth-of-type(3) td:nth-of-type(2)')[0]
34
+ season_elem = soup.select('table[cellspacing="1"] tr:nth-of-type(3) td:nth-of-type(2)')[0]
37
35
  season = season_elem.text.strip() # サニタイズ
38
-
36
+ # 出典
37
+ source_elem = soup.select('table[cellspacing="1"] tr:nth-of-type(4) td:nth-of-type(2)')[0]
38
+ source = source_elem.text.strip() # サニタイズ
39
+ # 前書
40
+ foreword_elem = soup.select('table[cellspacing="1"] tr:nth-of-type(5) td:nth-of-type(2)')[0]
41
+ foreword = foreword_elem.text.strip() # サニタイズ
42
+
39
- return {'poem': poem, 'author': author, 'season_word': season_word, 'season': season}
43
+ return {'poem': poem, 'author': author, 'season_word': season_word,
44
+ 'season': season, 'source': source, 'foreword': foreword}
40
45
  ```
41
46
 
42
47
  ### URL 取得部分
@@ -61,8 +66,7 @@
61
66
 
62
67
  ```
63
68
  with open('output.csv', 'w', newline='', encoding='utf-8') as f:
64
- fieldnames = ['first_name', 'last_name']
65
- writer = csv.DictWriter(f, fieldnames=['poem', 'author', 'season_word', 'season'])
69
+ writer = csv.DictWriter(f, fieldnames=['poem', 'author', 'season_word', 'season', 'source', 'foreword'])
66
70
  writer.writeheader()
67
71
  writer.writerows(data)
68
72
  ```