■概要
Instagramで指定したハッシュタグの投稿数を自動的に取得するコードを書いています
以下のコードを実行しているのですが、エラーが手で来てしまい
色々調べたのですが、まだまだ初心者で知識が及ばず、皆様のお力をいただけますと幸いです。
■現状のコード
html
1from bs4 import BeautifulSoup 2import requests 3import json 4import re 5import pprint 6 7def get_json_data(url): 8 9 res = requests.get(url) 10 html = BeautifulSoup(res.content, 'html.parser') 11 12 pattern = re.compile('window._sharedData = ({.*?});') 13 script = html.find("script", text=pattern) 14 data = pattern.search(script.text).group(1) 15 json_user_data = json.loads(data) 16 17 return(json_user_data) 18 19def get_hashtag_stats(tags): 20 21 for tag in tags: 22 23 json_hashtag = get_json_data('https://www.instagram.com/explore/tags/'+str(tag)) 24 print(json_hashtag) 25 26 number_of_posts = json_hashtag['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['count'] 27 # timestamp_1st = json_hashtag['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['edges'][0]['node']['taken_at_timestamp'] 28 # timestamp_1st = datetime.fromtimestamp(timestamp_1st) 29 # timestamp_10th = json_hashtag['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['edges'][9]['node']['taken_at_timestamp'] 30 # timestamp_10th = datetime.fromtimestamp(timestamp_10th) 31 # 32 # #最新の投稿と10個前の投稿の時間の差を求める 33 # diff = timestamp_1st - timestamp_10th 34 # 35 # post_freq= int(diff.total_seconds()/(10*60)) 36 37 # output_line = [datetime.now().strftime("%d/%m/%Y %H:%M:%S"), str(tag), str(number_of_posts), str(post_freq)] 38 # print(output_line) 39 # 40 # with open(r'/Users/sakailab/Desktop/Report_Python/hashtags_stats.csv', 'a') as f: 41 # writer = csv.writer(f) 42 # writer.writerow(output_line) 43 44 if sleep_timer != 0: 45 time.sleep(sleep_timer) 46 47 return() 48 49def clean_hashtags(tags): 50 51 if tags[0] == '#': 52 53 tags = tags.replace(' ', '') 54 hash_array = tags.split('#') 55 56 else: 57 58 hash_array = tags.split(',') 59 taglist = [a for a in hash_array if a != ''] 60 61 return(taglist) 62 63 64if __name__ == "__main__": 65 66 hashtags = "#LondonNature #britain #centrallondon #england" 67 print(hashtags) 68 get_hashtag_stats(clean_hashtags(hashtags)) 69 # pprint(taglist)
■エラー
html
1Traceback (most recent call last): 2 File "hashtag_posts.py", line 44, in <module> 3 get_hashtag_posts(clean_hashtags(hashtags)) 4 File "hashtag_posts.py", line 21, in get_hashtag_posts 5 json_hashtag = get_json_data('https://www.instagram.com/explore/tags/'+str(tag)) 6 File "hashtag_posts.py", line 13, in get_json_data 7 data = pattern.search(script.text).group(1) 8AttributeError: 'NoneType' object has no attribute 'group'
■参考にしたもの
https://note.com/masarusuzuki/n/n5a9216060fd8
よろしくお願いいたします。