前提・実現したいこと

TwitterAPIを利用して特定のツイートに対するリプライの取得を行っております。
リプライ情報の中で、テキスト文以外にユーザーidやユーザー名を取得しようとし、実装中に以下のエラーメッセージが発生しました。
また、このデータ(ユーザーid・ユーザー名・テキスト等)をDataFrameを用いて表にし、かつcsvの形で保存したいです。

発生している問題・エラーメッセージ

ValueError                                Traceback (most recent call last)
~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in create_block_manager_from_blocks(blocks, axes)
   1661                 blocks = [
-> 1662                     make_block(values=blocks[0], placement=slice(0, len(axes[0])))
   1663                 ]

~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py in make_block(values, placement, klass, ndim, dtype)
   2713 
-> 2714     return klass(values, ndim=ndim, placement=placement)
   2715 

~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py in __init__(self, values, placement, ndim)
   2369 
-> 2370         super().__init__(values, ndim=ndim, placement=placement)
   2371 

~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py in __init__(self, values, placement, ndim)
    129             raise ValueError(
--> 130                 f"Wrong number of items passed {len(self.values)}, "
    131                 f"placement implies {len(self.mgr_locs)}"

ValueError: Wrong number of items passed 1, placement implies 6

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-78-0f418a9fed69> in <module>
      1 if __name__ == '__main__':
----> 2     main()

<ipython-input-61-f197ee9a00ff> in main()
     13     range = 100 # 検索回数の上限値(最大180/15分でリセット)
     14     # ツイート検索・リプライの抽出
---> 15     tweets = search_tweets(CK, CKS, AT, ATS, user_id, tweet_id, count, range)
     16 
     17 

<ipython-input-77-a76659ff5e8f> in search_tweets(CK, CKS, AT, ATS, user_id, tweet_id, count, range)
     36                                       'UserID',
     37                                       'UserScreenName',
---> 38                                       'PostMessage']) 
     39 
     40         df.to_csv('jr1.csv', encoding="utf-8") # CSV形式で保存

~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
    521                     mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype)
    522                 else:
--> 523                     mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
    524             else:
    525                 mgr = init_dict({}, index, columns, dtype=dtype)

~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/construction.py in init_ndarray(values, index, columns, dtype, copy)
    232         block_values = [values]
    233 
--> 234     return create_block_manager_from_blocks(block_values, [columns, index])
    235 
    236 

~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in create_block_manager_from_blocks(blocks, axes)
   1670         blocks = [getattr(b, "values", b) for b in blocks]
   1671         tot_items = sum(b.shape[0] for b in blocks)
-> 1672         raise construction_error(tot_items, blocks[0].shape[1:], axes, e)
   1673 
   1674 

ValueError: Shape of passed values is (6, 1), indices imply (6, 6)

csvも保存されておらずかつDataFrameも表示されません。

該当のソースコード

python
1import urllib
2from requests_oauthlib import OAuth1
3import requests
4import sys
5import pandas as pd
6from datetime import datetime, timedelta
7
8def main():
9
10    # APIの秘密鍵
11    CK = '*****'
12    CKS = '*****'
13    AT = '*****'
14    ATS = '*****'
15    # ユーザー・ツイートID
16    user_id = '*****'
17    tweet_id = '*****' # str型で指定
18    # 検索時のパラメーター
19    count = 100 # 一回あたりの検索数(最大100/デフォルトは15)
20    range = 100 # 検索回数の上限値(最大180/15分でリセット)
21    # ツイート検索・リプライの抽出
22    tweets = search_tweets(CK, CKS, AT, ATS, user_id, tweet_id, count, range)
23    # 抽出結果を表示
24    #ツイート数が52個までの意味
25    print(tweets[0:52])
26
27def search_tweets(CK, CKS, AT, ATS, user_id, tweet_id, count, range):
28    # 文字列設定
29    user_id += ' exclude:retweets' # RTは除く
30    user_id = urllib.parse.quote_plus(user_id)
31    # リクエスト
32    url = "https://api.twitter.com/1.1/search/tweets.json?lang=ja&q="+user_id+"&count="+str(count)
33    auth = OAuth1(CK, CKS, AT, ATS)
34    response = requests.get(url, auth=auth)
35    data = response.json()['statuses']
36    # ２回目以降のリクエスト
37    cnt = 0
38    reply_cnt = 0
39    tweets = []
40    while True:
41        if len(data) == 0:
42            break
43        cnt += 1
44        if cnt > range:
45            break
46        for tweet in data:
47            if tweet['in_reply_to_status_id_str'] == tweet_id: # ツイートIDに一致するものを抽出
48                tweets.append(tweet['id_str'])#ツイートID
49                tweets.append('{0:%Y-%m-%d %H:%M:%S}'.format(datetime.strptime(tweet['created_at'], "%a %b %d %H:%M:%S %z %Y") + timedelta(hours=9)))# 投稿日
50                tweets.append(tweet['user']['name'])# ユーザー名
51                tweets.append(tweet['user']['id_str'])# ユーザーID
52                tweets.append(tweet['user']['screen_name'])# ユーザー表示名
53                tweets.append(tweet['text']) # ツイート内容
54                
55                reply_cnt += 1
56             maxid = int(tweet["id"]) - 1
57        
58    
59        df = pd.DataFrame(tweets,columns=['TweetID',
60                                      'PostedTime',
61                                      'UserName',
62                                      'UserID',
63                                      'UserScreenName',
64                                      'PostMessage']) 
65        
66        df.to_csv('jr1.csv', encoding="utf-8") # CSV形式で保存
67            
68        url = "https://api.twitter.com/1.1/search/tweets.json?lang=ja&q="+user_id+"&count="+str(count)+"&max_id="+str(maxid)
69        response = requests.get(url, auth=auth)
70        try:
71            data = response.json()['statuses']
72        except KeyError: # リクエスト回数が上限に達した場合のデータのエラー処理
73            print('上限まで検索しました')
74            break
75    print('検索回数 :', cnt)
76    print('リプライ数 :', reply_cnt)
77    print(df)
78
79    return tweets
80
81if __name__ == '__main__':
82    main()    
83

補足情報（FW/ツールのバージョンなど）

Jupiterで実行しています。
どなたか力をお貸しください。

以下jeanbiego様が回答してくださったものから修正したverです。
変更したdef search_tweetsの所のみ掲載します。

python
1def search_tweets(CK, CKS, AT, ATS, user_id, tweet_id, count, range):
2    # 文字列設定
3    user_id += ' exclude:retweets' # RTは除く
4    user_id = urllib.parse.quote_plus(user_id)
5    # リクエスト
6    url = "https://api.twitter.com/1.1/search/tweets.json?lang=ja&q="+user_id+"&count="+str(count)
7    auth = OAuth1(CK, CKS, AT, ATS)
8    response = requests.get(url, auth=auth)
9    data = response.json()['statuses']
10    # ２回目以降のリクエスト
11    cnt = 0
12    reply_cnt = 0
13    tweets = []
14    while True:
15        if len(data) == 0:
16            break
17        cnt += 1
18        if cnt > range:
19            break
20        for tweet in data:
21            if tweet['in_reply_to_status_id_str'] == tweet_id: # ツイートIDに一致するものを抽出
22                tweets.append([tweet['id_str']])#ツイートID
23                tweets.append(['{0:%Y-%m-%d %H:%M:%S}'.format(datetime.strptime(tweet['created_at'], "%a %b %d %H:%M:%S %z %Y") + timedelta(hours=9))])# 投稿日
24                tweets.append([tweet['user']['name']])# ユーザー名
25                tweets.append([tweet['user']['id_str']])# ユーザーID
26                tweets.append([tweet['user']['screen_name']])# ユーザー表示名
27                tweets.append([tweet['text']]) # ツイート内容
28                
29                reply_cnt += 1
30                
31            maxid = int(tweet["id"]) - 1    
32    
33        url = "https://api.twitter.com/1.1/search/tweets.json?lang=ja&q="+user_id+"&count="+str(count)+"&max_id="+str(maxid)
34        response = requests.get(url, auth=auth)
35        
36        try:
37            data = response.json()['statuses']
38        except KeyError: # リクエスト回数が上限に達した場合のデータのエラー処理
39            print('上限まで検索しました')
40            break
41    
42    df = pd.DataFrame([tweets],columns=['TweetID',
43                                        'PostedTime',
44                                        'UserName',
45                                        'UserID',
46                                        'UserScreenName',
47                                        'PostMessage'])
48    
49    df.to_csv('jr1.csv', encoding="utf-8") # CSV形式で保存
50    print('検索回数 :', cnt)
51    print('リプライ数 :', reply_cnt)
52    print(df)
53    
54    return tweets
55

エラー文です。tweetsの中に横並びで324個のデータが入ってしまっているようです。
全文載せようとしましたが字数制限にひっかかってしまいました。



ValueError: 6 columns passed, passed data had 324 columns

何度も申し訳ありません。分かる方がいらっしゃればよろしくお願いいたします。

jeanbiego

2020/09/13 23:44

エラーは全文を記載するようにしてください。

jeanbiego

2020/09/14 06:24

エラーの全文というのは、traceback（どのファイルの何行目にエラーが発生しているか）も含めて記載してくださいという意図でした。

naochan1027

2020/09/14 07:30

意図を理解せず申し訳ありませんでした。また、回答ありがとうございます。今から回答いただいた部分を修正してみます。

行動規範の内容に同意します

回答1件

ベストアンサー

Tracebackみないと正確なところはわかりませんが、多分ここかなあというところを。

エラー例：カラム1個×インデックス6個のデータフレームに、カラム6個を指定しようとしている

python3
1import pandas as pd
2tweets = [*range(6)]  # [0, 1, 2, 3, 4, 5]
3df = pd.DataFrame(tweets,columns=['TweetID',
4                                      'PostedTime',
5                                      'UserName',
6                                      'UserID',
7                                      'UserScreenName',
8                                      'PostMessage']) 
9print(df)
10# ValueError: Shape of passed values is (6, 1), indices imply (6, 6)

たとえば、tweetsを[tweets]と指定してやると横長のデータフレームになります。これならエラーは出ません。

python3
1import pandas as pd
2tweets = [*range(6)]  # [0, 1, 2, 3, 4, 5]
3df = pd.DataFrame([tweets],columns=['TweetID',
4                                      'PostedTime',
5                                      'UserName',
6                                      'UserID',
7                                      'UserScreenName',
8                                      'PostMessage']) 
9print(df)
10# TweetID  PostedTime  UserName  UserID  UserScreenName  PostMessage
11# 0        0           1         2       3               4            5

追記

一度データフレームを作成してから、転置（行と列を入れ替え）して、カラム名をセットしています。

python3
1import pandas as pd
2
3# テスト用データ
4tweets = list()
5for i in range(6):
6    accounts = list()
7    for a in "ABCDE":
8        accounts.append(a+str(i))
9    tweets.append(accounts)
10print(tweets)
11#[['A0', 'B0', 'C0', 'D0', 'E0'],
12# ['A1', 'B1', 'C1', 'D1', 'E1'],
13# ['A2', 'B2', 'C2', 'D2', 'E2'],
14# ['A3', 'B3', 'C3', 'D3', 'E3'],
15# ['A4', 'B4', 'C4', 'D4', 'E4'],
16# ['A5', 'B5', 'C5', 'D5', 'E5']]
17    
18df = pd.DataFrame(tweets).T  # 転置
19df.columns=['TweetID',
20          'PostedTime',
21          'UserName',
22          'UserID',
23          'UserScreenName',
24          'PostMessage']
25print(df)
26
27#  TweetID PostedTime UserName UserID UserScreenName PostMessage
28#0      A0         A1       A2     A3             A4          A5
29#1      B0         B1       B2     B3             B4          B5
30#2      C0         C1       C2     C3             C4          C5
31#3      D0         D1       D2     D3             D4          D5
32#4      E0         E1       E2     E3             E4          E5

投稿2020/09/14 06:35

編集2020/09/14 10:40

jeanbiego

総合スコア3966

naochan1027

2020/09/14 08:22

エラーの原因を理解することができました。大変助かりました。おかげさまでコードを修正したところ、"ValueError: 6 columns passed, passed data had 324 columns"といったエラーがでたため、54個のアカウントから6項目のデータを取得できているのですが、これを1アカウントずつ行にしたいです。何度も申し訳ありませんがもし分かるようであれば教えていただきたいです。 (補足情報のところに修正したコードとエラーを追加いたしました。)