python
1sum_array = np.zeros((0,1280)) 2for i in range(len(result_)): 3 4 a=summary(result_[i], race_info_[i])[0] 5 sum_array = np.vstack((sum_array,a))
result_とrace_info_というリストには、それぞれ7869個のURLが要素として入っています。
この上記のコードを実行すると以下のようなエラーが出てしまいます。
TypeError Traceback (most recent call last) ~\Anaconda3\envs\chainer\lib\site-packages\pandas\core\nanops.py in f(values, axis, skipna, **kwds) 127 else: --> 128 result = alt(values, axis=axis, skipna=skipna, **kwds) 129 except Exception: ~\Anaconda3\envs\chainer\lib\site-packages\pandas\core\nanops.py in reduction(values, axis, skipna) 506 else: --> 507 result = getattr(values, meth)(axis) 508 ~\Anaconda3\envs\chainer\lib\site-packages\numpy\core\_methods.py in _amax(a, axis, out, keepdims, initial) 27 initial=_NoValue): ---> 28 return umr_maximum(a, axis, None, out, keepdims, initial) 29 TypeError: '>=' not supported between instances of 'str' and 'float' During handling of the above exception, another exception occurred: TypeError Traceback (most recent call last) <ipython-input-11-b187769d2892> in <module>() 7 for i in range(len(result_)): 8 ----> 9 a=summary(result_[i], race_info_[i])[0] 10 sum_array = np.vstack((sum_array,a)) <ipython-input-7-121d16024538> in summary(result_, race_info_) 35 df_['len'] = df_['len']/df_['len'].max() 36 df_['time'] = df_['time']/df_['time'].max() ---> 37 df_['gap'] = pd.Series(df_['gap'],dtype=float)/pd.Series(df_['gap'],dtype=float).max() 38 df_['rank'] = pd.Series(df_['rank'],dtype=float)/pd.Series(df_['rank'],dtype=float).max() 39 df_['cnt'] = pd.Series(df_['cnt'],dtype=float)/pd.Series(df_['cnt'],dtype=float).max() ~\Anaconda3\envs\chainer\lib\site-packages\pandas\core\generic.py in stat_func(self, axis, skipna, level, numeric_only, **kwargs) 9611 skipna=skipna) 9612 return self._reduce(f, name, axis=axis, skipna=skipna, -> 9613 numeric_only=numeric_only) 9614 9615 return set_function_name(stat_func, name, cls) ~\Anaconda3\envs\chainer\lib\site-packages\pandas\core\series.py in _reduce(self, op, name, axis, skipna, numeric_only, filter_type, **kwds) 3219 'numeric_only.'.format(name)) 3220 with np.errstate(all='ignore'): -> 3221 return op(delegate, skipna=skipna, **kwds) 3222 3223 return delegate._reduce(op=op, name=name, axis=axis, skipna=skipna, ~\Anaconda3\envs\chainer\lib\site-packages\pandas\core\nanops.py in f(values, axis, skipna, **kwds) 129 except Exception: 130 try: --> 131 result = alt(values, axis=axis, skipna=skipna, **kwds) 132 except ValueError as e: 133 # we want to transform an object array ~\Anaconda3\envs\chainer\lib\site-packages\pandas\core\nanops.py in reduction(values, axis, skipna) 505 result = np.nan 506 else: --> 507 result = getattr(values, meth)(axis) 508 509 result = _wrap_results(result, dtype) ~\Anaconda3\envs\chainer\lib\site-packages\numpy\core\_methods.py in _amax(a, axis, out, keepdims, initial) 26 def _amax(a, axis=None, out=None, keepdims=False, 27 initial=_NoValue): ---> 28 return umr_maximum(a, axis, None, out, keepdims, initial) 29 30 def _amin(a, axis=None, out=None, keepdims=False, TypeError: '>=' not supported between instances of 'str' and 'float'
以下が関連しているsummary()です。
python
1def summary(result_, race_info_): 2 going_ = get_race_data(result_)[0] 3 wether_ = get_race_data(result_)[1] 4 len_ = get_race_data(result_)[2] 5 win = get_race_data(result_)[3] 6 7 link_lst = horse_page_link(race_info_) 8 9 df_ = pd.DataFrame() 10 11 #頭数足りない時用の0埋めデータフレーム作り 12 fill_z = np.zeros((10,8)) 13 zero_ = pd.DataFrame(fill_z) 14 zero_.rename(columns={0:'place',1:'len',2:'time',3:'gap',4:'wether',5:'going',6:'rank',7:'cnt'},inplace=True) 15 #レース数足りない時用の0埋めデータフレーム作り 16 fill_z_ = np.zeros((1,8)) 17 zero_row = pd.DataFrame(fill_z_) 18 zero_row.rename(columns={0:'place',1:'len',2:'time',3:'gap',4:'wether',5:'going',6:'rank',7:'cnt'},inplace=True) 19 20 #df_に全特徴量をまとめる 21 for i in range(len(link_lst)): 22 if len(uma_info(link_lst[i], going_, wether_, len_).index) < 10: 23 df_ = df_.append(uma_info(link_lst[i], going_, wether_, len_)) 24 for x in range(10 - len(uma_info(link_lst[i], going_, wether_, len_).index)): 25 df_ = df_.append(zero_row) 26 else: 27 df_ = df_.append(uma_info(link_lst[i], going_, wether_, len_)) 28 29 #16頭立てじゃないとき用の0埋め 30 for i in range(16-len(link_lst)): 31 df_ = df_.append(zero_) 32 33 34 #各インプット正規化 35 df_['len'] = df_['len']/df_['len'].max() 36 df_['time'] = df_['time']/df_['time'].max() 37 df_['gap'] = pd.Series(df_['gap'],dtype=float)/pd.Series(df_['gap'],dtype=float).max() 38 df_['rank'] = pd.Series(df_['rank'],dtype=float)/pd.Series(df_['rank'],dtype=float).max() 39 df_['cnt'] = pd.Series(df_['cnt'],dtype=float)/pd.Series(df_['cnt'],dtype=float).max() 40 41 #df_をflattenデータにする 42 data_summary = df_.round(5).values.flatten() 43 44 return data_summary ,win
df_['gap']に入る要素がエラーの原因であるのはわかるのですが、最初に記載したコード内のfor文の何回転目でエラーが起きているか知ることはできるのでしょうか?
それとも愚直にdf_['gap']内を探すしかないのでしょうか?
Jupyter Notebookを使用してます。
とりあえず一通り完成させたいという思いから、汚いコードになっています。申し訳ありません。
よろしくお願いいたします。

回答3件
あなたの回答
tips
プレビュー
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。