#実現したいこと
コードを完成させて顧客情報と位置情報よりマッピングして視覚化した分析ができるようにしたい。
#発生している問題・エラーメッセージ
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) ~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in create_block_manager_from_arrays(arrays, names, axes) 1693 try: -> 1694 blocks = form_blocks(arrays, names, axes) 1695 mgr = BlockManager(blocks, axes) ~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in form_blocks(arrays, names, axes) 1782 if len(items_dict["ObjectBlock"]) > 0: -> 1783 object_blocks = _simple_blockify(items_dict["ObjectBlock"], np.object_) 1784 blocks.extend(object_blocks) ~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in _simple_blockify(tuples, dtype) 1826 """ -> 1827 values, placement = _stack_arrays(tuples, dtype) 1828 ~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in _stack_arrays(tuples, dtype) 1875 for i, arr in enumerate(arrays): -> 1876 stacked[i] = _asarray_compat(arr) 1877 ValueError: could not broadcast input array from shape (27234) into shape (2) During handling of the above exception, another exception occurred: ValueError Traceback (most recent call last) <ipython-input-10-554ffd055ff2> in <module> 149 150 x_list3=pd.concat([price, x_list], axis=1) --> 151 sns.pairplot(x_list3, hue="Manhattan_dummy") 152 153 ~/opt/anaconda3/lib/python3.7/site-packages/seaborn/axisgrid.py in pairplot(data, hue, hue_order, palette, vars, x_vars, y_vars, kind, diag_kind, markers, height, aspect, dropna, plot_kws, diag_kws, grid_kws, size) 2119 if kind == "scatter": 2120 from .relational import scatterplot # Avoid circular import -> 2121 plotter(scatterplot, **plot_kws) 2122 elif kind == "reg": 2123 from .regression import regplot # Avoid circular import ~/opt/anaconda3/lib/python3.7/site-packages/seaborn/axisgrid.py in map_offdiag(self, func, **kwargs) 1510 """ 1511 -> 1512 self.map_lower(func, **kwargs) 1513 self.map_upper(func, **kwargs) 1514 return self ~/opt/anaconda3/lib/python3.7/site-packages/seaborn/axisgrid.py in map_lower(self, func, **kwargs) 1440 color = self.palette[k] if kw_color is None else kw_color 1441 func(data_k[x_var], data_k[y_var], label=label_k, -> 1442 color=color, **kwargs) 1443 1444 self._clean_axis(ax) ~/opt/anaconda3/lib/python3.7/site-packages/seaborn/relational.py in scatterplot(x, y, hue, style, size, data, palette, hue_order, hue_norm, sizes, size_order, size_norm, markers, style_order, x_bins, y_bins, units, estimator, ci, n_boot, alpha, x_jitter, y_jitter, legend, ax, **kwargs) 1333 x_bins=x_bins, y_bins=y_bins, 1334 estimator=estimator, ci=ci, n_boot=n_boot, -> 1335 alpha=alpha, x_jitter=x_jitter, y_jitter=y_jitter, legend=legend, 1336 ) 1337 ~/opt/anaconda3/lib/python3.7/site-packages/seaborn/relational.py in __init__(self, x, y, hue, size, style, data, palette, hue_order, hue_norm, sizes, size_order, size_norm, dashes, markers, style_order, x_bins, y_bins, units, estimator, ci, n_boot, alpha, x_jitter, y_jitter, legend) 850 851 plot_data = self.establish_variables( --> 852 x, y, hue, size, style, units, data 853 ) 854 ~/opt/anaconda3/lib/python3.7/site-packages/seaborn/relational.py in establish_variables(self, x, y, hue, size, style, units, data) 155 units=units 156 ) --> 157 plot_data = pd.DataFrame(plot_data) 158 159 # Option 3: ~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy) 409 ) 410 elif isinstance(data, dict): --> 411 mgr = init_dict(data, index, columns, dtype=dtype) 412 elif isinstance(data, ma.MaskedArray): 413 import numpy.ma.mrecords as mrecords ~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/construction.py in init_dict(data, index, columns, dtype) 255 arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays 256 ] --> 257 return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype) 258 259 ~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/construction.py in arrays_to_mgr(arrays, arr_names, index, columns, dtype) 85 axes = [ensure_index(columns), index] 86 ---> 87 return create_block_manager_from_arrays(arrays, arr_names, axes) 88 89 ~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in create_block_manager_from_arrays(arrays, names, axes) 1697 return mgr 1698 except ValueError as e: -> 1699 construction_error(len(arrays), arrays[0].shape, axes, e) 1700 1701 ~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in construction_error(tot_items, block_shape, axes, e) 1717 raise ValueError("Empty data passed with indices specified.") 1718 raise ValueError( -> 1719 "Shape of passed values is {0}, indices imply {1}".format(passed, implied) 1720 ) 1721 ValueError: Shape of passed values is (2, 6), indices imply (27234, 6)
#該当のソースコード
コード
import pandas as pd input_book = pd.ExcelFile('AB_NYC_2019_2.xlsx') input_sheet_name = input_book.sheet_names num_sheet = len(input_sheet_name) print(input_sheet_name) print("Sheet の数:", num_sheet) input_sheet_df = input_book.parse(input_sheet_name[0]) input_sheet_df.head(10) import matplotlib.pyplot as plt import scipy.stats manhattan_dummy=input_sheet_df.Manhattan_dummy private_dummy=input_sheet_df.private_dummy home_dummy=input_sheet_df.home_dummy shared_dummy=input_sheet_df.shared_dummy price=input_sheet_df.price minimum_nights=input_sheet_df.minimum_nights number_of_reviews=input_sheet_df.number_of_reviews reviews_per_month=input_sheet_df.reviews_per_month calculated_host_listings_count=input_sheet_df.calculated_host_listings_count availability_365=input_sheet_df.availability_365 result = scipy.stats.linregress(number_of_reviews,price) print('傾き=', result.slope.round(4),'切片=', result.intercept.round(4), '信頼係数=', result.rvalue.round(4), 'p値=', result.pvalue.round(4), '標準偏差=', result.stderr.round(4)) result_slope = result.slope result_intercept = result.intercept plt.plot(number_of_reviews, [result_slope*u + result_intercept for u in number_of_reviews]) plt.scatter(number_of_reviews,price) plt.title('price and number_of_reviews in Airbnb Dataset') plt.ylabel('price') plt.xlabel('number of reviews') plt.show() import statsmodels.api as sm model = sm.OLS(price, sm.add_constant(number_of_reviews)) result = model.fit() print(result.summary()) print('p-values\n', results.pvalues) import seaborn as sns plt.figure(figsize=(12, 9)) equation_df=pd.concat([manhattan_dummy, private_dummy, home_dummy, shared_dummy, price, minimum_nights, number_of_reviews, reviews_per_month, calculated_host_listings_count, availability_365], axis=1) sns.heatmap(equation_df.pct_change().corr(), annot=True, cmap='Blues') import numpy as np import statsmodels.api as sm from sklearn import linear_model, datasets from sklearn.linear_model import LinearRegression price = pd.DataFrame(equation_df.price) x_list = equation_df.drop("price",1) x_list = equation_df.drop("reviews_per_month",1) x_list = x_list.drop("shared_dummy",1) x_list = x_list.drop("home_dummy",1) x_list = x_list.drop(x_list.columns[np.isnan(x_list).any()], axis=1) model = sm.OLS(price, sm.add_constant(x_list)) result =model.fit() print(result.summary()) print(result.pvalues) from statsmodels.stats.outliers_influence import variance_inflation_factor num_cols = model.exog.shape[1] print(num_cols) #説明変数の列数 vifs = [variance_inflation_factor(model.exog, i) for i in range(0, num_cols)] pdv = pd.DataFrame(vifs, index=model.exog_names, columns=["VIF"]) print(pdv) x_list3=pd.concat([price, x_list], axis=1) sns.pairplot(x_list3, hue="Manhattan_dummy")
#読み込むデータファイル
https://www.dropbox.com/s/ypv4uwe8723x36o/AB_NYC_2019_2.xlsx?dl=0
データサンプルはこちら(3行分)
id name host_id host_name neighbourhood_group Manhattan_dummy neighbourhood latitude longitude room_type private_dummy home_dummy shared_dummy price minimum_nights number_of_reviews last_review reviews_per_month calculated_host_listings_count availability_365
0 2539 Clean & quiet apt home by the park 2787 John Brooklyn 0 Kensington 40.64749 -73.97237 Private room 1 0 0 149 1 9 2018-10-19 0.21 6 365
1 2595 Skylit Midtown Castle 2845 Jennifer Manhattan 1 Midtown 40.75362 -73.98377 Entire home/apt 0 1 0 225 1 45 2019-05-21 0.38 2 355
2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 Elisabeth Manhattan 1 Harlem 40.80902 -73.94190 Private room 1 0 0 150 3 0 NaT NaN 1 365
#自分で調べたことや試したこと
類例を索しましたが、うまく見つけられませんでした。
2日前にPython/notebookを初め、DLもその際にしましたので、macの最新version(3.7?)を使用しています。
初心者で詳しくないですが、おそらく、このvalue errorのため?、途中でコードの実行が止まっているいるかと思われます。
サポート頂ける方に深謝します!
回答1件
あなたの回答
tips
プレビュー