前提・実現したいこと
python 3.6系で、pandasのdataframeを使って、特定の列(list)と、特定の列の値がいくら以上(辞書(列名:条件(xx以上)))という条件でフィルタリングして、結果のdfを返す関数を作りたいです。
イメージは下記のようなものとなります。dictのkey(列名)と、listの列名は、
集合としては、listのほうが大きい(もしくは等しい)です。(まずlistでフィルタして、その中で、辞書でフィルタする感じ)
発生している問題・エラーメッセージ
上記を作ろうと思い、コードを描いてみたのですが、下記エラーが発生します。
「k」(辞書のkey)は存在するのにdefineされてないと出て、「???」となっています。。。
非常に基礎的なことで引っかかっているような気もして、質問するのも申し訳ありませんが、ご教示いただけますと幸甚に存じます。
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\scope.py in resolve(self, key, is_local) 180 if self.has_resolvers: --> 181 return self.resolvers[key] 182 C:\ProgramData\Anaconda3\lib\collections\__init__.py in __getitem__(self, key) 882 pass --> 883 return self.__missing__(key) # support subclasses that define __missing__ 884 C:\ProgramData\Anaconda3\lib\collections\__init__.py in __missing__(self, key) 874 def __missing__(self, key): --> 875 raise KeyError(key) 876 KeyError: 'k' During handling of the above exception, another exception occurred: KeyError Traceback (most recent call last) C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\scope.py in resolve(self, key, is_local) 191 # e.g., df[df > 0] --> 192 return self.temps[key] 193 except KeyError: KeyError: 'k' During handling of the above exception, another exception occurred: UndefinedVariableError Traceback (most recent call last) <ipython-input-21-77582b1647bc> in <module>() ----> 1 df2 = filter_df(df, {'列1':9, '列2':15}, ['列1', '列2']) <ipython-input-20-393068bc75a5> in filter_df(src_df, dict_row, lst_col) 4 ret_df = src_df[lst_col] 5 for k, v in dict_row.items(): ----> 6 print(ret_df.query('k > @v')) 7 8 return ret_df C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in query(self, expr, inplace, **kwargs) 2845 kwargs['level'] = kwargs.pop('level', 0) + 1 2846 kwargs['target'] = None -> 2847 res = self.eval(expr, **kwargs) 2848 2849 try: C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in eval(self, expr, inplace, **kwargs) 2960 kwargs['target'] = self 2961 kwargs['resolvers'] = kwargs.get('resolvers', ()) + tuple(resolvers) -> 2962 return _eval(expr, inplace=inplace, **kwargs) 2963 2964 def select_dtypes(self, include=None, exclude=None): C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\eval.py in eval(expr, parser, engine, truediv, local_dict, global_dict, resolvers, level, target, inplace) 289 290 parsed_expr = Expr(expr, engine=engine, parser=parser, env=env, --> 291 truediv=truediv) 292 293 # construct the engine and evaluate the parsed expression C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\expr.py in __init__(self, expr, engine, parser, env, truediv, level) 737 self.env.scope['truediv'] = truediv 738 self._visitor = _parsers[parser](self.env, self.engine, self.parser) --> 739 self.terms = self.parse() 740 741 @property C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\expr.py in parse(self) 754 def parse(self): 755 """Parse an expression""" --> 756 return self._visitor.visit(self.expr) 757 758 @property C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs) 319 method = 'visit_' + node.__class__.__name__ 320 visitor = getattr(self, method) --> 321 return visitor(node, **kwargs) 322 323 def visit_Module(self, node, **kwargs): C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\expr.py in visit_Module(self, node, **kwargs) 325 raise SyntaxError('only a single expression is allowed') 326 expr = node.body[0] --> 327 return self.visit(expr, **kwargs) 328 329 def visit_Expr(self, node, **kwargs): C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs) 319 method = 'visit_' + node.__class__.__name__ 320 visitor = getattr(self, method) --> 321 return visitor(node, **kwargs) 322 323 def visit_Module(self, node, **kwargs): C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\expr.py in visit_Expr(self, node, **kwargs) 328 329 def visit_Expr(self, node, **kwargs): --> 330 return self.visit(node.value, **kwargs) 331 332 def _rewrite_membership_op(self, node, left, right): C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs) 319 method = 'visit_' + node.__class__.__name__ 320 visitor = getattr(self, method) --> 321 return visitor(node, **kwargs) 322 323 def visit_Module(self, node, **kwargs): C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\expr.py in visit_Compare(self, node, **kwargs) 654 op = self.translate_In(ops[0]) 655 binop = ast.BinOp(op=op, left=node.left, right=comps[0]) --> 656 return self.visit(binop) 657 658 # recursive case: we have a chained comparison, a CMP b CMP c, etc. C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs) 319 method = 'visit_' + node.__class__.__name__ 320 visitor = getattr(self, method) --> 321 return visitor(node, **kwargs) 322 323 def visit_Module(self, node, **kwargs): C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\expr.py in visit_BinOp(self, node, **kwargs) 423 424 def visit_BinOp(self, node, **kwargs): --> 425 op, op_class, left, right = self._maybe_transform_eq_ne(node) 426 left, right = self._maybe_downcast_constants(left, right) 427 return self._maybe_evaluate_binop(op, op_class, left, right) C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\expr.py in _maybe_transform_eq_ne(self, node, left, right) 360 def _maybe_transform_eq_ne(self, node, left=None, right=None): 361 if left is None: --> 362 left = self.visit(node.left, side='left') 363 if right is None: 364 right = self.visit(node.right, side='right') C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\expr.py in visit(self, node, **kwargs) 319 method = 'visit_' + node.__class__.__name__ 320 visitor = getattr(self, method) --> 321 return visitor(node, **kwargs) 322 323 def visit_Module(self, node, **kwargs): C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\expr.py in visit_Name(self, node, **kwargs) 437 438 def visit_Name(self, node, **kwargs): --> 439 return self.term_type(node.id, self.env, **kwargs) 440 441 def visit_NameConstant(self, node, **kwargs): C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\ops.py in __init__(self, name, env, side, encoding) 56 self.is_local = (tname.startswith(_LOCAL_TAG) or 57 tname in _DEFAULT_GLOBALS) ---> 58 self._value = self._resolve_name() 59 self.encoding = encoding 60 C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\ops.py in _resolve_name(self) 73 74 def _resolve_name(self): ---> 75 res = self.env.resolve(self.local_name, is_local=self.is_local) 76 self.update(res) 77 C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\scope.py in resolve(self, key, is_local) 192 return self.temps[key] 193 except KeyError: --> 194 raise compu.ops.UndefinedVariableError(key, is_local) 195 196 def swapkey(self, old_key, new_key, new_value=None): UndefinedVariableError: name 'k' is not defined
該当のソースコード
Python3
1def filter_df(src_df, dict_row, lst_col): 2 3 # 列のフィルタ 4 ret_df = src_df[lst_col] 5 for k, v in dict_row.items(): 6 print(ret_df.query('k > @v')) 7 8 return ret_df 9 10 11df = pd.DataFrame(data = np.arange(21).reshape(7,3), columns=['列1', '列2', '列3']) 12 13df2 = filter_df(df, {'列1':9, '列2':15}, ['列1', '列2'])
試したこと
以下のサイトを確認しつつ、jupyterで試行錯誤してみた。
https://note.nkmk.me/python-pandas-query/
補足情報(FW/ツールのバージョンなど)
python 3.6.6
pandas 0.23.1
numpy 1.14.5
回答2件
あなたの回答
tips
プレビュー
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。
2019/04/10 03:48