Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python 

2 

3""" 

4Top level ``eval`` module. 

5""" 

6 

7import tokenize 

8from typing import Optional 

9import warnings 

10 

11from pandas._libs.lib import no_default 

12from pandas.util._validators import validate_bool_kwarg 

13 

14from pandas.core.computation.engines import _engines 

15from pandas.core.computation.expr import Expr, _parsers 

16from pandas.core.computation.parsing import tokenize_string 

17from pandas.core.computation.scope import ensure_scope 

18 

19from pandas.io.formats.printing import pprint_thing 

20 

21 

22def _check_engine(engine: Optional[str]) -> str: 

23 """ 

24 Make sure a valid engine is passed. 

25 

26 Parameters 

27 ---------- 

28 engine : str 

29 

30 Raises 

31 ------ 

32 KeyError 

33 * If an invalid engine is passed 

34 ImportError 

35 * If numexpr was requested but doesn't exist 

36 

37 Returns 

38 ------- 

39 string engine 

40 """ 

41 from pandas.core.computation.check import _NUMEXPR_INSTALLED 

42 

43 if engine is None: 

44 if _NUMEXPR_INSTALLED: 

45 engine = "numexpr" 

46 else: 

47 engine = "python" 

48 

49 if engine not in _engines: 

50 valid = list(_engines.keys()) 

51 raise KeyError( 

52 f"Invalid engine {repr(engine)} passed, valid engines are {valid}" 

53 ) 

54 

55 # TODO: validate this in a more general way (thinking of future engines 

56 # that won't necessarily be import-able) 

57 # Could potentially be done on engine instantiation 

58 if engine == "numexpr": 

59 if not _NUMEXPR_INSTALLED: 

60 raise ImportError( 

61 "'numexpr' is not installed or an " 

62 "unsupported version. Cannot use " 

63 "engine='numexpr' for query/eval " 

64 "if 'numexpr' is not installed" 

65 ) 

66 

67 return engine 

68 

69 

70def _check_parser(parser: str): 

71 """ 

72 Make sure a valid parser is passed. 

73 

74 Parameters 

75 ---------- 

76 parser : str 

77 

78 Raises 

79 ------ 

80 KeyError 

81 * If an invalid parser is passed 

82 """ 

83 

84 if parser not in _parsers: 

85 raise KeyError( 

86 f"Invalid parser {repr(parser)} passed, " 

87 f"valid parsers are {_parsers.keys()}" 

88 ) 

89 

90 

91def _check_resolvers(resolvers): 

92 if resolvers is not None: 

93 for resolver in resolvers: 

94 if not hasattr(resolver, "__getitem__"): 

95 name = type(resolver).__name__ 

96 raise TypeError( 

97 f"Resolver of type {repr(name)} does not " 

98 f"implement the __getitem__ method" 

99 ) 

100 

101 

102def _check_expression(expr): 

103 """ 

104 Make sure an expression is not an empty string 

105 

106 Parameters 

107 ---------- 

108 expr : object 

109 An object that can be converted to a string 

110 

111 Raises 

112 ------ 

113 ValueError 

114 * If expr is an empty string 

115 """ 

116 if not expr: 

117 raise ValueError("expr cannot be an empty string") 

118 

119 

120def _convert_expression(expr) -> str: 

121 """ 

122 Convert an object to an expression. 

123 

124 This function converts an object to an expression (a unicode string) and 

125 checks to make sure it isn't empty after conversion. This is used to 

126 convert operators to their string representation for recursive calls to 

127 :func:`~pandas.eval`. 

128 

129 Parameters 

130 ---------- 

131 expr : object 

132 The object to be converted to a string. 

133 

134 Returns 

135 ------- 

136 str 

137 The string representation of an object. 

138 

139 Raises 

140 ------ 

141 ValueError 

142 * If the expression is empty. 

143 """ 

144 s = pprint_thing(expr) 

145 _check_expression(s) 

146 return s 

147 

148 

149def _check_for_locals(expr: str, stack_level: int, parser: str): 

150 

151 at_top_of_stack = stack_level == 0 

152 not_pandas_parser = parser != "pandas" 

153 

154 if not_pandas_parser: 

155 msg = "The '@' prefix is only supported by the pandas parser" 

156 elif at_top_of_stack: 

157 msg = ( 

158 "The '@' prefix is not allowed in " 

159 "top-level eval calls, \nplease refer to " 

160 "your variables by name without the '@' " 

161 "prefix" 

162 ) 

163 

164 if at_top_of_stack or not_pandas_parser: 

165 for toknum, tokval in tokenize_string(expr): 

166 if toknum == tokenize.OP and tokval == "@": 

167 raise SyntaxError(msg) 

168 

169 

170def eval( 

171 expr, 

172 parser="pandas", 

173 engine: Optional[str] = None, 

174 truediv=no_default, 

175 local_dict=None, 

176 global_dict=None, 

177 resolvers=(), 

178 level=0, 

179 target=None, 

180 inplace=False, 

181): 

182 """ 

183 Evaluate a Python expression as a string using various backends. 

184 

185 The following arithmetic operations are supported: ``+``, ``-``, ``*``, 

186 ``/``, ``**``, ``%``, ``//`` (python engine only) along with the following 

187 boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not). 

188 Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`, 

189 :keyword:`or`, and :keyword:`not` with the same semantics as the 

190 corresponding bitwise operators. :class:`~pandas.Series` and 

191 :class:`~pandas.DataFrame` objects are supported and behave as they would 

192 with plain ol' Python evaluation. 

193 

194 Parameters 

195 ---------- 

196 expr : str 

197 The expression to evaluate. This string cannot contain any Python 

198 `statements 

199 <https://docs.python.org/3/reference/simple_stmts.html#simple-statements>`__, 

200 only Python `expressions 

201 <https://docs.python.org/3/reference/simple_stmts.html#expression-statements>`__. 

202 parser : {'pandas', 'python'}, default 'pandas' 

203 The parser to use to construct the syntax tree from the expression. The 

204 default of ``'pandas'`` parses code slightly different than standard 

205 Python. Alternatively, you can parse an expression using the 

206 ``'python'`` parser to retain strict Python semantics. See the 

207 :ref:`enhancing performance <enhancingperf.eval>` documentation for 

208 more details. 

209 engine : {'python', 'numexpr'}, default 'numexpr' 

210 

211 The engine used to evaluate the expression. Supported engines are 

212 

213 - None : tries to use ``numexpr``, falls back to ``python`` 

214 - ``'numexpr'``: This default engine evaluates pandas objects using 

215 numexpr for large speed ups in complex expressions 

216 with large frames. 

217 - ``'python'``: Performs operations as if you had ``eval``'d in top 

218 level python. This engine is generally not that useful. 

219 

220 More backends may be available in the future. 

221 

222 truediv : bool, optional 

223 Whether to use true division, like in Python >= 3. 

224 deprecated:: 1.0.0 

225 

226 local_dict : dict or None, optional 

227 A dictionary of local variables, taken from locals() by default. 

228 global_dict : dict or None, optional 

229 A dictionary of global variables, taken from globals() by default. 

230 resolvers : list of dict-like or None, optional 

231 A list of objects implementing the ``__getitem__`` special method that 

232 you can use to inject an additional collection of namespaces to use for 

233 variable lookup. For example, this is used in the 

234 :meth:`~DataFrame.query` method to inject the 

235 ``DataFrame.index`` and ``DataFrame.columns`` 

236 variables that refer to their respective :class:`~pandas.DataFrame` 

237 instance attributes. 

238 level : int, optional 

239 The number of prior stack frames to traverse and add to the current 

240 scope. Most users will **not** need to change this parameter. 

241 target : object, optional, default None 

242 This is the target object for assignment. It is used when there is 

243 variable assignment in the expression. If so, then `target` must 

244 support item assignment with string keys, and if a copy is being 

245 returned, it must also support `.copy()`. 

246 inplace : bool, default False 

247 If `target` is provided, and the expression mutates `target`, whether 

248 to modify `target` inplace. Otherwise, return a copy of `target` with 

249 the mutation. 

250 

251 Returns 

252 ------- 

253 ndarray, numeric scalar, DataFrame, Series 

254 

255 Raises 

256 ------ 

257 ValueError 

258 There are many instances where such an error can be raised: 

259 

260 - `target=None`, but the expression is multiline. 

261 - The expression is multiline, but not all them have item assignment. 

262 An example of such an arrangement is this: 

263 

264 a = b + 1 

265 a + 2 

266 

267 Here, there are expressions on different lines, making it multiline, 

268 but the last line has no variable assigned to the output of `a + 2`. 

269 - `inplace=True`, but the expression is missing item assignment. 

270 - Item assignment is provided, but the `target` does not support 

271 string item assignment. 

272 - Item assignment is provided and `inplace=False`, but the `target` 

273 does not support the `.copy()` method 

274 

275 See Also 

276 -------- 

277 DataFrame.query 

278 DataFrame.eval 

279 

280 Notes 

281 ----- 

282 The ``dtype`` of any objects involved in an arithmetic ``%`` operation are 

283 recursively cast to ``float64``. 

284 

285 See the :ref:`enhancing performance <enhancingperf.eval>` documentation for 

286 more details. 

287 """ 

288 

289 inplace = validate_bool_kwarg(inplace, "inplace") 

290 

291 if truediv is not no_default: 

292 warnings.warn( 

293 "The `truediv` parameter in pd.eval is deprecated and will be " 

294 "removed in a future version.", 

295 FutureWarning, 

296 stacklevel=2, 

297 ) 

298 

299 if isinstance(expr, str): 

300 _check_expression(expr) 

301 exprs = [e.strip() for e in expr.splitlines() if e.strip() != ""] 

302 else: 

303 exprs = [expr] 

304 multi_line = len(exprs) > 1 

305 

306 if multi_line and target is None: 

307 raise ValueError( 

308 "multi-line expressions are only valid in the " 

309 "context of data, use DataFrame.eval" 

310 ) 

311 engine = _check_engine(engine) 

312 _check_parser(parser) 

313 _check_resolvers(resolvers) 

314 

315 ret = None 

316 first_expr = True 

317 target_modified = False 

318 

319 for expr in exprs: 

320 expr = _convert_expression(expr) 

321 _check_for_locals(expr, level, parser) 

322 

323 # get our (possibly passed-in) scope 

324 env = ensure_scope( 

325 level + 1, 

326 global_dict=global_dict, 

327 local_dict=local_dict, 

328 resolvers=resolvers, 

329 target=target, 

330 ) 

331 

332 parsed_expr = Expr(expr, engine=engine, parser=parser, env=env) 

333 

334 # construct the engine and evaluate the parsed expression 

335 eng = _engines[engine] 

336 eng_inst = eng(parsed_expr) 

337 ret = eng_inst.evaluate() 

338 

339 if parsed_expr.assigner is None: 

340 if multi_line: 

341 raise ValueError( 

342 "Multi-line expressions are only valid " 

343 "if all expressions contain an assignment" 

344 ) 

345 elif inplace: 

346 raise ValueError("Cannot operate inplace if there is no assignment") 

347 

348 # assign if needed 

349 assigner = parsed_expr.assigner 

350 if env.target is not None and assigner is not None: 

351 target_modified = True 

352 

353 # if returning a copy, copy only on the first assignment 

354 if not inplace and first_expr: 

355 try: 

356 target = env.target.copy() 

357 except AttributeError: 

358 raise ValueError("Cannot return a copy of the target") 

359 else: 

360 target = env.target 

361 

362 # TypeError is most commonly raised (e.g. int, list), but you 

363 # get IndexError if you try to do this assignment on np.ndarray. 

364 # we will ignore numpy warnings here; e.g. if trying 

365 # to use a non-numeric indexer 

366 try: 

367 with warnings.catch_warnings(record=True): 

368 # TODO: Filter the warnings we actually care about here. 

369 target[assigner] = ret 

370 except (TypeError, IndexError): 

371 raise ValueError("Cannot assign expression output to target") 

372 

373 if not resolvers: 

374 resolvers = ({assigner: ret},) 

375 else: 

376 # existing resolver needs updated to handle 

377 # case of mutating existing column in copy 

378 for resolver in resolvers: 

379 if assigner in resolver: 

380 resolver[assigner] = ret 

381 break 

382 else: 

383 resolvers += ({assigner: ret},) 

384 

385 ret = None 

386 first_expr = False 

387 

388 # We want to exclude `inplace=None` as being False. 

389 if inplace is False: 

390 return target if target_modified else ret