Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Low-dependency indexing utilities. 

3""" 

4import warnings 

5 

6import numpy as np 

7 

8from pandas._typing import Any, AnyArrayLike 

9 

10from pandas.core.dtypes.common import ( 

11 is_array_like, 

12 is_bool_dtype, 

13 is_extension_array_dtype, 

14 is_integer_dtype, 

15 is_list_like, 

16) 

17from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries 

18 

19# ----------------------------------------------------------- 

20# Indexer Identification 

21 

22 

23def is_list_like_indexer(key) -> bool: 

24 """ 

25 Check if we have a list-like indexer that is *not* a NamedTuple. 

26 

27 Parameters 

28 ---------- 

29 key : object 

30 

31 Returns 

32 ------- 

33 bool 

34 """ 

35 # allow a list_like, but exclude NamedTuples which can be indexers 

36 return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple) 

37 

38 

39def is_scalar_indexer(indexer, arr_value) -> bool: 

40 """ 

41 Return True if we are all scalar indexers. 

42 

43 Returns 

44 ------- 

45 bool 

46 """ 

47 if arr_value.ndim == 1: 

48 if not isinstance(indexer, tuple): 

49 indexer = tuple([indexer]) 

50 return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) 

51 return False 

52 

53 

54def is_empty_indexer(indexer, arr_value: np.ndarray) -> bool: 

55 """ 

56 Check if we have an empty indexer. 

57 

58 Parameters 

59 ---------- 

60 indexer : object 

61 arr_value : np.ndarray 

62 

63 Returns 

64 ------- 

65 bool 

66 """ 

67 if is_list_like(indexer) and not len(indexer): 

68 return True 

69 if arr_value.ndim == 1: 

70 if not isinstance(indexer, tuple): 

71 indexer = tuple([indexer]) 

72 return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) 

73 return False 

74 

75 

76# ----------------------------------------------------------- 

77# Indexer Validation 

78 

79 

80def check_setitem_lengths(indexer, value, values) -> None: 

81 """ 

82 Validate that value and indexer are the same length. 

83 

84 An special-case is allowed for when the indexer is a boolean array 

85 and the number of true values equals the length of ``value``. In 

86 this case, no exception is raised. 

87 

88 Parameters 

89 ---------- 

90 indexer : sequence 

91 Key for the setitem. 

92 value : array-like 

93 Value for the setitem. 

94 values : array-like 

95 Values being set into. 

96 

97 Returns 

98 ------- 

99 None 

100 

101 Raises 

102 ------ 

103 ValueError 

104 When the indexer is an ndarray or list and the lengths don't match. 

105 """ 

106 # boolean with truth values == len of the value is ok too 

107 if isinstance(indexer, (np.ndarray, list)): 

108 if is_list_like(value) and len(indexer) != len(value): 

109 if not ( 

110 isinstance(indexer, np.ndarray) 

111 and indexer.dtype == np.bool_ 

112 and len(indexer[indexer]) == len(value) 

113 ): 

114 raise ValueError( 

115 "cannot set using a list-like indexer " 

116 "with a different length than the value" 

117 ) 

118 

119 elif isinstance(indexer, slice): 

120 # slice 

121 if is_list_like(value) and len(values): 

122 if len(value) != length_of_indexer(indexer, values): 

123 raise ValueError( 

124 "cannot set using a slice indexer with a " 

125 "different length than the value" 

126 ) 

127 

128 

129def validate_indices(indices: np.ndarray, n: int) -> None: 

130 """ 

131 Perform bounds-checking for an indexer. 

132 

133 -1 is allowed for indicating missing values. 

134 

135 Parameters 

136 ---------- 

137 indices : ndarray 

138 n : int 

139 Length of the array being indexed. 

140 

141 Raises 

142 ------ 

143 ValueError 

144 

145 Examples 

146 -------- 

147 >>> validate_indices([1, 2], 3) 

148 # OK 

149 >>> validate_indices([1, -2], 3) 

150 ValueError 

151 >>> validate_indices([1, 2, 3], 3) 

152 IndexError 

153 >>> validate_indices([-1, -1], 0) 

154 # OK 

155 >>> validate_indices([0, 1], 0) 

156 IndexError 

157 """ 

158 if len(indices): 

159 min_idx = indices.min() 

160 if min_idx < -1: 

161 msg = f"'indices' contains values less than allowed ({min_idx} < -1)" 

162 raise ValueError(msg) 

163 

164 max_idx = indices.max() 

165 if max_idx >= n: 

166 raise IndexError("indices are out-of-bounds") 

167 

168 

169# ----------------------------------------------------------- 

170# Indexer Conversion 

171 

172 

173def maybe_convert_indices(indices, n: int): 

174 """ 

175 Attempt to convert indices into valid, positive indices. 

176 

177 If we have negative indices, translate to positive here. 

178 If we have indices that are out-of-bounds, raise an IndexError. 

179 

180 Parameters 

181 ---------- 

182 indices : array-like 

183 Array of indices that we are to convert. 

184 n : int 

185 Number of elements in the array that we are indexing. 

186 

187 Returns 

188 ------- 

189 array-like 

190 An array-like of positive indices that correspond to the ones 

191 that were passed in initially to this function. 

192 

193 Raises 

194 ------ 

195 IndexError 

196 One of the converted indices either exceeded the number of, 

197 elements (specified by `n`), or was still negative. 

198 """ 

199 if isinstance(indices, list): 

200 indices = np.array(indices) 

201 if len(indices) == 0: 

202 # If `indices` is empty, np.array will return a float, 

203 # and will cause indexing errors. 

204 return np.empty(0, dtype=np.intp) 

205 

206 mask = indices < 0 

207 if mask.any(): 

208 indices = indices.copy() 

209 indices[mask] += n 

210 

211 mask = (indices >= n) | (indices < 0) 

212 if mask.any(): 

213 raise IndexError("indices are out-of-bounds") 

214 return indices 

215 

216 

217# ----------------------------------------------------------- 

218# Unsorted 

219 

220 

221def length_of_indexer(indexer, target=None) -> int: 

222 """ 

223 Return the length of a single non-tuple indexer which could be a slice. 

224 

225 Returns 

226 ------- 

227 int 

228 """ 

229 if target is not None and isinstance(indexer, slice): 

230 target_len = len(target) 

231 start = indexer.start 

232 stop = indexer.stop 

233 step = indexer.step 

234 if start is None: 

235 start = 0 

236 elif start < 0: 

237 start += target_len 

238 if stop is None or stop > target_len: 

239 stop = target_len 

240 elif stop < 0: 

241 stop += target_len 

242 if step is None: 

243 step = 1 

244 elif step < 0: 

245 start, stop = stop + 1, start + 1 

246 step = -step 

247 return (stop - start + step - 1) // step 

248 elif isinstance(indexer, (ABCSeries, ABCIndexClass, np.ndarray, list)): 

249 return len(indexer) 

250 elif not is_list_like_indexer(indexer): 

251 return 1 

252 raise AssertionError("cannot find the length of the indexer") 

253 

254 

255def deprecate_ndim_indexing(result): 

256 """ 

257 Helper function to raise the deprecation warning for multi-dimensional 

258 indexing on 1D Series/Index. 

259 

260 GH#27125 indexer like idx[:, None] expands dim, but we cannot do that 

261 and keep an index, so we currently return ndarray, which is deprecated 

262 (Deprecation GH#30588). 

263 """ 

264 if np.ndim(result) > 1: 

265 warnings.warn( 

266 "Support for multi-dimensional indexing (e.g. `index[:, None]`) " 

267 "on an Index is deprecated and will be removed in a future " 

268 "version. Convert to a numpy array before indexing instead.", 

269 DeprecationWarning, 

270 stacklevel=3, 

271 ) 

272 

273 

274# ----------------------------------------------------------- 

275# Public indexer validation 

276 

277 

278def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: 

279 """ 

280 Check if `indexer` is a valid array indexer for `array`. 

281 

282 For a boolean mask, `array` and `indexer` are checked to have the same 

283 length. The dtype is validated, and if it is an integer or boolean 

284 ExtensionArray, it is checked if there are missing values present, and 

285 it is converted to the appropriate numpy array. Other dtypes will raise 

286 an error. 

287 

288 Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed 

289 through as is. 

290 

291 .. versionadded:: 1.0.0 

292 

293 Parameters 

294 ---------- 

295 array : array-like 

296 The array that is being indexed (only used for the length). 

297 indexer : array-like or list-like 

298 The array-like that's used to index. List-like input that is not yet 

299 a numpy array or an ExtensionArray is converted to one. Other input 

300 types are passed through as is 

301 

302 Returns 

303 ------- 

304 numpy.ndarray 

305 The validated indexer as a numpy array that can be used to index. 

306 

307 Raises 

308 ------ 

309 IndexError 

310 When the lengths don't match. 

311 ValueError 

312 When `indexer` cannot be converted to a numpy ndarray to index 

313 (e.g. presence of missing values). 

314 

315 See Also 

316 -------- 

317 api.types.is_bool_dtype : Check if `key` is of boolean dtype. 

318 

319 Examples 

320 -------- 

321 When checking a boolean mask, a boolean ndarray is returned when the 

322 arguments are all valid. 

323 

324 >>> mask = pd.array([True, False]) 

325 >>> arr = pd.array([1, 2]) 

326 >>> pd.api.indexers.check_array_indexer(arr, mask) 

327 array([ True, False]) 

328 

329 An IndexError is raised when the lengths don't match. 

330 

331 >>> mask = pd.array([True, False, True]) 

332 >>> pd.api.indexers.check_array_indexer(arr, mask) 

333 Traceback (most recent call last): 

334 ... 

335 IndexError: Boolean index has wrong length: 3 instead of 2. 

336 

337 NA values in a boolean array are treated as False. 

338 

339 >>> mask = pd.array([True, pd.NA]) 

340 >>> pd.api.indexers.check_array_indexer(arr, mask) 

341 array([ True, False]) 

342 

343 A numpy boolean mask will get passed through (if the length is correct): 

344 

345 >>> mask = np.array([True, False]) 

346 >>> pd.api.indexers.check_array_indexer(arr, mask) 

347 array([ True, False]) 

348 

349 Similarly for integer indexers, an integer ndarray is returned when it is 

350 a valid indexer, otherwise an error is (for integer indexers, a matching 

351 length is not required): 

352 

353 >>> indexer = pd.array([0, 2], dtype="Int64") 

354 >>> arr = pd.array([1, 2, 3]) 

355 >>> pd.api.indexers.check_array_indexer(arr, indexer) 

356 array([0, 2]) 

357 

358 >>> indexer = pd.array([0, pd.NA], dtype="Int64") 

359 >>> pd.api.indexers.check_array_indexer(arr, indexer) 

360 Traceback (most recent call last): 

361 ... 

362 ValueError: Cannot index with an integer indexer containing NA values 

363 

364 For non-integer/boolean dtypes, an appropriate error is raised: 

365 

366 >>> indexer = np.array([0., 2.], dtype="float64") 

367 >>> pd.api.indexers.check_array_indexer(arr, indexer) 

368 Traceback (most recent call last): 

369 ... 

370 IndexError: arrays used as indices must be of integer or boolean type 

371 """ 

372 from pandas.core.construction import array as pd_array 

373 

374 # whathever is not an array-like is returned as-is (possible valid array 

375 # indexers that are not array-like: integer, slice, Ellipsis, None) 

376 # In this context, tuples are not considered as array-like, as they have 

377 # a specific meaning in indexing (multi-dimensional indexing) 

378 if is_list_like(indexer): 

379 if isinstance(indexer, tuple): 

380 return indexer 

381 else: 

382 return indexer 

383 

384 # convert list-likes to array 

385 if not is_array_like(indexer): 

386 indexer = pd_array(indexer) 

387 if len(indexer) == 0: 

388 # empty list is converted to float array by pd.array 

389 indexer = np.array([], dtype=np.intp) 

390 

391 dtype = indexer.dtype 

392 if is_bool_dtype(dtype): 

393 if is_extension_array_dtype(dtype): 

394 indexer = indexer.to_numpy(dtype=bool, na_value=False) 

395 else: 

396 indexer = np.asarray(indexer, dtype=bool) 

397 

398 # GH26658 

399 if len(indexer) != len(array): 

400 raise IndexError( 

401 f"Boolean index has wrong length: " 

402 f"{len(indexer)} instead of {len(array)}" 

403 ) 

404 elif is_integer_dtype(dtype): 

405 try: 

406 indexer = np.asarray(indexer, dtype=np.intp) 

407 except ValueError: 

408 raise ValueError( 

409 "Cannot index with an integer indexer containing NA values" 

410 ) 

411 else: 

412 raise IndexError("arrays used as indices must be of integer or boolean type") 

413 

414 return indexer