Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" define the IntervalIndex """ 

2from operator import le, lt 

3import textwrap 

4from typing import Any, Optional, Tuple, Union 

5 

6import numpy as np 

7 

8from pandas._config import get_option 

9 

10from pandas._libs import Timedelta, Timestamp, lib 

11from pandas._libs.interval import Interval, IntervalMixin, IntervalTree 

12from pandas._typing import AnyArrayLike 

13from pandas.util._decorators import Appender, Substitution, cache_readonly 

14from pandas.util._exceptions import rewrite_exception 

15 

16from pandas.core.dtypes.cast import ( 

17 find_common_type, 

18 infer_dtype_from_scalar, 

19 maybe_downcast_to_dtype, 

20) 

21from pandas.core.dtypes.common import ( 

22 ensure_platform_int, 

23 is_categorical, 

24 is_datetime64tz_dtype, 

25 is_datetime_or_timedelta_dtype, 

26 is_dtype_equal, 

27 is_float, 

28 is_float_dtype, 

29 is_integer, 

30 is_integer_dtype, 

31 is_interval_dtype, 

32 is_list_like, 

33 is_number, 

34 is_object_dtype, 

35 is_scalar, 

36) 

37from pandas.core.dtypes.generic import ABCSeries 

38from pandas.core.dtypes.missing import isna 

39 

40from pandas.core.algorithms import take_1d 

41from pandas.core.arrays.interval import IntervalArray, _interval_shared_docs 

42import pandas.core.common as com 

43import pandas.core.indexes.base as ibase 

44from pandas.core.indexes.base import ( 

45 Index, 

46 InvalidIndexError, 

47 _index_shared_docs, 

48 default_pprint, 

49 ensure_index, 

50 maybe_extract_name, 

51) 

52from pandas.core.indexes.datetimes import DatetimeIndex, date_range 

53from pandas.core.indexes.extension import ExtensionIndex, inherit_names 

54from pandas.core.indexes.multi import MultiIndex 

55from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range 

56from pandas.core.ops import get_op_result_name 

57 

58from pandas.tseries.frequencies import to_offset 

59from pandas.tseries.offsets import DateOffset 

60 

61_VALID_CLOSED = {"left", "right", "both", "neither"} 

62_index_doc_kwargs = dict(ibase._index_doc_kwargs) 

63 

64_index_doc_kwargs.update( 

65 dict( 

66 klass="IntervalIndex", 

67 qualname="IntervalIndex", 

68 target_klass="IntervalIndex or list of Intervals", 

69 name=textwrap.dedent( 

70 """\ 

71 name : object, optional 

72 Name to be stored in the index. 

73 """ 

74 ), 

75 ) 

76) 

77 

78 

79def _get_next_label(label): 

80 dtype = getattr(label, "dtype", type(label)) 

81 if isinstance(label, (Timestamp, Timedelta)): 

82 dtype = "datetime64" 

83 if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype): 

84 return label + np.timedelta64(1, "ns") 

85 elif is_integer_dtype(dtype): 

86 return label + 1 

87 elif is_float_dtype(dtype): 

88 return np.nextafter(label, np.infty) 

89 else: 

90 raise TypeError(f"cannot determine next label for type {repr(type(label))}") 

91 

92 

93def _get_prev_label(label): 

94 dtype = getattr(label, "dtype", type(label)) 

95 if isinstance(label, (Timestamp, Timedelta)): 

96 dtype = "datetime64" 

97 if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype): 

98 return label - np.timedelta64(1, "ns") 

99 elif is_integer_dtype(dtype): 

100 return label - 1 

101 elif is_float_dtype(dtype): 

102 return np.nextafter(label, -np.infty) 

103 else: 

104 raise TypeError(f"cannot determine next label for type {repr(type(label))}") 

105 

106 

107def _new_IntervalIndex(cls, d): 

108 """ 

109 This is called upon unpickling, rather than the default which doesn't have 

110 arguments and breaks __new__. 

111 """ 

112 return cls.from_arrays(**d) 

113 

114 

115class SetopCheck: 

116 """ 

117 This is called to decorate the set operations of IntervalIndex 

118 to perform the type check in advance. 

119 """ 

120 

121 def __init__(self, op_name): 

122 self.op_name = op_name 

123 

124 def __call__(self, setop): 

125 def func(intvidx_self, other, sort=False): 

126 intvidx_self._assert_can_do_setop(other) 

127 other = ensure_index(other) 

128 

129 if not isinstance(other, IntervalIndex): 

130 result = getattr(intvidx_self.astype(object), self.op_name)(other) 

131 if self.op_name in ("difference",): 

132 result = result.astype(intvidx_self.dtype) 

133 return result 

134 elif intvidx_self.closed != other.closed: 

135 raise ValueError( 

136 "can only do set operations between two IntervalIndex " 

137 "objects that are closed on the same side" 

138 ) 

139 

140 # GH 19016: ensure set op will not return a prohibited dtype 

141 subtypes = [intvidx_self.dtype.subtype, other.dtype.subtype] 

142 common_subtype = find_common_type(subtypes) 

143 if is_object_dtype(common_subtype): 

144 raise TypeError( 

145 f"can only do {self.op_name} between two IntervalIndex " 

146 "objects that have compatible dtypes" 

147 ) 

148 

149 return setop(intvidx_self, other, sort) 

150 

151 return func 

152 

153 

154@Appender( 

155 _interval_shared_docs["class"] 

156 % dict( 

157 klass="IntervalIndex", 

158 summary="Immutable index of intervals that are closed on the same side.", 

159 name=_index_doc_kwargs["name"], 

160 versionadded="0.20.0", 

161 extra_attributes="is_overlapping\nvalues\n", 

162 extra_methods="", 

163 examples=textwrap.dedent( 

164 """\ 

165 Examples 

166 -------- 

167 A new ``IntervalIndex`` is typically constructed using 

168 :func:`interval_range`: 

169 

170 >>> pd.interval_range(start=0, end=5) 

171 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], 

172 closed='right', 

173 dtype='interval[int64]') 

174 

175 It may also be constructed using one of the constructor 

176 methods: :meth:`IntervalIndex.from_arrays`, 

177 :meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`. 

178 

179 See further examples in the doc strings of ``interval_range`` and the 

180 mentioned constructor methods. 

181 """ 

182 ), 

183 ) 

184) 

185@inherit_names(["set_closed", "to_tuples"], IntervalArray, wrap=True) 

186@inherit_names( 

187 [ 

188 "__len__", 

189 "__array__", 

190 "overlaps", 

191 "contains", 

192 "size", 

193 "dtype", 

194 "left", 

195 "right", 

196 "length", 

197 ], 

198 IntervalArray, 

199) 

200@inherit_names( 

201 ["is_non_overlapping_monotonic", "mid", "_ndarray_values", "closed"], 

202 IntervalArray, 

203 cache=True, 

204) 

205class IntervalIndex(IntervalMixin, ExtensionIndex): 

206 _typ = "intervalindex" 

207 _comparables = ["name"] 

208 _attributes = ["name", "closed"] 

209 

210 # we would like our indexing holder to defer to us 

211 _defer_to_indexing = True 

212 

213 # Immutable, so we are able to cache computations like isna in '_mask' 

214 _mask = None 

215 

216 # -------------------------------------------------------------------- 

217 # Constructors 

218 

219 def __new__( 

220 cls, 

221 data, 

222 closed=None, 

223 dtype=None, 

224 copy: bool = False, 

225 name=None, 

226 verify_integrity: bool = True, 

227 ): 

228 

229 name = maybe_extract_name(name, data, cls) 

230 

231 with rewrite_exception("IntervalArray", cls.__name__): 

232 array = IntervalArray( 

233 data, 

234 closed=closed, 

235 copy=copy, 

236 dtype=dtype, 

237 verify_integrity=verify_integrity, 

238 ) 

239 

240 return cls._simple_new(array, name) 

241 

242 @classmethod 

243 def _simple_new(cls, array, name, closed=None): 

244 """ 

245 Construct from an IntervalArray 

246 

247 Parameters 

248 ---------- 

249 array : IntervalArray 

250 name : str 

251 Attached as result.name 

252 closed : Any 

253 Ignored. 

254 """ 

255 result = IntervalMixin.__new__(cls) 

256 result._data = array 

257 result.name = name 

258 result._no_setting_name = False 

259 result._reset_identity() 

260 return result 

261 

262 @classmethod 

263 @Appender( 

264 _interval_shared_docs["from_breaks"] 

265 % dict( 

266 klass="IntervalIndex", 

267 examples=textwrap.dedent( 

268 """\ 

269 Examples 

270 -------- 

271 >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3]) 

272 IntervalIndex([(0, 1], (1, 2], (2, 3]], 

273 closed='right', 

274 dtype='interval[int64]') 

275 """ 

276 ), 

277 ) 

278 ) 

279 def from_breaks( 

280 cls, breaks, closed: str = "right", name=None, copy: bool = False, dtype=None 

281 ): 

282 with rewrite_exception("IntervalArray", cls.__name__): 

283 array = IntervalArray.from_breaks( 

284 breaks, closed=closed, copy=copy, dtype=dtype 

285 ) 

286 return cls._simple_new(array, name=name) 

287 

288 @classmethod 

289 @Appender( 

290 _interval_shared_docs["from_arrays"] 

291 % dict( 

292 klass="IntervalIndex", 

293 examples=textwrap.dedent( 

294 """\ 

295 Examples 

296 -------- 

297 >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3]) 

298 IntervalIndex([(0, 1], (1, 2], (2, 3]], 

299 closed='right', 

300 dtype='interval[int64]') 

301 """ 

302 ), 

303 ) 

304 ) 

305 def from_arrays( 

306 cls, 

307 left, 

308 right, 

309 closed: str = "right", 

310 name=None, 

311 copy: bool = False, 

312 dtype=None, 

313 ): 

314 with rewrite_exception("IntervalArray", cls.__name__): 

315 array = IntervalArray.from_arrays( 

316 left, right, closed, copy=copy, dtype=dtype 

317 ) 

318 return cls._simple_new(array, name=name) 

319 

320 @classmethod 

321 @Appender( 

322 _interval_shared_docs["from_tuples"] 

323 % dict( 

324 klass="IntervalIndex", 

325 examples=textwrap.dedent( 

326 """\ 

327 Examples 

328 -------- 

329 >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)]) 

330 IntervalIndex([(0, 1], (1, 2]], 

331 closed='right', 

332 dtype='interval[int64]') 

333 """ 

334 ), 

335 ) 

336 ) 

337 def from_tuples( 

338 cls, data, closed: str = "right", name=None, copy: bool = False, dtype=None 

339 ): 

340 with rewrite_exception("IntervalArray", cls.__name__): 

341 arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype) 

342 return cls._simple_new(arr, name=name) 

343 

344 # -------------------------------------------------------------------- 

345 

346 @Appender(_index_shared_docs["_shallow_copy"]) 

347 def _shallow_copy(self, left=None, right=None, **kwargs): 

348 result = self._data._shallow_copy(left=left, right=right) 

349 attributes = self._get_attributes_dict() 

350 attributes.update(kwargs) 

351 return self._simple_new(result, **attributes) 

352 

353 @cache_readonly 

354 def _isnan(self): 

355 """ 

356 Return a mask indicating if each value is NA. 

357 """ 

358 if self._mask is None: 

359 self._mask = isna(self.left) 

360 return self._mask 

361 

362 @cache_readonly 

363 def _engine(self): 

364 left = self._maybe_convert_i8(self.left) 

365 right = self._maybe_convert_i8(self.right) 

366 return IntervalTree(left, right, closed=self.closed) 

367 

368 def __contains__(self, key) -> bool: 

369 """ 

370 return a boolean if this key is IN the index 

371 We *only* accept an Interval 

372 

373 Parameters 

374 ---------- 

375 key : Interval 

376 

377 Returns 

378 ------- 

379 bool 

380 """ 

381 if not isinstance(key, Interval): 

382 return False 

383 

384 try: 

385 self.get_loc(key) 

386 return True 

387 except KeyError: 

388 return False 

389 

390 @cache_readonly 

391 def _multiindex(self): 

392 return MultiIndex.from_arrays([self.left, self.right], names=["left", "right"]) 

393 

394 @cache_readonly 

395 def values(self): 

396 """ 

397 Return the IntervalIndex's data as an IntervalArray. 

398 """ 

399 return self._data 

400 

401 @cache_readonly 

402 def _values(self): 

403 return self._data 

404 

405 @property 

406 def _has_complex_internals(self): 

407 # used to avoid libreduction code paths, which raise or require conversion 

408 return True 

409 

410 def __array_wrap__(self, result, context=None): 

411 # we don't want the superclass implementation 

412 return result 

413 

414 def __reduce__(self): 

415 d = dict(left=self.left, right=self.right) 

416 d.update(self._get_attributes_dict()) 

417 return _new_IntervalIndex, (type(self), d), None 

418 

419 @Appender(_index_shared_docs["astype"]) 

420 def astype(self, dtype, copy=True): 

421 with rewrite_exception("IntervalArray", type(self).__name__): 

422 new_values = self.values.astype(dtype, copy=copy) 

423 if is_interval_dtype(new_values): 

424 return self._shallow_copy(new_values.left, new_values.right) 

425 return Index.astype(self, dtype, copy=copy) 

426 

427 @property 

428 def inferred_type(self) -> str: 

429 """Return a string of the type inferred from the values""" 

430 return "interval" 

431 

432 @Appender(Index.memory_usage.__doc__) 

433 def memory_usage(self, deep: bool = False) -> int: 

434 # we don't use an explicit engine 

435 # so return the bytes here 

436 return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep) 

437 

438 @cache_readonly 

439 def is_monotonic(self) -> bool: 

440 """ 

441 Return True if the IntervalIndex is monotonic increasing (only equal or 

442 increasing values), else False 

443 """ 

444 return self.is_monotonic_increasing 

445 

446 @cache_readonly 

447 def is_monotonic_increasing(self) -> bool: 

448 """ 

449 Return True if the IntervalIndex is monotonic increasing (only equal or 

450 increasing values), else False 

451 """ 

452 return self._engine.is_monotonic_increasing 

453 

454 @cache_readonly 

455 def is_monotonic_decreasing(self) -> bool: 

456 """ 

457 Return True if the IntervalIndex is monotonic decreasing (only equal or 

458 decreasing values), else False 

459 """ 

460 return self[::-1].is_monotonic_increasing 

461 

462 @cache_readonly 

463 def is_unique(self): 

464 """ 

465 Return True if the IntervalIndex contains unique elements, else False. 

466 """ 

467 left = self.left 

468 right = self.right 

469 

470 if self.isna().sum() > 1: 

471 return False 

472 

473 if left.is_unique or right.is_unique: 

474 return True 

475 

476 seen_pairs = set() 

477 check_idx = np.where(left.duplicated(keep=False))[0] 

478 for idx in check_idx: 

479 pair = (left[idx], right[idx]) 

480 if pair in seen_pairs: 

481 return False 

482 seen_pairs.add(pair) 

483 

484 return True 

485 

486 @property 

487 def is_overlapping(self): 

488 """ 

489 Return True if the IntervalIndex has overlapping intervals, else False. 

490 

491 Two intervals overlap if they share a common point, including closed 

492 endpoints. Intervals that only have an open endpoint in common do not 

493 overlap. 

494 

495 .. versionadded:: 0.24.0 

496 

497 Returns 

498 ------- 

499 bool 

500 Boolean indicating if the IntervalIndex has overlapping intervals. 

501 

502 See Also 

503 -------- 

504 Interval.overlaps : Check whether two Interval objects overlap. 

505 IntervalIndex.overlaps : Check an IntervalIndex elementwise for 

506 overlaps. 

507 

508 Examples 

509 -------- 

510 >>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)]) 

511 >>> index 

512 IntervalIndex([(0, 2], (1, 3], (4, 5]], 

513 closed='right', 

514 dtype='interval[int64]') 

515 >>> index.is_overlapping 

516 True 

517 

518 Intervals that share closed endpoints overlap: 

519 

520 >>> index = pd.interval_range(0, 3, closed='both') 

521 >>> index 

522 IntervalIndex([[0, 1], [1, 2], [2, 3]], 

523 closed='both', 

524 dtype='interval[int64]') 

525 >>> index.is_overlapping 

526 True 

527 

528 Intervals that only have an open endpoint in common do not overlap: 

529 

530 >>> index = pd.interval_range(0, 3, closed='left') 

531 >>> index 

532 IntervalIndex([[0, 1), [1, 2), [2, 3)], 

533 closed='left', 

534 dtype='interval[int64]') 

535 >>> index.is_overlapping 

536 False 

537 """ 

538 # GH 23309 

539 return self._engine.is_overlapping 

540 

541 @Appender(_index_shared_docs["_convert_scalar_indexer"]) 

542 def _convert_scalar_indexer(self, key, kind=None): 

543 if kind == "iloc": 

544 return super()._convert_scalar_indexer(key, kind=kind) 

545 return key 

546 

547 def _maybe_cast_slice_bound(self, label, side, kind): 

548 return getattr(self, side)._maybe_cast_slice_bound(label, side, kind) 

549 

550 @Appender(_index_shared_docs["_convert_list_indexer"]) 

551 def _convert_list_indexer(self, keyarr, kind=None): 

552 """ 

553 we are passed a list-like indexer. Return the 

554 indexer for matching intervals. 

555 """ 

556 locs = self.get_indexer_for(keyarr) 

557 

558 # we have missing values 

559 if (locs == -1).any(): 

560 raise KeyError 

561 

562 return locs 

563 

564 def _can_reindex(self, indexer: np.ndarray) -> None: 

565 """ 

566 Check if we are allowing reindexing with this particular indexer. 

567 

568 Parameters 

569 ---------- 

570 indexer : an integer indexer 

571 

572 Raises 

573 ------ 

574 ValueError if its a duplicate axis 

575 """ 

576 

577 # trying to reindex on an axis with duplicates 

578 if self.is_overlapping and len(indexer): 

579 raise ValueError("cannot reindex from an overlapping axis") 

580 

581 def _needs_i8_conversion(self, key): 

582 """ 

583 Check if a given key needs i8 conversion. Conversion is necessary for 

584 Timestamp, Timedelta, DatetimeIndex, and TimedeltaIndex keys. An 

585 Interval-like requires conversion if it's endpoints are one of the 

586 aforementioned types. 

587 

588 Assumes that any list-like data has already been cast to an Index. 

589 

590 Parameters 

591 ---------- 

592 key : scalar or Index-like 

593 The key that should be checked for i8 conversion 

594 

595 Returns 

596 ------- 

597 bool 

598 """ 

599 if is_interval_dtype(key) or isinstance(key, Interval): 

600 return self._needs_i8_conversion(key.left) 

601 

602 i8_types = (Timestamp, Timedelta, DatetimeIndex, TimedeltaIndex) 

603 return isinstance(key, i8_types) 

604 

605 def _maybe_convert_i8(self, key): 

606 """ 

607 Maybe convert a given key to it's equivalent i8 value(s). Used as a 

608 preprocessing step prior to IntervalTree queries (self._engine), which 

609 expects numeric data. 

610 

611 Parameters 

612 ---------- 

613 key : scalar or list-like 

614 The key that should maybe be converted to i8. 

615 

616 Returns 

617 ------- 

618 scalar or list-like 

619 The original key if no conversion occurred, int if converted scalar, 

620 Int64Index if converted list-like. 

621 """ 

622 original = key 

623 if is_list_like(key): 

624 key = ensure_index(key) 

625 

626 if not self._needs_i8_conversion(key): 

627 return original 

628 

629 scalar = is_scalar(key) 

630 if is_interval_dtype(key) or isinstance(key, Interval): 

631 # convert left/right and reconstruct 

632 left = self._maybe_convert_i8(key.left) 

633 right = self._maybe_convert_i8(key.right) 

634 constructor = Interval if scalar else IntervalIndex.from_arrays 

635 return constructor(left, right, closed=self.closed) 

636 

637 if scalar: 

638 # Timestamp/Timedelta 

639 key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True) 

640 else: 

641 # DatetimeIndex/TimedeltaIndex 

642 key_dtype, key_i8 = key.dtype, Index(key.asi8) 

643 if key.hasnans: 

644 # convert NaT from it's i8 value to np.nan so it's not viewed 

645 # as a valid value, maybe causing errors (e.g. is_overlapping) 

646 key_i8 = key_i8.where(~key._isnan) 

647 

648 # ensure consistency with IntervalIndex subtype 

649 subtype = self.dtype.subtype 

650 

651 if not is_dtype_equal(subtype, key_dtype): 

652 raise ValueError( 

653 f"Cannot index an IntervalIndex of subtype {subtype} with " 

654 f"values of dtype {key_dtype}" 

655 ) 

656 

657 return key_i8 

658 

659 def _check_method(self, method): 

660 if method is None: 

661 return 

662 

663 if method in ["bfill", "backfill", "pad", "ffill", "nearest"]: 

664 raise NotImplementedError( 

665 f"method {method} not yet implemented for IntervalIndex" 

666 ) 

667 

668 raise ValueError("Invalid fill method") 

669 

670 def _searchsorted_monotonic(self, label, side, exclude_label=False): 

671 if not self.is_non_overlapping_monotonic: 

672 raise KeyError( 

673 "can only get slices from an IntervalIndex if bounds are " 

674 "non-overlapping and all monotonic increasing or decreasing" 

675 ) 

676 

677 if isinstance(label, IntervalMixin): 

678 raise NotImplementedError("Interval objects are not currently supported") 

679 

680 # GH 20921: "not is_monotonic_increasing" for the second condition 

681 # instead of "is_monotonic_decreasing" to account for single element 

682 # indexes being both increasing and decreasing 

683 if (side == "left" and self.left.is_monotonic_increasing) or ( 

684 side == "right" and not self.left.is_monotonic_increasing 

685 ): 

686 sub_idx = self.right 

687 if self.open_right or exclude_label: 

688 label = _get_next_label(label) 

689 else: 

690 sub_idx = self.left 

691 if self.open_left or exclude_label: 

692 label = _get_prev_label(label) 

693 

694 return sub_idx._searchsorted_monotonic(label, side) 

695 

696 def get_loc( 

697 self, key: Any, method: Optional[str] = None, tolerance=None 

698 ) -> Union[int, slice, np.ndarray]: 

699 """ 

700 Get integer location, slice or boolean mask for requested label. 

701 

702 Parameters 

703 ---------- 

704 key : label 

705 method : {None}, optional 

706 * default: matches where the label is within an interval only. 

707 

708 Returns 

709 ------- 

710 int if unique index, slice if monotonic index, else mask 

711 

712 Examples 

713 -------- 

714 >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2) 

715 >>> index = pd.IntervalIndex([i1, i2]) 

716 >>> index.get_loc(1) 

717 0 

718 

719 You can also supply a point inside an interval. 

720 

721 >>> index.get_loc(1.5) 

722 1 

723 

724 If a label is in several intervals, you get the locations of all the 

725 relevant intervals. 

726 

727 >>> i3 = pd.Interval(0, 2) 

728 >>> overlapping_index = pd.IntervalIndex([i1, i2, i3]) 

729 >>> overlapping_index.get_loc(0.5) 

730 array([ True, False, True]) 

731 

732 Only exact matches will be returned if an interval is provided. 

733 

734 >>> index.get_loc(pd.Interval(0, 1)) 

735 0 

736 """ 

737 self._check_method(method) 

738 

739 # list-like are invalid labels for II but in some cases may work, e.g 

740 # single element array of comparable type, so guard against them early 

741 if is_list_like(key): 

742 raise KeyError(key) 

743 

744 if isinstance(key, Interval): 

745 if self.closed != key.closed: 

746 raise KeyError(key) 

747 mask = (self.left == key.left) & (self.right == key.right) 

748 else: 

749 # assume scalar 

750 op_left = le if self.closed_left else lt 

751 op_right = le if self.closed_right else lt 

752 try: 

753 mask = op_left(self.left, key) & op_right(key, self.right) 

754 except TypeError: 

755 # scalar is not comparable to II subtype --> invalid label 

756 raise KeyError(key) 

757 

758 matches = mask.sum() 

759 if matches == 0: 

760 raise KeyError(key) 

761 elif matches == 1: 

762 return mask.argmax() 

763 return lib.maybe_booleans_to_slice(mask.view("u1")) 

764 

765 @Substitution( 

766 **dict( 

767 _index_doc_kwargs, 

768 **{ 

769 "raises_section": textwrap.dedent( 

770 """ 

771 Raises 

772 ------ 

773 NotImplementedError 

774 If any method argument other than the default of 

775 None is specified as these are not yet implemented. 

776 """ 

777 ) 

778 }, 

779 ) 

780 ) 

781 @Appender(_index_shared_docs["get_indexer"]) 

782 def get_indexer( 

783 self, 

784 target: AnyArrayLike, 

785 method: Optional[str] = None, 

786 limit: Optional[int] = None, 

787 tolerance: Optional[Any] = None, 

788 ) -> np.ndarray: 

789 

790 self._check_method(method) 

791 

792 if self.is_overlapping: 

793 raise InvalidIndexError( 

794 "cannot handle overlapping indices; " 

795 "use IntervalIndex.get_indexer_non_unique" 

796 ) 

797 

798 target_as_index = ensure_index(target) 

799 

800 if isinstance(target_as_index, IntervalIndex): 

801 # equal indexes -> 1:1 positional match 

802 if self.equals(target_as_index): 

803 return np.arange(len(self), dtype="intp") 

804 

805 # different closed or incompatible subtype -> no matches 

806 common_subtype = find_common_type( 

807 [self.dtype.subtype, target_as_index.dtype.subtype] 

808 ) 

809 if self.closed != target_as_index.closed or is_object_dtype(common_subtype): 

810 return np.repeat(np.intp(-1), len(target_as_index)) 

811 

812 # non-overlapping -> at most one match per interval in target_as_index 

813 # want exact matches -> need both left/right to match, so defer to 

814 # left/right get_indexer, compare elementwise, equality -> match 

815 left_indexer = self.left.get_indexer(target_as_index.left) 

816 right_indexer = self.right.get_indexer(target_as_index.right) 

817 indexer = np.where(left_indexer == right_indexer, left_indexer, -1) 

818 elif is_categorical(target_as_index): 

819 # get an indexer for unique categories then propagate to codes via take_1d 

820 categories_indexer = self.get_indexer(target_as_index.categories) 

821 indexer = take_1d(categories_indexer, target_as_index.codes, fill_value=-1) 

822 elif not is_object_dtype(target_as_index): 

823 # homogeneous scalar index: use IntervalTree 

824 target_as_index = self._maybe_convert_i8(target_as_index) 

825 indexer = self._engine.get_indexer(target_as_index.values) 

826 else: 

827 # heterogeneous scalar index: defer elementwise to get_loc 

828 # (non-overlapping so get_loc guarantees scalar of KeyError) 

829 indexer = [] 

830 for key in target_as_index: 

831 try: 

832 loc = self.get_loc(key) 

833 except KeyError: 

834 loc = -1 

835 indexer.append(loc) 

836 

837 return ensure_platform_int(indexer) 

838 

839 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) 

840 def get_indexer_non_unique( 

841 self, target: AnyArrayLike 

842 ) -> Tuple[np.ndarray, np.ndarray]: 

843 target_as_index = ensure_index(target) 

844 

845 # check that target_as_index IntervalIndex is compatible 

846 if isinstance(target_as_index, IntervalIndex): 

847 common_subtype = find_common_type( 

848 [self.dtype.subtype, target_as_index.dtype.subtype] 

849 ) 

850 if self.closed != target_as_index.closed or is_object_dtype(common_subtype): 

851 # different closed or incompatible subtype -> no matches 

852 return ( 

853 np.repeat(-1, len(target_as_index)), 

854 np.arange(len(target_as_index)), 

855 ) 

856 

857 if is_object_dtype(target_as_index) or isinstance( 

858 target_as_index, IntervalIndex 

859 ): 

860 # target_as_index might contain intervals: defer elementwise to get_loc 

861 indexer, missing = [], [] 

862 for i, key in enumerate(target_as_index): 

863 try: 

864 locs = self.get_loc(key) 

865 if isinstance(locs, slice): 

866 locs = np.arange(locs.start, locs.stop, locs.step, dtype="intp") 

867 locs = np.array(locs, ndmin=1) 

868 except KeyError: 

869 missing.append(i) 

870 locs = np.array([-1]) 

871 indexer.append(locs) 

872 indexer = np.concatenate(indexer) 

873 else: 

874 target_as_index = self._maybe_convert_i8(target_as_index) 

875 indexer, missing = self._engine.get_indexer_non_unique( 

876 target_as_index.values 

877 ) 

878 

879 return ensure_platform_int(indexer), ensure_platform_int(missing) 

880 

881 def get_indexer_for(self, target: AnyArrayLike, **kwargs) -> np.ndarray: 

882 """ 

883 Guaranteed return of an indexer even when overlapping. 

884 

885 This dispatches to get_indexer or get_indexer_non_unique 

886 as appropriate. 

887 

888 Returns 

889 ------- 

890 numpy.ndarray 

891 List of indices. 

892 """ 

893 if self.is_overlapping: 

894 return self.get_indexer_non_unique(target)[0] 

895 return self.get_indexer(target, **kwargs) 

896 

897 @Appender(_index_shared_docs["get_value"] % _index_doc_kwargs) 

898 def get_value(self, series: ABCSeries, key: Any) -> Any: 

899 

900 if com.is_bool_indexer(key): 

901 loc = key 

902 elif is_list_like(key): 

903 if self.is_overlapping: 

904 loc, missing = self.get_indexer_non_unique(key) 

905 if len(missing): 

906 raise KeyError 

907 else: 

908 loc = self.get_indexer(key) 

909 elif isinstance(key, slice): 

910 if not (key.step is None or key.step == 1): 

911 raise ValueError("cannot support not-default step in a slice") 

912 loc = self._convert_slice_indexer(key, kind="getitem") 

913 else: 

914 loc = self.get_loc(key) 

915 return series.iloc[loc] 

916 

917 @Appender(_index_shared_docs["where"]) 

918 def where(self, cond, other=None): 

919 if other is None: 

920 other = self._na_value 

921 values = np.where(cond, self.values, other) 

922 return self._shallow_copy(values) 

923 

924 def delete(self, loc): 

925 """ 

926 Return a new IntervalIndex with passed location(-s) deleted 

927 

928 Returns 

929 ------- 

930 IntervalIndex 

931 """ 

932 new_left = self.left.delete(loc) 

933 new_right = self.right.delete(loc) 

934 return self._shallow_copy(new_left, new_right) 

935 

936 def insert(self, loc, item): 

937 """ 

938 Return a new IntervalIndex inserting new item at location. Follows 

939 Python list.append semantics for negative values. Only Interval 

940 objects and NA can be inserted into an IntervalIndex 

941 

942 Parameters 

943 ---------- 

944 loc : int 

945 item : object 

946 

947 Returns 

948 ------- 

949 IntervalIndex 

950 """ 

951 if isinstance(item, Interval): 

952 if item.closed != self.closed: 

953 raise ValueError( 

954 "inserted item must be closed on the same side as the index" 

955 ) 

956 left_insert = item.left 

957 right_insert = item.right 

958 elif is_scalar(item) and isna(item): 

959 # GH 18295 

960 left_insert = right_insert = item 

961 else: 

962 raise ValueError( 

963 "can only insert Interval objects and NA into an IntervalIndex" 

964 ) 

965 

966 new_left = self.left.insert(loc, left_insert) 

967 new_right = self.right.insert(loc, right_insert) 

968 return self._shallow_copy(new_left, new_right) 

969 

970 def _concat_same_dtype(self, to_concat, name): 

971 """ 

972 assert that we all have the same .closed 

973 we allow a 0-len index here as well 

974 """ 

975 if not len({i.closed for i in to_concat if len(i)}) == 1: 

976 raise ValueError( 

977 "can only append two IntervalIndex objects " 

978 "that are closed on the same side" 

979 ) 

980 return super()._concat_same_dtype(to_concat, name) 

981 

982 @Appender(_index_shared_docs["take"] % _index_doc_kwargs) 

983 def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): 

984 result = self._data.take( 

985 indices, axis=axis, allow_fill=allow_fill, fill_value=fill_value, **kwargs 

986 ) 

987 return self._shallow_copy(result) 

988 

989 def __getitem__(self, value): 

990 result = self._data[value] 

991 if isinstance(result, IntervalArray): 

992 return self._shallow_copy(result) 

993 else: 

994 # scalar 

995 return result 

996 

997 # -------------------------------------------------------------------- 

998 # Rendering Methods 

999 # __repr__ associated methods are based on MultiIndex 

1000 

1001 def _format_with_header(self, header, **kwargs): 

1002 return header + list(self._format_native_types(**kwargs)) 

1003 

1004 def _format_native_types(self, na_rep="NaN", quoting=None, **kwargs): 

1005 # GH 28210: use base method but with different default na_rep 

1006 return super()._format_native_types(na_rep=na_rep, quoting=quoting, **kwargs) 

1007 

1008 def _format_data(self, name=None): 

1009 

1010 # TODO: integrate with categorical and make generic 

1011 # name argument is unused here; just for compat with base / categorical 

1012 n = len(self) 

1013 max_seq_items = min((get_option("display.max_seq_items") or n) // 10, 10) 

1014 

1015 formatter = str 

1016 

1017 if n == 0: 

1018 summary = "[]" 

1019 elif n == 1: 

1020 first = formatter(self[0]) 

1021 summary = f"[{first}]" 

1022 elif n == 2: 

1023 first = formatter(self[0]) 

1024 last = formatter(self[-1]) 

1025 summary = f"[{first}, {last}]" 

1026 else: 

1027 

1028 if n > max_seq_items: 

1029 n = min(max_seq_items // 2, 10) 

1030 head = [formatter(x) for x in self[:n]] 

1031 tail = [formatter(x) for x in self[-n:]] 

1032 head_joined = ", ".join(head) 

1033 tail_joined = ", ".join(tail) 

1034 summary = f"[{head_joined} ... {tail_joined}]" 

1035 else: 

1036 tail = [formatter(x) for x in self] 

1037 joined = ", ".join(tail) 

1038 summary = f"[{joined}]" 

1039 

1040 return summary + "," + self._format_space() 

1041 

1042 def _format_attrs(self): 

1043 attrs = [("closed", repr(self.closed))] 

1044 if self.name is not None: 

1045 attrs.append(("name", default_pprint(self.name))) 

1046 attrs.append(("dtype", f"'{self.dtype}'")) 

1047 return attrs 

1048 

1049 def _format_space(self) -> str: 

1050 space = " " * (len(type(self).__name__) + 1) 

1051 return f"\n{space}" 

1052 

1053 # -------------------------------------------------------------------- 

1054 

1055 def argsort(self, *args, **kwargs): 

1056 return np.lexsort((self.right, self.left)) 

1057 

1058 def equals(self, other) -> bool: 

1059 """ 

1060 Determines if two IntervalIndex objects contain the same elements. 

1061 """ 

1062 if self.is_(other): 

1063 return True 

1064 

1065 # if we can coerce to an II 

1066 # then we can compare 

1067 if not isinstance(other, IntervalIndex): 

1068 if not is_interval_dtype(other): 

1069 return False 

1070 other = Index(getattr(other, ".values", other)) 

1071 

1072 return ( 

1073 self.left.equals(other.left) 

1074 and self.right.equals(other.right) 

1075 and self.closed == other.closed 

1076 ) 

1077 

1078 @Appender(_index_shared_docs["intersection"]) 

1079 @SetopCheck(op_name="intersection") 

1080 def intersection( 

1081 self, other: "IntervalIndex", sort: bool = False 

1082 ) -> "IntervalIndex": 

1083 if self.left.is_unique and self.right.is_unique: 

1084 taken = self._intersection_unique(other) 

1085 elif other.left.is_unique and other.right.is_unique and self.isna().sum() <= 1: 

1086 # Swap other/self if other is unique and self does not have 

1087 # multiple NaNs 

1088 taken = other._intersection_unique(self) 

1089 else: 

1090 # duplicates 

1091 taken = self._intersection_non_unique(other) 

1092 

1093 if sort is None: 

1094 taken = taken.sort_values() 

1095 

1096 return taken 

1097 

1098 def _intersection_unique(self, other: "IntervalIndex") -> "IntervalIndex": 

1099 """ 

1100 Used when the IntervalIndex does not have any common endpoint, 

1101 no mater left or right. 

1102 Return the intersection with another IntervalIndex. 

1103 

1104 Parameters 

1105 ---------- 

1106 other : IntervalIndex 

1107 

1108 Returns 

1109 ------- 

1110 IntervalIndex 

1111 """ 

1112 lindexer = self.left.get_indexer(other.left) 

1113 rindexer = self.right.get_indexer(other.right) 

1114 

1115 match = (lindexer == rindexer) & (lindexer != -1) 

1116 indexer = lindexer.take(match.nonzero()[0]) 

1117 

1118 return self.take(indexer) 

1119 

1120 def _intersection_non_unique(self, other: "IntervalIndex") -> "IntervalIndex": 

1121 """ 

1122 Used when the IntervalIndex does have some common endpoints, 

1123 on either sides. 

1124 Return the intersection with another IntervalIndex. 

1125 

1126 Parameters 

1127 ---------- 

1128 other : IntervalIndex 

1129 

1130 Returns 

1131 ------- 

1132 IntervalIndex 

1133 """ 

1134 mask = np.zeros(len(self), dtype=bool) 

1135 

1136 if self.hasnans and other.hasnans: 

1137 first_nan_loc = np.arange(len(self))[self.isna()][0] 

1138 mask[first_nan_loc] = True 

1139 

1140 other_tups = set(zip(other.left, other.right)) 

1141 for i, tup in enumerate(zip(self.left, self.right)): 

1142 if tup in other_tups: 

1143 mask[i] = True 

1144 

1145 return self[mask] 

1146 

1147 def _setop(op_name: str, sort=None): 

1148 @SetopCheck(op_name=op_name) 

1149 def func(self, other, sort=sort): 

1150 result = getattr(self._multiindex, op_name)(other._multiindex, sort=sort) 

1151 result_name = get_op_result_name(self, other) 

1152 

1153 # GH 19101: ensure empty results have correct dtype 

1154 if result.empty: 

1155 result = result.values.astype(self.dtype.subtype) 

1156 else: 

1157 result = result.values 

1158 

1159 return type(self).from_tuples(result, closed=self.closed, name=result_name) 

1160 

1161 return func 

1162 

1163 @property 

1164 def is_all_dates(self) -> bool: 

1165 """ 

1166 This is False even when left/right contain datetime-like objects, 

1167 as the check is done on the Interval itself 

1168 """ 

1169 return False 

1170 

1171 union = _setop("union") 

1172 difference = _setop("difference") 

1173 symmetric_difference = _setop("symmetric_difference") 

1174 

1175 # TODO: arithmetic operations 

1176 

1177 # GH#30817 until IntervalArray implements inequalities, get them from Index 

1178 def __lt__(self, other): 

1179 return Index.__lt__(self, other) 

1180 

1181 def __le__(self, other): 

1182 return Index.__le__(self, other) 

1183 

1184 def __gt__(self, other): 

1185 return Index.__gt__(self, other) 

1186 

1187 def __ge__(self, other): 

1188 return Index.__ge__(self, other) 

1189 

1190 

1191IntervalIndex._add_logical_methods_disabled() 

1192 

1193 

1194def _is_valid_endpoint(endpoint) -> bool: 

1195 """ 

1196 Helper for interval_range to check if start/end are valid types. 

1197 """ 

1198 return any( 

1199 [ 

1200 is_number(endpoint), 

1201 isinstance(endpoint, Timestamp), 

1202 isinstance(endpoint, Timedelta), 

1203 endpoint is None, 

1204 ] 

1205 ) 

1206 

1207 

1208def _is_type_compatible(a, b) -> bool: 

1209 """ 

1210 Helper for interval_range to check type compat of start/end/freq. 

1211 """ 

1212 is_ts_compat = lambda x: isinstance(x, (Timestamp, DateOffset)) 

1213 is_td_compat = lambda x: isinstance(x, (Timedelta, DateOffset)) 

1214 return ( 

1215 (is_number(a) and is_number(b)) 

1216 or (is_ts_compat(a) and is_ts_compat(b)) 

1217 or (is_td_compat(a) and is_td_compat(b)) 

1218 or com.any_none(a, b) 

1219 ) 

1220 

1221 

1222def interval_range( 

1223 start=None, end=None, periods=None, freq=None, name=None, closed="right" 

1224): 

1225 """ 

1226 Return a fixed frequency IntervalIndex. 

1227 

1228 Parameters 

1229 ---------- 

1230 start : numeric or datetime-like, default None 

1231 Left bound for generating intervals. 

1232 end : numeric or datetime-like, default None 

1233 Right bound for generating intervals. 

1234 periods : int, default None 

1235 Number of periods to generate. 

1236 freq : numeric, str, or DateOffset, default None 

1237 The length of each interval. Must be consistent with the type of start 

1238 and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 

1239 for numeric and 'D' for datetime-like. 

1240 name : str, default None 

1241 Name of the resulting IntervalIndex. 

1242 closed : {'left', 'right', 'both', 'neither'}, default 'right' 

1243 Whether the intervals are closed on the left-side, right-side, both 

1244 or neither. 

1245 

1246 Returns 

1247 ------- 

1248 IntervalIndex 

1249 

1250 See Also 

1251 -------- 

1252 IntervalIndex : An Index of intervals that are all closed on the same side. 

1253 

1254 Notes 

1255 ----- 

1256 Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, 

1257 exactly three must be specified. If ``freq`` is omitted, the resulting 

1258 ``IntervalIndex`` will have ``periods`` linearly spaced elements between 

1259 ``start`` and ``end``, inclusively. 

1260 

1261 To learn more about datetime-like frequency strings, please see `this link 

1262 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. 

1263 

1264 Examples 

1265 -------- 

1266 Numeric ``start`` and ``end`` is supported. 

1267 

1268 >>> pd.interval_range(start=0, end=5) 

1269 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], 

1270 closed='right', dtype='interval[int64]') 

1271 

1272 Additionally, datetime-like input is also supported. 

1273 

1274 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), 

1275 ... end=pd.Timestamp('2017-01-04')) 

1276 IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], 

1277 (2017-01-03, 2017-01-04]], 

1278 closed='right', dtype='interval[datetime64[ns]]') 

1279 

1280 The ``freq`` parameter specifies the frequency between the left and right. 

1281 endpoints of the individual intervals within the ``IntervalIndex``. For 

1282 numeric ``start`` and ``end``, the frequency must also be numeric. 

1283 

1284 >>> pd.interval_range(start=0, periods=4, freq=1.5) 

1285 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], 

1286 closed='right', dtype='interval[float64]') 

1287 

1288 Similarly, for datetime-like ``start`` and ``end``, the frequency must be 

1289 convertible to a DateOffset. 

1290 

1291 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), 

1292 ... periods=3, freq='MS') 

1293 IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], 

1294 (2017-03-01, 2017-04-01]], 

1295 closed='right', dtype='interval[datetime64[ns]]') 

1296 

1297 Specify ``start``, ``end``, and ``periods``; the frequency is generated 

1298 automatically (linearly spaced). 

1299 

1300 >>> pd.interval_range(start=0, end=6, periods=4) 

1301 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], 

1302 closed='right', 

1303 dtype='interval[float64]') 

1304 

1305 The ``closed`` parameter specifies which endpoints of the individual 

1306 intervals within the ``IntervalIndex`` are closed. 

1307 

1308 >>> pd.interval_range(end=5, periods=4, closed='both') 

1309 IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], 

1310 closed='both', dtype='interval[int64]') 

1311 """ 

1312 start = com.maybe_box_datetimelike(start) 

1313 end = com.maybe_box_datetimelike(end) 

1314 endpoint = start if start is not None else end 

1315 

1316 if freq is None and com.any_none(periods, start, end): 

1317 freq = 1 if is_number(endpoint) else "D" 

1318 

1319 if com.count_not_none(start, end, periods, freq) != 3: 

1320 raise ValueError( 

1321 "Of the four parameters: start, end, periods, and " 

1322 "freq, exactly three must be specified" 

1323 ) 

1324 

1325 if not _is_valid_endpoint(start): 

1326 raise ValueError(f"start must be numeric or datetime-like, got {start}") 

1327 elif not _is_valid_endpoint(end): 

1328 raise ValueError(f"end must be numeric or datetime-like, got {end}") 

1329 

1330 if is_float(periods): 

1331 periods = int(periods) 

1332 elif not is_integer(periods) and periods is not None: 

1333 raise TypeError(f"periods must be a number, got {periods}") 

1334 

1335 if freq is not None and not is_number(freq): 

1336 try: 

1337 freq = to_offset(freq) 

1338 except ValueError: 

1339 raise ValueError( 

1340 f"freq must be numeric or convertible to DateOffset, got {freq}" 

1341 ) 

1342 

1343 # verify type compatibility 

1344 if not all( 

1345 [ 

1346 _is_type_compatible(start, end), 

1347 _is_type_compatible(start, freq), 

1348 _is_type_compatible(end, freq), 

1349 ] 

1350 ): 

1351 raise TypeError("start, end, freq need to be type compatible") 

1352 

1353 # +1 to convert interval count to breaks count (n breaks = n-1 intervals) 

1354 if periods is not None: 

1355 periods += 1 

1356 

1357 if is_number(endpoint): 

1358 # force consistency between start/end/freq (lower end if freq skips it) 

1359 if com.all_not_none(start, end, freq): 

1360 end -= (end - start) % freq 

1361 

1362 # compute the period/start/end if unspecified (at most one) 

1363 if periods is None: 

1364 periods = int((end - start) // freq) + 1 

1365 elif start is None: 

1366 start = end - (periods - 1) * freq 

1367 elif end is None: 

1368 end = start + (periods - 1) * freq 

1369 

1370 breaks = np.linspace(start, end, periods) 

1371 if all(is_integer(x) for x in com.not_none(start, end, freq)): 

1372 # np.linspace always produces float output 

1373 breaks = maybe_downcast_to_dtype(breaks, "int64") 

1374 else: 

1375 # delegate to the appropriate range function 

1376 if isinstance(endpoint, Timestamp): 

1377 range_func = date_range 

1378 else: 

1379 range_func = timedelta_range 

1380 

1381 breaks = range_func(start=start, end=end, periods=periods, freq=freq) 

1382 

1383 return IntervalIndex.from_breaks(breaks, name=name, closed=closed)