Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/indexes/datetimes.py : 19%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from datetime import date, datetime, time, timedelta, tzinfo
2import operator
3from typing import Optional
4import warnings
6import numpy as np
8from pandas._libs import NaT, Timestamp, index as libindex, lib, tslib as libts
9from pandas._libs.tslibs import ccalendar, fields, parsing, timezones
10from pandas.util._decorators import Appender, Substitution, cache_readonly
12from pandas.core.dtypes.common import _NS_DTYPE, is_float, is_integer, is_scalar
13from pandas.core.dtypes.dtypes import DatetimeTZDtype
14from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna
16from pandas.core.accessor import delegate_names
17from pandas.core.arrays.datetimes import (
18 DatetimeArray,
19 tz_to_dtype,
20 validate_tz_from_dtype,
21)
22from pandas.core.base import _shared_docs
23import pandas.core.common as com
24from pandas.core.indexes.base import Index, maybe_extract_name
25from pandas.core.indexes.datetimelike import (
26 DatetimelikeDelegateMixin,
27 DatetimeTimedeltaMixin,
28)
29from pandas.core.indexes.extension import inherit_names
30from pandas.core.ops import get_op_result_name
31import pandas.core.tools.datetimes as tools
33from pandas.tseries.frequencies import Resolution, to_offset
34from pandas.tseries.offsets import Nano, prefix_mapping
37def _new_DatetimeIndex(cls, d):
38 """
39 This is called upon unpickling, rather than the default which doesn't
40 have arguments and breaks __new__
41 """
42 if "data" in d and not isinstance(d["data"], DatetimeIndex):
43 # Avoid need to verify integrity by calling simple_new directly
44 data = d.pop("data")
45 result = cls._simple_new(data, **d)
46 else:
47 with warnings.catch_warnings():
48 # TODO: If we knew what was going in to **d, we might be able to
49 # go through _simple_new instead
50 warnings.simplefilter("ignore")
51 result = cls.__new__(cls, **d)
53 return result
56class DatetimeDelegateMixin(DatetimelikeDelegateMixin):
57 # Most attrs are dispatched via datetimelike_{ops,methods}
58 # Some are "raw" methods, the result is not not re-boxed in an Index
59 # We also have a few "extra" attrs, which may or may not be raw,
60 # which we we dont' want to expose in the .dt accessor.
61 _extra_methods = ["to_period", "to_perioddelta", "to_julian_date", "strftime"]
62 _extra_raw_methods = [
63 "to_pydatetime",
64 "_local_timestamps",
65 "_has_same_tz",
66 "_format_native_types",
67 "__iter__",
68 ]
69 _extra_raw_properties = ["_box_func", "tz", "tzinfo", "dtype"]
70 _delegated_properties = DatetimeArray._datetimelike_ops + _extra_raw_properties
71 _delegated_methods = (
72 DatetimeArray._datetimelike_methods + _extra_methods + _extra_raw_methods
73 )
74 _raw_properties = (
75 {"date", "time", "timetz"}
76 | set(DatetimeArray._bool_ops)
77 | set(_extra_raw_properties)
78 )
79 _raw_methods = set(_extra_raw_methods)
82@inherit_names(["_timezone", "is_normalized", "_resolution"], DatetimeArray, cache=True)
83@inherit_names(
84 [
85 "_bool_ops",
86 "_object_ops",
87 "_field_ops",
88 "_datetimelike_ops",
89 "_datetimelike_methods",
90 ],
91 DatetimeArray,
92)
93@delegate_names(
94 DatetimeArray, DatetimeDelegateMixin._delegated_properties, typ="property"
95)
96@delegate_names(
97 DatetimeArray,
98 DatetimeDelegateMixin._delegated_methods,
99 typ="method",
100 overwrite=True,
101)
102class DatetimeIndex(DatetimeTimedeltaMixin, DatetimeDelegateMixin):
103 """
104 Immutable ndarray of datetime64 data, represented internally as int64, and
105 which can be boxed to Timestamp objects that are subclasses of datetime and
106 carry metadata such as frequency information.
108 Parameters
109 ----------
110 data : array-like (1-dimensional), optional
111 Optional datetime-like data to construct index with.
112 copy : bool
113 Make a copy of input ndarray.
114 freq : str or pandas offset object, optional
115 One of pandas date offset strings or corresponding objects. The string
116 'infer' can be passed in order to set the frequency of the index as the
117 inferred frequency upon creation.
118 tz : pytz.timezone or dateutil.tz.tzfile
119 ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
120 When clocks moved backward due to DST, ambiguous times may arise.
121 For example in Central European Time (UTC+01), when going from 03:00
122 DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC
123 and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter
124 dictates how ambiguous times should be handled.
126 - 'infer' will attempt to infer fall dst-transition hours based on
127 order
128 - bool-ndarray where True signifies a DST time, False signifies a
129 non-DST time (note that this flag is only applicable for ambiguous
130 times)
131 - 'NaT' will return NaT where there are ambiguous times
132 - 'raise' will raise an AmbiguousTimeError if there are ambiguous times.
133 name : object
134 Name to be stored in the index.
135 dayfirst : bool, default False
136 If True, parse dates in `data` with the day first order.
137 yearfirst : bool, default False
138 If True parse dates in `data` with the year first order.
140 Attributes
141 ----------
142 year
143 month
144 day
145 hour
146 minute
147 second
148 microsecond
149 nanosecond
150 date
151 time
152 timetz
153 dayofyear
154 weekofyear
155 week
156 dayofweek
157 weekday
158 quarter
159 tz
160 freq
161 freqstr
162 is_month_start
163 is_month_end
164 is_quarter_start
165 is_quarter_end
166 is_year_start
167 is_year_end
168 is_leap_year
169 inferred_freq
171 Methods
172 -------
173 normalize
174 strftime
175 snap
176 tz_convert
177 tz_localize
178 round
179 floor
180 ceil
181 to_period
182 to_perioddelta
183 to_pydatetime
184 to_series
185 to_frame
186 month_name
187 day_name
188 mean
190 See Also
191 --------
192 Index : The base pandas Index type.
193 TimedeltaIndex : Index of timedelta64 data.
194 PeriodIndex : Index of Period data.
195 to_datetime : Convert argument to datetime.
196 date_range : Create a fixed-frequency DatetimeIndex.
198 Notes
199 -----
200 To learn more about the frequency strings, please see `this link
201 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
202 """
204 _typ = "datetimeindex"
206 _engine_type = libindex.DatetimeEngine
207 _supports_partial_string_indexing = True
209 _comparables = ["name", "freqstr", "tz"]
210 _attributes = ["name", "tz", "freq"]
212 _is_numeric_dtype = False
213 _infer_as_myclass = True
215 tz: Optional[tzinfo]
217 # --------------------------------------------------------------------
218 # Constructors
220 def __new__(
221 cls,
222 data=None,
223 freq=None,
224 tz=None,
225 normalize=False,
226 closed=None,
227 ambiguous="raise",
228 dayfirst=False,
229 yearfirst=False,
230 dtype=None,
231 copy=False,
232 name=None,
233 ):
235 if is_scalar(data):
236 raise TypeError(
237 f"{cls.__name__}() must be called with a "
238 f"collection of some kind, {repr(data)} was passed"
239 )
241 # - Cases checked above all return/raise before reaching here - #
243 name = maybe_extract_name(name, data, cls)
245 dtarr = DatetimeArray._from_sequence(
246 data,
247 dtype=dtype,
248 copy=copy,
249 tz=tz,
250 freq=freq,
251 dayfirst=dayfirst,
252 yearfirst=yearfirst,
253 ambiguous=ambiguous,
254 )
256 subarr = cls._simple_new(dtarr, name=name, freq=dtarr.freq, tz=dtarr.tz)
257 return subarr
259 @classmethod
260 def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None):
261 """
262 We require the we have a dtype compat for the values
263 if we are passed a non-dtype compat, then coerce using the constructor
264 """
265 if isinstance(values, DatetimeArray):
266 if tz:
267 tz = validate_tz_from_dtype(dtype, tz)
268 dtype = DatetimeTZDtype(tz=tz)
269 elif dtype is None:
270 dtype = _NS_DTYPE
272 values = DatetimeArray(values, freq=freq, dtype=dtype)
273 tz = values.tz
274 freq = values.freq
275 values = values._data
277 # DatetimeArray._simple_new will accept either i8 or M8[ns] dtypes
278 if isinstance(values, DatetimeIndex):
279 values = values._data
281 dtype = tz_to_dtype(tz)
282 dtarr = DatetimeArray._simple_new(values, freq=freq, dtype=dtype)
283 assert isinstance(dtarr, DatetimeArray)
285 result = object.__new__(cls)
286 result._data = dtarr
287 result.name = name
288 result._no_setting_name = False
289 # For groupby perf. See note in indexes/base about _index_data
290 result._index_data = dtarr._data
291 result._reset_identity()
292 return result
294 # --------------------------------------------------------------------
296 def __array__(self, dtype=None) -> np.ndarray:
297 return np.asarray(self._data, dtype=dtype)
299 @cache_readonly
300 def _is_dates_only(self) -> bool:
301 """
302 Return a boolean if we are only dates (and don't have a timezone)
304 Returns
305 -------
306 bool
307 """
308 from pandas.io.formats.format import _is_dates_only
310 return _is_dates_only(self.values) and self.tz is None
312 def __reduce__(self):
314 # we use a special reduce here because we need
315 # to simply set the .tz (and not reinterpret it)
317 d = dict(data=self._data)
318 d.update(self._get_attributes_dict())
319 return _new_DatetimeIndex, (type(self), d), None
321 def _convert_for_op(self, value):
322 """
323 Convert value to be insertable to ndarray.
324 """
325 if self._has_same_tz(value):
326 return Timestamp(value).asm8
327 raise ValueError("Passed item and index have different timezone")
329 # --------------------------------------------------------------------
330 # Rendering Methods
332 def _mpl_repr(self):
333 # how to represent ourselves to matplotlib
334 return libts.ints_to_pydatetime(self.asi8, self.tz)
336 @property
337 def _formatter_func(self):
338 from pandas.io.formats.format import _get_format_datetime64
340 formatter = _get_format_datetime64(is_dates_only=self._is_dates_only)
341 return lambda x: f"'{formatter(x, tz=self.tz)}'"
343 # --------------------------------------------------------------------
344 # Set Operation Methods
346 def union_many(self, others):
347 """
348 A bit of a hack to accelerate unioning a collection of indexes.
349 """
350 this = self
352 for other in others:
353 if not isinstance(this, DatetimeIndex):
354 this = Index.union(this, other)
355 continue
357 if not isinstance(other, DatetimeIndex):
358 try:
359 other = DatetimeIndex(other)
360 except TypeError:
361 pass
363 this, other = this._maybe_utc_convert(other)
365 if this._can_fast_union(other):
366 this = this._fast_union(other)
367 else:
368 dtype = this.dtype
369 this = Index.union(this, other)
370 if isinstance(this, DatetimeIndex):
371 # TODO: we shouldn't be setting attributes like this;
372 # in all the tests this equality already holds
373 this._data._dtype = dtype
374 return this
376 def _wrap_setop_result(self, other, result):
377 name = get_op_result_name(self, other)
378 return self._shallow_copy(result, name=name, freq=None, tz=self.tz)
380 # --------------------------------------------------------------------
382 def _get_time_micros(self):
383 values = self.asi8
384 if self.tz is not None and not timezones.is_utc(self.tz):
385 values = self._data._local_timestamps()
386 return fields.get_time_micros(values)
388 def to_series(self, keep_tz=lib.no_default, index=None, name=None):
389 """
390 Create a Series with both index and values equal to the index keys
391 useful with map for returning an indexer based on an index.
393 Parameters
394 ----------
395 keep_tz : optional, defaults True
396 Return the data keeping the timezone.
398 If keep_tz is True:
400 If the timezone is not set, the resulting
401 Series will have a datetime64[ns] dtype.
403 Otherwise the Series will have an datetime64[ns, tz] dtype; the
404 tz will be preserved.
406 If keep_tz is False:
408 Series will have a datetime64[ns] dtype. TZ aware
409 objects will have the tz removed.
411 .. versionchanged:: 1.0.0
412 The default value is now True. In a future version,
413 this keyword will be removed entirely. Stop passing the
414 argument to obtain the future behavior and silence the warning.
416 index : Index, optional
417 Index of resulting Series. If None, defaults to original index.
418 name : str, optional
419 Name of resulting Series. If None, defaults to name of original
420 index.
422 Returns
423 -------
424 Series
425 """
426 from pandas import Series
428 if index is None:
429 index = self._shallow_copy()
430 if name is None:
431 name = self.name
433 if keep_tz is not lib.no_default:
434 if keep_tz:
435 warnings.warn(
436 "The 'keep_tz' keyword in DatetimeIndex.to_series "
437 "is deprecated and will be removed in a future version. "
438 "You can stop passing 'keep_tz' to silence this warning.",
439 FutureWarning,
440 stacklevel=2,
441 )
442 else:
443 warnings.warn(
444 "Specifying 'keep_tz=False' is deprecated and this "
445 "option will be removed in a future release. If "
446 "you want to remove the timezone information, you "
447 "can do 'idx.tz_convert(None)' before calling "
448 "'to_series'.",
449 FutureWarning,
450 stacklevel=2,
451 )
452 else:
453 keep_tz = True
455 if keep_tz and self.tz is not None:
456 # preserve the tz & copy
457 values = self.copy(deep=True)
458 else:
459 values = self.values.copy()
461 return Series(values, index=index, name=name)
463 def snap(self, freq="S"):
464 """
465 Snap time stamps to nearest occurring frequency.
467 Returns
468 -------
469 DatetimeIndex
470 """
471 # Superdumb, punting on any optimizing
472 freq = to_offset(freq)
474 snapped = np.empty(len(self), dtype=_NS_DTYPE)
476 for i, v in enumerate(self):
477 s = v
478 if not freq.is_on_offset(s):
479 t0 = freq.rollback(s)
480 t1 = freq.rollforward(s)
481 if abs(s - t0) < abs(t1 - s):
482 s = t0
483 else:
484 s = t1
485 snapped[i] = s
487 # we know it conforms; skip check
488 return DatetimeIndex._simple_new(snapped, name=self.name, tz=self.tz, freq=freq)
490 def _parsed_string_to_bounds(self, reso, parsed):
491 """
492 Calculate datetime bounds for parsed time string and its resolution.
494 Parameters
495 ----------
496 reso : Resolution
497 Resolution provided by parsed string.
498 parsed : datetime
499 Datetime from parsed string.
501 Returns
502 -------
503 lower, upper: pd.Timestamp
505 """
506 valid_resos = {
507 "year",
508 "month",
509 "quarter",
510 "day",
511 "hour",
512 "minute",
513 "second",
514 "minute",
515 "second",
516 "microsecond",
517 }
518 if reso not in valid_resos:
519 raise KeyError
520 if reso == "year":
521 start = Timestamp(parsed.year, 1, 1)
522 end = Timestamp(parsed.year, 12, 31, 23, 59, 59, 999999)
523 elif reso == "month":
524 d = ccalendar.get_days_in_month(parsed.year, parsed.month)
525 start = Timestamp(parsed.year, parsed.month, 1)
526 end = Timestamp(parsed.year, parsed.month, d, 23, 59, 59, 999999)
527 elif reso == "quarter":
528 qe = (((parsed.month - 1) + 2) % 12) + 1 # two months ahead
529 d = ccalendar.get_days_in_month(parsed.year, qe) # at end of month
530 start = Timestamp(parsed.year, parsed.month, 1)
531 end = Timestamp(parsed.year, qe, d, 23, 59, 59, 999999)
532 elif reso == "day":
533 start = Timestamp(parsed.year, parsed.month, parsed.day)
534 end = start + timedelta(days=1) - Nano(1)
535 elif reso == "hour":
536 start = Timestamp(parsed.year, parsed.month, parsed.day, parsed.hour)
537 end = start + timedelta(hours=1) - Nano(1)
538 elif reso == "minute":
539 start = Timestamp(
540 parsed.year, parsed.month, parsed.day, parsed.hour, parsed.minute
541 )
542 end = start + timedelta(minutes=1) - Nano(1)
543 elif reso == "second":
544 start = Timestamp(
545 parsed.year,
546 parsed.month,
547 parsed.day,
548 parsed.hour,
549 parsed.minute,
550 parsed.second,
551 )
552 end = start + timedelta(seconds=1) - Nano(1)
553 elif reso == "microsecond":
554 start = Timestamp(
555 parsed.year,
556 parsed.month,
557 parsed.day,
558 parsed.hour,
559 parsed.minute,
560 parsed.second,
561 parsed.microsecond,
562 )
563 end = start + timedelta(microseconds=1) - Nano(1)
564 # GH 24076
565 # If an incoming date string contained a UTC offset, need to localize
566 # the parsed date to this offset first before aligning with the index's
567 # timezone
568 if parsed.tzinfo is not None:
569 if self.tz is None:
570 raise ValueError(
571 "The index must be timezone aware when indexing "
572 "with a date string with a UTC offset"
573 )
574 start = start.tz_localize(parsed.tzinfo).tz_convert(self.tz)
575 end = end.tz_localize(parsed.tzinfo).tz_convert(self.tz)
576 elif self.tz is not None:
577 start = start.tz_localize(self.tz)
578 end = end.tz_localize(self.tz)
579 return start, end
581 def _partial_date_slice(
582 self, reso: str, parsed, use_lhs: bool = True, use_rhs: bool = True
583 ):
584 """
585 Parameters
586 ----------
587 reso : str
588 use_lhs : bool, default True
589 use_rhs : bool, default True
590 """
591 is_monotonic = self.is_monotonic
592 if (
593 is_monotonic
594 and reso in ["day", "hour", "minute", "second"]
595 and self._resolution >= Resolution.get_reso(reso)
596 ):
597 # These resolution/monotonicity validations came from GH3931,
598 # GH3452 and GH2369.
600 # See also GH14826
601 raise KeyError
603 if reso == "microsecond":
604 # _partial_date_slice doesn't allow microsecond resolution, but
605 # _parsed_string_to_bounds allows it.
606 raise KeyError
608 t1, t2 = self._parsed_string_to_bounds(reso, parsed)
609 stamps = self.asi8
611 if is_monotonic:
613 # we are out of range
614 if len(stamps) and (
615 (use_lhs and t1.value < stamps[0] and t2.value < stamps[0])
616 or ((use_rhs and t1.value > stamps[-1] and t2.value > stamps[-1]))
617 ):
618 raise KeyError
620 # a monotonic (sorted) series can be sliced
621 left = stamps.searchsorted(t1.value, side="left") if use_lhs else None
622 right = stamps.searchsorted(t2.value, side="right") if use_rhs else None
624 return slice(left, right)
626 lhs_mask = (stamps >= t1.value) if use_lhs else True
627 rhs_mask = (stamps <= t2.value) if use_rhs else True
629 # try to find a the dates
630 return (lhs_mask & rhs_mask).nonzero()[0]
632 def _maybe_promote(self, other):
633 if other.inferred_type == "date":
634 other = DatetimeIndex(other)
635 return self, other
637 def get_value(self, series, key):
638 """
639 Fast lookup of value from 1-dimensional ndarray. Only use this if you
640 know what you're doing
641 """
643 if isinstance(key, (datetime, np.datetime64)):
644 return self.get_value_maybe_box(series, key)
646 if isinstance(key, time):
647 locs = self.indexer_at_time(key)
648 return series.take(locs)
650 try:
651 value = Index.get_value(self, series, key)
652 except KeyError:
653 try:
654 loc = self._get_string_slice(key)
655 return series[loc]
656 except (TypeError, ValueError, KeyError):
657 pass
659 try:
660 return self.get_value_maybe_box(series, key)
661 except (TypeError, ValueError, KeyError):
662 raise KeyError(key)
663 else:
664 return com.maybe_box(self, value, series, key)
666 def get_value_maybe_box(self, series, key):
667 # needed to localize naive datetimes
668 if self.tz is not None:
669 key = Timestamp(key)
670 if key.tzinfo is not None:
671 key = key.tz_convert(self.tz)
672 else:
673 key = key.tz_localize(self.tz)
674 elif not isinstance(key, Timestamp):
675 key = Timestamp(key)
676 values = self._engine.get_value(com.values_from_object(series), key, tz=self.tz)
677 return com.maybe_box(self, values, series, key)
679 def get_loc(self, key, method=None, tolerance=None):
680 """
681 Get integer location for requested label
683 Returns
684 -------
685 loc : int
686 """
688 if tolerance is not None:
689 # try converting tolerance now, so errors don't get swallowed by
690 # the try/except clauses below
691 tolerance = self._convert_tolerance(tolerance, np.asarray(key))
693 if isinstance(key, datetime):
694 # needed to localize naive datetimes
695 if key.tzinfo is None:
696 key = Timestamp(key, tz=self.tz)
697 else:
698 key = Timestamp(key).tz_convert(self.tz)
699 return Index.get_loc(self, key, method, tolerance)
701 elif isinstance(key, timedelta):
702 # GH#20464
703 raise TypeError(
704 f"Cannot index {type(self).__name__} with {type(key).__name__}"
705 )
707 if isinstance(key, time):
708 if method is not None:
709 raise NotImplementedError(
710 "cannot yet lookup inexact labels when key is a time object"
711 )
712 return self.indexer_at_time(key)
714 try:
715 return Index.get_loc(self, key, method, tolerance)
716 except (KeyError, ValueError, TypeError):
717 try:
718 return self._get_string_slice(key)
719 except (TypeError, KeyError, ValueError, OverflowError):
720 pass
722 try:
723 stamp = Timestamp(key)
724 if stamp.tzinfo is not None and self.tz is not None:
725 stamp = stamp.tz_convert(self.tz)
726 else:
727 stamp = stamp.tz_localize(self.tz)
728 return Index.get_loc(self, stamp, method, tolerance)
729 except KeyError:
730 raise KeyError(key)
731 except ValueError as e:
732 # list-like tolerance size must match target index size
733 if "list-like" in str(e):
734 raise e
735 raise KeyError(key)
737 def _maybe_cast_slice_bound(self, label, side, kind):
738 """
739 If label is a string, cast it to datetime according to resolution.
741 Parameters
742 ----------
743 label : object
744 side : {'left', 'right'}
745 kind : {'ix', 'loc', 'getitem'}
747 Returns
748 -------
749 label : object
751 Notes
752 -----
753 Value of `side` parameter should be validated in caller.
754 """
755 assert kind in ["ix", "loc", "getitem", None]
757 if is_float(label) or isinstance(label, time) or is_integer(label):
758 self._invalid_indexer("slice", label)
760 if isinstance(label, str):
761 freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None))
762 _, parsed, reso = parsing.parse_time_string(label, freq)
763 lower, upper = self._parsed_string_to_bounds(reso, parsed)
764 # lower, upper form the half-open interval:
765 # [parsed, parsed + 1 freq)
766 # because label may be passed to searchsorted
767 # the bounds need swapped if index is reverse sorted and has a
768 # length > 1 (is_monotonic_decreasing gives True for empty
769 # and length 1 index)
770 if self._is_strictly_monotonic_decreasing and len(self) > 1:
771 return upper if side == "left" else lower
772 return lower if side == "left" else upper
773 else:
774 return label
776 def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True):
777 freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None))
778 _, parsed, reso = parsing.parse_time_string(key, freq)
779 loc = self._partial_date_slice(reso, parsed, use_lhs=use_lhs, use_rhs=use_rhs)
780 return loc
782 def slice_indexer(self, start=None, end=None, step=None, kind=None):
783 """
784 Return indexer for specified label slice.
785 Index.slice_indexer, customized to handle time slicing.
787 In addition to functionality provided by Index.slice_indexer, does the
788 following:
790 - if both `start` and `end` are instances of `datetime.time`, it
791 invokes `indexer_between_time`
792 - if `start` and `end` are both either string or None perform
793 value-based selection in non-monotonic cases.
795 """
796 # For historical reasons DatetimeIndex supports slices between two
797 # instances of datetime.time as if it were applying a slice mask to
798 # an array of (self.hour, self.minute, self.seconds, self.microsecond).
799 if isinstance(start, time) and isinstance(end, time):
800 if step is not None and step != 1:
801 raise ValueError("Must have step size of 1 with time slices")
802 return self.indexer_between_time(start, end)
804 if isinstance(start, time) or isinstance(end, time):
805 raise KeyError("Cannot mix time and non-time slice keys")
807 # Pandas supports slicing with dates, treated as datetimes at midnight.
808 # https://github.com/pandas-dev/pandas/issues/31501
809 if isinstance(start, date) and not isinstance(start, datetime):
810 start = datetime.combine(start, time(0, 0))
811 if isinstance(end, date) and not isinstance(end, datetime):
812 end = datetime.combine(end, time(0, 0))
814 try:
815 return Index.slice_indexer(self, start, end, step, kind=kind)
816 except KeyError:
817 # For historical reasons DatetimeIndex by default supports
818 # value-based partial (aka string) slices on non-monotonic arrays,
819 # let's try that.
820 if (start is None or isinstance(start, str)) and (
821 end is None or isinstance(end, str)
822 ):
823 mask = True
824 if start is not None:
825 start_casted = self._maybe_cast_slice_bound(start, "left", kind)
826 mask = start_casted <= self
828 if end is not None:
829 end_casted = self._maybe_cast_slice_bound(end, "right", kind)
830 mask = (self <= end_casted) & mask
832 indexer = mask.nonzero()[0][::step]
833 if len(indexer) == len(self):
834 return slice(None)
835 else:
836 return indexer
837 else:
838 raise
840 # --------------------------------------------------------------------
842 @Substitution(klass="DatetimeIndex")
843 @Appender(_shared_docs["searchsorted"])
844 def searchsorted(self, value, side="left", sorter=None):
845 if isinstance(value, (np.ndarray, Index)):
846 if not type(self._data)._is_recognized_dtype(value):
847 raise TypeError(
848 "searchsorted requires compatible dtype or scalar, "
849 f"not {type(value).__name__}"
850 )
851 value = type(self._data)(value)
852 self._data._check_compatible_with(value)
854 elif isinstance(value, self._data._recognized_scalars):
855 self._data._check_compatible_with(value)
856 value = self._data._scalar_type(value)
858 elif not isinstance(value, DatetimeArray):
859 raise TypeError(
860 "searchsorted requires compatible dtype or scalar, "
861 f"not {type(value).__name__}"
862 )
864 return self._data.searchsorted(value, side=side)
866 def is_type_compatible(self, typ) -> bool:
867 return typ == self.inferred_type or typ == "datetime"
869 @property
870 def inferred_type(self) -> str:
871 # b/c datetime is represented as microseconds since the epoch, make
872 # sure we can't have ambiguous indexing
873 return "datetime64"
875 def insert(self, loc, item):
876 """
877 Make new Index inserting new item at location
879 Parameters
880 ----------
881 loc : int
882 item : object
883 if not either a Python datetime or a numpy integer-like, returned
884 Index dtype will be object rather than datetime.
886 Returns
887 -------
888 new_index : Index
889 """
890 if isinstance(item, self._data._recognized_scalars):
891 item = self._data._scalar_type(item)
892 elif is_valid_nat_for_dtype(item, self.dtype):
893 # GH 18295
894 item = self._na_value
895 elif is_scalar(item) and isna(item):
896 # i.e. timedeltat64("NaT")
897 raise TypeError(
898 f"cannot insert {type(self).__name__} with incompatible label"
899 )
901 freq = None
902 if isinstance(item, self._data._scalar_type) or item is NaT:
903 self._data._check_compatible_with(item, setitem=True)
905 # check freq can be preserved on edge cases
906 if self.size and self.freq is not None:
907 if item is NaT:
908 pass
909 elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]:
910 freq = self.freq
911 elif (loc == len(self)) and item - self.freq == self[-1]:
912 freq = self.freq
913 item = item.asm8
915 try:
916 new_i8s = np.concatenate(
917 (self[:loc].asi8, [item.view(np.int64)], self[loc:].asi8)
918 )
919 return self._shallow_copy(new_i8s, freq=freq)
920 except (AttributeError, TypeError):
922 # fall back to object index
923 if isinstance(item, str):
924 return self.astype(object).insert(loc, item)
925 raise TypeError(
926 f"cannot insert {type(self).__name__} with incompatible label"
927 )
929 def indexer_at_time(self, time, asof=False):
930 """
931 Return index locations of index values at particular time of day
932 (e.g. 9:30AM).
934 Parameters
935 ----------
936 time : datetime.time or str
937 datetime.time or string in appropriate format ("%H:%M", "%H%M",
938 "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p",
939 "%I%M%S%p").
941 Returns
942 -------
943 values_at_time : array of integers
945 See Also
946 --------
947 indexer_between_time, DataFrame.at_time
948 """
949 if asof:
950 raise NotImplementedError("'asof' argument is not supported")
952 if isinstance(time, str):
953 from dateutil.parser import parse
955 time = parse(time).time()
957 if time.tzinfo:
958 if self.tz is None:
959 raise ValueError("Index must be timezone aware.")
960 time_micros = self.tz_convert(time.tzinfo)._get_time_micros()
961 else:
962 time_micros = self._get_time_micros()
963 micros = _time_to_micros(time)
964 return (micros == time_micros).nonzero()[0]
966 def indexer_between_time(
967 self, start_time, end_time, include_start=True, include_end=True
968 ):
969 """
970 Return index locations of values between particular times of day
971 (e.g., 9:00-9:30AM).
973 Parameters
974 ----------
975 start_time, end_time : datetime.time, str
976 datetime.time or string in appropriate format ("%H:%M", "%H%M",
977 "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p",
978 "%I%M%S%p").
979 include_start : bool, default True
980 include_end : bool, default True
982 Returns
983 -------
984 values_between_time : array of integers
986 See Also
987 --------
988 indexer_at_time, DataFrame.between_time
989 """
990 start_time = tools.to_time(start_time)
991 end_time = tools.to_time(end_time)
992 time_micros = self._get_time_micros()
993 start_micros = _time_to_micros(start_time)
994 end_micros = _time_to_micros(end_time)
996 if include_start and include_end:
997 lop = rop = operator.le
998 elif include_start:
999 lop = operator.le
1000 rop = operator.lt
1001 elif include_end:
1002 lop = operator.lt
1003 rop = operator.le
1004 else:
1005 lop = rop = operator.lt
1007 if start_time <= end_time:
1008 join_op = operator.and_
1009 else:
1010 join_op = operator.or_
1012 mask = join_op(lop(start_micros, time_micros), rop(time_micros, end_micros))
1014 return mask.nonzero()[0]
1017DatetimeIndex._add_numeric_methods_disabled()
1018DatetimeIndex._add_logical_methods_disabled()
1021def date_range(
1022 start=None,
1023 end=None,
1024 periods=None,
1025 freq=None,
1026 tz=None,
1027 normalize=False,
1028 name=None,
1029 closed=None,
1030 **kwargs,
1031) -> DatetimeIndex:
1032 """
1033 Return a fixed frequency DatetimeIndex.
1035 Parameters
1036 ----------
1037 start : str or datetime-like, optional
1038 Left bound for generating dates.
1039 end : str or datetime-like, optional
1040 Right bound for generating dates.
1041 periods : int, optional
1042 Number of periods to generate.
1043 freq : str or DateOffset, default 'D'
1044 Frequency strings can have multiples, e.g. '5H'. See
1045 :ref:`here <timeseries.offset_aliases>` for a list of
1046 frequency aliases.
1047 tz : str or tzinfo, optional
1048 Time zone name for returning localized DatetimeIndex, for example
1049 'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is
1050 timezone-naive.
1051 normalize : bool, default False
1052 Normalize start/end dates to midnight before generating date range.
1053 name : str, default None
1054 Name of the resulting DatetimeIndex.
1055 closed : {None, 'left', 'right'}, optional
1056 Make the interval closed with respect to the given frequency to
1057 the 'left', 'right', or both sides (None, the default).
1058 **kwargs
1059 For compatibility. Has no effect on the result.
1061 Returns
1062 -------
1063 rng : DatetimeIndex
1065 See Also
1066 --------
1067 DatetimeIndex : An immutable container for datetimes.
1068 timedelta_range : Return a fixed frequency TimedeltaIndex.
1069 period_range : Return a fixed frequency PeriodIndex.
1070 interval_range : Return a fixed frequency IntervalIndex.
1072 Notes
1073 -----
1074 Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
1075 exactly three must be specified. If ``freq`` is omitted, the resulting
1076 ``DatetimeIndex`` will have ``periods`` linearly spaced elements between
1077 ``start`` and ``end`` (closed on both sides).
1079 To learn more about the frequency strings, please see `this link
1080 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
1082 Examples
1083 --------
1084 **Specifying the values**
1086 The next four examples generate the same `DatetimeIndex`, but vary
1087 the combination of `start`, `end` and `periods`.
1089 Specify `start` and `end`, with the default daily frequency.
1091 >>> pd.date_range(start='1/1/2018', end='1/08/2018')
1092 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
1093 '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'],
1094 dtype='datetime64[ns]', freq='D')
1096 Specify `start` and `periods`, the number of periods (days).
1098 >>> pd.date_range(start='1/1/2018', periods=8)
1099 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
1100 '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'],
1101 dtype='datetime64[ns]', freq='D')
1103 Specify `end` and `periods`, the number of periods (days).
1105 >>> pd.date_range(end='1/1/2018', periods=8)
1106 DatetimeIndex(['2017-12-25', '2017-12-26', '2017-12-27', '2017-12-28',
1107 '2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01'],
1108 dtype='datetime64[ns]', freq='D')
1110 Specify `start`, `end`, and `periods`; the frequency is generated
1111 automatically (linearly spaced).
1113 >>> pd.date_range(start='2018-04-24', end='2018-04-27', periods=3)
1114 DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00',
1115 '2018-04-27 00:00:00'],
1116 dtype='datetime64[ns]', freq=None)
1118 **Other Parameters**
1120 Changed the `freq` (frequency) to ``'M'`` (month end frequency).
1122 >>> pd.date_range(start='1/1/2018', periods=5, freq='M')
1123 DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30',
1124 '2018-05-31'],
1125 dtype='datetime64[ns]', freq='M')
1127 Multiples are allowed
1129 >>> pd.date_range(start='1/1/2018', periods=5, freq='3M')
1130 DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31',
1131 '2019-01-31'],
1132 dtype='datetime64[ns]', freq='3M')
1134 `freq` can also be specified as an Offset object.
1136 >>> pd.date_range(start='1/1/2018', periods=5, freq=pd.offsets.MonthEnd(3))
1137 DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31',
1138 '2019-01-31'],
1139 dtype='datetime64[ns]', freq='3M')
1141 Specify `tz` to set the timezone.
1143 >>> pd.date_range(start='1/1/2018', periods=5, tz='Asia/Tokyo')
1144 DatetimeIndex(['2018-01-01 00:00:00+09:00', '2018-01-02 00:00:00+09:00',
1145 '2018-01-03 00:00:00+09:00', '2018-01-04 00:00:00+09:00',
1146 '2018-01-05 00:00:00+09:00'],
1147 dtype='datetime64[ns, Asia/Tokyo]', freq='D')
1149 `closed` controls whether to include `start` and `end` that are on the
1150 boundary. The default includes boundary points on either end.
1152 >>> pd.date_range(start='2017-01-01', end='2017-01-04', closed=None)
1153 DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'],
1154 dtype='datetime64[ns]', freq='D')
1156 Use ``closed='left'`` to exclude `end` if it falls on the boundary.
1158 >>> pd.date_range(start='2017-01-01', end='2017-01-04', closed='left')
1159 DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'],
1160 dtype='datetime64[ns]', freq='D')
1162 Use ``closed='right'`` to exclude `start` if it falls on the boundary.
1164 >>> pd.date_range(start='2017-01-01', end='2017-01-04', closed='right')
1165 DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'],
1166 dtype='datetime64[ns]', freq='D')
1167 """
1169 if freq is None and com.any_none(periods, start, end):
1170 freq = "D"
1172 dtarr = DatetimeArray._generate_range(
1173 start=start,
1174 end=end,
1175 periods=periods,
1176 freq=freq,
1177 tz=tz,
1178 normalize=normalize,
1179 closed=closed,
1180 **kwargs,
1181 )
1182 return DatetimeIndex._simple_new(dtarr, tz=dtarr.tz, freq=dtarr.freq, name=name)
1185def bdate_range(
1186 start=None,
1187 end=None,
1188 periods=None,
1189 freq="B",
1190 tz=None,
1191 normalize=True,
1192 name=None,
1193 weekmask=None,
1194 holidays=None,
1195 closed=None,
1196 **kwargs,
1197) -> DatetimeIndex:
1198 """
1199 Return a fixed frequency DatetimeIndex, with business day as the default
1200 frequency.
1202 Parameters
1203 ----------
1204 start : str or datetime-like, default None
1205 Left bound for generating dates.
1206 end : str or datetime-like, default None
1207 Right bound for generating dates.
1208 periods : int, default None
1209 Number of periods to generate.
1210 freq : str or DateOffset, default 'B' (business daily)
1211 Frequency strings can have multiples, e.g. '5H'.
1212 tz : str or None
1213 Time zone name for returning localized DatetimeIndex, for example
1214 Asia/Beijing.
1215 normalize : bool, default False
1216 Normalize start/end dates to midnight before generating date range.
1217 name : str, default None
1218 Name of the resulting DatetimeIndex.
1219 weekmask : str or None, default None
1220 Weekmask of valid business days, passed to ``numpy.busdaycalendar``,
1221 only used when custom frequency strings are passed. The default
1222 value None is equivalent to 'Mon Tue Wed Thu Fri'.
1224 .. versionadded:: 0.21.0
1226 holidays : list-like or None, default None
1227 Dates to exclude from the set of valid business days, passed to
1228 ``numpy.busdaycalendar``, only used when custom frequency strings
1229 are passed.
1231 .. versionadded:: 0.21.0
1233 closed : str, default None
1234 Make the interval closed with respect to the given frequency to
1235 the 'left', 'right', or both sides (None).
1236 **kwargs
1237 For compatibility. Has no effect on the result.
1239 Returns
1240 -------
1241 DatetimeIndex
1243 Notes
1244 -----
1245 Of the four parameters: ``start``, ``end``, ``periods``, and ``freq``,
1246 exactly three must be specified. Specifying ``freq`` is a requirement
1247 for ``bdate_range``. Use ``date_range`` if specifying ``freq`` is not
1248 desired.
1250 To learn more about the frequency strings, please see `this link
1251 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
1253 Examples
1254 --------
1255 Note how the two weekend days are skipped in the result.
1257 >>> pd.bdate_range(start='1/1/2018', end='1/08/2018')
1258 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
1259 '2018-01-05', '2018-01-08'],
1260 dtype='datetime64[ns]', freq='B')
1261 """
1262 if freq is None:
1263 msg = "freq must be specified for bdate_range; use date_range instead"
1264 raise TypeError(msg)
1266 if isinstance(freq, str) and freq.startswith("C"):
1267 try:
1268 weekmask = weekmask or "Mon Tue Wed Thu Fri"
1269 freq = prefix_mapping[freq](holidays=holidays, weekmask=weekmask)
1270 except (KeyError, TypeError):
1271 msg = f"invalid custom frequency string: {freq}"
1272 raise ValueError(msg)
1273 elif holidays or weekmask:
1274 msg = (
1275 "a custom frequency string is required when holidays or "
1276 f"weekmask are passed, got frequency {freq}"
1277 )
1278 raise ValueError(msg)
1280 return date_range(
1281 start=start,
1282 end=end,
1283 periods=periods,
1284 freq=freq,
1285 tz=tz,
1286 normalize=normalize,
1287 name=name,
1288 closed=closed,
1289 **kwargs,
1290 )
1293def _time_to_micros(time):
1294 seconds = time.hour * 60 * 60 + 60 * time.minute + time.second
1295 return 1000000 * seconds + time.microsecond