Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Helper functions to generate range-like data for DatetimeArray 

3(and possibly TimedeltaArray/PeriodArray) 

4""" 

5 

6from typing import Tuple 

7 

8import numpy as np 

9 

10from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp 

11 

12from pandas.tseries.offsets import DateOffset, Tick, generate_range 

13 

14 

15def generate_regular_range( 

16 start: Timestamp, end: Timestamp, periods: int, freq: DateOffset 

17) -> Tuple[np.ndarray, str]: 

18 """ 

19 Generate a range of dates with the spans between dates described by 

20 the given `freq` DateOffset. 

21 

22 Parameters 

23 ---------- 

24 start : Timestamp or None 

25 first point of produced date range 

26 end : Timestamp or None 

27 last point of produced date range 

28 periods : int 

29 number of periods in produced date range 

30 freq : DateOffset 

31 describes space between dates in produced date range 

32 

33 Returns 

34 ------- 

35 ndarray[np.int64] representing nanosecond unix timestamps 

36 """ 

37 if isinstance(freq, Tick): 

38 stride = freq.nanos 

39 if periods is None: 

40 b = Timestamp(start).value 

41 # cannot just use e = Timestamp(end) + 1 because arange breaks when 

42 # stride is too large, see GH10887 

43 e = b + (Timestamp(end).value - b) // stride * stride + stride // 2 + 1 

44 # end.tz == start.tz by this point due to _generate implementation 

45 tz = start.tz 

46 elif start is not None: 

47 b = Timestamp(start).value 

48 e = _generate_range_overflow_safe(b, periods, stride, side="start") 

49 tz = start.tz 

50 elif end is not None: 

51 e = Timestamp(end).value + stride 

52 b = _generate_range_overflow_safe(e, periods, stride, side="end") 

53 tz = end.tz 

54 else: 

55 raise ValueError( 

56 "at least 'start' or 'end' should be specified " 

57 "if a 'period' is given." 

58 ) 

59 

60 with np.errstate(over="raise"): 

61 # If the range is sufficiently large, np.arange may overflow 

62 # and incorrectly return an empty array if not caught. 

63 try: 

64 values = np.arange(b, e, stride, dtype=np.int64) 

65 except FloatingPointError: 

66 xdr = [b] 

67 while xdr[-1] != e: 

68 xdr.append(xdr[-1] + stride) 

69 values = np.array(xdr[:-1], dtype=np.int64) 

70 

71 else: 

72 tz = None 

73 # start and end should have the same timezone by this point 

74 if start is not None: 

75 tz = start.tz 

76 elif end is not None: 

77 tz = end.tz 

78 

79 xdr = generate_range(start=start, end=end, periods=periods, offset=freq) 

80 

81 values = np.array([x.value for x in xdr], dtype=np.int64) 

82 

83 return values, tz 

84 

85 

86def _generate_range_overflow_safe( 

87 endpoint: int, periods: int, stride: int, side: str = "start" 

88) -> int: 

89 """ 

90 Calculate the second endpoint for passing to np.arange, checking 

91 to avoid an integer overflow. Catch OverflowError and re-raise 

92 as OutOfBoundsDatetime. 

93 

94 Parameters 

95 ---------- 

96 endpoint : int 

97 nanosecond timestamp of the known endpoint of the desired range 

98 periods : int 

99 number of periods in the desired range 

100 stride : int 

101 nanoseconds between periods in the desired range 

102 side : {'start', 'end'} 

103 which end of the range `endpoint` refers to 

104 

105 Returns 

106 ------- 

107 other_end : int 

108 

109 Raises 

110 ------ 

111 OutOfBoundsDatetime 

112 """ 

113 # GH#14187 raise instead of incorrectly wrapping around 

114 assert side in ["start", "end"] 

115 

116 i64max = np.uint64(np.iinfo(np.int64).max) 

117 msg = f"Cannot generate range with {side}={endpoint} and periods={periods}" 

118 

119 with np.errstate(over="raise"): 

120 # if periods * strides cannot be multiplied within the *uint64* bounds, 

121 # we cannot salvage the operation by recursing, so raise 

122 try: 

123 addend = np.uint64(periods) * np.uint64(np.abs(stride)) 

124 except FloatingPointError: 

125 raise OutOfBoundsDatetime(msg) 

126 

127 if np.abs(addend) <= i64max: 

128 # relatively easy case without casting concerns 

129 return _generate_range_overflow_safe_signed(endpoint, periods, stride, side) 

130 

131 elif (endpoint > 0 and side == "start" and stride > 0) or ( 

132 endpoint < 0 and side == "end" and stride > 0 

133 ): 

134 # no chance of not-overflowing 

135 raise OutOfBoundsDatetime(msg) 

136 

137 elif side == "end" and endpoint > i64max and endpoint - stride <= i64max: 

138 # in _generate_regular_range we added `stride` thereby overflowing 

139 # the bounds. Adjust to fix this. 

140 return _generate_range_overflow_safe( 

141 endpoint - stride, periods - 1, stride, side 

142 ) 

143 

144 # split into smaller pieces 

145 mid_periods = periods // 2 

146 remaining = periods - mid_periods 

147 assert 0 < remaining < periods, (remaining, periods, endpoint, stride) 

148 

149 midpoint = _generate_range_overflow_safe(endpoint, mid_periods, stride, side) 

150 return _generate_range_overflow_safe(midpoint, remaining, stride, side) 

151 

152 

153def _generate_range_overflow_safe_signed( 

154 endpoint: int, periods: int, stride: int, side: str 

155) -> int: 

156 """ 

157 A special case for _generate_range_overflow_safe where `periods * stride` 

158 can be calculated without overflowing int64 bounds. 

159 """ 

160 assert side in ["start", "end"] 

161 if side == "end": 

162 stride *= -1 

163 

164 with np.errstate(over="raise"): 

165 addend = np.int64(periods) * np.int64(stride) 

166 try: 

167 # easy case with no overflows 

168 return np.int64(endpoint) + addend 

169 except (FloatingPointError, OverflowError): 

170 # with endpoint negative and addend positive we risk 

171 # FloatingPointError; with reversed signed we risk OverflowError 

172 pass 

173 

174 # if stride and endpoint had opposite signs, then endpoint + addend 

175 # should never overflow. so they must have the same signs 

176 assert (stride > 0 and endpoint >= 0) or (stride < 0 and endpoint <= 0) 

177 

178 if stride > 0: 

179 # watch out for very special case in which we just slightly 

180 # exceed implementation bounds, but when passing the result to 

181 # np.arange will get a result slightly within the bounds 

182 result = np.uint64(endpoint) + np.uint64(addend) 

183 i64max = np.uint64(np.iinfo(np.int64).max) 

184 assert result > i64max 

185 if result <= i64max + np.uint64(stride): 

186 return result 

187 

188 raise OutOfBoundsDatetime( 

189 f"Cannot generate range with {side}={endpoint} and periods={periods}" 

190 )