Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from datetime import timedelta 

2import re 

3from typing import Dict, Optional 

4import warnings 

5 

6import numpy as np 

7from pytz import AmbiguousTimeError 

8 

9from pandas._libs.algos import unique_deltas 

10from pandas._libs.tslibs import Timedelta, Timestamp 

11from pandas._libs.tslibs.ccalendar import MONTH_ALIASES, int_to_weekday 

12from pandas._libs.tslibs.fields import build_field_sarray 

13import pandas._libs.tslibs.frequencies as libfreqs 

14from pandas._libs.tslibs.offsets import _offset_to_period_map 

15import pandas._libs.tslibs.resolution as libresolution 

16from pandas._libs.tslibs.resolution import Resolution 

17from pandas._libs.tslibs.timezones import UTC 

18from pandas._libs.tslibs.tzconversion import tz_convert 

19from pandas.util._decorators import cache_readonly 

20 

21from pandas.core.dtypes.common import ( 

22 is_datetime64_dtype, 

23 is_period_arraylike, 

24 is_timedelta64_dtype, 

25) 

26from pandas.core.dtypes.generic import ABCSeries 

27 

28from pandas.core.algorithms import unique 

29 

30from pandas.tseries.offsets import ( 

31 DateOffset, 

32 Day, 

33 Hour, 

34 Micro, 

35 Milli, 

36 Minute, 

37 Nano, 

38 Second, 

39 prefix_mapping, 

40) 

41 

42_ONE_MICRO = 1000 

43_ONE_MILLI = _ONE_MICRO * 1000 

44_ONE_SECOND = _ONE_MILLI * 1000 

45_ONE_MINUTE = 60 * _ONE_SECOND 

46_ONE_HOUR = 60 * _ONE_MINUTE 

47_ONE_DAY = 24 * _ONE_HOUR 

48 

49# --------------------------------------------------------------------- 

50# Offset names ("time rules") and related functions 

51 

52#: cache of previously seen offsets 

53_offset_map: Dict[str, DateOffset] = {} 

54 

55 

56def get_period_alias(offset_str: str) -> Optional[str]: 

57 """ 

58 Alias to closest period strings BQ->Q etc. 

59 """ 

60 return _offset_to_period_map.get(offset_str, None) 

61 

62 

63_name_to_offset_map = { 

64 "days": Day(1), 

65 "hours": Hour(1), 

66 "minutes": Minute(1), 

67 "seconds": Second(1), 

68 "milliseconds": Milli(1), 

69 "microseconds": Micro(1), 

70 "nanoseconds": Nano(1), 

71} 

72 

73 

74def to_offset(freq) -> Optional[DateOffset]: 

75 """ 

76 Return DateOffset object from string or tuple representation 

77 or datetime.timedelta object. 

78 

79 Parameters 

80 ---------- 

81 freq : str, tuple, datetime.timedelta, DateOffset or None 

82 

83 Returns 

84 ------- 

85 DateOffset 

86 None if freq is None. 

87 

88 Raises 

89 ------ 

90 ValueError 

91 If freq is an invalid frequency 

92 

93 See Also 

94 -------- 

95 DateOffset 

96 

97 Examples 

98 -------- 

99 >>> to_offset('5min') 

100 <5 * Minutes> 

101 

102 >>> to_offset('1D1H') 

103 <25 * Hours> 

104 

105 >>> to_offset(('W', 2)) 

106 <2 * Weeks: weekday=6> 

107 

108 >>> to_offset((2, 'B')) 

109 <2 * BusinessDays> 

110 

111 >>> to_offset(datetime.timedelta(days=1)) 

112 <Day> 

113 

114 >>> to_offset(Hour()) 

115 <Hour> 

116 """ 

117 if freq is None: 

118 return None 

119 

120 if isinstance(freq, DateOffset): 

121 return freq 

122 

123 if isinstance(freq, tuple): 

124 name = freq[0] 

125 stride = freq[1] 

126 if isinstance(stride, str): 

127 name, stride = stride, name 

128 name, _ = libfreqs._base_and_stride(name) 

129 delta = _get_offset(name) * stride 

130 

131 elif isinstance(freq, timedelta): 

132 delta = None 

133 freq = Timedelta(freq) 

134 try: 

135 for name in freq.components._fields: 

136 offset = _name_to_offset_map[name] 

137 stride = getattr(freq.components, name) 

138 if stride != 0: 

139 offset = stride * offset 

140 if delta is None: 

141 delta = offset 

142 else: 

143 delta = delta + offset 

144 except ValueError: 

145 raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq)) 

146 

147 else: 

148 delta = None 

149 stride_sign = None 

150 try: 

151 splitted = re.split(libfreqs.opattern, freq) 

152 if splitted[-1] != "" and not splitted[-1].isspace(): 

153 # the last element must be blank 

154 raise ValueError("last element must be blank") 

155 for sep, stride, name in zip( 

156 splitted[0::4], splitted[1::4], splitted[2::4] 

157 ): 

158 if sep != "" and not sep.isspace(): 

159 raise ValueError("separator must be spaces") 

160 prefix = libfreqs._lite_rule_alias.get(name) or name 

161 if stride_sign is None: 

162 stride_sign = -1 if stride.startswith("-") else 1 

163 if not stride: 

164 stride = 1 

165 if prefix in Resolution._reso_str_bump_map.keys(): 

166 stride, name = Resolution.get_stride_from_decimal( 

167 float(stride), prefix 

168 ) 

169 stride = int(stride) 

170 offset = _get_offset(name) 

171 offset = offset * int(np.fabs(stride) * stride_sign) 

172 if delta is None: 

173 delta = offset 

174 else: 

175 delta = delta + offset 

176 except (ValueError, TypeError): 

177 raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq)) 

178 

179 if delta is None: 

180 raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq)) 

181 

182 return delta 

183 

184 

185def get_offset(name: str) -> DateOffset: 

186 """ 

187 Return DateOffset object associated with rule name. 

188 

189 .. deprecated:: 1.0.0 

190 

191 Examples 

192 -------- 

193 get_offset('EOM') --> BMonthEnd(1) 

194 """ 

195 warnings.warn( 

196 "get_offset is deprecated and will be removed in a future version, " 

197 "use to_offset instead", 

198 FutureWarning, 

199 stacklevel=2, 

200 ) 

201 return _get_offset(name) 

202 

203 

204def _get_offset(name: str) -> DateOffset: 

205 """ 

206 Return DateOffset object associated with rule name. 

207 

208 Examples 

209 -------- 

210 _get_offset('EOM') --> BMonthEnd(1) 

211 """ 

212 if name not in libfreqs._dont_uppercase: 

213 name = name.upper() 

214 name = libfreqs._lite_rule_alias.get(name, name) 

215 name = libfreqs._lite_rule_alias.get(name.lower(), name) 

216 else: 

217 name = libfreqs._lite_rule_alias.get(name, name) 

218 

219 if name not in _offset_map: 

220 try: 

221 split = name.split("-") 

222 klass = prefix_mapping[split[0]] 

223 # handles case where there's no suffix (and will TypeError if too 

224 # many '-') 

225 offset = klass._from_name(*split[1:]) 

226 except (ValueError, TypeError, KeyError): 

227 # bad prefix or suffix 

228 raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(name)) 

229 # cache 

230 _offset_map[name] = offset 

231 

232 return _offset_map[name] 

233 

234 

235# --------------------------------------------------------------------- 

236# Period codes 

237 

238 

239def infer_freq(index, warn: bool = True) -> Optional[str]: 

240 """ 

241 Infer the most likely frequency given the input index. If the frequency is 

242 uncertain, a warning will be printed. 

243 

244 Parameters 

245 ---------- 

246 index : DatetimeIndex or TimedeltaIndex 

247 If passed a Series will use the values of the series (NOT THE INDEX). 

248 warn : bool, default True 

249 

250 Returns 

251 ------- 

252 str or None 

253 None if no discernible frequency 

254 TypeError if the index is not datetime-like 

255 ValueError if there are less than three values. 

256 """ 

257 import pandas as pd 

258 

259 if isinstance(index, ABCSeries): 

260 values = index._values 

261 if not ( 

262 is_datetime64_dtype(values) 

263 or is_timedelta64_dtype(values) 

264 or values.dtype == object 

265 ): 

266 raise TypeError( 

267 "cannot infer freq from a non-convertible dtype " 

268 f"on a Series of {index.dtype}" 

269 ) 

270 index = values 

271 

272 inferer: _FrequencyInferer 

273 if is_period_arraylike(index): 

274 raise TypeError( 

275 "PeriodIndex given. Check the `freq` attribute " 

276 "instead of using infer_freq." 

277 ) 

278 elif is_timedelta64_dtype(index): 

279 # Allow TimedeltaIndex and TimedeltaArray 

280 inferer = _TimedeltaFrequencyInferer(index, warn=warn) 

281 return inferer.get_freq() 

282 

283 if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex): 

284 if isinstance(index, (pd.Int64Index, pd.Float64Index)): 

285 raise TypeError( 

286 f"cannot infer freq from a non-convertible index type {type(index)}" 

287 ) 

288 index = index.values 

289 

290 if not isinstance(index, pd.DatetimeIndex): 

291 try: 

292 index = pd.DatetimeIndex(index) 

293 except AmbiguousTimeError: 

294 index = pd.DatetimeIndex(index.asi8) 

295 

296 inferer = _FrequencyInferer(index, warn=warn) 

297 return inferer.get_freq() 

298 

299 

300class _FrequencyInferer: 

301 """ 

302 Not sure if I can avoid the state machine here 

303 """ 

304 

305 def __init__(self, index, warn: bool = True): 

306 self.index = index 

307 self.values = index.asi8 

308 

309 # This moves the values, which are implicitly in UTC, to the 

310 # the timezone so they are in local time 

311 if hasattr(index, "tz"): 

312 if index.tz is not None: 

313 self.values = tz_convert(self.values, UTC, index.tz) 

314 

315 self.warn = warn 

316 

317 if len(index) < 3: 

318 raise ValueError("Need at least 3 dates to infer frequency") 

319 

320 self.is_monotonic = ( 

321 self.index._is_monotonic_increasing or self.index._is_monotonic_decreasing 

322 ) 

323 

324 @cache_readonly 

325 def deltas(self): 

326 return unique_deltas(self.values) 

327 

328 @cache_readonly 

329 def deltas_asi8(self): 

330 return unique_deltas(self.index.asi8) 

331 

332 @cache_readonly 

333 def is_unique(self) -> bool: 

334 return len(self.deltas) == 1 

335 

336 @cache_readonly 

337 def is_unique_asi8(self): 

338 return len(self.deltas_asi8) == 1 

339 

340 def get_freq(self) -> Optional[str]: 

341 """ 

342 Find the appropriate frequency string to describe the inferred 

343 frequency of self.values 

344 

345 Returns 

346 ------- 

347 str or None 

348 """ 

349 if not self.is_monotonic or not self.index._is_unique: 

350 return None 

351 

352 delta = self.deltas[0] 

353 if _is_multiple(delta, _ONE_DAY): 

354 return self._infer_daily_rule() 

355 

356 # Business hourly, maybe. 17: one day / 65: one weekend 

357 if self.hour_deltas in ([1, 17], [1, 65], [1, 17, 65]): 

358 return "BH" 

359 # Possibly intraday frequency. Here we use the 

360 # original .asi8 values as the modified values 

361 # will not work around DST transitions. See #8772 

362 elif not self.is_unique_asi8: 

363 return None 

364 

365 delta = self.deltas_asi8[0] 

366 if _is_multiple(delta, _ONE_HOUR): 

367 # Hours 

368 return _maybe_add_count("H", delta / _ONE_HOUR) 

369 elif _is_multiple(delta, _ONE_MINUTE): 

370 # Minutes 

371 return _maybe_add_count("T", delta / _ONE_MINUTE) 

372 elif _is_multiple(delta, _ONE_SECOND): 

373 # Seconds 

374 return _maybe_add_count("S", delta / _ONE_SECOND) 

375 elif _is_multiple(delta, _ONE_MILLI): 

376 # Milliseconds 

377 return _maybe_add_count("L", delta / _ONE_MILLI) 

378 elif _is_multiple(delta, _ONE_MICRO): 

379 # Microseconds 

380 return _maybe_add_count("U", delta / _ONE_MICRO) 

381 else: 

382 # Nanoseconds 

383 return _maybe_add_count("N", delta) 

384 

385 @cache_readonly 

386 def day_deltas(self): 

387 return [x / _ONE_DAY for x in self.deltas] 

388 

389 @cache_readonly 

390 def hour_deltas(self): 

391 return [x / _ONE_HOUR for x in self.deltas] 

392 

393 @cache_readonly 

394 def fields(self): 

395 return build_field_sarray(self.values) 

396 

397 @cache_readonly 

398 def rep_stamp(self): 

399 return Timestamp(self.values[0]) 

400 

401 def month_position_check(self): 

402 return libresolution.month_position_check(self.fields, self.index.dayofweek) 

403 

404 @cache_readonly 

405 def mdiffs(self): 

406 nmonths = self.fields["Y"] * 12 + self.fields["M"] 

407 return unique_deltas(nmonths.astype("i8")) 

408 

409 @cache_readonly 

410 def ydiffs(self): 

411 return unique_deltas(self.fields["Y"].astype("i8")) 

412 

413 def _infer_daily_rule(self) -> Optional[str]: 

414 annual_rule = self._get_annual_rule() 

415 if annual_rule: 

416 nyears = self.ydiffs[0] 

417 month = MONTH_ALIASES[self.rep_stamp.month] 

418 alias = f"{annual_rule}-{month}" 

419 return _maybe_add_count(alias, nyears) 

420 

421 quarterly_rule = self._get_quarterly_rule() 

422 if quarterly_rule: 

423 nquarters = self.mdiffs[0] / 3 

424 mod_dict = {0: 12, 2: 11, 1: 10} 

425 month = MONTH_ALIASES[mod_dict[self.rep_stamp.month % 3]] 

426 alias = f"{quarterly_rule}-{month}" 

427 return _maybe_add_count(alias, nquarters) 

428 

429 monthly_rule = self._get_monthly_rule() 

430 if monthly_rule: 

431 return _maybe_add_count(monthly_rule, self.mdiffs[0]) 

432 

433 if self.is_unique: 

434 days = self.deltas[0] / _ONE_DAY 

435 if days % 7 == 0: 

436 # Weekly 

437 day = int_to_weekday[self.rep_stamp.weekday()] 

438 return _maybe_add_count(f"W-{day}", days / 7) 

439 else: 

440 return _maybe_add_count("D", days) 

441 

442 if self._is_business_daily(): 

443 return "B" 

444 

445 wom_rule = self._get_wom_rule() 

446 if wom_rule: 

447 return wom_rule 

448 

449 return None 

450 

451 def _get_annual_rule(self) -> Optional[str]: 

452 if len(self.ydiffs) > 1: 

453 return None 

454 

455 if len(unique(self.fields["M"])) > 1: 

456 return None 

457 

458 pos_check = self.month_position_check() 

459 return {"cs": "AS", "bs": "BAS", "ce": "A", "be": "BA"}.get(pos_check) 

460 

461 def _get_quarterly_rule(self) -> Optional[str]: 

462 if len(self.mdiffs) > 1: 

463 return None 

464 

465 if not self.mdiffs[0] % 3 == 0: 

466 return None 

467 

468 pos_check = self.month_position_check() 

469 return {"cs": "QS", "bs": "BQS", "ce": "Q", "be": "BQ"}.get(pos_check) 

470 

471 def _get_monthly_rule(self) -> Optional[str]: 

472 if len(self.mdiffs) > 1: 

473 return None 

474 pos_check = self.month_position_check() 

475 return {"cs": "MS", "bs": "BMS", "ce": "M", "be": "BM"}.get(pos_check) 

476 

477 def _is_business_daily(self) -> bool: 

478 # quick check: cannot be business daily 

479 if self.day_deltas != [1, 3]: 

480 return False 

481 

482 # probably business daily, but need to confirm 

483 first_weekday = self.index[0].weekday() 

484 shifts = np.diff(self.index.asi8) 

485 shifts = np.floor_divide(shifts, _ONE_DAY) 

486 weekdays = np.mod(first_weekday + np.cumsum(shifts), 7) 

487 return np.all( 

488 ((weekdays == 0) & (shifts == 3)) 

489 | ((weekdays > 0) & (weekdays <= 4) & (shifts == 1)) 

490 ) 

491 

492 def _get_wom_rule(self) -> Optional[str]: 

493 # wdiffs = unique(np.diff(self.index.week)) 

494 # We also need -47, -49, -48 to catch index spanning year boundary 

495 # if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all(): 

496 # return None 

497 

498 weekdays = unique(self.index.weekday) 

499 if len(weekdays) > 1: 

500 return None 

501 

502 week_of_months = unique((self.index.day - 1) // 7) 

503 # Only attempt to infer up to WOM-4. See #9425 

504 week_of_months = week_of_months[week_of_months < 4] 

505 if len(week_of_months) == 0 or len(week_of_months) > 1: 

506 return None 

507 

508 # get which week 

509 week = week_of_months[0] + 1 

510 wd = int_to_weekday[weekdays[0]] 

511 

512 return f"WOM-{week}{wd}" 

513 

514 

515class _TimedeltaFrequencyInferer(_FrequencyInferer): 

516 def _infer_daily_rule(self): 

517 if self.is_unique: 

518 days = self.deltas[0] / _ONE_DAY 

519 if days % 7 == 0: 

520 # Weekly 

521 wd = int_to_weekday[self.rep_stamp.weekday()] 

522 alias = f"W-{wd}" 

523 return _maybe_add_count(alias, days / 7) 

524 else: 

525 return _maybe_add_count("D", days) 

526 

527 

528def _is_multiple(us, mult: int) -> bool: 

529 return us % mult == 0 

530 

531 

532def _maybe_add_count(base: str, count: float) -> str: 

533 if count != 1: 

534 assert count == int(count) 

535 count = int(count) 

536 return f"{count}{base}" 

537 else: 

538 return base