Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import numpy as np 

2import pandas as pd 

3from scipy.stats.distributions import chi2, norm 

4from statsmodels.graphics import utils 

5 

6 

7def _calc_survfunc_right(time, status, weights=None, entry=None, compress=True, 

8 retall=True): 

9 """ 

10 Calculate the survival function and its standard error for a single 

11 group. 

12 """ 

13 

14 # Convert the unique times to ranks (0, 1, 2, ...) 

15 if entry is None: 

16 utime, rtime = np.unique(time, return_inverse=True) 

17 else: 

18 tx = np.concatenate((time, entry)) 

19 utime, rtime = np.unique(tx, return_inverse=True) 

20 rtime = rtime[0:len(time)] 

21 

22 # Number of deaths at each unique time. 

23 ml = len(utime) 

24 if weights is None: 

25 d = np.bincount(rtime, weights=status, minlength=ml) 

26 else: 

27 d = np.bincount(rtime, weights=status*weights, minlength=ml) 

28 

29 # Size of risk set just prior to each event time. 

30 if weights is None: 

31 n = np.bincount(rtime, minlength=ml) 

32 else: 

33 n = np.bincount(rtime, weights=weights, minlength=ml) 

34 if entry is not None: 

35 n = np.cumsum(n) - n 

36 rentry = np.searchsorted(utime, entry, side='left') 

37 if weights is None: 

38 n0 = np.bincount(rentry, minlength=ml) 

39 else: 

40 n0 = np.bincount(rentry, weights=weights, minlength=ml) 

41 n0 = np.cumsum(n0) - n0 

42 n = n0 - n 

43 else: 

44 n = np.cumsum(n[::-1])[::-1] 

45 

46 # Only retain times where an event occurred. 

47 if compress: 

48 ii = np.flatnonzero(d > 0) 

49 d = d[ii] 

50 n = n[ii] 

51 utime = utime[ii] 

52 

53 # The survival function probabilities. 

54 sp = 1 - d / n.astype(np.float64) 

55 ii = sp < 1e-16 

56 sp[ii] = 1e-16 

57 sp = np.log(sp) 

58 sp = np.cumsum(sp) 

59 sp = np.exp(sp) 

60 sp[ii] = 0 

61 

62 if not retall: 

63 return sp, utime, rtime, n, d 

64 

65 # Standard errors 

66 if weights is None: 

67 # Greenwood's formula 

68 denom = n * (n - d) 

69 denom = np.clip(denom, 1e-12, np.inf) 

70 se = d / denom.astype(np.float64) 

71 se[(n == d) | (n == 0)] = np.nan 

72 se = np.cumsum(se) 

73 se = np.sqrt(se) 

74 locs = np.isfinite(se) | (sp != 0) 

75 se[locs] *= sp[locs] 

76 se[~locs] = np.nan 

77 else: 

78 # Tsiatis' (1981) formula 

79 se = d / (n * n).astype(np.float64) 

80 se = np.cumsum(se) 

81 se = np.sqrt(se) 

82 

83 return sp, se, utime, rtime, n, d 

84 

85 

86def _calc_incidence_right(time, status, weights=None): 

87 """ 

88 Calculate the cumulative incidence function and its standard error. 

89 """ 

90 

91 # Calculate the all-cause survival function. 

92 status0 = (status >= 1).astype(np.float64) 

93 sp, utime, rtime, n, d = _calc_survfunc_right(time, status0, weights, 

94 compress=False, retall=False) 

95 

96 ngrp = int(status.max()) 

97 

98 # Number of cause-specific deaths at each unique time. 

99 d = [] 

100 for k in range(ngrp): 

101 status0 = (status == k + 1).astype(np.float64) 

102 if weights is None: 

103 d0 = np.bincount(rtime, weights=status0, minlength=len(utime)) 

104 else: 

105 d0 = np.bincount(rtime, weights=status0*weights, 

106 minlength=len(utime)) 

107 d.append(d0) 

108 

109 # The cumulative incidence function probabilities. 

110 ip = [] 

111 sp0 = np.r_[1, sp[:-1]] / n 

112 for k in range(ngrp): 

113 ip0 = np.cumsum(sp0 * d[k]) 

114 ip.append(ip0) 

115 

116 # The standard error of the cumulative incidence function. 

117 if weights is not None: 

118 return ip, None, utime 

119 se = [] 

120 da = sum(d) 

121 for k in range(ngrp): 

122 

123 ra = da / (n * (n - da)) 

124 v = ip[k]**2 * np.cumsum(ra) 

125 v -= 2 * ip[k] * np.cumsum(ip[k] * ra) 

126 v += np.cumsum(ip[k]**2 * ra) 

127 

128 ra = (n - d[k]) * d[k] / n 

129 v += np.cumsum(sp0**2 * ra) 

130 

131 ra = sp0 * d[k] / n 

132 v -= 2 * ip[k] * np.cumsum(ra) 

133 v += 2 * np.cumsum(ip[k] * ra) 

134 

135 se.append(np.sqrt(v)) 

136 

137 return ip, se, utime 

138 

139 

140def _checkargs(time, status, entry, freq_weights, exog): 

141 

142 if len(time) != len(status): 

143 raise ValueError("time and status must have the same length") 

144 

145 if entry is not None and (len(entry) != len(time)): 

146 msg = "entry times and event times must have the same length" 

147 raise ValueError(msg) 

148 

149 if entry is not None and np.any(entry >= time): 

150 msg = "Entry times must not occur on or after event times" 

151 raise ValueError(msg) 

152 

153 if freq_weights is not None and (len(freq_weights) != len(time)): 

154 raise ValueError("weights, time and status must have the same length") 

155 

156 if exog is not None and (exog.shape[0] != len(time)): 

157 raise ValueError("the rows of exog should align with time") 

158 

159 

160class CumIncidenceRight(object): 

161 """ 

162 Estimation and inference for a cumulative incidence function. 

163 

164 If J = 1, 2, ... indicates the event type, the cumulative 

165 incidence function for cause j is: 

166 

167 I(t, j) = P(T <= t and J=j) 

168 

169 Only right censoring is supported. If frequency weights are provided, 

170 the point estimate is returned without a standard error. 

171 

172 Parameters 

173 ---------- 

174 time : array_like 

175 An array of times (censoring times or event times) 

176 status : array_like 

177 If status >= 1 indicates which event occurred at time t. If 

178 status = 0, the subject was censored at time t. 

179 title : str 

180 Optional title used for plots and summary output. 

181 freq_weights : array_like 

182 Optional frequency weights 

183 exog : array_like 

184 Optional, if present used to account for violation of 

185 independent censoring. 

186 bw_factor : float 

187 Band-width multiplier for kernel-based estimation. Only 

188 used if exog is provided. 

189 dimred : bool 

190 If True, proportional hazards regression models are used to 

191 reduce exog to two columns by predicting overall events and 

192 censoring in two separate models. If False, exog is used 

193 directly for calculating kernel weights without dimension 

194 reduction. 

195 

196 Attributes 

197 ---------- 

198 times : array_like 

199 The distinct times at which the incidence rates are estimated 

200 cinc : list of arrays 

201 cinc[k-1] contains the estimated cumulative incidence rates 

202 for outcome k=1,2,... 

203 cinc_se : list of arrays 

204 The standard errors for the values in `cinc`. Not available when 

205 exog and/or frequency weights are provided. 

206 

207 Notes 

208 ----- 

209 When exog is provided, a local estimate of the cumulative incidence 

210 rate around each point is provided, and these are averaged to 

211 produce an estimate of the marginal cumulative incidence 

212 functions. The procedure is analogous to that described in Zeng 

213 (2004) for estimation of the marginal survival function. The 

214 approach removes bias resulting from dependent censoring when the 

215 censoring becomes independent conditioned on the columns of exog. 

216 

217 References 

218 ---------- 

219 The Stata stcompet procedure: 

220 http://www.stata-journal.com/sjpdf.html?articlenum=st0059 

221 

222 Dinse, G. E. and M. G. Larson. 1986. A note on semi-Markov models 

223 for partially censored data. Biometrika 73: 379-386. 

224 

225 Marubini, E. and M. G. Valsecchi. 1995. Analysing Survival Data 

226 from Clinical Trials and Observational Studies. Chichester, UK: 

227 John Wiley & Sons. 

228 

229 D. Zeng (2004). Estimating marginal survival function by 

230 adjusting for dependent censoring using many covariates. Annals 

231 of Statistics 32:4. 

232 https://arxiv.org/pdf/math/0409180.pdf 

233 """ 

234 

235 def __init__(self, time, status, title=None, freq_weights=None, 

236 exog=None, bw_factor=1., dimred=True): 

237 

238 _checkargs(time, status, None, freq_weights, None) 

239 time = self.time = np.asarray(time) 

240 status = self.status = np.asarray(status) 

241 if freq_weights is not None: 

242 freq_weights = self.freq_weights = np.asarray(freq_weights) 

243 

244 if exog is not None: 

245 from ._kernel_estimates import _kernel_cumincidence 

246 exog = self.exog = np.asarray(exog) 

247 nobs = exog.shape[0] 

248 kw = nobs**(-1/3.0) * bw_factor 

249 kfunc = lambda x: np.exp(-x**2 / kw**2).sum(1) 

250 x = _kernel_cumincidence(time, status, exog, kfunc, freq_weights, 

251 dimred) 

252 self.times = x[0] 

253 self.cinc = x[1] 

254 return 

255 

256 x = _calc_incidence_right(time, status, freq_weights) 

257 self.cinc = x[0] 

258 self.cinc_se = x[1] 

259 self.times = x[2] 

260 self.title = "" if not title else title 

261 

262 

263class SurvfuncRight(object): 

264 """ 

265 Estimation and inference for a survival function. 

266 

267 The survival function S(t) = P(T > t) is the probability that an 

268 event time T is greater than t. 

269 

270 This class currently only supports right censoring. 

271 

272 Parameters 

273 ---------- 

274 time : array_like 

275 An array of times (censoring times or event times) 

276 status : array_like 

277 Status at the event time, status==1 is the 'event' 

278 (e.g. death, failure), meaning that the event 

279 occurs at the given value in `time`; status==0 

280 indicates that censoring has occurred, meaning that 

281 the event occurs after the given value in `time`. 

282 entry : array_like, optional An array of entry times for handling 

283 left truncation (the subject is not in the risk set on or 

284 before the entry time) 

285 title : str 

286 Optional title used for plots and summary output. 

287 freq_weights : array_like 

288 Optional frequency weights 

289 exog : array_like 

290 Optional, if present used to account for violation of 

291 independent censoring. 

292 bw_factor : float 

293 Band-width multiplier for kernel-based estimation. Only used 

294 if exog is provided. 

295 

296 Attributes 

297 ---------- 

298 surv_prob : array_like 

299 The estimated value of the survivor function at each time 

300 point in `surv_times`. 

301 surv_prob_se : array_like 

302 The standard errors for the values in `surv_prob`. Not available 

303 if exog is provided. 

304 surv_times : array_like 

305 The points where the survival function changes. 

306 n_risk : array_like 

307 The number of subjects at risk just before each time value in 

308 `surv_times`. Not available if exog is provided. 

309 n_events : array_like 

310 The number of events (e.g. deaths) that occur at each point 

311 in `surv_times`. Not available if exog is provided. 

312 

313 Notes 

314 ----- 

315 If exog is None, the standard Kaplan-Meier estimator is used. If 

316 exog is not None, a local estimate of the marginal survival 

317 function around each point is constructed, and these are then 

318 averaged. This procedure gives an estimate of the marginal 

319 survival function that accounts for dependent censoring as long as 

320 the censoring becomes independent when conditioning on the 

321 covariates in exog. See Zeng et al. (2004) for details. 

322 

323 References 

324 ---------- 

325 D. Zeng (2004). Estimating marginal survival function by 

326 adjusting for dependent censoring using many covariates. Annals 

327 of Statistics 32:4. 

328 https://arxiv.org/pdf/math/0409180.pdf 

329 """ 

330 

331 def __init__(self, time, status, entry=None, title=None, 

332 freq_weights=None, exog=None, bw_factor=1.): 

333 

334 _checkargs(time, status, entry, freq_weights, exog) 

335 time = self.time = np.asarray(time) 

336 status = self.status = np.asarray(status) 

337 if freq_weights is not None: 

338 freq_weights = self.freq_weights = np.asarray(freq_weights) 

339 

340 if entry is not None: 

341 entry = self.entry = np.asarray(entry) 

342 

343 if exog is not None: 

344 if entry is not None: 

345 raise ValueError("exog and entry cannot both be present") 

346 from ._kernel_estimates import _kernel_survfunc 

347 exog = self.exog = np.asarray(exog) 

348 nobs = exog.shape[0] 

349 kw = nobs**(-1/3.0) * bw_factor 

350 kfunc = lambda x: np.exp(-x**2 / kw**2).sum(1) 

351 x = _kernel_survfunc(time, status, exog, kfunc, freq_weights) 

352 self.surv_prob = x[0] 

353 self.surv_times = x[1] 

354 return 

355 

356 x = _calc_survfunc_right(time, status, weights=freq_weights, 

357 entry=entry) 

358 

359 self.surv_prob = x[0] 

360 self.surv_prob_se = x[1] 

361 self.surv_times = x[2] 

362 self.n_risk = x[4] 

363 self.n_events = x[5] 

364 self.title = "" if not title else title 

365 

366 def plot(self, ax=None): 

367 """ 

368 Plot the survival function. 

369 

370 Examples 

371 -------- 

372 Change the line color: 

373 

374 >>> import statsmodels.api as sm 

375 >>> data = sm.datasets.get_rdataset("flchain", "survival").data 

376 >>> df = data.loc[data.sex == "F", :] 

377 >>> sf = sm.SurvfuncRight(df["futime"], df["death"]) 

378 >>> fig = sf.plot() 

379 >>> ax = fig.get_axes()[0] 

380 >>> li = ax.get_lines() 

381 >>> li[0].set_color('purple') 

382 >>> li[1].set_color('purple') 

383 

384 Do not show the censoring points: 

385 

386 >>> fig = sf.plot() 

387 >>> ax = fig.get_axes()[0] 

388 >>> li = ax.get_lines() 

389 >>> li[1].set_visible(False) 

390 """ 

391 

392 return plot_survfunc(self, ax) 

393 

394 def quantile(self, p): 

395 """ 

396 Estimated quantile of a survival distribution. 

397 

398 Parameters 

399 ---------- 

400 p : float 

401 The probability point at which the quantile 

402 is determined. 

403 

404 Returns the estimated quantile. 

405 """ 

406 

407 # SAS uses a strict inequality here. 

408 ii = np.flatnonzero(self.surv_prob < 1 - p) 

409 

410 if len(ii) == 0: 

411 return np.nan 

412 

413 return self.surv_times[ii[0]] 

414 

415 def quantile_ci(self, p, alpha=0.05, method='cloglog'): 

416 """ 

417 Returns a confidence interval for a survival quantile. 

418 

419 Parameters 

420 ---------- 

421 p : float 

422 The probability point for which a confidence interval is 

423 determined. 

424 alpha : float 

425 The confidence interval has nominal coverage probability 

426 1 - `alpha`. 

427 method : str 

428 Function to use for g-transformation, must be ... 

429 

430 Returns 

431 ------- 

432 lb : float 

433 The lower confidence limit. 

434 ub : float 

435 The upper confidence limit. 

436 

437 Notes 

438 ----- 

439 The confidence interval is obtained by inverting Z-tests. The 

440 limits of the confidence interval will always be observed 

441 event times. 

442 

443 References 

444 ---------- 

445 The method is based on the approach used in SAS, documented here: 

446 

447 http://support.sas.com/documentation/cdl/en/statug/68162/HTML/default/viewer.htm#statug_lifetest_details03.htm 

448 """ 

449 

450 tr = norm.ppf(1 - alpha / 2) 

451 

452 method = method.lower() 

453 if method == "cloglog": 

454 g = lambda x: np.log(-np.log(x)) 

455 gprime = lambda x: -1 / (x * np.log(x)) 

456 elif method == "linear": 

457 g = lambda x: x 

458 gprime = lambda x: 1 

459 elif method == "log": 

460 g = lambda x: np.log(x) 

461 gprime = lambda x: 1 / x 

462 elif method == "logit": 

463 g = lambda x: np.log(x / (1 - x)) 

464 gprime = lambda x: 1 / (x * (1 - x)) 

465 elif method == "asinsqrt": 

466 g = lambda x: np.arcsin(np.sqrt(x)) 

467 gprime = lambda x: 1 / (2 * np.sqrt(x) * np.sqrt(1 - x)) 

468 else: 

469 raise ValueError("unknown method") 

470 

471 r = g(self.surv_prob) - g(1 - p) 

472 r /= (gprime(self.surv_prob) * self.surv_prob_se) 

473 

474 ii = np.flatnonzero(np.abs(r) <= tr) 

475 if len(ii) == 0: 

476 return np.nan, np.nan 

477 

478 lb = self.surv_times[ii[0]] 

479 

480 if ii[-1] == len(self.surv_times) - 1: 

481 ub = np.inf 

482 else: 

483 ub = self.surv_times[ii[-1] + 1] 

484 

485 return lb, ub 

486 

487 def summary(self): 

488 """ 

489 Return a summary of the estimated survival function. 

490 

491 The summary is a dataframe containing the unique event times, 

492 estimated survival function values, and related quantities. 

493 """ 

494 

495 df = pd.DataFrame(index=self.surv_times) 

496 df.index.name = "Time" 

497 df["Surv prob"] = self.surv_prob 

498 df["Surv prob SE"] = self.surv_prob_se 

499 df["num at risk"] = self.n_risk 

500 df["num events"] = self.n_events 

501 

502 return df 

503 

504 def simultaneous_cb(self, alpha=0.05, method="hw", transform="log"): 

505 """ 

506 Returns a simultaneous confidence band for the survival function. 

507 

508 Parameters 

509 ---------- 

510 alpha : float 

511 `1 - alpha` is the desired simultaneous coverage 

512 probability for the confidence region. Currently alpha 

513 must be set to 0.05, giving 95% simultaneous intervals. 

514 method : str 

515 The method used to produce the simultaneous confidence 

516 band. Only the Hall-Wellner (hw) method is currently 

517 implemented. 

518 transform : str 

519 The used to produce the interval (note that the returned 

520 interval is on the survival probability scale regardless 

521 of which transform is used). Only `log` and `arcsin` are 

522 implemented. 

523 

524 Returns 

525 ------- 

526 lcb : array_like 

527 The lower confidence limits corresponding to the points 

528 in `surv_times`. 

529 ucb : array_like 

530 The upper confidence limits corresponding to the points 

531 in `surv_times`. 

532 """ 

533 

534 method = method.lower() 

535 if method != "hw": 

536 msg = "only the Hall-Wellner (hw) method is implemented" 

537 raise ValueError(msg) 

538 

539 if alpha != 0.05: 

540 raise ValueError("alpha must be set to 0.05") 

541 

542 transform = transform.lower() 

543 s2 = self.surv_prob_se**2 / self.surv_prob**2 

544 nn = self.n_risk 

545 if transform == "log": 

546 denom = np.sqrt(nn) * np.log(self.surv_prob) 

547 theta = 1.3581 * (1 + nn * s2) / denom 

548 theta = np.exp(theta) 

549 lcb = self.surv_prob**(1/theta) 

550 ucb = self.surv_prob**theta 

551 elif transform == "arcsin": 

552 k = 1.3581 

553 k *= (1 + nn * s2) / (2 * np.sqrt(nn)) 

554 k *= np.sqrt(self.surv_prob / (1 - self.surv_prob)) 

555 f = np.arcsin(np.sqrt(self.surv_prob)) 

556 v = np.clip(f - k, 0, np.inf) 

557 lcb = np.sin(v)**2 

558 v = np.clip(f + k, -np.inf, np.pi/2) 

559 ucb = np.sin(v)**2 

560 else: 

561 raise ValueError("Unknown transform") 

562 

563 return lcb, ucb 

564 

565 

566def survdiff(time, status, group, weight_type=None, strata=None, 

567 entry=None, **kwargs): 

568 """ 

569 Test for the equality of two survival distributions. 

570 

571 Parameters 

572 ---------- 

573 time : array_like 

574 The event or censoring times. 

575 status : array_like 

576 The censoring status variable, status=1 indicates that the 

577 event occurred, status=0 indicates that the observation was 

578 censored. 

579 group : array_like 

580 Indicators of the two groups 

581 weight_type : str 

582 The following weight types are implemented: 

583 None (default) : logrank test 

584 fh : Fleming-Harrington, weights by S^(fh_p), 

585 requires exponent fh_p to be provided as keyword 

586 argument; the weights are derived from S defined at 

587 the previous event time, and the first weight is 

588 always 1. 

589 gb : Gehan-Breslow, weights by the number at risk 

590 tw : Tarone-Ware, weights by the square root of the number 

591 at risk 

592 strata : array_like 

593 Optional stratum indicators for a stratified test 

594 entry : array_like 

595 Entry times to handle left truncation. The subject is not in 

596 the risk set on or before the entry time. 

597 

598 Returns 

599 ------- 

600 chisq : The chi-square (1 degree of freedom) distributed test 

601 statistic value 

602 pvalue : The p-value for the chi^2 test 

603 """ 

604 

605 # TODO: extend to handle more than two groups 

606 

607 time = np.asarray(time) 

608 status = np.asarray(status) 

609 group = np.asarray(group) 

610 

611 gr = np.unique(group) 

612 if len(gr) != 2: 

613 raise ValueError("logrank only supports two groups") 

614 

615 if strata is None: 

616 obs, var = _survdiff(time, status, group, weight_type, gr, 

617 entry, **kwargs) 

618 else: 

619 strata = np.asarray(strata) 

620 stu = np.unique(strata) 

621 obs, var = 0., 0. 

622 for st in stu: 

623 # could be more efficient? 

624 ii = (strata == st) 

625 obs1, var1 = _survdiff(time[ii], status[ii], group[ii], 

626 weight_type, gr, entry, **kwargs) 

627 obs += obs1 

628 var += var1 

629 

630 zstat = obs / np.sqrt(var) 

631 

632 # The chi^2 test statistic and p-value. 

633 chisq = zstat**2 

634 pvalue = 1 - chi2.cdf(chisq, 1) 

635 

636 return chisq, pvalue 

637 

638 

639def _survdiff(time, status, group, weight_type, gr, entry=None, 

640 **kwargs): 

641 # logrank test for one stratum 

642 

643 # Get the unique times. 

644 if entry is None: 

645 utimes, rtimes = np.unique(time, return_inverse=True) 

646 else: 

647 utimes, rtimes = np.unique(np.concatenate((time, entry)), 

648 return_inverse=True) 

649 rtimes = rtimes[0:len(time)] 

650 

651 # Split entry times by group if present (should use pandas groupby) 

652 tse = [(gr[0], None), (gr[1], None)] 

653 if entry is not None: 

654 for k in 0, 1: 

655 ii = (group == gr[k]) 

656 entry1 = entry[ii] 

657 tse[k] = (gr[k], entry1) 

658 

659 # Event count and risk set size at each time point, per group and overall. 

660 # TODO: should use Pandas groupby 

661 nrisk, obsv = [], [] 

662 ml = len(utimes) 

663 for g, entry0 in tse: 

664 

665 mk = (group == g) 

666 n = np.bincount(rtimes, weights=mk, minlength=ml) 

667 

668 ob = np.bincount(rtimes, weights=status*mk, minlength=ml) 

669 obsv.append(ob) 

670 

671 if entry is not None: 

672 n = np.cumsum(n) - n 

673 rentry = np.searchsorted(utimes, entry0, side='left') 

674 n0 = np.bincount(rentry, minlength=ml) 

675 n0 = np.cumsum(n0) - n0 

676 nr = n0 - n 

677 else: 

678 nr = np.cumsum(n[::-1])[::-1] 

679 

680 nrisk.append(nr) 

681 

682 obs = sum(obsv) 

683 nrisk_tot = sum(nrisk) 

684 

685 # The variance of event counts in the first group. 

686 r = nrisk[0] / np.clip(nrisk_tot, 1e-10, np.inf) 

687 denom = nrisk_tot - 1 

688 denom = np.clip(denom, 1e-10, np.inf) 

689 var = obs * r * (1 - r) * (nrisk_tot - obs) / denom 

690 

691 # The expected number of events in the first group. 

692 exp1 = obs * r 

693 

694 weights = None 

695 if weight_type is not None: 

696 weight_type = weight_type.lower() 

697 if weight_type == "gb": 

698 weights = nrisk_tot 

699 elif weight_type == "tw": 

700 weights = np.sqrt(nrisk_tot) 

701 elif weight_type == "fh": 

702 if "fh_p" not in kwargs: 

703 msg = "weight_type type 'fh' requires specification of fh_p" 

704 raise ValueError(msg) 

705 fh_p = kwargs["fh_p"] 

706 # Calculate the survivor function directly to avoid the 

707 # overhead of creating a SurvfuncRight object 

708 sp = 1 - obs / nrisk_tot.astype(np.float64) 

709 sp = np.log(sp) 

710 sp = np.cumsum(sp) 

711 sp = np.exp(sp) 

712 weights = sp**fh_p 

713 weights = np.roll(weights, 1) 

714 weights[0] = 1 

715 else: 

716 raise ValueError("weight_type not implemented") 

717 

718 # The Z-scale test statistic (compare to normal reference 

719 # distribution). 

720 ix = np.flatnonzero(nrisk_tot > 1) 

721 if weights is None: 

722 obs = np.sum(obsv[0][ix] - exp1[ix]) 

723 var = np.sum(var[ix]) 

724 else: 

725 obs = np.dot(weights[ix], obsv[0][ix] - exp1[ix]) 

726 var = np.dot(weights[ix]**2, var[ix]) 

727 

728 return obs, var 

729 

730 

731def plot_survfunc(survfuncs, ax=None): 

732 """ 

733 Plot one or more survivor functions. 

734 

735 Parameters 

736 ---------- 

737 survfuncs : object or array_like 

738 A single SurvfuncRight object, or a list or SurvfuncRight 

739 objects that are plotted together. 

740 

741 Returns 

742 ------- 

743 A figure instance on which the plot was drawn. 

744 

745 Examples 

746 -------- 

747 Add a legend: 

748 

749 >>> import statsmodels.api as sm 

750 >>> from statsmodels.duration.survfunc import plot_survfunc 

751 >>> data = sm.datasets.get_rdataset("flchain", "survival").data 

752 >>> df = data.loc[data.sex == "F", :] 

753 >>> sf0 = sm.SurvfuncRight(df["futime"], df["death"]) 

754 >>> sf1 = sm.SurvfuncRight(3.0 * df["futime"], df["death"]) 

755 >>> fig = plot_survfunc([sf0, sf1]) 

756 >>> ax = fig.get_axes()[0] 

757 >>> ax.set_position([0.1, 0.1, 0.64, 0.8]) 

758 >>> ha, lb = ax.get_legend_handles_labels() 

759 >>> leg = fig.legend((ha[0], ha[1]), (lb[0], lb[1]), 'center right') 

760 

761 Change the line colors: 

762 

763 >>> fig = plot_survfunc([sf0, sf1]) 

764 >>> ax = fig.get_axes()[0] 

765 >>> ax.set_position([0.1, 0.1, 0.64, 0.8]) 

766 >>> ha, lb = ax.get_legend_handles_labels() 

767 >>> ha[0].set_color('purple') 

768 >>> ha[1].set_color('orange') 

769 """ 

770 

771 fig, ax = utils.create_mpl_ax(ax) 

772 

773 # If we have only a single survival function to plot, put it into 

774 # a list. 

775 try: 

776 assert(type(survfuncs[0]) is SurvfuncRight) 

777 except: 

778 survfuncs = [survfuncs] 

779 

780 for gx, sf in enumerate(survfuncs): 

781 

782 # The estimated survival function does not include a point at 

783 # time 0, include it here for plotting. 

784 surv_times = np.concatenate(([0], sf.surv_times)) 

785 surv_prob = np.concatenate(([1], sf.surv_prob)) 

786 

787 # If the final times are censoring times they are not included 

788 # in the survival function so we add them here 

789 mxt = max(sf.time) 

790 if mxt > surv_times[-1]: 

791 surv_times = np.concatenate((surv_times, [mxt])) 

792 surv_prob = np.concatenate((surv_prob, [surv_prob[-1]])) 

793 

794 label = getattr(sf, "title", "Group %d" % (gx + 1)) 

795 

796 li, = ax.step(surv_times, surv_prob, '-', label=label, lw=2, 

797 where='post') 

798 

799 # Plot the censored points. 

800 ii = np.flatnonzero(np.logical_not(sf.status)) 

801 ti = np.unique(sf.time[ii]) 

802 jj = np.searchsorted(surv_times, ti) - 1 

803 sp = surv_prob[jj] 

804 ax.plot(ti, sp, '+', ms=12, color=li.get_color(), 

805 label=label + " points") 

806 

807 ax.set_ylim(0, 1.01) 

808 

809 return fig